Commit 00f07b86 by Richard Henderson Committed by Richard Henderson

Move vector highpart emulation to the optabs layer

        * expmed.c (expmed_mult_highpart): Rename from expand_mult_highpart.
        (expmed_mult_highpart_optab): Rename from expand_mult_highpart_optab.
        * optabs.c (can_mult_highpart_p): New.
        (expand_mult_highpart): New.
        * expr.c (expand_expr_real_2) [MULT_HIGHPART_EXPR): Use it.
        * tree-vect-generic.c (expand_vector_operations_1): Don't expand
        by pieces if can_mult_highpart_p.
        (expand_vector_divmod): Use can_mult_highpart_p and always
        generate MULT_HIGHPART_EXPR.
        * tree-vect-patterns.c (vect_recog_divmod_pattern): Likewise.
        * tree-vect-stmts.c (vectorizable_operation): Likewise.

From-SVN: r189407
parent 9283726f
2012-07-10 Richard Henderson <rth@redhat.com> 2012-07-10 Richard Henderson <rth@redhat.com>
* expmed.c (expmed_mult_highpart): Rename from expand_mult_highpart.
(expmed_mult_highpart_optab): Rename from expand_mult_highpart_optab.
* optabs.c (can_mult_highpart_p): New.
(expand_mult_highpart): New.
* expr.c (expand_expr_real_2) [MULT_HIGHPART_EXPR): Use it.
* tree-vect-generic.c (expand_vector_operations_1): Don't expand
by pieces if can_mult_highpart_p.
(expand_vector_divmod): Use can_mult_highpart_p and always
generate MULT_HIGHPART_EXPR.
* tree-vect-patterns.c (vect_recog_divmod_pattern): Likewise.
* tree-vect-stmts.c (vectorizable_operation): Likewise.
* config/spu/spu-builtins.md (spu_mpy): Move to spu.md. * config/spu/spu-builtins.md (spu_mpy): Move to spu.md.
(spu_mpyu, spu_mpyhhu, spu_mpyhh): Likewise. (spu_mpyu, spu_mpyhhu, spu_mpyhh): Likewise.
* config/spu/spu.md (vec_widen_smult_odd_v8hi): Rename from spu_mpy. * config/spu/spu.md (vec_widen_smult_odd_v8hi): Rename from spu_mpy.
......
...@@ -2381,8 +2381,8 @@ static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx, ...@@ -2381,8 +2381,8 @@ static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
const struct algorithm *, enum mult_variant); const struct algorithm *, enum mult_variant);
static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
static rtx extract_high_half (enum machine_mode, rtx); static rtx extract_high_half (enum machine_mode, rtx);
static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int); static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx, static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
int, int); int, int);
/* Compute and return the best algorithm for multiplying by T. /* Compute and return the best algorithm for multiplying by T.
The algorithm must cost less than cost_limit The algorithm must cost less than cost_limit
...@@ -3477,7 +3477,7 @@ expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0, ...@@ -3477,7 +3477,7 @@ expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
return target; return target;
} }
/* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */ /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
static rtx static rtx
extract_high_half (enum machine_mode mode, rtx op) extract_high_half (enum machine_mode mode, rtx op)
...@@ -3495,11 +3495,11 @@ extract_high_half (enum machine_mode mode, rtx op) ...@@ -3495,11 +3495,11 @@ extract_high_half (enum machine_mode mode, rtx op)
return convert_modes (mode, wider_mode, op, 0); return convert_modes (mode, wider_mode, op, 0);
} }
/* Like expand_mult_highpart, but only consider using a multiplication /* Like expmed_mult_highpart, but only consider using a multiplication
optab. OP1 is an rtx for the constant operand. */ optab. OP1 is an rtx for the constant operand. */
static rtx static rtx
expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
rtx target, int unsignedp, int max_cost) rtx target, int unsignedp, int max_cost)
{ {
rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
...@@ -3610,7 +3610,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, ...@@ -3610,7 +3610,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
MAX_COST is the total allowed cost for the expanded RTL. */ MAX_COST is the total allowed cost for the expanded RTL. */
static rtx static rtx
expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
rtx target, int unsignedp, int max_cost) rtx target, int unsignedp, int max_cost)
{ {
enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
...@@ -3633,7 +3633,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, ...@@ -3633,7 +3633,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
mode == word_mode, however all the cost calculations in mode == word_mode, however all the cost calculations in
synth_mult etc. assume single-word operations. */ synth_mult etc. assume single-word operations. */
if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
return expand_mult_highpart_optab (mode, op0, op1, target, return expmed_mult_highpart_optab (mode, op0, op1, target,
unsignedp, max_cost); unsignedp, max_cost);
extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1]; extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
...@@ -3651,7 +3651,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, ...@@ -3651,7 +3651,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
{ {
/* See whether the specialized multiplication optabs are /* See whether the specialized multiplication optabs are
cheaper than the shift/add version. */ cheaper than the shift/add version. */
tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp, tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
alg.cost.cost + extra_cost); alg.cost.cost + extra_cost);
if (tem) if (tem)
return tem; return tem;
...@@ -3666,7 +3666,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, ...@@ -3666,7 +3666,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
return tem; return tem;
} }
return expand_mult_highpart_optab (mode, op0, op1, target, return expmed_mult_highpart_optab (mode, op0, op1, target,
unsignedp, max_cost); unsignedp, max_cost);
} }
...@@ -3940,7 +3940,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -3940,7 +3940,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
In all cases but EXACT_DIV_EXPR, this multiplication requires the upper In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
half of the product. Different strategies for generating the product are half of the product. Different strategies for generating the product are
implemented in expand_mult_highpart. implemented in expmed_mult_highpart.
If what we actually want is the remainder, we generate that by another If what we actually want is the remainder, we generate that by another
by-constant multiplication and a subtraction. */ by-constant multiplication and a subtraction. */
...@@ -3990,7 +3990,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -3990,7 +3990,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
mode for which we can do the operation with a library call. */ mode for which we can do the operation with a library call. */
/* We might want to refine this now that we have division-by-constant /* We might want to refine this now that we have division-by-constant
optimization. Since expand_mult_highpart tries so many variants, it is optimization. Since expmed_mult_highpart tries so many variants, it is
not straightforward to generalize this. Maybe we should make an array not straightforward to generalize this. Maybe we should make an array
of possible modes in init_expmed? Save this for GCC 2.7. */ of possible modes in init_expmed? Save this for GCC 2.7. */
...@@ -4155,7 +4155,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4155,7 +4155,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
= (shift_cost[speed][compute_mode][post_shift - 1] = (shift_cost[speed][compute_mode][post_shift - 1]
+ shift_cost[speed][compute_mode][1] + shift_cost[speed][compute_mode][1]
+ 2 * add_cost[speed][compute_mode]); + 2 * add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, t1 = expmed_mult_highpart (compute_mode, op0,
GEN_INT (ml), GEN_INT (ml),
NULL_RTX, 1, NULL_RTX, 1,
max_cost - extra_cost); max_cost - extra_cost);
...@@ -4187,7 +4187,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4187,7 +4187,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
extra_cost extra_cost
= (shift_cost[speed][compute_mode][pre_shift] = (shift_cost[speed][compute_mode][pre_shift]
+ shift_cost[speed][compute_mode][post_shift]); + shift_cost[speed][compute_mode][post_shift]);
t2 = expand_mult_highpart (compute_mode, t1, t2 = expmed_mult_highpart (compute_mode, t1,
GEN_INT (ml), GEN_INT (ml),
NULL_RTX, 1, NULL_RTX, 1,
max_cost - extra_cost); max_cost - extra_cost);
...@@ -4313,7 +4313,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4313,7 +4313,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
extra_cost = (shift_cost[speed][compute_mode][post_shift] extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ shift_cost[speed][compute_mode][size - 1] + shift_cost[speed][compute_mode][size - 1]
+ add_cost[speed][compute_mode]); + add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, t1 = expmed_mult_highpart (compute_mode, op0,
GEN_INT (ml), NULL_RTX, 0, GEN_INT (ml), NULL_RTX, 0,
max_cost - extra_cost); max_cost - extra_cost);
if (t1 == 0) if (t1 == 0)
...@@ -4348,7 +4348,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4348,7 +4348,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
extra_cost = (shift_cost[speed][compute_mode][post_shift] extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ shift_cost[speed][compute_mode][size - 1] + shift_cost[speed][compute_mode][size - 1]
+ 2 * add_cost[speed][compute_mode]); + 2 * add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, mlr, t1 = expmed_mult_highpart (compute_mode, op0, mlr,
NULL_RTX, 0, NULL_RTX, 0,
max_cost - extra_cost); max_cost - extra_cost);
if (t1 == 0) if (t1 == 0)
...@@ -4436,7 +4436,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4436,7 +4436,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
extra_cost = (shift_cost[speed][compute_mode][post_shift] extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ shift_cost[speed][compute_mode][size - 1] + shift_cost[speed][compute_mode][size - 1]
+ 2 * add_cost[speed][compute_mode]); + 2 * add_cost[speed][compute_mode]);
t3 = expand_mult_highpart (compute_mode, t2, t3 = expmed_mult_highpart (compute_mode, t2,
GEN_INT (ml), NULL_RTX, 1, GEN_INT (ml), NULL_RTX, 1,
max_cost - extra_cost); max_cost - extra_cost);
if (t3 != 0) if (t3 != 0)
......
...@@ -8554,9 +8554,14 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode, ...@@ -8554,9 +8554,14 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
return expand_divmod (0, code, mode, op0, op1, target, unsignedp); return expand_divmod (0, code, mode, op0, op1, target, unsignedp);
case RDIV_EXPR: case RDIV_EXPR:
case MULT_HIGHPART_EXPR:
goto binop; goto binop;
case MULT_HIGHPART_EXPR:
expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
temp = expand_mult_highpart (mode, op0, op1, target, unsignedp);
gcc_assert (temp);
return temp;
case TRUNC_MOD_EXPR: case TRUNC_MOD_EXPR:
case FLOOR_MOD_EXPR: case FLOOR_MOD_EXPR:
case CEIL_MOD_EXPR: case CEIL_MOD_EXPR:
......
...@@ -7162,6 +7162,132 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2, ...@@ -7162,6 +7162,132 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
return ops[0].value; return ops[0].value;
} }
/* Return non-zero if a highpart multiply is supported of can be synthisized.
For the benefit of expand_mult_highpart, the return value is 1 for direct,
2 for even/odd widening, and 3 for hi/lo widening. */
int
can_mult_highpart_p (enum machine_mode mode, bool uns_p)
{
optab op;
unsigned char *sel;
unsigned i, nunits;
op = uns_p ? umul_highpart_optab : smul_highpart_optab;
if (optab_handler (op, mode) != CODE_FOR_nothing)
return 1;
/* If the mode is an integral vector, synth from widening operations. */
if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
return 0;
nunits = GET_MODE_NUNITS (mode);
sel = XALLOCAVEC (unsigned char, nunits);
op = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab;
if (optab_handler (op, mode) != CODE_FOR_nothing)
{
op = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
if (optab_handler (op, mode) != CODE_FOR_nothing)
{
for (i = 0; i < nunits; ++i)
sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0);
if (can_vec_perm_p (mode, false, sel))
return 2;
}
}
op = uns_p ? vec_widen_umult_hi_optab : vec_widen_smult_hi_optab;
if (optab_handler (op, mode) != CODE_FOR_nothing)
{
op = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
if (optab_handler (op, mode) != CODE_FOR_nothing)
{
for (i = 0; i < nunits; ++i)
sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
if (can_vec_perm_p (mode, false, sel))
return 3;
}
}
return 0;
}
/* Expand a highpart multiply. */
rtx
expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
rtx target, bool uns_p)
{
struct expand_operand eops[3];
enum insn_code icode;
int method, i, nunits;
enum machine_mode wmode;
rtx m1, m2, perm;
optab tab1, tab2;
rtvec v;
method = can_mult_highpart_p (mode, uns_p);
switch (method)
{
case 0:
return NULL_RTX;
case 1:
tab1 = uns_p ? umul_highpart_optab : smul_highpart_optab;
return expand_binop (mode, tab1, op0, op1, target, uns_p,
OPTAB_LIB_WIDEN);
case 2:
tab1 = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab;
tab2 = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
break;
case 3:
tab1 = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
tab2 = uns_p ? vec_widen_umult_hi_optab : vec_widen_smult_hi_optab;
if (BYTES_BIG_ENDIAN)
{
optab t = tab1;
tab1 = tab2;
tab2 = t;
}
break;
default:
gcc_unreachable ();
}
icode = optab_handler (tab1, mode);
nunits = GET_MODE_NUNITS (mode);
wmode = insn_data[icode].operand[0].mode;
gcc_checking_assert (2 * GET_MODE_NUNITS (wmode) == nunits);
gcc_checking_assert (GET_MODE_SIZE (wmode) == GET_MODE_SIZE (mode));
create_output_operand (&eops[0], gen_reg_rtx (wmode), wmode);
create_input_operand (&eops[1], op0, mode);
create_input_operand (&eops[2], op1, mode);
expand_insn (icode, 3, eops);
m1 = gen_lowpart (mode, eops[0].value);
create_output_operand (&eops[0], gen_reg_rtx (wmode), wmode);
create_input_operand (&eops[1], op0, mode);
create_input_operand (&eops[2], op1, mode);
expand_insn (optab_handler (tab2, mode), 3, eops);
m2 = gen_lowpart (mode, eops[0].value);
v = rtvec_alloc (nunits);
if (method == 2)
{
for (i = 0; i < nunits; ++i)
RTVEC_ELT (v, i) = GEN_INT (!BYTES_BIG_ENDIAN + (i & ~1)
+ ((i & 1) ? nunits : 0));
}
else
{
for (i = 0; i < nunits; ++i)
RTVEC_ELT (v, i) = GEN_INT (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1));
}
perm = gen_rtx_CONST_VECTOR (mode, v);
return expand_vec_perm (mode, m1, m2, perm, target);
}
/* Return true if there is a compare_and_swap pattern. */ /* Return true if there is a compare_and_swap pattern. */
......
...@@ -1014,6 +1014,12 @@ extern bool can_vec_perm_p (enum machine_mode, bool, const unsigned char *); ...@@ -1014,6 +1014,12 @@ extern bool can_vec_perm_p (enum machine_mode, bool, const unsigned char *);
/* Generate code for VEC_PERM_EXPR. */ /* Generate code for VEC_PERM_EXPR. */
extern rtx expand_vec_perm (enum machine_mode, rtx, rtx, rtx, rtx); extern rtx expand_vec_perm (enum machine_mode, rtx, rtx, rtx, rtx);
/* Return non-zero if target supports a given highpart multiplication. */
extern int can_mult_highpart_p (enum machine_mode, bool);
/* Generate code for MULT_HIGHPART_EXPR. */
extern rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, bool);
/* Return the insn used to implement mode MODE of OP, or CODE_FOR_nothing /* Return the insn used to implement mode MODE of OP, or CODE_FOR_nothing
if the target does not have such an insn. */ if the target does not have such an insn. */
......
...@@ -454,10 +454,9 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, ...@@ -454,10 +454,9 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
int dummy_int; int dummy_int;
unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type)); unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type))); unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
optab op;
tree *vec; tree *vec;
unsigned char *sel = NULL; tree cur_op, mulcst, tem;
tree cur_op, m1, m2, mulcst, perm_mask, wider_type, tem, decl_e, decl_o; optab op;
if (prec > HOST_BITS_PER_WIDE_INT) if (prec > HOST_BITS_PER_WIDE_INT)
return NULL_TREE; return NULL_TREE;
...@@ -745,54 +744,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, ...@@ -745,54 +744,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
return NULL_TREE; return NULL_TREE;
op = optab_for_tree_code (MULT_HIGHPART_EXPR, type, optab_default); if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
if (op != NULL && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) return NULL_TREE;
wider_type = decl_e = decl_o = NULL_TREE;
else
{
wider_type = build_nonstandard_integer_type (prec * 2, unsignedp),
wider_type = build_vector_type (wider_type, nunits / 2);
if (GET_MODE_CLASS (TYPE_MODE (wider_type)) != MODE_VECTOR_INT
|| GET_MODE_BITSIZE (TYPE_MODE (wider_type))
!= GET_MODE_BITSIZE (TYPE_MODE (type)))
return NULL_TREE;
sel = XALLOCAVEC (unsigned char, nunits);
if (targetm.vectorize.builtin_mul_widen_even
&& targetm.vectorize.builtin_mul_widen_odd
&& (decl_e = targetm.vectorize.builtin_mul_widen_even (type))
&& (decl_o = targetm.vectorize.builtin_mul_widen_odd (type))
&& (TYPE_MODE (TREE_TYPE (TREE_TYPE (decl_e)))
== TYPE_MODE (wider_type)))
{
for (i = 0; i < nunits; i++)
sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0);
if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
decl_e = decl_o = NULL_TREE;
}
else
decl_e = decl_o = NULL_TREE;
if (decl_e == NULL_TREE)
{
op = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR,
type, optab_default);
if (op == NULL
|| optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
return NULL_TREE;
op = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
type, optab_default);
if (op == NULL
|| optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
return NULL_TREE;
for (i = 0; i < nunits; i++)
sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
return NULL_TREE;
}
}
cur_op = op0; cur_op = op0;
...@@ -830,46 +783,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, ...@@ -830,46 +783,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
for (i = 0; i < nunits; i++) for (i = 0; i < nunits; i++)
vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]); vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
mulcst = build_vector (type, vec); mulcst = build_vector (type, vec);
if (wider_type == NULL_TREE)
cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
else
{
for (i = 0; i < nunits; i++)
vec[i] = build_int_cst (TREE_TYPE (type), sel[i]);
perm_mask = build_vector (type, vec);
if (decl_e != NULL_TREE)
{
gimple call;
call = gimple_build_call (decl_e, 2, cur_op, mulcst);
m1 = create_tmp_reg (wider_type, NULL);
add_referenced_var (m1);
m1 = make_ssa_name (m1, call);
gimple_call_set_lhs (call, m1);
gsi_insert_seq_before (gsi, call, GSI_SAME_STMT);
call = gimple_build_call (decl_o, 2, cur_op, mulcst);
m2 = create_tmp_reg (wider_type, NULL);
add_referenced_var (m2);
m2 = make_ssa_name (m2, call);
gimple_call_set_lhs (call, m2);
gsi_insert_seq_before (gsi, call, GSI_SAME_STMT);
}
else
{
m1 = gimplify_build2 (gsi, BYTES_BIG_ENDIAN ? VEC_WIDEN_MULT_HI_EXPR
: VEC_WIDEN_MULT_LO_EXPR,
wider_type, cur_op, mulcst);
m2 = gimplify_build2 (gsi, BYTES_BIG_ENDIAN ? VEC_WIDEN_MULT_LO_EXPR
: VEC_WIDEN_MULT_HI_EXPR,
wider_type, cur_op, mulcst);
}
m1 = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, m1); cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
m2 = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, m2);
cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, m1, m2, perm_mask);
}
switch (mode) switch (mode)
{ {
...@@ -1454,13 +1369,17 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi) ...@@ -1454,13 +1369,17 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi)
if (compute_type == type) if (compute_type == type)
{ {
compute_mode = TYPE_MODE (compute_type); compute_mode = TYPE_MODE (compute_type);
if (VECTOR_MODE_P (compute_mode) if (VECTOR_MODE_P (compute_mode))
&& op != NULL {
&& optab_handler (op, compute_mode) != CODE_FOR_nothing) if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
return; return;
else if (code == MULT_HIGHPART_EXPR
/* There is no operation in hardware, so fall back to scalars. */ && can_mult_highpart_p (compute_mode,
compute_type = TREE_TYPE (type); TYPE_UNSIGNED (compute_type)))
return;
}
/* There is no operation in hardware, so fall back to scalars. */
compute_type = TREE_TYPE (type);
} }
gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR); gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
......
...@@ -1642,10 +1642,8 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts, ...@@ -1642,10 +1642,8 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
optab optab; optab optab;
tree dummy, q; tree q;
enum tree_code dummy_code;
int dummy_int, prec; int dummy_int, prec;
VEC (tree, heap) *dummy_vec;
stmt_vec_info def_stmt_vinfo; stmt_vec_info def_stmt_vinfo;
if (!is_gimple_assign (last_stmt)) if (!is_gimple_assign (last_stmt))
...@@ -1814,23 +1812,8 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts, ...@@ -1814,23 +1812,8 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
|| prec > HOST_BITS_PER_WIDE_INT) || prec > HOST_BITS_PER_WIDE_INT)
return NULL; return NULL;
optab = optab_for_tree_code (MULT_HIGHPART_EXPR, vectype, optab_default); if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
if (optab == NULL return NULL;
|| optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
{
tree witype = build_nonstandard_integer_type (prec * 2,
TYPE_UNSIGNED (itype));
tree vecwtype = get_vectype_for_scalar_type (witype);
if (vecwtype == NULL_TREE)
return NULL;
if (!supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
vecwtype, vectype,
&dummy, &dummy, &dummy_code,
&dummy_code, &dummy_int,
&dummy_vec))
return NULL;
}
STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
......
...@@ -3304,18 +3304,17 @@ static bool ...@@ -3304,18 +3304,17 @@ static bool
vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
gimple *vec_stmt, slp_tree slp_node) gimple *vec_stmt, slp_tree slp_node)
{ {
tree vec_dest, vec_dest2 = NULL_TREE; tree vec_dest;
tree vec_dest3 = NULL_TREE, vec_dest4 = NULL_TREE;
tree scalar_dest; tree scalar_dest;
tree op0, op1 = NULL_TREE, op2 = NULL_TREE; tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype, wide_vectype = NULL_TREE; tree vectype;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
enum tree_code code; enum tree_code code;
enum machine_mode vec_mode; enum machine_mode vec_mode;
tree new_temp; tree new_temp;
int op_type; int op_type;
optab optab, optab2 = NULL; optab optab;
int icode; int icode;
tree def; tree def;
gimple def_stmt; gimple def_stmt;
...@@ -3332,8 +3331,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3332,8 +3331,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
tree vop0, vop1, vop2; tree vop0, vop1, vop2;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
int vf; int vf;
unsigned char *sel = NULL;
tree decl1 = NULL_TREE, decl2 = NULL_TREE, perm_mask = NULL_TREE;
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false; return false;
...@@ -3455,87 +3452,26 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3455,87 +3452,26 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|| code == RROTATE_EXPR) || code == RROTATE_EXPR)
return false; return false;
optab = optab_for_tree_code (code, vectype, optab_default);
/* Supportable by target? */ /* Supportable by target? */
if (!optab && code != MULT_HIGHPART_EXPR)
vec_mode = TYPE_MODE (vectype);
if (code == MULT_HIGHPART_EXPR)
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
fprintf (vect_dump, "no optab."); icode = 0;
return false; else
icode = CODE_FOR_nothing;
} }
vec_mode = TYPE_MODE (vectype); else
icode = optab ? (int) optab_handler (optab, vec_mode) : CODE_FOR_nothing; {
optab = optab_for_tree_code (code, vectype, optab_default);
if (icode == CODE_FOR_nothing if (!optab)
&& code == MULT_HIGHPART_EXPR
&& VECTOR_MODE_P (vec_mode)
&& BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
{
/* If MULT_HIGHPART_EXPR isn't supported by the backend, see
if we can emit VEC_WIDEN_MULT_{LO,HI}_EXPR followed by VEC_PERM_EXPR
or builtin_mul_widen_{even,odd} followed by VEC_PERM_EXPR. */
unsigned int prec = TYPE_PRECISION (TREE_TYPE (scalar_dest));
unsigned int unsignedp = TYPE_UNSIGNED (TREE_TYPE (scalar_dest));
tree wide_type
= build_nonstandard_integer_type (prec * 2, unsignedp);
wide_vectype
= get_same_sized_vectype (wide_type, vectype);
sel = XALLOCAVEC (unsigned char, nunits_in);
if (VECTOR_MODE_P (TYPE_MODE (wide_vectype))
&& GET_MODE_SIZE (TYPE_MODE (wide_vectype))
== GET_MODE_SIZE (vec_mode))
{
if (targetm.vectorize.builtin_mul_widen_even
&& (decl1 = targetm.vectorize.builtin_mul_widen_even (vectype))
&& targetm.vectorize.builtin_mul_widen_odd
&& (decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype))
&& TYPE_MODE (TREE_TYPE (TREE_TYPE (decl1)))
== TYPE_MODE (wide_vectype))
{
for (i = 0; i < nunits_in; i++)
sel[i] = !BYTES_BIG_ENDIAN + (i & ~1)
+ ((i & 1) ? nunits_in : 0);
if (can_vec_perm_p (vec_mode, false, sel))
icode = 0;
}
if (icode == CODE_FOR_nothing)
{
decl1 = NULL_TREE;
decl2 = NULL_TREE;
optab = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR,
vectype, optab_default);
optab2 = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
vectype, optab_default);
if (optab != NULL
&& optab2 != NULL
&& optab_handler (optab, vec_mode) != CODE_FOR_nothing
&& optab_handler (optab2, vec_mode) != CODE_FOR_nothing
&& insn_data[optab_handler (optab, vec_mode)].operand[0].mode
== TYPE_MODE (wide_vectype)
&& insn_data[optab_handler (optab2,
vec_mode)].operand[0].mode
== TYPE_MODE (wide_vectype))
{
for (i = 0; i < nunits_in; i++)
sel[i] = !BYTES_BIG_ENDIAN + 2 * i;
if (can_vec_perm_p (vec_mode, false, sel))
icode = optab_handler (optab, vec_mode);
}
}
}
if (icode == CODE_FOR_nothing)
{ {
if (optab_for_tree_code (code, vectype, optab_default) == NULL) if (vect_print_dump_info (REPORT_DETAILS))
{ fprintf (vect_dump, "no optab.");
if (vect_print_dump_info (REPORT_DETAILS)) return false;
fprintf (vect_dump, "no optab.");
return false;
}
wide_vectype = NULL_TREE;
optab2 = NULL;
} }
icode = (int) optab_handler (optab, vec_mode);
} }
if (icode == CODE_FOR_nothing) if (icode == CODE_FOR_nothing)
...@@ -3575,16 +3511,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3575,16 +3511,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
fprintf (vect_dump, "transform binary/unary operation."); fprintf (vect_dump, "transform binary/unary operation.");
/* Handle def. */ /* Handle def. */
if (wide_vectype) vec_dest = vect_create_destination_var (scalar_dest, vectype);
{
vec_dest = vect_create_destination_var (scalar_dest, wide_vectype);
vec_dest2 = vect_create_destination_var (scalar_dest, wide_vectype);
vec_dest3 = vect_create_destination_var (scalar_dest, vectype);
vec_dest4 = vect_create_destination_var (scalar_dest, vectype);
perm_mask = vect_gen_perm_mask (vectype, sel);
}
else
vec_dest = vect_create_destination_var (scalar_dest, vectype);
/* Allocate VECs for vector operands. In case of SLP, vector operands are /* Allocate VECs for vector operands. In case of SLP, vector operands are
created in the previous stages of the recursion, so no allocation is created in the previous stages of the recursion, so no allocation is
...@@ -3693,66 +3620,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3693,66 +3620,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
? VEC_index (tree, vec_oprnds1, i) : NULL_TREE); ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
vop2 = ((op_type == ternary_op) vop2 = ((op_type == ternary_op)
? VEC_index (tree, vec_oprnds2, i) : NULL_TREE); ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
if (wide_vectype)
{
tree new_temp2, vce;
gcc_assert (code == MULT_HIGHPART_EXPR);
if (decl1 != NULL_TREE)
{
new_stmt = gimple_build_call (decl1, 2, vop0, vop1);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_call_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
new_stmt = gimple_build_call (decl2, 2, vop0, vop1);
new_temp2 = make_ssa_name (vec_dest2, new_stmt);
gimple_call_set_lhs (new_stmt, new_temp2);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
}
else
{
new_temp = make_ssa_name (vec_dest, NULL);
new_stmt
= gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
? VEC_WIDEN_MULT_HI_EXPR
: VEC_WIDEN_MULT_LO_EXPR,
new_temp, vop0, vop1);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
new_temp2 = make_ssa_name (vec_dest2, NULL);
new_stmt
= gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
? VEC_WIDEN_MULT_LO_EXPR
: VEC_WIDEN_MULT_HI_EXPR,
new_temp2, vop0, vop1);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
}
vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
vec_dest3, vce,
NULL_TREE);
new_temp = make_ssa_name (vec_dest3, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp2);
new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
vec_dest4, vce,
NULL_TREE);
new_temp2 = make_ssa_name (vec_dest4, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp2);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
new_temp = permute_vec_elements (new_temp, new_temp2,
perm_mask, stmt, gsi);
new_stmt = SSA_NAME_DEF_STMT (new_temp);
if (slp_node)
VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
new_stmt);
continue;
}
new_stmt = gimple_build_assign_with_ops3 (code, vec_dest, new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
vop0, vop1, vop2); vop0, vop1, vop2);
new_temp = make_ssa_name (vec_dest, new_stmt); new_temp = make_ssa_name (vec_dest, new_stmt);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment