Commit 8a67ca92 by Richard Henderson Committed by Richard Henderson

i386.c (avx_vpermilp_parallel): New function.

	* i386.c (avx_vpermilp_parallel): New function.
	* i386-protos.h: Declare it.
	* predicates.md (avx_vpermilp_v8sf_operand, avx_vpermilp_v4df_operand,
	avx_vpermilp_v4sf_operand, avx_vpermilp_v2df_operand): New.
	* sse.md (AVXMODEFDP, AVXMODEFSP): New iterators.
	(ssescalarnum, ssedoublesizemode): Add AVX modes.
	(vpermilbits): Remove.
	(avx_vpermil<mode>): Change insns to expanders.
	(*avx_vpermil<mode>): New.  Use vec_select.

From-SVN: r154427
parent d90a2c59
2009-11-22 Richard Henderson <rth@redhat.com>
* i386.c (avx_vpermilp_parallel): New function.
* i386-protos.h: Declare it.
* predicates.md (avx_vpermilp_v8sf_operand, avx_vpermilp_v4df_operand,
avx_vpermilp_v4sf_operand, avx_vpermilp_v2df_operand): New.
* sse.md (AVXMODEFDP, AVXMODEFSP): New iterators.
(ssescalarnum, ssedoublesizemode): Add AVX modes.
(vpermilbits): Remove.
(avx_vpermil<mode>): Change insns to expanders.
(*avx_vpermil<mode>): New. Use vec_select.
2009-11-22 Richard Earnshaw <rearnsha@arm.com>
* opts.c (decode_options): Don't enable flag_schedule_insns
......@@ -47,6 +47,8 @@ extern bool x86_extended_QIreg_mentioned_p (rtx);
extern bool x86_extended_reg_mentioned_p (rtx);
extern enum machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx);
extern int avx_vpermilp_parallel (rtx par, enum machine_mode mode);
extern int ix86_expand_movmem (rtx, rtx, rtx, rtx, rtx, rtx);
extern int ix86_expand_setmem (rtx, rtx, rtx, rtx, rtx, rtx);
extern int ix86_expand_strlen (rtx, rtx, rtx, rtx);
......@@ -275,3 +277,4 @@ extern int asm_preferred_eh_data_format (int, int);
#ifdef HAVE_ATTR_cpu
extern enum attr_cpu ix86_schedule;
#endif
......@@ -24527,6 +24527,82 @@ ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
return NULL_TREE;
}
}
/* Helper for avx_vpermilps256_operand et al. This is also used by
the expansion functions to turn the parallel back into a mask.
The return value is 0 for no match and the imm8+1 for a match. */
int
avx_vpermilp_parallel (rtx par, enum machine_mode mode)
{
unsigned i, nelt = GET_MODE_NUNITS (mode);
unsigned mask = 0;
unsigned char ipar[8];
if (XVECLEN (par, 0) != nelt)
return 0;
/* Validate that all of the elements are constants, and not totally
out of range. Copy the data into an integral array to make the
subsequent checks easier. */
for (i = 0; i < nelt; ++i)
{
rtx er = XVECEXP (par, 0, i);
unsigned HOST_WIDE_INT ei;
if (!CONST_INT_P (er))
return 0;
ei = INTVAL (er);
if (ei >= nelt)
return 0;
ipar[i] = ei;
}
switch (mode)
{
case V4DFmode:
/* In the 256-bit DFmode case, we can only move elements within
a 128-bit lane. */
for (i = 0; i < 2; ++i)
{
if (ipar[i] >= 2)
return 0;
mask |= ipar[i] << i;
}
for (i = 2; i < 4; ++i)
{
if (ipar[i] < 2)
return 0;
mask |= (ipar[i] - 2) << i;
}
break;
case V8SFmode:
/* In the 256-bit SFmode case, we have full freedom of movement
within the low 128-bit lane, but the high 128-bit lane must
mirror the exact same pattern. */
for (i = 0; i < 4; ++i)
if (ipar[i] + 4 != ipar[i + 4])
return 0;
nelt = 4;
/* FALLTHRU */
case V2DFmode:
case V4SFmode:
/* In the 128-bit case, we've full freedom in the placement of
the elements from the source operand. */
for (i = 0; i < nelt; ++i)
mask |= ipar[i] << (i * (nelt / 2));
break;
default:
gcc_unreachable ();
}
/* Make sure success has a non-zero value by adding one. */
return mask + 1;
}
/* Store OPERAND to the memory after reload is completed. This means
that we can't easily use assign_stack_local. */
......
......@@ -1148,3 +1148,24 @@
return 1;
})
;; Return 1 if OP is a parallel for a vpermilp[ds] permute.
;; ??? It would be much easier if the PARALLEL for a VEC_SELECT
;; had a mode, but it doesn't. So we have 4 copies and install
;; the mode by hand.
(define_predicate "avx_vpermilp_v8sf_operand"
(and (match_code "parallel")
(match_test "avx_vpermilp_parallel (op, V8SFmode)")))
(define_predicate "avx_vpermilp_v4df_operand"
(and (match_code "parallel")
(match_test "avx_vpermilp_parallel (op, V4DFmode)")))
(define_predicate "avx_vpermilp_v4sf_operand"
(and (match_code "parallel")
(match_test "avx_vpermilp_parallel (op, V4SFmode)")))
(define_predicate "avx_vpermilp_v2df_operand"
(and (match_code "parallel")
(match_test "avx_vpermilp_parallel (op, V2DFmode)")))
......@@ -58,6 +58,8 @@
(define_mode_iterator AVX256MODE8P [V8SI V8SF])
(define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
(define_mode_iterator AVXMODEF4P [V4SF V4DF])
(define_mode_iterator AVXMODEFDP [V2DF V4DF])
(define_mode_iterator AVXMODEFSP [V4SF V8SF])
(define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
(define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
......@@ -95,13 +97,16 @@
(V4SI "SI") (V2DI "DI")])
;; Mapping of vector modes to a vector mode of double size
(define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI")
(V4SF "V8SF") (V4SI "V8SI")])
(define_mode_attr ssedoublesizemode
[(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
(V8HI "V16HI") (V16QI "V32QI")
(V4DF "V8DF") (V8SF "V16SF")
(V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
;; Number of scalar elements in each vector type
(define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
(V16QI "16") (V8HI "8")
(V4SI "4") (V2DI "2")])
(define_mode_attr ssescalarnum
[(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
(V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
;; Mapping for AVX
(define_mode_attr avxvecmode
......@@ -134,10 +139,6 @@
(define_mode_attr blendbits
[(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
;; Mapping of immediate bits for vpermil instructions
(define_mode_attr vpermilbits
[(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")])
;; Mapping of immediate bits for pinsr instructions
(define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
......@@ -12088,14 +12089,66 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
(define_insn "avx_vpermil<mode>"
(define_expand "avx_vpermil<mode>"
[(set (match_operand:AVXMODEFDP 0 "register_operand" "")
(vec_select:AVXMODEFDP
(match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
(match_operand:SI 2 "const_0_to_255_operand" "")))]
"TARGET_AVX"
{
int mask = INTVAL (operands[2]);
rtx perm[<ssescalarnum>];
perm[0] = GEN_INT (mask & 1);
perm[1] = GEN_INT ((mask >> 1) & 1);
if (<MODE>mode == V4DFmode)
{
perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
}
operands[2]
= gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
})
(define_expand "avx_vpermil<mode>"
[(set (match_operand:AVXMODEFSP 0 "register_operand" "")
(vec_select:AVXMODEFSP
(match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
(match_operand:SI 2 "const_0_to_255_operand" "")))]
"TARGET_AVX"
{
int mask = INTVAL (operands[2]);
rtx perm[<ssescalarnum>];
perm[0] = GEN_INT (mask & 3);
perm[1] = GEN_INT ((mask >> 2) & 3);
perm[2] = GEN_INT ((mask >> 4) & 3);
perm[3] = GEN_INT ((mask >> 6) & 3);
if (<MODE>mode == V8SFmode)
{
perm[4] = GEN_INT ((mask & 3) + 4);
perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
}
operands[2]
= gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
})
(define_insn "*avx_vpermilp<mode>"
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
(unspec:AVXMODEF2P
[(match_operand:AVXMODEF2P 1 "register_operand" "xm")
(match_operand:SI 2 "const_0_to_<vpermilbits>_operand" "n")]
UNSPEC_VPERMIL))]
(vec_select:AVXMODEF2P
(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
(match_parallel 2 "avx_vpermilp_<mode>_operand"
[(match_operand 3 "const_int_operand" "")])))]
"TARGET_AVX"
"vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
{
int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
operands[2] = GEN_INT (mask);
return "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}";
}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment