Commit c0b0ee6f by Jakub Jelinek Committed by Jakub Jelinek

i386.c (ix86_expand_reduc_v4sf): Rename to ...

	* config/i386/i386.c (ix86_expand_reduc_v4sf): Rename to ...
	(ix86_expand_reduc): ... this.  Handle also V8SFmode and V4DFmode.
	* config/i386/sse.md (reduc_splus_v4sf, reduc_smax_v4sf,
	reduc_smin_v4sf): Adjust callers.
	(reduc_smax_v8sf, reduc_smin_v8sf, reduc_smax_v4df, reduc_smin_v4df):
	New expanders.

	* gcc.dg/vect/vect-reduc-10.c: New test.
	* gcc.target/i386/avx-reduc-1.c: New test.

From-SVN: r178916
parent 6e2cb391
2011-09-16 Jakub Jelinek <jakub@redhat.com>
* config/i386/i386.c (ix86_expand_reduc_v4sf): Rename to ...
(ix86_expand_reduc): ... this. Handle also V8SFmode and V4DFmode.
* config/i386/sse.md (reduc_splus_v4sf, reduc_smax_v4sf,
reduc_smin_v4sf): Adjust callers.
(reduc_smax_v8sf, reduc_smin_v8sf, reduc_smax_v4df, reduc_smin_v4df):
New expanders.
* config/i386/sse.md (vec_extract_hi_<mode>,
vec_extract_hi_v16hi, vec_extract_hi_v32qi): Use
vextracti128 instead of vextractf128 for -mavx2 and
......@@ -211,7 +211,7 @@ extern rtx ix86_tls_module_base (void);
extern void ix86_expand_vector_init (bool, rtx, rtx);
extern void ix86_expand_vector_set (bool, rtx, rtx, int);
extern void ix86_expand_vector_extract (bool, rtx, rtx, int);
extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx);
extern void ix86_expand_reduc (rtx (*)(rtx, rtx, rtx), rtx, rtx);
extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned);
extern bool ix86_expand_pinsr (rtx *);
......
......@@ -32696,24 +32696,45 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
}
}
/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
pattern to reduce; DEST is the destination; IN is the input vector. */
/* Expand a vector reduction. FN is the binary pattern to reduce;
DEST is the destination; IN is the input vector. */
void
ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
{
rtx tmp1, tmp2, tmp3;
rtx tmp1, tmp2, tmp3, tmp4, tmp5;
enum machine_mode mode = GET_MODE (in);
tmp1 = gen_reg_rtx (V4SFmode);
tmp2 = gen_reg_rtx (V4SFmode);
tmp3 = gen_reg_rtx (V4SFmode);
tmp1 = gen_reg_rtx (mode);
tmp2 = gen_reg_rtx (mode);
tmp3 = gen_reg_rtx (mode);
emit_insn (gen_sse_movhlps (tmp1, in, in));
emit_insn (fn (tmp2, tmp1, in));
emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
const1_rtx, const1_rtx,
GEN_INT (1+4), GEN_INT (1+4)));
switch (mode)
{
case V4SFmode:
emit_insn (gen_sse_movhlps (tmp1, in, in));
emit_insn (fn (tmp2, tmp1, in));
emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
const1_rtx, const1_rtx,
GEN_INT (1+4), GEN_INT (1+4)));
break;
case V8SFmode:
tmp4 = gen_reg_rtx (mode);
tmp5 = gen_reg_rtx (mode);
emit_insn (gen_avx_vperm2f128v8sf3 (tmp4, in, in, const1_rtx));
emit_insn (fn (tmp5, tmp4, in));
emit_insn (gen_avx_shufps256 (tmp1, tmp5, tmp5, GEN_INT (2+12)));
emit_insn (fn (tmp2, tmp1, tmp5));
emit_insn (gen_avx_shufps256 (tmp3, tmp2, tmp2, const1_rtx));
break;
case V4DFmode:
emit_insn (gen_avx_vperm2f128v4df3 (tmp1, in, in, const1_rtx));
emit_insn (fn (tmp2, tmp1, in));
emit_insn (gen_avx_shufpd256 (tmp3, tmp2, tmp2, const1_rtx));
break;
default:
gcc_unreachable ();
}
emit_insn (fn (dest, tmp2, tmp3));
}
......@@ -1253,7 +1253,7 @@
emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
}
else
ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
DONE;
})
......@@ -1263,7 +1263,7 @@
(match_operand:V4SF 1 "register_operand" "")]
"TARGET_SSE"
{
ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
ix86_expand_reduc (gen_smaxv4sf3, operands[0], operands[1]);
DONE;
})
......@@ -1272,7 +1272,43 @@
(match_operand:V4SF 1 "register_operand" "")]
"TARGET_SSE"
{
ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
ix86_expand_reduc (gen_sminv4sf3, operands[0], operands[1]);
DONE;
})
(define_expand "reduc_smax_v8sf"
[(match_operand:V8SF 0 "register_operand" "")
(match_operand:V8SF 1 "register_operand" "")]
"TARGET_AVX"
{
ix86_expand_reduc (gen_smaxv8sf3, operands[0], operands[1]);
DONE;
})
(define_expand "reduc_smin_v8sf"
[(match_operand:V8SF 0 "register_operand" "")
(match_operand:V8SF 1 "register_operand" "")]
"TARGET_AVX"
{
ix86_expand_reduc (gen_sminv8sf3, operands[0], operands[1]);
DONE;
})
(define_expand "reduc_smax_v4df"
[(match_operand:V4DF 0 "register_operand" "")
(match_operand:V4DF 1 "register_operand" "")]
"TARGET_AVX"
{
ix86_expand_reduc (gen_smaxv4df3, operands[0], operands[1]);
DONE;
})
(define_expand "reduc_smin_v4df"
[(match_operand:V4DF 0 "register_operand" "")
(match_operand:V4DF 1 "register_operand" "")]
"TARGET_AVX"
{
ix86_expand_reduc (gen_sminv4df3, operands[0], operands[1]);
DONE;
})
......
2011-09-16 Jakub Jelinek <jakub@redhat.com>
* gcc.dg/vect/vect-reduc-10.c: New test.
* gcc.target/i386/avx-reduc-1.c: New test.
* gcc.target/i386/sse2-extract-1.c: New test.
* gcc.target/i386/avx-extract-1.c: New test.
......
#include "tree-vect.h"
extern void abort (void);
double ad[1024];
float af[1024];
short as[1024];
int ai[1024];
long long all[1024];
unsigned short aus[1024];
unsigned int au[1024];
unsigned long long aull[1024];
#define F(var) \
__attribute__((noinline, noclone)) __typeof (var[0]) \
f##var (void) \
{ \
int i; \
__typeof (var[0]) r = 0; \
for (i = 0; i < 1024; i++) \
r = r > var[i] ? r : var[i]; \
return r; \
}
#define TESTS \
F (ad) F (af) F (as) F (ai) F (all) F (aus) F (au) F (aull)
TESTS
int
main ()
{
int i;
check_vect ();
for (i = 0; i < 1024; i++)
{
#undef F
#define F(var) var[i] = i;
TESTS
}
for (i = 1023; i < 32 * 1024; i += 1024 + 271)
{
#undef F
#define F(var) var[i & 1023] = i; if (f##var () != i) abort ();
TESTS
}
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do run } */
/* { dg-options "-O3 -mavx" } */
/* { dg-require-effective-target avx_runtime } */
extern void abort (void);
double ad[1024];
float af[1024];
short as[1024];
int ai[1024];
long long all[1024];
unsigned short aus[1024];
unsigned int au[1024];
unsigned long long aull[1024];
#define F(var) \
__attribute__((noinline, noclone)) __typeof (var[0]) \
f##var (void) \
{ \
int i; \
__typeof (var[0]) r = 0; \
for (i = 0; i < 1024; i++) \
r = r > var[i] ? r : var[i]; \
return r; \
}
#define TESTS \
F (ad) F (af) F (as) F (ai) F (all) F (aus) F (au) F (aull)
TESTS
int
main ()
{
int i;
for (i = 0; i < 1024; i++)
{
#undef F
#define F(var) var[i] = i;
TESTS
}
for (i = 1023; i < 32 * 1024; i += 1024 + 271)
{
#undef F
#define F(var) var[i & 1023] = i; if (f##var () != i) abort ();
TESTS
}
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment