Commit 22554cf9 by H.J. Lu Committed by H.J. Lu

Don't convert GTU to GT for V4SI and V2DI

gcc/

2010-01-04  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/42542
	* config/i386/i386.c (ix86_expand_int_vcond): Don't convert
	GTU to GT for V4SI and V2DI.

	* config/i386/sse.md (umaxv4si3): Enabled for SSE4.1 and XOP.
	(umin<mode>3): Removed.
	(uminv8hi3): New.
	(uminv4si3): Likewise.

gcc/testsuite/

2010-01-04  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/42542
	* gcc.target/i386/pr42542-1.c: New.
	* gcc.target/i386/pr42542-1a.c: Likewise.
	* gcc.target/i386/pr42542-1b.c: Likewise.
	* gcc.target/i386/pr42542-2.c: Likewise.
	* gcc.target/i386/pr42542-2a.c: Likewise.
	* gcc.target/i386/pr42542-2b.c: Likewise.
	* gcc.target/i386/pr42542-3.c: Likewise.
	* gcc.target/i386/pr42542-3a.c: Likewise.

From-SVN: r155618
parent 7152f51e
2010-01-04 H.J. Lu <hongjiu.lu@intel.com>
PR target/42542
* config/i386/i386.c (ix86_expand_int_vcond): Don't convert
GTU to GT for V4SI and V2DI.
* config/i386/sse.md (umaxv4si3): Enabled for SSE4.1 and XOP.
(umin<mode>3): Removed.
(uminv8hi3): New.
(uminv4si3): Likewise.
2010-01-04 H.J. Lu <hongjiu.lu@intel.com>
PR lto/42581
* collect2.c (main): Turn on trace in collect2 if -v is passed
to gcc with LTO.
......
/* Subroutines used for code generation on IA-32.
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
Free Software Foundation, Inc.
This file is part of GCC.
......@@ -16252,37 +16252,6 @@ ix86_expand_int_vcond (rtx operands[])
switch (mode)
{
case V4SImode:
case V2DImode:
{
rtx t1, t2, mask;
/* Perform a parallel modulo subtraction. */
t1 = gen_reg_rtx (mode);
emit_insn ((mode == V4SImode
? gen_subv4si3
: gen_subv2di3) (t1, cop0, cop1));
/* Extract the original sign bit of op0. */
mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
true, false);
t2 = gen_reg_rtx (mode);
emit_insn ((mode == V4SImode
? gen_andv4si3
: gen_andv2di3) (t2, cop0, mask));
/* XOR it back into the result of the subtraction.
This results in the sign bit set iff we saw
unsigned underflow. */
x = gen_reg_rtx (mode);
emit_insn ((mode == V4SImode
? gen_xorv4si3
: gen_xorv2di3) (x, t1, t2));
code = GT;
}
break;
case V16QImode:
case V8HImode:
/* Perform a parallel unsigned saturating subtraction. */
......
;; GCC machine description for SSE instructions
;; Copyright (C) 2005, 2006, 2007, 2008, 2009
;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
;; Free Software Foundation, Inc.
;;
;; This file is part of GCC.
......@@ -6138,7 +6138,7 @@
[(set (match_operand:V4SI 0 "register_operand" "")
(umax:V4SI (match_operand:V4SI 1 "register_operand" "")
(match_operand:V4SI 2 "register_operand" "")))]
"TARGET_SSE2"
"TARGET_SSE4_1 || TARGET_XOP"
{
if (TARGET_SSE4_1)
ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
......@@ -6195,14 +6195,39 @@
}
})
(define_expand "umin<mode>3"
[(set (match_operand:SSEMODE24 0 "register_operand" "")
(umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
(match_operand:SSEMODE24 2 "register_operand" "")))]
(define_expand "uminv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "")
(umin:V8HI (match_operand:V8HI 1 "register_operand" "")
(match_operand:V8HI 2 "register_operand" "")))]
"TARGET_SSE2"
{
if (TARGET_SSE4_1)
ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
ix86_fixup_binary_operands_no_copy (UMIN, V8HImode, operands);
else
{
rtx xops[6];
bool ok;
xops[0] = operands[0];
xops[1] = operands[2];
xops[2] = operands[1];
xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
xops[4] = operands[1];
xops[5] = operands[2];
ok = ix86_expand_int_vcond (xops);
gcc_assert (ok);
DONE;
}
})
(define_expand "uminv4si3"
[(set (match_operand:V4SI 0 "register_operand" "")
(umin:V4SI (match_operand:V4SI 1 "register_operand" "")
(match_operand:V4SI 2 "register_operand" "")))]
"TARGET_SSE4_1 || TARGET_XOP"
{
if (TARGET_SSE4_1)
ix86_fixup_binary_operands_no_copy (UMIN, V4SImode, operands);
else
{
rtx xops[6];
......
2010-01-04 H.J. Lu <hongjiu.lu@intel.com>
PR target/42542
* gcc.target/i386/pr42542-1.c: New.
* gcc.target/i386/pr42542-1a.c: Likewise.
* gcc.target/i386/pr42542-1b.c: Likewise.
* gcc.target/i386/pr42542-2.c: Likewise.
* gcc.target/i386/pr42542-2a.c: Likewise.
* gcc.target/i386/pr42542-2b.c: Likewise.
* gcc.target/i386/pr42542-3.c: Likewise.
* gcc.target/i386/pr42542-3a.c: Likewise.
2009-01-04 Tobias Burnus <burnus@net-b.de>
PR fortran/41872
......
/* { dg-do run } */
/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#ifndef TEST
#define TEST sse2_test
#endif
#include CHECK_H
unsigned int v1[] __attribute__ ((aligned(16))) =
{
0x80000000, 1, 0xa0000000, 2,
3, 0xd0000000, 0xf0000000, 0xe0000000
};
unsigned int v2[] __attribute__ ((aligned(16))) =
{
4, 0xb0000000, 5, 0xc0000000,
0xd0000000, 6, 7, 8
};
unsigned int max[] =
{
0x80000000, 0xb0000000, 0xa0000000, 0xc0000000,
0xd0000000, 0xd0000000, 0xf0000000, 0xe0000000
};
unsigned int min[] =
{
4, 1, 5, 2,
3, 6, 7, 8
};
unsigned int res[16] __attribute__ ((aligned(16)));
extern void abort (void);
void
find_max (void)
{
int i;
for (i = 0; i < 8; i++)
res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
}
void
find_min (void)
{
int i;
for (i = 0; i < 8; i++)
res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
}
static void
TEST (void)
{
int i;
int err = 0;
find_max ();
for (i = 0; i < 8; i++)
if (res[i] != max[i])
err++;
find_min ();
for (i = 0; i < 8; i++)
if (res[i] != min[i])
err++;
if (err)
abort ();
}
/* { dg-do run } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
#define CHECK_H "sse4_1-check.h"
#define TEST sse4_1_test
#include "pr42542-1.c"
/* { dg-do compile } */
/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
#define CHECK_H "sse4_1-check.h"
#define TEST sse4_1_test
#include "pr42542-1.c"
/* { dg-final { scan-assembler "pmaxud" } } */
/* { dg-final { scan-assembler "pminud" } } */
/* { dg-do run } */
/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#ifndef TEST
#define TEST sse2_test
#endif
#include CHECK_H
unsigned short v1[] __attribute__ ((aligned(16))) =
{
0x8000, 0x9000, 1, 10, 0xa000, 0xb000, 2, 20,
3, 30, 0xd000, 0xe000, 0xf000, 0xe000, 25, 30
};
unsigned short v2[] __attribute__ ((aligned(16))) =
{
4, 40, 0xb000, 0x8000, 5, 50, 0xc000, 0xf000,
0xd000, 0xa000, 6, 65, 7, 75, 0xe000, 0xc000
};
unsigned short max[] =
{
0x8000, 0x9000, 0xb000, 0x8000, 0xa000, 0xb000, 0xc000, 0xf000,
0xd000, 0xa000, 0xd000, 0xe000, 0xf000, 0xe000, 0xe000, 0xc000
};
unsigned short min[] =
{
4, 40, 1, 10, 5, 50, 2, 20,
3, 30, 6, 65, 7, 75, 25, 30
};
unsigned short res[16] __attribute__ ((aligned(16)));
extern void abort (void);
void
find_max (void)
{
int i;
for (i = 0; i < 16; i++)
res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
}
void
find_min (void)
{
int i;
for (i = 0; i < 16; i++)
res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
}
static void
TEST (void)
{
int i;
int err = 0;
find_max ();
for (i = 0; i < 16; i++)
if (res[i] != max[i])
err++;
find_min ();
for (i = 0; i < 16; i++)
if (res[i] != min[i])
err++;
if (err)
abort ();
}
/* { dg-do run } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
#define CHECK_H "sse4_1-check.h"
#define TEST sse4_1_test
#include "pr42542-2.c"
/* { dg-do compile } */
/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
#define CHECK_H "sse4_1-check.h"
#define TEST sse4_1_test
#include "pr42542-2.c"
/* { dg-final { scan-assembler "pmaxuw" } } */
/* { dg-final { scan-assembler "pminuw" } } */
/* { dg-do run } */
/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#ifndef TEST
#define TEST sse2_test
#endif
#include CHECK_H
unsigned char v1[] __attribute__ ((aligned(16))) =
{
0x80, 0xd0, 0x90, 0xa0, 1, 15, 10, 15,
0xa0, 0xc0, 0xb0, 0xf0, 2, 25, 20, 35,
3, 34, 30, 36, 0xd0, 0x80, 0xe0, 0xb0,
0xf0, 0xe0, 0xe0, 0x80, 25, 34, 30, 40
};
unsigned char v2[] __attribute__ ((aligned(16))) =
{
4, 44, 40, 48, 0xb0, 0x80, 0x80, 0x90,
5, 55, 50, 51, 0xc0, 0xb0, 0xf0, 0xd0,
0xd0, 0x80, 0xa0, 0xf0, 6, 61, 65, 68,
7, 76, 75, 81, 0xe0, 0xf0, 0xc0, 0x90
};
unsigned char max[] =
{
0x80, 0xd0, 0x90, 0xa0, 0xb0, 0x80, 0x80, 0x90,
0xa0, 0xc0, 0xb0, 0xf0, 0xc0, 0xb0, 0xf0, 0xd0,
0xd0, 0x80, 0xa0, 0xf0, 0xd0, 0x80, 0xe0, 0xb0,
0xf0, 0xe0, 0xe0, 0x80, 0xe0, 0xf0, 0xc0, 0x90
};
unsigned char min[] =
{
4, 44, 40, 48, 1, 15, 10, 15,
5, 55, 50, 51, 2, 25, 20, 35,
3, 34, 30, 36, 6, 61, 65, 68,
7, 76, 75, 81, 25, 34, 30, 40
};
unsigned char res[32] __attribute__ ((aligned(16)));
extern void abort (void);
void
find_max (void)
{
int i;
for (i = 0; i < 32; i++)
res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
}
void
find_min (void)
{
int i;
for (i = 0; i < 32; i++)
res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
}
static void
TEST (void)
{
int i;
int err = 0;
find_max ();
for (i = 0; i < 32; i++)
if (res[i] != max[i])
err++;
find_min ();
for (i = 0; i < 32; i++)
if (res[i] != min[i])
err++;
if (err)
abort ();
}
/* { dg-do compile } */
/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
#include "pr42542-3.c"
/* { dg-final { scan-assembler "pmaxub" } } */
/* { dg-final { scan-assembler "pminub" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment