Commit 3b84d61f by Uros Bizjak

re PR target/48678 (unable to find a register to spill in class ‘GENERAL_REGS’)

	PR target/48678
	* config/i386/i386.md (insv): Change operand 0 constraint to
	"register_operand".  Change operand 1 and 2 constraint to
	"const_int_operand".  Expand to pinsr{b,w,d,q} * when appropriate.
	* config/i386/sse.md (sse4_1_pinsrb): Export.
	(sse2_pinsrw): Ditto.
	(sse4_1_pinsrd): Ditto.
	(sse4_1_pinsrq): Ditto.
	* config/i386/i386-protos.h (ix86_expand_pinsr): Add prototype.
	* config/i386/i386.c (ix86_expand_pinsr): New.

testsuite/ChangeLog:

	PR target/48678
	* gcc.target/i386/sse2-pinsrw.c: New test.
	* gcc.target/i386/avx-vpinsrw.c: Ditto.
	* gcc.target/i386/sse4_1-insvqi.c: Ditto.
	* gcc.target/i386/sse2-insvhi.c: Ditto.
	* gcc.target/i386/sse4_1-insvsi.c: Ditto.
	* gcc.target/i386/sse4_1-insvdi.c: Ditto.

From-SVN: r172792
parent 8efcbeca
2011-04-20 Uros Bizjak <ubizjak@gmail.com>
PR target/48678
* config/i386/i386.md (insv): Change operand 0 constraint to
"register_operand". Change operand 1 and 2 constraint to
"const_int_operand". Expand to pinsr{b,w,d,q} * when appropriate.
* config/i386/sse.md (sse4_1_pinsrb): Export.
(sse2_pinsrw): Ditto.
(sse4_1_pinsrd): Ditto.
(sse4_1_pinsrq): Ditto.
* config/i386/i386-protos.h (ix86_expand_pinsr): Add prototype.
* config/i386/i386.c (ix86_expand_pinsr): New.
2011-04-20 Easwaran Raman <eraman@google.com>
* cfgexpand.c (add_alias_set_conflicts): Add conflicts with a variable
......@@ -72,8 +85,7 @@
2011-04-20 Kai Tietz <ktietz@redhat.com>
* fold-const.c (fold_binary_loc): Add handling for
(X & ~Y) | (~X & Y) and (X && !Y) | (!X && Y) optimization
to (X ^ Y).
(X & ~Y) | (~X & Y) and (X && !Y) | (!X && Y) optimization to (X ^ Y).
2011-04-20 Andrew Stubbs <ams@codesourcery.com>
......@@ -246,11 +258,10 @@
2011-04-19 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
* doc/install.texi (Configuration, --enable-threads): Remove mach.
Add lynx, mipssde.
Sort table.
Add lynx, mipssde. Sort table.
2011-04-19 Xinliang David Li <davidxl@google.com>
* ipa-cp.c (ipcp_update_profiling): Assert that scale_completement is
not negative.
......
......@@ -203,6 +203,7 @@ extern void ix86_expand_vector_extract (bool, rtx, rtx, int);
extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx);
extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned);
extern bool ix86_expand_pinsr (rtx *);
/* In i386-c.c */
extern void ix86_target_macros (void);
......
......@@ -34105,6 +34105,88 @@ ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
/* ... or we use the special-case patterns. */
expand_vec_perm_even_odd_1 (&d, odd);
}
/* Expand an insert into a vector register through pinsr insn.
Return true if successful. */
bool
ix86_expand_pinsr (rtx *operands)
{
rtx dst = operands[0];
rtx src = operands[3];
unsigned int size = INTVAL (operands[1]);
unsigned int pos = INTVAL (operands[2]);
if (GET_CODE (dst) == SUBREG)
{
pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
dst = SUBREG_REG (dst);
}
if (GET_CODE (src) == SUBREG)
src = SUBREG_REG (src);
switch (GET_MODE (dst))
{
case V16QImode:
case V8HImode:
case V4SImode:
case V2DImode:
{
enum machine_mode srcmode, dstmode;
rtx (*pinsr)(rtx, rtx, rtx, rtx);
srcmode = mode_for_size (size, MODE_INT, 0);
switch (srcmode)
{
case QImode:
if (!TARGET_SSE4_1)
return false;
dstmode = V16QImode;
pinsr = gen_sse4_1_pinsrb;
break;
case HImode:
if (!TARGET_SSE2)
return false;
dstmode = V8HImode;
pinsr = gen_sse2_pinsrw;
break;
case SImode:
if (!TARGET_SSE4_1)
return false;
dstmode = V4SImode;
pinsr = gen_sse4_1_pinsrd;
break;
case DImode:
gcc_assert (TARGET_64BIT);
if (!TARGET_SSE4_1)
return false;
dstmode = V2DImode;
pinsr = gen_sse4_1_pinsrq;
break;
default:
return false;
}
dst = gen_lowpart (dstmode, dst);
src = gen_lowpart (srcmode, src);
pos /= size;
emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
return true;
}
default:
return false;
}
}
/* This function returns the calling abi specific va_list type node.
It returns the FNDECL specific va_list type. */
......@@ -10393,14 +10393,17 @@
})
(define_expand "insv"
[(set (zero_extract (match_operand 0 "ext_register_operand" "")
(match_operand 1 "const8_operand" "")
(match_operand 2 "const8_operand" ""))
[(set (zero_extract (match_operand 0 "register_operand" "")
(match_operand 1 "const_int_operand" "")
(match_operand 2 "const_int_operand" ""))
(match_operand 3 "register_operand" ""))]
""
{
rtx (*gen_mov_insv_1) (rtx, rtx);
if (ix86_expand_pinsr (operands))
DONE;
/* Handle insertions to %ah et al. */
if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8)
FAIL;
......
......@@ -6051,7 +6051,7 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
(define_insn "*sse4_1_pinsrb"
(define_insn "sse4_1_pinsrb"
[(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
(vec_merge:V16QI
(vec_duplicate:V16QI
......@@ -6083,7 +6083,7 @@
(set_attr "prefix" "orig,orig,vex,vex")
(set_attr "mode" "TI")])
(define_insn "*sse2_pinsrw"
(define_insn "sse2_pinsrw"
[(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
(vec_merge:V8HI
(vec_duplicate:V8HI
......@@ -6117,7 +6117,7 @@
(set_attr "mode" "TI")])
;; It must come before sse2_loadld since it is preferred.
(define_insn "*sse4_1_pinsrd"
(define_insn "sse4_1_pinsrd"
[(set (match_operand:V4SI 0 "register_operand" "=x,x")
(vec_merge:V4SI
(vec_duplicate:V4SI
......@@ -6145,7 +6145,7 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
(define_insn "*sse4_1_pinsrq"
(define_insn "sse4_1_pinsrq"
[(set (match_operand:V2DI 0 "register_operand" "=x,x")
(vec_merge:V2DI
(vec_duplicate:V2DI
......
2011-04-20 Uros Bizjak <ubizjak@gmail.com>
PR target/48678
* gcc.target/i386/sse2-pinsrw.c: New test.
* gcc.target/i386/avx-vpinsrw.c: Ditto.
* gcc.target/i386/sse4_1-insvqi.c: Ditto.
* gcc.target/i386/sse2-insvhi.c: Ditto.
* gcc.target/i386/sse4_1-insvsi.c: Ditto.
* gcc.target/i386/sse4_1-insvdi.c: Ditto.
2011-04-20 Jason Merrill <jason@redhat.com>
* g++.dg/template/const4.C: New.
......
/* { dg-do run } */
/* { dg-require-effective-target avx } */
/* { dg-options "-O2 -mavx" } */
#define CHECK_H "avx-check.h"
#define TEST avx_test
#include "sse2-pinsrw.c"
/* { dg-do run } */
/* { dg-require-effective-target sse2 } */
/* { dg-options "-O2 -msse2" } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#ifndef TEST
#define TEST sse2_test
#endif
#include CHECK_H
#include <emmintrin.h>
#include <string.h>
typedef short T __attribute__((may_alias));
struct S { __m128i d; };
__m128i
__attribute__((noinline))
foo (__m128i y, short x)
{
struct S s;
s.d = y;
((T *) &s.d)[1] = x;
return s.d;
}
static void
TEST (void)
{
union
{
__m128i x;
unsigned int i[4];
unsigned short s[8];
} res, val, tmp;
unsigned short ins[4] = { 3, 4, 5, 6 };
val.i[0] = 0x35251505;
val.i[1] = 0x75655545;
val.i[2] = 0xB5A59585;
val.i[3] = 0xF5E5D5C5;
res.x = foo (val.x, ins[3]);
tmp.x = val.x;
tmp.s[1] = ins[3];
if (memcmp (&tmp, &res, sizeof (tmp)))
abort ();
}
/* { dg-do run } */
/* { dg-require-effective-target sse2 } */
/* { dg-options "-O2 -msse2" } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#ifndef TEST
#define TEST sse2_test
#endif
#include CHECK_H
#include <emmintrin.h>
#include <string.h>
#define msk0 0x00
#define msk1 0x01
#define msk2 0x02
#define msk3 0x03
#define msk4 0x04
#define msk5 0x05
#define msk6 0x06
#define msk7 0x07
static void
TEST (void)
{
union
{
__m128i x;
unsigned int i[4];
unsigned short s[8];
} res [8], val, tmp;
int masks[8];
unsigned short ins[4] = { 3, 4, 5, 6 };
int i;
val.i[0] = 0x35251505;
val.i[1] = 0x75655545;
val.i[2] = 0xB5A59585;
val.i[3] = 0xF5E5D5C5;
/* Check pinsrw imm8, r32, xmm. */
res[0].x = _mm_insert_epi16 (val.x, ins[0], msk0);
res[1].x = _mm_insert_epi16 (val.x, ins[0], msk1);
res[2].x = _mm_insert_epi16 (val.x, ins[0], msk2);
res[3].x = _mm_insert_epi16 (val.x, ins[0], msk3);
res[4].x = _mm_insert_epi16 (val.x, ins[0], msk4);
res[5].x = _mm_insert_epi16 (val.x, ins[0], msk5);
res[6].x = _mm_insert_epi16 (val.x, ins[0], msk6);
res[7].x = _mm_insert_epi16 (val.x, ins[0], msk7);
masks[0] = msk0;
masks[1] = msk1;
masks[2] = msk2;
masks[3] = msk3;
masks[4] = msk4;
masks[5] = msk5;
masks[6] = msk6;
masks[7] = msk7;
for (i = 0; i < 8; i++)
{
tmp.x = val.x;
tmp.s[masks[i]] = ins[0];
if (memcmp (&tmp, &res[i], sizeof (tmp)))
abort ();
}
/* Check pinsrw imm8, m16, xmm. */
for (i = 0; i < 8; i++)
{
res[i].x = _mm_insert_epi16 (val.x, ins[i % 2], msk0);
masks[i] = msk0;
}
for (i = 0; i < 8; i++)
{
tmp.x = val.x;
tmp.s[masks[i]] = ins[i % 2];
if (memcmp (&tmp, &res[i], sizeof (tmp)))
abort ();
}
}
/* { dg-do run } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.1" } */
#ifndef CHECK_H
#define CHECK_H "sse4_1-check.h"
#endif
#ifndef TEST
#define TEST sse4_1_test
#endif
#include CHECK_H
#include <smmintrin.h>
#include <string.h>
typedef long T __attribute__((may_alias));
struct S { __m128i d; };
__m128i
__attribute__((noinline))
foo (__m128i y, long x)
{
struct S s;
s.d = y;
((T *) &s.d)[1] = x;
return s.d;
}
static void
TEST (void)
{
union
{
__m128i x;
unsigned int i[4];
unsigned long l[2];
} res, val, tmp;
unsigned long ins[4] = { 3, 4, 5, 6 };
val.i[0] = 0x35251505;
val.i[1] = 0x75655545;
val.i[2] = 0xB5A59585;
val.i[3] = 0xF5E5D5C5;
res.x = foo (val.x, ins[3]);
tmp.x = val.x;
tmp.l[1] = ins[3];
if (memcmp (&tmp, &res, sizeof (tmp)))
abort ();
}
/* { dg-do run } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.1" } */
#ifndef CHECK_H
#define CHECK_H "sse4_1-check.h"
#endif
#ifndef TEST
#define TEST sse4_1_test
#endif
#include CHECK_H
#include <smmintrin.h>
#include <string.h>
typedef char T __attribute__((may_alias));
struct S { __m128i d; };
__m128i
__attribute__((noinline))
foo (__m128i y, char x)
{
struct S s;
s.d = y;
((T *) &s.d)[1] = x;
return s.d;
}
static void
TEST (void)
{
union
{
__m128i x;
unsigned int i[4];
unsigned char c[16];
} res, val, tmp;
unsigned char ins[4] = { 3, 4, 5, 6 };
val.i[0] = 0x35251505;
val.i[1] = 0x75655545;
val.i[2] = 0xB5A59585;
val.i[3] = 0xF5E5D5C5;
res.x = foo (val.x, ins[3]);
tmp.x = val.x;
tmp.c[1] = ins[3];
if (memcmp (&tmp, &res, sizeof (tmp)))
abort ();
}
/* { dg-do run } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.1" } */
#ifndef CHECK_H
#define CHECK_H "sse4_1-check.h"
#endif
#ifndef TEST
#define TEST sse4_1_test
#endif
#include CHECK_H
#include <smmintrin.h>
#include <string.h>
typedef int T __attribute__((may_alias));
struct S { __m128i d; };
__m128i
__attribute__((noinline))
foo (__m128i y, int x)
{
struct S s;
s.d = y;
((T *) &s.d)[1] = x;
return s.d;
}
static void
TEST (void)
{
union
{
__m128i x;
unsigned int i[4];
} res, val, tmp;
unsigned int ins[4] = { 3, 4, 5, 6 };
val.i[0] = 0x35251505;
val.i[1] = 0x75655545;
val.i[2] = 0xB5A59585;
val.i[3] = 0xF5E5D5C5;
res.x = foo (val.x, ins[3]);
tmp.x = val.x;
tmp.i[1] = ins[3];
if (memcmp (&tmp, &res, sizeof (tmp)))
abort ();
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment