Commit 09db7afe by Jakub Jelinek Committed by Jakub Jelinek

re PR target/48605 (gcc.target/i386/sse4_1-insertps-2.c FAILs with -mtune=geode…

re PR target/48605 (gcc.target/i386/sse4_1-insertps-2.c FAILs with -mtune=geode - instruction insertps with memory operands behaves differently)

	PR target/48605
	* config/i386/sse.md (sse4_1_insertps): If operands[2] is a MEM,
	offset it as needed based on top 2 bits in operands[3], change
	MEM mode to SFmode and mask those 2 bits away from operands[3].

	* gcc.target/i386/sse4_1-insertps-3.c: New test.
	* gcc.target/i386/sse4_1-insertps-4.c: New test.
	* gcc.target/i386/avx-insertps-3.c: New test.
	* gcc.target/i386/avx-insertps-4.c: New test.

From-SVN: r172458
parent c59633d9
2011-04-14 Jakub Jelinek <jakub@redhat.com>
PR target/48605
* config/i386/sse.md (sse4_1_insertps): If operands[2] is a MEM,
offset it as needed based on top 2 bits in operands[3], change
MEM mode to SFmode and mask those 2 bits away from operands[3].
2011-04-14 Nicola Pero <nicola.pero@meta-innovation.com>
* c-parser.c (c_parser_objc_protocol_definition): Updated for
......
......@@ -3572,9 +3572,24 @@
(match_operand:SI 3 "const_0_to_255_operand" "n,n")]
UNSPEC_INSERTPS))]
"TARGET_SSE4_1"
"@
insertps\t{%3, %2, %0|%0, %2, %3}
vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
{
if (MEM_P (operands[2]))
{
unsigned count_s = INTVAL (operands[3]) >> 6;
if (count_s)
operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
}
switch (which_alternative)
{
case 0:
return "insertps\t{%3, %2, %0|%0, %2, %3}";
case 1:
return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
default:
gcc_unreachable ();
}
}
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
......
2011-04-14 Jakub Jelinek <jakub@redhat.com>
PR target/48605
* gcc.target/i386/sse4_1-insertps-3.c: New test.
* gcc.target/i386/sse4_1-insertps-4.c: New test.
* gcc.target/i386/avx-insertps-3.c: New test.
* gcc.target/i386/avx-insertps-4.c: New test.
2011-04-14 Georg-Johann Lay <avr@gjlay.de>
* gcc.target/avr/torture/pr41885.c (dg-options): Add
......
/* { dg-do run { target ilp32 } } */
/* { dg-require-effective-target avx } */
/* { dg-options "-O2 -mfpmath=sse -mavx -mtune=geode" } */
#define CHECK_H "avx-check.h"
#define TEST avx_test
#include "sse4_1-insertps-3.c"
/* { dg-do run } */
/* { dg-require-effective-target avx } */
/* { dg-options "-O2 -mfpmath=sse -mavx" } */
#define CHECK_H "avx-check.h"
#define TEST avx_test
#include "sse4_1-insertps-4.c"
/* { dg-do run { target ilp32 } } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.1 -mtune=geode" } */
#include "sse4_1-insertps-2.c"
/* { dg-do run } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.1" } */
#ifndef CHECK_H
#define CHECK_H "sse4_1-check.h"
#endif
#ifndef TEST
#define TEST sse4_1_test
#endif
#include CHECK_H
#include <smmintrin.h>
#include <string.h>
#define msk0 0x41
#define msk1 0x90
#define msk2 0xe9
#define msk3 0x70
#define msk4 0xFC
#define msk5 0x05
#define msk6 0x0A
#define msk7 0x0F
union
{
__m128 x;
float f[4];
} val1;
static void
TEST (void)
{
union
{
__m128 x;
float f[4];
} res[8], val2, tmp;
int masks[8];
int i, j;
val2.f[0] = 55.0;
val2.f[1] = 55.0;
val2.f[2] = 55.0;
val2.f[3] = 55.0;
val1.f[0] = 1.;
val1.f[1] = 2.;
val1.f[2] = 3.;
val1.f[3] = 4.;
asm volatile ("" : "+m" (val1));
res[0].x = _mm_insert_ps (val2.x, val1.x, msk0);
asm volatile ("" : "+m" (val1));
res[1].x = _mm_insert_ps (val2.x, val1.x, msk1);
asm volatile ("" : "+m" (val1));
res[2].x = _mm_insert_ps (val2.x, val1.x, msk2);
asm volatile ("" : "+m" (val1));
res[3].x = _mm_insert_ps (val2.x, val1.x, msk3);
masks[0] = msk0;
masks[1] = msk1;
masks[2] = msk2;
masks[3] = msk3;
for (i = 0; i < 4; i++)
{
asm volatile ("" : "+m" (val1));
res[i + 4].x = _mm_insert_ps (val2.x, val1.x, msk4);
}
masks[4] = msk4;
masks[5] = msk4;
masks[6] = msk4;
masks[7] = msk4;
for (i=0; i < 8; i++)
{
tmp = val2;
tmp.f[(masks[i] & 0x30) >> 4] = val1.f[(masks[i] & 0xC0) >> 6];
for (j = 0; j < 4; j++)
if (masks[i] & (0x1 << j))
tmp.f[j] = 0.f;
if (memcmp (&res[i], &tmp, sizeof (tmp)))
abort ();
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment