Commit 54e86f6b by Uros Bizjak Committed by Uros Bizjak

re PR target/40811 (unsigned int to float isn't vectorized)

	PR target/40811
	* config/i386/sse.md (sse2_cvtudq2ps): New expander.
	(enum ix86_builtins): Add IX86_BUILTIN_CVTUDQ2PS.
	(builtin_description): Add __builtin_ia32_cvtudq2ps.
	(ix86_vectorize_builtin_conversion): Handle IX86_BUILTIN_CVTUDQ2PS.

testsuite/ChangeLog:

	PR target/40811
	* lib/target-supports.exp (check_effective_target_vect_uintfloat_cvt):
	Add i?86 and x86_64 targets.
	* gcc.target/i386/vectorize7.c: New test.

	PR target/40809
	* gcc.target/i386/pr40809.c: New test.

From-SVN: r149861
parent 4a2b7f24
2009-07-21 Uros Bizjak <ubizjak@gmail.com>
PR target/40811
* config/i386/sse.md (sse2_cvtudq2ps): New expander.
(enum ix86_builtins): Add IX86_BUILTIN_CVTUDQ2PS.
(builtin_description): Add __builtin_ia32_cvtudq2ps.
(ix86_vectorize_builtin_conversion): Handle IX86_BUILTIN_CVTUDQ2PS.
2009-07-21 Jakub Jelinek <jakub@redhat.com> 2009-07-21 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/40813 PR tree-optimization/40813
......
...@@ -20908,6 +20908,8 @@ enum ix86_builtins ...@@ -20908,6 +20908,8 @@ enum ix86_builtins
IX86_BUILTIN_CPYSGNPS, IX86_BUILTIN_CPYSGNPS,
IX86_BUILTIN_CPYSGNPD, IX86_BUILTIN_CPYSGNPD,
IX86_BUILTIN_CVTUDQ2PS,
/* SSE5 instructions */ /* SSE5 instructions */
IX86_BUILTIN_FMADDSS, IX86_BUILTIN_FMADDSS,
IX86_BUILTIN_FMADDSD, IX86_BUILTIN_FMADDSD,
...@@ -21785,6 +21787,7 @@ static const struct builtin_description bdesc_args[] = ...@@ -21785,6 +21787,7 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
...@@ -25962,9 +25965,7 @@ ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in) ...@@ -25962,9 +25965,7 @@ ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
static tree static tree
ix86_vectorize_builtin_conversion (unsigned int code, tree type) ix86_vectorize_builtin_conversion (unsigned int code, tree type)
{ {
if (TREE_CODE (type) != VECTOR_TYPE if (TREE_CODE (type) != VECTOR_TYPE)
/* There are only conversions from/to signed integers. */
|| TYPE_UNSIGNED (TREE_TYPE (type)))
return NULL_TREE; return NULL_TREE;
switch (code) switch (code)
...@@ -25973,7 +25974,9 @@ ix86_vectorize_builtin_conversion (unsigned int code, tree type) ...@@ -25973,7 +25974,9 @@ ix86_vectorize_builtin_conversion (unsigned int code, tree type)
switch (TYPE_MODE (type)) switch (TYPE_MODE (type))
{ {
case V4SImode: case V4SImode:
return ix86_builtins[IX86_BUILTIN_CVTDQ2PS]; return TYPE_UNSIGNED (type)
? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
: ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
default: default:
return NULL_TREE; return NULL_TREE;
} }
...@@ -25982,7 +25985,9 @@ ix86_vectorize_builtin_conversion (unsigned int code, tree type) ...@@ -25982,7 +25985,9 @@ ix86_vectorize_builtin_conversion (unsigned int code, tree type)
switch (TYPE_MODE (type)) switch (TYPE_MODE (type))
{ {
case V4SImode: case V4SImode:
return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]; return TYPE_UNSIGNED (type)
? NULL_TREE
: ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
default: default:
return NULL_TREE; return NULL_TREE;
} }
......
...@@ -2420,6 +2420,31 @@ ...@@ -2420,6 +2420,31 @@
[(set_attr "type" "ssecvt") [(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")]) (set_attr "mode" "V4SF")])
(define_expand "sse2_cvtudq2ps"
[(set (match_dup 5)
(float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
(set (match_dup 6)
(lt:V4SF (match_dup 5) (match_dup 3)))
(set (match_dup 7)
(and:V4SF (match_dup 6) (match_dup 4)))
(set (match_operand:V4SF 0 "register_operand" "")
(plus:V4SF (match_dup 5) (match_dup 7)))]
"TARGET_SSE2"
{
REAL_VALUE_TYPE TWO32r;
rtx x;
int i;
real_ldexp (&TWO32r, &dconst1, 32);
x = const_double_from_real_value (TWO32r, SFmode);
operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
for (i = 5; i < 8; i++)
operands[i] = gen_reg_rtx (V4SFmode);
})
(define_insn "avx_cvtps2dq<avxmodesuffix>" (define_insn "avx_cvtps2dq<avxmodesuffix>"
[(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x") [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
(unspec:AVXMODEDCVTPS2DQ (unspec:AVXMODEDCVTPS2DQ
......
2009-07-21 Uros Bizjak <ubizjak@gmail.com>
PR target/40811
* lib/target-supports.exp (check_effective_target_vect_uintfloat_cvt):
Add i?86 and x86_64 targets.
* gcc.target/i386/vectorize7.c: New test.
PR target/40809
* gcc.target/i386/pr40809.c: New test.
2009-07-21 Jakub Jelinek <jakub@redhat.com> 2009-07-21 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/40813 PR tree-optimization/40813
......
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -msse2" } */
#include "sse2-check.h"
#define N 8
unsigned int u4[N] = { 4000000000u, 4000000000u, 4000000000u, 4000000000u, 4000000000u, 4000000000u, 4000000000u, 4000000000u };
float f4[N];
static void
sse2_test (void)
{
int j;
for (j = 0; j < N; j++)
f4[j] = u4[j];
/* check results: */
for (j = 0; j < N; j++)
if (f4[j] != 4000000000.0)
abort ();
}
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -msse2" } */
unsigned int a[256];
float b[256];
void foo(void)
{
int i;
for (i=0; i<256; ++i)
b[i] = a[i];
}
/* { dg-final { scan-assembler "cvtdq2ps" } } */
...@@ -1399,8 +1399,10 @@ proc check_effective_target_vect_uintfloat_cvt { } { ...@@ -1399,8 +1399,10 @@ proc check_effective_target_vect_uintfloat_cvt { } {
verbose "check_effective_target_vect_uintfloat_cvt: using cached result" 2 verbose "check_effective_target_vect_uintfloat_cvt: using cached result" 2
} else { } else {
set et_vect_uintfloat_cvt_saved 0 set et_vect_uintfloat_cvt_saved 0
if { ([istarget powerpc*-*-*] if { [istarget i?86-*-*]
&& ![istarget powerpc-*-linux*paired*]) } { || ([istarget powerpc*-*-*]
&& ![istarget powerpc-*-linux*paired*])
|| [istarget x86_64-*-*] } {
set et_vect_uintfloat_cvt_saved 1 set et_vect_uintfloat_cvt_saved 1
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment