Commit 2cdb3148 by Richard Henderson

re PR target/12902 (Invalid assembly generated when using SSE / xmmintrin.h)

        PR target/12902
        * config/i386/i386.md (sse_movhps, sse_movlps): Remove.
        (sse_shufps): Change operand 3 to const_int_operand.
        (sse2_storelps): Fix typo in template.
        (sse_storehps, sse_loadhps, sse_storelps, sse_loadlps): New.
        * config/i386/i386.c (ix86_expand_vector_move_misalign): Use them.
        (ix86_expand_builtin): Likewise.

From-SVN: r92967
parent a8182d37
2004-01-05 Julian Brown <julian@codesourcery.com>
2005-01-05 Richard Henderson <rth@redhat.com>
PR target/12902
* config/i386/i386.md (sse_movhps, sse_movlps): Remove.
(sse_shufps): Change operand 3 to const_int_operand.
(sse2_storelps): Fix typo in template.
(sse_storehps, sse_loadhps, sse_storelps, sse_loadlps): New.
* config/i386/i386.c (ix86_expand_vector_move_misalign): Use them.
(ix86_expand_builtin): Likewise.
2005-01-05 Julian Brown <julian@codesourcery.com>
* config/arm/arm.c (arm_return_in_memory): Treat complex types
as aggregates for AAPCS ABIs.
......
/* Subroutines used for code generation on IA-32.
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
2002, 2003, 2004 Free Software Foundation, Inc.
2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This file is part of GCC.
......@@ -7645,11 +7645,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
else
emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
op0 = gen_lowpart (V4SFmode, op0);
m = adjust_address (op1, V4SFmode, 0);
emit_insn (gen_sse_movlps (op0, op0, m));
m = adjust_address (op1, V4SFmode, 8);
emit_insn (gen_sse_movhps (op0, op0, m));
m = adjust_address (op1, V2SFmode, 0);
emit_insn (gen_sse_loadlps (op0, op0, m));
m = adjust_address (op1, V2SFmode, 8);
emit_insn (gen_sse_loadhps (op0, op0, m));
}
}
else if (MEM_P (op0))
......@@ -7684,11 +7683,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
}
else
{
op1 = gen_lowpart (V4SFmode, op1);
m = adjust_address (op0, V4SFmode, 0);
emit_insn (gen_sse_movlps (m, m, op1));
m = adjust_address (op0, V4SFmode, 8);
emit_insn (gen_sse_movhps (m, m, op1));
m = adjust_address (op0, V2SFmode, 0);
emit_insn (gen_sse_storelps (m, op1));
m = adjust_address (op0, V2SFmode, 8);
emit_insn (gen_sse_storehps (m, op1));
return;
}
}
......@@ -13508,8 +13506,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case IX86_BUILTIN_LOADLPS:
case IX86_BUILTIN_LOADHPD:
case IX86_BUILTIN_LOADLPD:
icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
: fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
: fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
: fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
: CODE_FOR_sse2_loadlpd);
arg0 = TREE_VALUE (arglist);
......@@ -13535,28 +13533,11 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case IX86_BUILTIN_STOREHPS:
case IX86_BUILTIN_STORELPS:
icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
: CODE_FOR_sse_movlps);
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
mode0 = insn_data[icode].operand[1].mode;
mode1 = insn_data[icode].operand[2].mode;
op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
pat = GEN_FCN (icode) (op0, op0, op1);
if (! pat)
return 0;
emit_insn (pat);
return const0_rtx;
case IX86_BUILTIN_STOREHPD:
case IX86_BUILTIN_STORELPD:
icode = (fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
: fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_storelps
: fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
: CODE_FOR_sse2_storelpd);
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
......
;; GCC machine description for IA-32 and x86-64.
;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
;; 2001, 2002, 2003, 2004
;; 2001, 2002, 2003, 2004, 2005
;; Free Software Foundation, Inc.
;; Mostly by William Schelter.
;; x86_64 support added by Jan Hubicka
......@@ -20335,29 +20335,98 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
(define_insn "sse_movhps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
(vec_merge:V4SF
(match_operand:V4SF 1 "nonimmediate_operand" "0,0")
(match_operand:V4SF 2 "nonimmediate_operand" "m,x")
(const_int 12)))]
"TARGET_SSE
&& (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"movhps\t{%2, %0|%0, %2}"
;; Store the high V2SF of the source vector to the destination.
(define_insn "sse_storehps"
[(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
(vec_select:V2SF
(match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
(parallel [(const_int 2) (const_int 3)])))]
"TARGET_SSE"
"@
movhps\t{%1, %0|%0, %1}
movhlps\t{%1, %0|%0, %1}
#"
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
(set_attr "mode" "V2SF")])
(define_insn "sse_movlps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
(vec_merge:V4SF
(match_operand:V4SF 1 "nonimmediate_operand" "0,0")
(match_operand:V4SF 2 "nonimmediate_operand" "m,x")
(const_int 3)))]
"TARGET_SSE
&& (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"movlps\t{%2, %0|%0, %2}"
(define_split
[(set (match_operand:V2SF 0 "register_operand" "")
(vec_select:V2SF
(match_operand:V4SF 1 "memory_operand" "")
(parallel [(const_int 2) (const_int 3)])))]
"TARGET_SSE && reload_completed"
[(const_int 0)]
{
emit_move_insn (operands[0], adjust_address (operands[1], V2SFmode, 8));
DONE;
})
;; Load the high V2SF of the target vector from the source vector.
(define_insn "sse_loadhps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
(vec_concat:V4SF
(vec_select:V2SF
(match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
(parallel [(const_int 0) (const_int 1)]))
(match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
"TARGET_SSE"
"@
movhps\t{%2, %0|%0, %2}
movlhps\t{%2, %0|%0, %2}
#"
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
(set_attr "mode" "V2SF")])
(define_split
[(set (match_operand:V4SF 0 "memory_operand" "")
(vec_concat:V4SF
(vec_select:V2SF
(match_dup 0)
(parallel [(const_int 0) (const_int 1)]))
(match_operand:V2SF 2 "register_operand" "")))]
"TARGET_SSE && reload_completed"
[(const_int 0)]
{
emit_move_insn (adjust_address (operands[0], V2SFmode, 8), operands[1]);
DONE;
})
;; Store the low V2SF of the source vector to the destination.
(define_expand "sse_storelps"
[(set (match_operand:V2SF 0 "nonimmediate_operand" "")
(vec_select:V2SF
(match_operand:V4SF 1 "nonimmediate_operand" "")
(parallel [(const_int 0) (const_int 1)])))]
"TARGET_SSE"
{
operands[1] = gen_lowpart (V2SFmode, operands[1]);
emit_move_insn (operands[0], operands[1]);
DONE;
})
;; Load the low V2SF of the target vector from the source vector.
(define_insn "sse_loadlps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
(vec_concat:V4SF
(match_operand:V2SF 2 "nonimmediate_operand" "m,0,x")
(vec_select:V2SF
(match_operand:V4SF 1 "nonimmediate_operand" "0,x,0")
(parallel [(const_int 2) (const_int 3)]))))]
"TARGET_SSE"
{
static const char * const alt[] = {
"movlps\t{%2, %0|%0, %2}",
"shufps\t{%2, %1, %0|%0, %1, %2}",
"movlps\t{%2, %0|%0, %2}"
};
if (which_alternative == 1)
operands[2] = GEN_INT (0xe4);
return alt[which_alternative];
}
[(set_attr "type" "ssecvt")
(set_attr "mode" "V2SF")])
(define_expand "sse_loadss"
[(match_operand:V4SF 0 "register_operand" "")
......@@ -20405,10 +20474,9 @@
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm")
(match_operand:SI 3 "immediate_operand" "i")]
(match_operand:SI 3 "const_int_operand" "n")]
UNSPEC_SHUFFLE))]
"TARGET_SSE"
;; @@@ check operand order for intel/nonintel syntax
"shufps\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
......@@ -23902,7 +23970,7 @@
[(set (match_operand:DF 0 "nonimmediate_operand" "")
(vec_select:DF
(match_operand:V2DF 1 "nonimmediate_operand" "")
(parallel [(const_int 1)])))]
(parallel [(const_int 0)])))]
"TARGET_SSE2"
{
operands[1] = gen_lowpart (DFmode, operands[1]);
......@@ -23910,7 +23978,7 @@
DONE;
})
;; Load the load double of the target vector from the source scalar.
;; Load the low double of the target vector from the source scalar.
(define_insn "sse2_loadlpd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,m")
(vec_concat:V2DF
......
/* PR 12902 */
/* { dg-do compile } */
/* { dg-options "-O1 -msse" } */
#include <xmmintrin.h>
typedef union
{
int i[4];
float f[4];
__m128 v;
} vector4_t;
void
swizzle (const void *a, vector4_t * b, vector4_t * c)
{
b->v = _mm_loadl_pi (b->v, (__m64 *) a);
c->v = _mm_loadl_pi (c->v, ((__m64 *) a) + 1);
}
/* While one legal rendering of each statement would be movaps;movlps;movaps,
we can implmenent this with just movlps;movlps. Since we do now, anything
less would be a regression. */
/* { dg-final { scan-assembler-not "movaps" } } */
/* { dg-final { scan-assembler "movlps" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment