Commit 7650b83e by Jan Hubicka Committed by Jan Hubicka

i386.h (CONDITIONAL_CALL_USAGE): SSE regs are not used for w64 ABI.


	* i386.h (CONDITIONAL_CALL_USAGE): SSE regs are not used for w64 ABI.
	* i386.c (struct ix86_frame): Add padding0 and nsseregs.
	(ix86_nsaved_regs): Count only general purpose regs.
	(ix86_nsaved_sseregs): New.
	(ix86_compute_frame_layout): Update nsseregs; set preferred alignment
	to 16 for w64; compute padding and size of sse reg save area.
	(ix86_emit_save_regs, ix86_emit_save_regs_using_mov): Save only general
	purpose regs.
	(ix86_emit_save_sse_regs_using_mov): New.
	(ix86_expand_prologue): Save SSE regs if needed.
	(ix86_emit_restore_regs_using_mov): Use only general purpose regs.
	(ix86_emit_restore_sse_regs_using_mov): New.
	(ix86_expand_epilogue): Save SSE regs if needed.

Co-Authored-By: Kai Tietz <kai.tietz@onevision.com>

From-SVN: r143119
parent 9aa5c1b2
2009-01-06 Jan Hubicka <jh@suse.cz> 2009-01-06 Jan Hubicka <jh@suse.cz>
Kai Tietz <kai.tietz@onevision.com> Kai Tietz <kai.tietz@onevision.com>
* i386.h (CONDITIONAL_CALL_USAGE): SSE regs are not used for w64 ABI.
* i386.c (struct ix86_frame): Add padding0 and nsseregs.
(ix86_nsaved_regs): Count only general purpose regs.
(ix86_nsaved_sseregs): New.
(ix86_compute_frame_layout): Update nsseregs; set preferred alignment
to 16 for w64; compute padding and size of sse reg save area.
(ix86_emit_save_regs, ix86_emit_save_regs_using_mov): Save only general
purpose regs.
(ix86_emit_save_sse_regs_using_mov): New.
(ix86_expand_prologue): Save SSE regs if needed.
(ix86_emit_restore_regs_using_mov): Use only general purpose regs.
(ix86_emit_restore_sse_regs_using_mov): New.
(ix86_expand_epilogue): Save SSE regs if needed.
2009-01-06 Jan Hubicka <jh@suse.cz>
Kai Tietz <kai.tietz@onevision.com>
* i386.h (ACCUMULATE_OUTGOING_ARGS): Enable for MSABI * i386.h (ACCUMULATE_OUTGOING_ARGS): Enable for MSABI
* i386.c (init_cumulative_args): Disallow calls of MSABI functions when * i386.c (init_cumulative_args): Disallow calls of MSABI functions when
accumulate outgoing args is off. accumulate outgoing args is off.
......
...@@ -1655,6 +1655,10 @@ struct stack_local_entry GTY(()) ...@@ -1655,6 +1655,10 @@ struct stack_local_entry GTY(())
<- HARD_FRAME_POINTER <- HARD_FRAME_POINTER
[saved regs] [saved regs]
[padding0]
[saved SSE regs]
[padding1] \ [padding1] \
) )
[va_arg registers] ( [va_arg registers] (
...@@ -1665,6 +1669,8 @@ struct stack_local_entry GTY(()) ...@@ -1665,6 +1669,8 @@ struct stack_local_entry GTY(())
*/ */
struct ix86_frame struct ix86_frame
{ {
int padding0;
int nsseregs;
int nregs; int nregs;
int padding1; int padding1;
int va_arg_size; int va_arg_size;
...@@ -7417,7 +7423,7 @@ ix86_save_reg (unsigned int regno, int maybe_eh_return) ...@@ -7417,7 +7423,7 @@ ix86_save_reg (unsigned int regno, int maybe_eh_return)
&& (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
} }
/* Return number of registers to be saved on the stack. */ /* Return number of saved general prupose registers. */
static int static int
ix86_nsaved_regs (void) ix86_nsaved_regs (void)
...@@ -7425,9 +7431,25 @@ ix86_nsaved_regs (void) ...@@ -7425,9 +7431,25 @@ ix86_nsaved_regs (void)
int nregs = 0; int nregs = 0;
int regno; int regno;
for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (ix86_save_reg (regno, true)) if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
nregs++; nregs ++;
return nregs;
}
/* Return number of saved SSE registrers. */
static int
ix86_nsaved_sseregs (void)
{
int nregs = 0;
int regno;
if (ix86_cfun_abi () != MS_ABI)
return 0;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
nregs ++;
return nregs; return nregs;
} }
...@@ -7487,11 +7509,22 @@ ix86_compute_frame_layout (struct ix86_frame *frame) ...@@ -7487,11 +7509,22 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
HOST_WIDE_INT size = get_frame_size (); HOST_WIDE_INT size = get_frame_size ();
frame->nregs = ix86_nsaved_regs (); frame->nregs = ix86_nsaved_regs ();
frame->nsseregs = ix86_nsaved_sseregs ();
total_size = size; total_size = size;
stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
/* MS ABI seem to require stack alignment to be always 16 except for function
prologues. */
if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
{
preferred_alignment = 16;
stack_alignment_needed = 16;
crtl->preferred_stack_boundary = 128;
crtl->stack_alignment_needed = 128;
}
gcc_assert (!size || stack_alignment_needed); gcc_assert (!size || stack_alignment_needed);
gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
gcc_assert (preferred_alignment <= stack_alignment_needed); gcc_assert (preferred_alignment <= stack_alignment_needed);
...@@ -7546,6 +7579,15 @@ ix86_compute_frame_layout (struct ix86_frame *frame) ...@@ -7546,6 +7579,15 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
/* Register save area */ /* Register save area */
offset += frame->nregs * UNITS_PER_WORD; offset += frame->nregs * UNITS_PER_WORD;
/* Align SSE reg save area. */
if (frame->nsseregs)
frame->padding0 = ((offset + 16 - 1) & -16) - offset;
else
frame->padding0 = 0;
/* SSE register save area. */
offset += frame->padding0 + frame->nsseregs * 16;
/* Va-arg area */ /* Va-arg area */
frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
offset += frame->va_arg_size; offset += frame->va_arg_size;
...@@ -7615,8 +7657,10 @@ ix86_compute_frame_layout (struct ix86_frame *frame) ...@@ -7615,8 +7657,10 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
frame->stack_pointer_offset -= frame->red_zone_size; frame->stack_pointer_offset -= frame->red_zone_size;
#if 0 #if 0
fprintf (stderr, "\n"); fprintf (stderr, "\n");
fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
fprintf (stderr, "size: %ld\n", (long)size); fprintf (stderr, "size: %ld\n", (long)size);
fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed); fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
fprintf (stderr, "padding1: %ld\n", (long)frame->padding1); fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size); fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
...@@ -7641,8 +7685,8 @@ ix86_emit_save_regs (void) ...@@ -7641,8 +7685,8 @@ ix86_emit_save_regs (void)
unsigned int regno; unsigned int regno;
rtx insn; rtx insn;
for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; ) for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
if (ix86_save_reg (regno, true)) if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
{ {
insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
RTX_FRAME_RELATED_P (insn) = 1; RTX_FRAME_RELATED_P (insn) = 1;
...@@ -7658,7 +7702,7 @@ ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) ...@@ -7658,7 +7702,7 @@ ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
rtx insn; rtx insn;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (ix86_save_reg (regno, true)) if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
{ {
insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
Pmode, offset), Pmode, offset),
...@@ -7668,6 +7712,26 @@ ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) ...@@ -7668,6 +7712,26 @@ ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
} }
} }
/* Emit code to save registers using MOV insns. First register
is restored from POINTER + OFFSET. */
static void
ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
{
unsigned int regno;
rtx insn;
rtx mem;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
{
mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
set_mem_align (mem, 128);
insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
RTX_FRAME_RELATED_P (insn) = 1;
offset += 16;
}
}
/* Expand prologue or epilogue stack adjustment. /* Expand prologue or epilogue stack adjustment.
The pattern exist to put a dependency on all ebp-based memory accesses. The pattern exist to put a dependency on all ebp-based memory accesses.
STYLE should be negative if instructions should be marked as frame related, STYLE should be negative if instructions should be marked as frame related,
...@@ -7972,7 +8036,7 @@ ix86_expand_prologue (void) ...@@ -7972,7 +8036,7 @@ ix86_expand_prologue (void)
RTX_FRAME_RELATED_P (insn) = 1; RTX_FRAME_RELATED_P (insn) = 1;
} }
allocate = frame.to_allocate; allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
if (!frame.save_regs_using_mov) if (!frame.save_regs_using_mov)
ix86_emit_save_regs (); ix86_emit_save_regs ();
...@@ -8051,11 +8115,22 @@ ix86_expand_prologue (void) ...@@ -8051,11 +8115,22 @@ ix86_expand_prologue (void)
|| !frame.to_allocate || !frame.to_allocate
|| crtl->stack_realign_needed) || crtl->stack_realign_needed)
ix86_emit_save_regs_using_mov (stack_pointer_rtx, ix86_emit_save_regs_using_mov (stack_pointer_rtx,
frame.to_allocate); frame.to_allocate
+ frame.nsseregs * 16 + frame.padding0);
else else
ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
-frame.nregs * UNITS_PER_WORD); -frame.nregs * UNITS_PER_WORD);
} }
if (!frame_pointer_needed
|| !frame.to_allocate
|| crtl->stack_realign_needed)
ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
frame.to_allocate);
else
ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
- frame.nregs * UNITS_PER_WORD
- frame.nsseregs * 16
- frame.padding0);
pic_reg_used = false; pic_reg_used = false;
if (pic_offset_table_rtx if (pic_offset_table_rtx
...@@ -8127,7 +8202,7 @@ ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, ...@@ -8127,7 +8202,7 @@ ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
rtx base_address = gen_rtx_MEM (Pmode, pointer); rtx base_address = gen_rtx_MEM (Pmode, pointer);
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (ix86_save_reg (regno, maybe_eh_return)) if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
{ {
/* Ensure that adjust_address won't be forced to produce pointer /* Ensure that adjust_address won't be forced to produce pointer
out of range allowed by x86-64 instruction set. */ out of range allowed by x86-64 instruction set. */
...@@ -8142,11 +8217,43 @@ ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, ...@@ -8142,11 +8217,43 @@ ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
offset = 0; offset = 0;
} }
emit_move_insn (gen_rtx_REG (Pmode, regno), emit_move_insn (gen_rtx_REG (Pmode, regno),
adjust_address (base_address, Pmode, offset)); adjust_address (base_address, Pmode, offset));
offset += UNITS_PER_WORD; offset += UNITS_PER_WORD;
} }
} }
/* Emit code to restore saved registers using MOV insns. First register
is restored from POINTER + OFFSET. */
static void
ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
int maybe_eh_return)
{
int regno;
rtx base_address = gen_rtx_MEM (TImode, pointer);
rtx mem;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
{
/* Ensure that adjust_address won't be forced to produce pointer
out of range allowed by x86-64 instruction set. */
if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
{
rtx r11;
r11 = gen_rtx_REG (DImode, R11_REG);
emit_move_insn (r11, GEN_INT (offset));
emit_insn (gen_adddi3 (r11, r11, pointer));
base_address = gen_rtx_MEM (TImode, r11);
offset = 0;
}
mem = adjust_address (base_address, TImode, offset);
set_mem_align (mem, 128);
emit_move_insn (gen_rtx_REG (TImode, regno), mem);
offset += 16;
}
}
/* Restore function stack, frame, and registers. */ /* Restore function stack, frame, and registers. */
void void
...@@ -8174,6 +8281,7 @@ ix86_expand_epilogue (int style) ...@@ -8174,6 +8281,7 @@ ix86_expand_epilogue (int style)
if (crtl->calls_eh_return && style != 2) if (crtl->calls_eh_return && style != 2)
offset -= 2; offset -= 2;
offset *= -UNITS_PER_WORD; offset *= -UNITS_PER_WORD;
offset -= frame.nsseregs * 16 + frame.padding0;
/* If we're only restoring one register and sp is not valid then /* If we're only restoring one register and sp is not valid then
using a move instruction to restore the register since it's using a move instruction to restore the register since it's
...@@ -8207,11 +8315,23 @@ ix86_expand_epilogue (int style) ...@@ -8207,11 +8315,23 @@ ix86_expand_epilogue (int style)
if (!frame_pointer_needed if (!frame_pointer_needed
|| (sp_valid && !frame.to_allocate) || (sp_valid && !frame.to_allocate)
|| stack_realign_fp) || stack_realign_fp)
ix86_emit_restore_regs_using_mov (stack_pointer_rtx, {
frame.to_allocate, style == 2); ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
frame.to_allocate, style == 2);
ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
frame.to_allocate
+ frame.nsseregs * 16
+ frame.padding0, style == 2);
}
else else
ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, {
offset, style == 2); ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
offset, style == 2);
ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
offset
+ frame.nsseregs * 16
+ frame.padding0, style == 2);
}
/* eh_return epilogues need %ecx added to the stack pointer. */ /* eh_return epilogues need %ecx added to the stack pointer. */
if (style == 2) if (style == 2)
...@@ -8237,14 +8357,18 @@ ix86_expand_epilogue (int style) ...@@ -8237,14 +8357,18 @@ ix86_expand_epilogue (int style)
{ {
tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
tmp = plus_constant (tmp, (frame.to_allocate tmp = plus_constant (tmp, (frame.to_allocate
+ frame.nregs * UNITS_PER_WORD)); + frame.nregs * UNITS_PER_WORD
+ frame.nsseregs * 16
+ frame.padding0));
emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
} }
} }
else if (!frame_pointer_needed) else if (!frame_pointer_needed)
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (frame.to_allocate GEN_INT (frame.to_allocate
+ frame.nregs * UNITS_PER_WORD), + frame.nregs * UNITS_PER_WORD
+ frame.nsseregs * 16
+ frame.padding0),
style); style);
/* If not an i386, mov & pop is faster than "leave". */ /* If not an i386, mov & pop is faster than "leave". */
else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun) else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
...@@ -8275,13 +8399,24 @@ ix86_expand_epilogue (int style) ...@@ -8275,13 +8399,24 @@ ix86_expand_epilogue (int style)
pro_epilogue_adjust_stack (stack_pointer_rtx, pro_epilogue_adjust_stack (stack_pointer_rtx,
hard_frame_pointer_rtx, hard_frame_pointer_rtx,
GEN_INT (offset), style); GEN_INT (offset), style);
ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
frame.to_allocate, style == 2);
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (frame.nsseregs * 16), style);
}
else if (frame.to_allocate || frame.nsseregs)
{
ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
frame.to_allocate,
style == 2);
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (frame.to_allocate
+ frame.nsseregs * 16
+ frame.padding0), style);
} }
else if (frame.to_allocate)
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (frame.to_allocate), style);
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (ix86_save_reg (regno, false)) if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno))); emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
if (frame_pointer_needed) if (frame_pointer_needed)
{ {
......
...@@ -968,8 +968,12 @@ do { \ ...@@ -968,8 +968,12 @@ do { \
&& ((cfun && cfun->machine->call_abi == MS_ABI) \ && ((cfun && cfun->machine->call_abi == MS_ABI) \
|| (!cfun && DEFAULT_ABI == MS_ABI))) \ || (!cfun && DEFAULT_ABI == MS_ABI))) \
{ \ { \
int i; \
call_used_regs[4 /*RSI*/] = 0; \ call_used_regs[4 /*RSI*/] = 0; \
call_used_regs[5 /*RDI*/] = 0; \ call_used_regs[5 /*RDI*/] = 0; \
for (i = 0; i < 8; i++) \
call_used_regs[45+i] = 0; \
call_used_regs[27] = call_used_regs[28] = 0; \
} \ } \
} while (0) } while (0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment