Commit 650b5329 by Richard Henderson Committed by Richard Henderson

Eliminate the computed goto in x64 varargs.

Use a simple test and branch vs 0 instead.  Which also allows us
to avoid funny code emission problems as well.

From-SVN: r162384
parent 5634254c
2010-07-21 Richard Henderson <rth@redhat.com>
* config/i386/i386.c (setup_incoming_varargs_64): Emit a simple
comparison for avoiding xmm register saves. Emit the xmm register
saves explicitly.
* config/i386/i386.md (UNSPEC_SSE_PROLOGUE_SAVE): Remove.
(UNSPEC_SSE_PROLOGUE_SAVE_LOW): Remove.
(sse_prologue_save, sse_prologue_save_insn1, sse_prologue_save_insn):
Remove patterns and the associated splitters.
2010-07-21 Changpeng Fang <changpeng.fang@amd.com>
* tree-ssa-loop-prefetch.c (analyze_ref): Strip off the real
......
......@@ -7062,11 +7062,8 @@ static void
setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
{
rtx save_area, mem;
rtx label;
rtx tmp_reg;
rtx nsse_reg;
alias_set_type set;
int i;
int i, max;
/* GPR size of varargs save area. */
if (cfun->va_list_gpr_size)
......@@ -7087,10 +7084,11 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
save_area = frame_pointer_rtx;
set = get_varargs_alias_set ();
for (i = cum->regno;
i < X86_64_REGPARM_MAX
&& i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
i++)
max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
if (max > X86_64_REGPARM_MAX)
max = X86_64_REGPARM_MAX;
for (i = cum->regno; i < max; i++)
{
mem = gen_rtx_MEM (Pmode,
plus_constant (save_area, i * UNITS_PER_WORD));
......@@ -7102,33 +7100,41 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
if (ix86_varargs_fpr_size)
{
enum machine_mode smode;
rtx label, test;
/* Now emit code to save SSE registers. The AX parameter contains number
of SSE parameter registers used to call this function. We use
sse_prologue_save insn template that produces computed jump across
SSE saves. We need some preparation work to get this working. */
of SSE parameter registers used to call this function, though all we
actually check here is the zero/non-zero status. */
label = gen_label_rtx ();
test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
label));
/* If we've determined that we're only loading scalars (and not
vector data) then we can store doubles instead. */
if (crtl->stack_alignment_needed < 128)
smode = DFmode;
else
smode = V4SFmode;
nsse_reg = gen_reg_rtx (Pmode);
emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
/* Compute address of memory block we save into. We always use pointer
pointing 127 bytes after first byte to store - this is needed to keep
instruction size limited by 4 bytes (5 bytes for AVX) with one
byte displacement. */
tmp_reg = gen_reg_rtx (Pmode);
emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
plus_constant (save_area,
ix86_varargs_gpr_size + 127)));
mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
MEM_NOTRAP_P (mem) = 1;
set_mem_alias_set (mem, set);
set_mem_align (mem, 64);
max = cum->sse_regno + cfun->va_list_fpr_size / 16;
if (max > X86_64_SSE_REGPARM_MAX)
max = X86_64_SSE_REGPARM_MAX;
/* And finally do the dirty job! */
emit_insn (gen_sse_prologue_save (mem, nsse_reg,
GEN_INT (cum->sse_regno), label,
gen_reg_rtx (Pmode)));
for (i = cum->sse_regno; i < max; ++i)
{
mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
mem = gen_rtx_MEM (smode, mem);
MEM_NOTRAP_P (mem) = 1;
set_mem_alias_set (mem, set);
set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
}
emit_label (label);
}
}
......
......@@ -79,13 +79,11 @@
;; Prologue support
UNSPEC_STACK_ALLOC
UNSPEC_SET_GOT
UNSPEC_SSE_PROLOGUE_SAVE
UNSPEC_REG_SAVE
UNSPEC_DEF_CFA
UNSPEC_SET_RIP
UNSPEC_SET_GOT_OFFSET
UNSPEC_MEMORY_BLOCKAGE
UNSPEC_SSE_PROLOGUE_SAVE_LOW
;; TLS support
UNSPEC_TP
......@@ -17825,179 +17823,6 @@
{ return ASM_SHORT "0x0b0f"; }
[(set_attr "length" "2")])
(define_expand "sse_prologue_save"
[(parallel [(set (match_operand:BLK 0 "" "")
(unspec:BLK [(reg:DI XMM0_REG)
(reg:DI XMM1_REG)
(reg:DI XMM2_REG)
(reg:DI XMM3_REG)
(reg:DI XMM4_REG)
(reg:DI XMM5_REG)
(reg:DI XMM6_REG)
(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
(clobber (reg:CC FLAGS_REG))
(clobber (match_operand:DI 1 "register_operand" ""))
(use (match_operand:DI 2 "immediate_operand" ""))
(use (label_ref:DI (match_operand 3 "" "")))
(clobber (match_operand:DI 4 "register_operand" ""))
(use (match_dup 1))])]
"TARGET_64BIT"
"")
;; Pre-reload version of prologue save. Until after prologue generation we don't know
;; what the size of save instruction will be.
;; Operand 0+operand 6 is the memory save area
;; Operand 1 is number of registers to save (will get overwritten to operand 5)
;; Operand 2 is number of non-vaargs SSE arguments
;; Operand 3 is label starting the save block
;; Operand 4 is used for temporary computation of jump address
(define_insn "*sse_prologue_save_insn1"
[(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
(match_operand:DI 6 "const_int_operand" "n")))
(unspec:BLK [(reg:DI XMM0_REG)
(reg:DI XMM1_REG)
(reg:DI XMM2_REG)
(reg:DI XMM3_REG)
(reg:DI XMM4_REG)
(reg:DI XMM5_REG)
(reg:DI XMM6_REG)
(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
(clobber (reg:CC FLAGS_REG))
(clobber (match_operand:DI 1 "register_operand" "=r"))
(use (match_operand:DI 2 "const_int_operand" "i"))
(use (label_ref:DI (match_operand 3 "" "X")))
(clobber (match_operand:DI 4 "register_operand" "=&r"))
(use (match_operand:DI 5 "register_operand" "1"))]
"TARGET_64BIT
&& INTVAL (operands[6]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
&& INTVAL (operands[6]) + INTVAL (operands[2]) * 16 >= -128"
"#"
[(set_attr "type" "other")
(set_attr "memory" "store")
(set_attr "mode" "DI")])
;; We know size of save instruction; expand the computation of jump address
;; in the jumptable.
(define_split
[(parallel [(set (match_operand:BLK 0 "" "")
(unspec:BLK [(reg:DI XMM0_REG)
(reg:DI XMM1_REG)
(reg:DI XMM2_REG)
(reg:DI XMM3_REG)
(reg:DI XMM4_REG)
(reg:DI XMM5_REG)
(reg:DI XMM6_REG)
(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
(clobber (reg:CC FLAGS_REG))
(clobber (match_operand:DI 1 "register_operand" ""))
(use (match_operand:DI 2 "const_int_operand" ""))
(use (match_operand 3 "" ""))
(clobber (match_operand:DI 4 "register_operand" ""))
(use (match_operand:DI 5 "register_operand" ""))])]
"reload_completed"
[(parallel [(set (match_dup 0)
(unspec:BLK [(reg:DI XMM0_REG)
(reg:DI XMM1_REG)
(reg:DI XMM2_REG)
(reg:DI XMM3_REG)
(reg:DI XMM4_REG)
(reg:DI XMM5_REG)
(reg:DI XMM6_REG)
(reg:DI XMM7_REG)]
UNSPEC_SSE_PROLOGUE_SAVE_LOW))
(use (match_dup 1))
(use (match_dup 2))
(use (match_dup 3))
(use (match_dup 5))])]
{
/* Movaps is 4 bytes, AVX and movsd is 5 bytes. */
int size = 4 + (TARGET_AVX || crtl->stack_alignment_needed < 128);
/* Compute address to jump to:
label - eax*size + nnamed_sse_arguments*size. */
if (size == 5)
emit_insn (gen_rtx_SET (VOIDmode, operands[4],
gen_rtx_PLUS
(Pmode,
gen_rtx_MULT (Pmode, operands[1],
GEN_INT (4)),
operands[1])));
else if (size == 4)
emit_insn (gen_rtx_SET (VOIDmode, operands[4],
gen_rtx_MULT (Pmode, operands[1],
GEN_INT (4))));
else
gcc_unreachable ();
if (INTVAL (operands[2]))
emit_move_insn
(operands[1],
gen_rtx_CONST (DImode,
gen_rtx_PLUS (DImode,
operands[3],
GEN_INT (INTVAL (operands[2])
* size))));
else
emit_move_insn (operands[1], operands[3]);
emit_insn (gen_subdi3 (operands[1], operands[1], operands[4]));
operands[5] = GEN_INT (size);
})
(define_insn "sse_prologue_save_insn"
[(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
(match_operand:DI 4 "const_int_operand" "n")))
(unspec:BLK [(reg:DI XMM0_REG)
(reg:DI XMM1_REG)
(reg:DI XMM2_REG)
(reg:DI XMM3_REG)
(reg:DI XMM4_REG)
(reg:DI XMM5_REG)
(reg:DI XMM6_REG)
(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW))
(use (match_operand:DI 1 "register_operand" "r"))
(use (match_operand:DI 2 "const_int_operand" "i"))
(use (label_ref:DI (match_operand 3 "" "X")))
(use (match_operand:DI 5 "const_int_operand" "i"))]
"TARGET_64BIT
&& INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
&& INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128"
{
int i;
operands[0] = gen_rtx_MEM (Pmode,
gen_rtx_PLUS (Pmode, operands[0], operands[4]));
/* VEX instruction with a REX prefix will #UD. */
if (TARGET_AVX && GET_CODE (XEXP (operands[0], 0)) != PLUS)
gcc_unreachable ();
output_asm_insn ("jmp\t%A1", operands);
for (i = X86_64_SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--)
{
operands[4] = adjust_address (operands[0], DImode, i*16);
operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i));
PUT_MODE (operands[4], TImode);
if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
output_asm_insn ("rex", operands);
if (crtl->stack_alignment_needed < 128)
output_asm_insn ("%vmovsd\t{%5, %4|%4, %5}", operands);
else
output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands);
}
targetm.asm_out.internal_label (asm_out_file, "L",
CODE_LABEL_NUMBER (operands[3]));
return "";
}
[(set_attr "type" "other")
(set_attr "length_immediate" "0")
(set_attr "length_address" "0")
;; 2 bytes for jump and opernds[4] bytes for each save.
(set (attr "length")
(plus (const_int 2)
(mult (symbol_ref ("INTVAL (operands[5])"))
(symbol_ref ("X86_64_SSE_REGPARM_MAX - INTVAL (operands[2])")))))
(set_attr "memory" "store")
(set_attr "modrm" "0")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
(define_expand "prefetch"
[(prefetch (match_operand 0 "address_operand" "")
(match_operand:SI 1 "const_int_operand" "")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment