Commit 650b5329 by Richard Henderson Committed by Richard Henderson

Eliminate the computed goto in x64 varargs.

Use a simple test and branch vs 0 instead.  Which also allows us
to avoid funny code emission problems as well.

From-SVN: r162384
parent 5634254c
2010-07-21 Richard Henderson <rth@redhat.com>
* config/i386/i386.c (setup_incoming_varargs_64): Emit a simple
comparison for avoiding xmm register saves. Emit the xmm register
saves explicitly.
* config/i386/i386.md (UNSPEC_SSE_PROLOGUE_SAVE): Remove.
(UNSPEC_SSE_PROLOGUE_SAVE_LOW): Remove.
(sse_prologue_save, sse_prologue_save_insn1, sse_prologue_save_insn):
Remove patterns and the associated splitters.
2010-07-21 Changpeng Fang <changpeng.fang@amd.com> 2010-07-21 Changpeng Fang <changpeng.fang@amd.com>
* tree-ssa-loop-prefetch.c (analyze_ref): Strip off the real * tree-ssa-loop-prefetch.c (analyze_ref): Strip off the real
......
...@@ -7062,11 +7062,8 @@ static void ...@@ -7062,11 +7062,8 @@ static void
setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
{ {
rtx save_area, mem; rtx save_area, mem;
rtx label;
rtx tmp_reg;
rtx nsse_reg;
alias_set_type set; alias_set_type set;
int i; int i, max;
/* GPR size of varargs save area. */ /* GPR size of varargs save area. */
if (cfun->va_list_gpr_size) if (cfun->va_list_gpr_size)
...@@ -7087,10 +7084,11 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) ...@@ -7087,10 +7084,11 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
save_area = frame_pointer_rtx; save_area = frame_pointer_rtx;
set = get_varargs_alias_set (); set = get_varargs_alias_set ();
for (i = cum->regno; max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
i < X86_64_REGPARM_MAX if (max > X86_64_REGPARM_MAX)
&& i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; max = X86_64_REGPARM_MAX;
i++)
for (i = cum->regno; i < max; i++)
{ {
mem = gen_rtx_MEM (Pmode, mem = gen_rtx_MEM (Pmode,
plus_constant (save_area, i * UNITS_PER_WORD)); plus_constant (save_area, i * UNITS_PER_WORD));
...@@ -7102,33 +7100,41 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) ...@@ -7102,33 +7100,41 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
if (ix86_varargs_fpr_size) if (ix86_varargs_fpr_size)
{ {
enum machine_mode smode;
rtx label, test;
/* Now emit code to save SSE registers. The AX parameter contains number /* Now emit code to save SSE registers. The AX parameter contains number
of SSE parameter registers used to call this function. We use of SSE parameter registers used to call this function, though all we
sse_prologue_save insn template that produces computed jump across actually check here is the zero/non-zero status. */
SSE saves. We need some preparation work to get this working. */
label = gen_label_rtx (); label = gen_label_rtx ();
test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
label));
/* If we've determined that we're only loading scalars (and not
vector data) then we can store doubles instead. */
if (crtl->stack_alignment_needed < 128)
smode = DFmode;
else
smode = V4SFmode;
nsse_reg = gen_reg_rtx (Pmode); max = cum->sse_regno + cfun->va_list_fpr_size / 16;
emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG))); if (max > X86_64_SSE_REGPARM_MAX)
max = X86_64_SSE_REGPARM_MAX;
/* Compute address of memory block we save into. We always use pointer
pointing 127 bytes after first byte to store - this is needed to keep
instruction size limited by 4 bytes (5 bytes for AVX) with one
byte displacement. */
tmp_reg = gen_reg_rtx (Pmode);
emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
plus_constant (save_area,
ix86_varargs_gpr_size + 127)));
mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
MEM_NOTRAP_P (mem) = 1;
set_mem_alias_set (mem, set);
set_mem_align (mem, 64);
/* And finally do the dirty job! */ for (i = cum->sse_regno; i < max; ++i)
emit_insn (gen_sse_prologue_save (mem, nsse_reg, {
GEN_INT (cum->sse_regno), label, mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
gen_reg_rtx (Pmode))); mem = gen_rtx_MEM (smode, mem);
MEM_NOTRAP_P (mem) = 1;
set_mem_alias_set (mem, set);
set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
}
emit_label (label);
} }
} }
......
...@@ -79,13 +79,11 @@ ...@@ -79,13 +79,11 @@
;; Prologue support ;; Prologue support
UNSPEC_STACK_ALLOC UNSPEC_STACK_ALLOC
UNSPEC_SET_GOT UNSPEC_SET_GOT
UNSPEC_SSE_PROLOGUE_SAVE
UNSPEC_REG_SAVE UNSPEC_REG_SAVE
UNSPEC_DEF_CFA UNSPEC_DEF_CFA
UNSPEC_SET_RIP UNSPEC_SET_RIP
UNSPEC_SET_GOT_OFFSET UNSPEC_SET_GOT_OFFSET
UNSPEC_MEMORY_BLOCKAGE UNSPEC_MEMORY_BLOCKAGE
UNSPEC_SSE_PROLOGUE_SAVE_LOW
;; TLS support ;; TLS support
UNSPEC_TP UNSPEC_TP
...@@ -17825,179 +17823,6 @@ ...@@ -17825,179 +17823,6 @@
{ return ASM_SHORT "0x0b0f"; } { return ASM_SHORT "0x0b0f"; }
[(set_attr "length" "2")]) [(set_attr "length" "2")])
(define_expand "sse_prologue_save"
[(parallel [(set (match_operand:BLK 0 "" "")
(unspec:BLK [(reg:DI XMM0_REG)
(reg:DI XMM1_REG)
(reg:DI XMM2_REG)
(reg:DI XMM3_REG)
(reg:DI XMM4_REG)
(reg:DI XMM5_REG)
(reg:DI XMM6_REG)
(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
(clobber (reg:CC FLAGS_REG))
(clobber (match_operand:DI 1 "register_operand" ""))
(use (match_operand:DI 2 "immediate_operand" ""))
(use (label_ref:DI (match_operand 3 "" "")))
(clobber (match_operand:DI 4 "register_operand" ""))
(use (match_dup 1))])]
"TARGET_64BIT"
"")
;; Pre-reload version of prologue save. Until after prologue generation we don't know
;; what the size of save instruction will be.
;; Operand 0+operand 6 is the memory save area
;; Operand 1 is number of registers to save (will get overwritten to operand 5)
;; Operand 2 is number of non-vaargs SSE arguments
;; Operand 3 is label starting the save block
;; Operand 4 is used for temporary computation of jump address
(define_insn "*sse_prologue_save_insn1"
[(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
(match_operand:DI 6 "const_int_operand" "n")))
(unspec:BLK [(reg:DI XMM0_REG)
(reg:DI XMM1_REG)
(reg:DI XMM2_REG)
(reg:DI XMM3_REG)
(reg:DI XMM4_REG)
(reg:DI XMM5_REG)
(reg:DI XMM6_REG)
(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
(clobber (reg:CC FLAGS_REG))
(clobber (match_operand:DI 1 "register_operand" "=r"))
(use (match_operand:DI 2 "const_int_operand" "i"))
(use (label_ref:DI (match_operand 3 "" "X")))
(clobber (match_operand:DI 4 "register_operand" "=&r"))
(use (match_operand:DI 5 "register_operand" "1"))]
"TARGET_64BIT
&& INTVAL (operands[6]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
&& INTVAL (operands[6]) + INTVAL (operands[2]) * 16 >= -128"
"#"
[(set_attr "type" "other")
(set_attr "memory" "store")
(set_attr "mode" "DI")])
;; We know size of save instruction; expand the computation of jump address
;; in the jumptable.
(define_split
[(parallel [(set (match_operand:BLK 0 "" "")
(unspec:BLK [(reg:DI XMM0_REG)
(reg:DI XMM1_REG)
(reg:DI XMM2_REG)
(reg:DI XMM3_REG)
(reg:DI XMM4_REG)
(reg:DI XMM5_REG)
(reg:DI XMM6_REG)
(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
(clobber (reg:CC FLAGS_REG))
(clobber (match_operand:DI 1 "register_operand" ""))
(use (match_operand:DI 2 "const_int_operand" ""))
(use (match_operand 3 "" ""))
(clobber (match_operand:DI 4 "register_operand" ""))
(use (match_operand:DI 5 "register_operand" ""))])]
"reload_completed"
[(parallel [(set (match_dup 0)
(unspec:BLK [(reg:DI XMM0_REG)
(reg:DI XMM1_REG)
(reg:DI XMM2_REG)
(reg:DI XMM3_REG)
(reg:DI XMM4_REG)
(reg:DI XMM5_REG)
(reg:DI XMM6_REG)
(reg:DI XMM7_REG)]
UNSPEC_SSE_PROLOGUE_SAVE_LOW))
(use (match_dup 1))
(use (match_dup 2))
(use (match_dup 3))
(use (match_dup 5))])]
{
/* Movaps is 4 bytes, AVX and movsd is 5 bytes. */
int size = 4 + (TARGET_AVX || crtl->stack_alignment_needed < 128);
/* Compute address to jump to:
label - eax*size + nnamed_sse_arguments*size. */
if (size == 5)
emit_insn (gen_rtx_SET (VOIDmode, operands[4],
gen_rtx_PLUS
(Pmode,
gen_rtx_MULT (Pmode, operands[1],
GEN_INT (4)),
operands[1])));
else if (size == 4)
emit_insn (gen_rtx_SET (VOIDmode, operands[4],
gen_rtx_MULT (Pmode, operands[1],
GEN_INT (4))));
else
gcc_unreachable ();
if (INTVAL (operands[2]))
emit_move_insn
(operands[1],
gen_rtx_CONST (DImode,
gen_rtx_PLUS (DImode,
operands[3],
GEN_INT (INTVAL (operands[2])
* size))));
else
emit_move_insn (operands[1], operands[3]);
emit_insn (gen_subdi3 (operands[1], operands[1], operands[4]));
operands[5] = GEN_INT (size);
})
(define_insn "sse_prologue_save_insn"
[(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
(match_operand:DI 4 "const_int_operand" "n")))
(unspec:BLK [(reg:DI XMM0_REG)
(reg:DI XMM1_REG)
(reg:DI XMM2_REG)
(reg:DI XMM3_REG)
(reg:DI XMM4_REG)
(reg:DI XMM5_REG)
(reg:DI XMM6_REG)
(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW))
(use (match_operand:DI 1 "register_operand" "r"))
(use (match_operand:DI 2 "const_int_operand" "i"))
(use (label_ref:DI (match_operand 3 "" "X")))
(use (match_operand:DI 5 "const_int_operand" "i"))]
"TARGET_64BIT
&& INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
&& INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128"
{
int i;
operands[0] = gen_rtx_MEM (Pmode,
gen_rtx_PLUS (Pmode, operands[0], operands[4]));
/* VEX instruction with a REX prefix will #UD. */
if (TARGET_AVX && GET_CODE (XEXP (operands[0], 0)) != PLUS)
gcc_unreachable ();
output_asm_insn ("jmp\t%A1", operands);
for (i = X86_64_SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--)
{
operands[4] = adjust_address (operands[0], DImode, i*16);
operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i));
PUT_MODE (operands[4], TImode);
if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
output_asm_insn ("rex", operands);
if (crtl->stack_alignment_needed < 128)
output_asm_insn ("%vmovsd\t{%5, %4|%4, %5}", operands);
else
output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands);
}
targetm.asm_out.internal_label (asm_out_file, "L",
CODE_LABEL_NUMBER (operands[3]));
return "";
}
[(set_attr "type" "other")
(set_attr "length_immediate" "0")
(set_attr "length_address" "0")
;; 2 bytes for jump and opernds[4] bytes for each save.
(set (attr "length")
(plus (const_int 2)
(mult (symbol_ref ("INTVAL (operands[5])"))
(symbol_ref ("X86_64_SSE_REGPARM_MAX - INTVAL (operands[2])")))))
(set_attr "memory" "store")
(set_attr "modrm" "0")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
(define_expand "prefetch" (define_expand "prefetch"
[(prefetch (match_operand 0 "address_operand" "") [(prefetch (match_operand 0 "address_operand" "")
(match_operand:SI 1 "const_int_operand" "") (match_operand:SI 1 "const_int_operand" "")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment