Commit 617e6634 by H.J. Lu Committed by H.J. Lu

Improve vzeroupper optimization.

gcc/

2010-11-24  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/46519
	* config/i386/i386.c (upper_128bits_state): New.
	(block_info_def): Remove upper_128bits_set and done.  Add state,
	referenced, count, processed and rescanned. 
	(check_avx256_stores): Updated.
	(move_or_delete_vzeroupper_2): Updated. Handle deleted BB_END.
	Call note_stores only if needed.  Set referenced and count.
	(move_or_delete_vzeroupper_1): Updated.  Set rescan_vzeroupper_p.
	(rescan_move_or_delete_vzeroupper): New.
	(move_or_delete_vzeroupper):  Process and rescan all all basic
	blocks instead of predecessor blocks of all exit points.
	(ix86_option_override_internal): Enable vzeroupper optimization
	only for -fexpensive-optimizations and not optimizing for size.
	(use_avx256_p): Removed.
	(init_cumulative_args): Don't set use_avx256_p.
	(ix86_function_arg): Likewise.
	(ix86_expand_move): Likewise.
	(ix86_expand_vector_move_misalign): Likewise.
	(ix86_local_alignment): Likewise.
	(ix86_minimum_alignment): Likewise.
	(ix86_expand_epilogue): Don't check use_avx256_p when generating
	vzeroupper.
	(ix86_expand_call): Likewise.

	* config/i386/i386.h (machine_function): Remove use_vzeroupper_p
	and use_avx256_p.  Add rescan_vzeroupper_p.

gcc/testsuite/

2010-11-24  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/46519
	* gcc.target/i386/avx-vzeroupper-10.c: Expect no avx_vzeroupper.
	* gcc.target/i386/avx-vzeroupper-11.c: Likewise.

	* gcc.target/i386/avx-vzeroupper-14.c: Replace -O0 with -O2.
	* gcc.target/i386/avx-vzeroupper-15.c: Likewise.
	* gcc.target/i386/avx-vzeroupper-16.c: Likewise.
	* gcc.target/i386/avx-vzeroupper-17.c: Likewise.

	* gcc.target/i386/avx-vzeroupper-20.c: New.
	* gcc.target/i386/avx-vzeroupper-21.c: Likewise.
	* gcc.target/i386/avx-vzeroupper-22.c: Likewise.
	* gcc.target/i386/avx-vzeroupper-23.c: Likewise.
	* gcc.target/i386/avx-vzeroupper-24.c: Likewise.
	* gcc.target/i386/avx-vzeroupper-25.c: Likewise.
	* gcc.target/i386/avx-vzeroupper-26.c: Likewise.

From-SVN: r167124
parent a19ff177
2010-11-24 H.J. Lu <hongjiu.lu@intel.com>
PR target/46519
* config/i386/i386.c (upper_128bits_state): New.
(block_info_def): Remove upper_128bits_set and done. Add state,
referenced, count, processed and rescanned.
(check_avx256_stores): Updated.
(move_or_delete_vzeroupper_2): Updated. Handle deleted BB_END.
Call note_stores only if needed. Set referenced and count.
(move_or_delete_vzeroupper_1): Updated. Set rescan_vzeroupper_p.
(rescan_move_or_delete_vzeroupper): New.
(move_or_delete_vzeroupper): Process and rescan all all basic
blocks instead of predecessor blocks of all exit points.
(ix86_option_override_internal): Enable vzeroupper optimization
only for -fexpensive-optimizations and not optimizing for size.
(use_avx256_p): Removed.
(init_cumulative_args): Don't set use_avx256_p.
(ix86_function_arg): Likewise.
(ix86_expand_move): Likewise.
(ix86_expand_vector_move_misalign): Likewise.
(ix86_local_alignment): Likewise.
(ix86_minimum_alignment): Likewise.
(ix86_expand_epilogue): Don't check use_avx256_p when generating
vzeroupper.
(ix86_expand_call): Likewise.
* config/i386/i386.h (machine_function): Remove use_vzeroupper_p
and use_avx256_p. Add rescan_vzeroupper_p.
2010-11-24 Joseph Myers <joseph@codesourcery.com> 2010-11-24 Joseph Myers <joseph@codesourcery.com>
* toplev.c: Include <signal.h>. * toplev.c: Include <signal.h>.
...@@ -57,12 +57,25 @@ along with GCC; see the file COPYING3. If not see ...@@ -57,12 +57,25 @@ along with GCC; see the file COPYING3. If not see
#include "dwarf2out.h" #include "dwarf2out.h"
#include "sched-int.h" #include "sched-int.h"
enum upper_128bits_state
{
unknown = 0, /* Unknown. */
unused, /* Not used or not referenced. */
used /* Used or referenced. */
};
typedef struct block_info_def typedef struct block_info_def
{ {
/* TRUE if the upper 128bits of any AVX registers are live at exit. */ /* State of the upper 128bits of any AVX registers at exit. */
bool upper_128bits_set; enum upper_128bits_state state;
/* If the upper 128bits of any AVX registers are referenced. */
enum upper_128bits_state referenced;
/* Number of vzerouppers in this block. */
unsigned int count;
/* TRUE if block has been processed. */ /* TRUE if block has been processed. */
bool done; bool processed;
/* TRUE if block has been rescanned. */
bool rescanned;
} *block_info; } *block_info;
#define BLOCK_INFO(B) ((block_info) (B)->aux) #define BLOCK_INFO(B) ((block_info) (B)->aux)
...@@ -93,8 +106,9 @@ check_avx256_stores (rtx dest, const_rtx set, void *data) ...@@ -93,8 +106,9 @@ check_avx256_stores (rtx dest, const_rtx set, void *data)
&& REG_P (SET_SRC (set)) && REG_P (SET_SRC (set))
&& VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set))))) && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
{ {
bool *upper_128bits_set = (bool *) data; enum upper_128bits_state *state
*upper_128bits_set = true; = (enum upper_128bits_state *) data;
*state = used;
} }
} }
...@@ -106,19 +120,24 @@ check_avx256_stores (rtx dest, const_rtx set, void *data) ...@@ -106,19 +120,24 @@ check_avx256_stores (rtx dest, const_rtx set, void *data)
are live at entry. */ are live at entry. */
static void static void
move_or_delete_vzeroupper_2 (basic_block bb, bool upper_128bits_set) move_or_delete_vzeroupper_2 (basic_block bb,
enum upper_128bits_state state)
{ {
rtx insn; rtx insn, bb_end;
rtx vzeroupper_insn = NULL_RTX; rtx vzeroupper_insn = NULL_RTX;
rtx pat; rtx pat;
int avx256; int avx256;
enum upper_128bits_state referenced = BLOCK_INFO (bb)->referenced;
int count = BLOCK_INFO (bb)->count;
if (dump_file) if (dump_file)
fprintf (dump_file, " BB [%i] entry: upper 128bits: %d\n", fprintf (dump_file, " BB [%i] entry: upper 128bits: %d\n",
bb->index, upper_128bits_set); bb->index, state);
/* BB_END changes when it is deleted. */
bb_end = BB_END (bb);
insn = BB_HEAD (bb); insn = BB_HEAD (bb);
while (insn != BB_END (bb)) while (insn != bb_end)
{ {
insn = NEXT_INSN (insn); insn = NEXT_INSN (insn);
...@@ -167,67 +186,89 @@ move_or_delete_vzeroupper_2 (basic_block bb, bool upper_128bits_set) ...@@ -167,67 +186,89 @@ move_or_delete_vzeroupper_2 (basic_block bb, bool upper_128bits_set)
&& GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
&& XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL) && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
{ {
upper_128bits_set = false; state = unused;
/* Delete pending vzeroupper insertion. */ /* Delete pending vzeroupper insertion. */
if (vzeroupper_insn) if (vzeroupper_insn)
{ {
count--;
delete_insn (vzeroupper_insn); delete_insn (vzeroupper_insn);
vzeroupper_insn = NULL_RTX; vzeroupper_insn = NULL_RTX;
} }
} }
else if (!upper_128bits_set) else if (state != used && referenced != unused)
note_stores (pat, check_avx256_stores, &upper_128bits_set); {
/* No need to call note_stores if the upper 128bits of
AVX registers are never referenced. */
note_stores (pat, check_avx256_stores, &state);
if (state == used)
referenced = used;
}
continue; continue;
} }
/* Process vzeroupper intrinsic. */ /* Process vzeroupper intrinsic. */
count++;
avx256 = INTVAL (XVECEXP (pat, 0, 0)); avx256 = INTVAL (XVECEXP (pat, 0, 0));
if (!upper_128bits_set) if (state == unused)
{ {
/* Since the upper 128bits are cleared, callee must not pass /* Since the upper 128bits are cleared, callee must not pass
256bit AVX register. We only need to check if callee 256bit AVX register. We only need to check if callee
returns 256bit AVX register. */ returns 256bit AVX register. */
upper_128bits_set = (avx256 == callee_return_avx256); if (avx256 == callee_return_avx256)
state = used;
/* Remove unnecessary vzeroupper since /* Remove unnecessary vzeroupper since upper 128bits are
upper 128bits are cleared. */ cleared. */
if (dump_file) if (dump_file)
{ {
fprintf (dump_file, "Delete redundant vzeroupper:\n"); fprintf (dump_file, "Delete redundant vzeroupper:\n");
print_rtl_single (dump_file, insn); print_rtl_single (dump_file, insn);
} }
count--;
delete_insn (insn); delete_insn (insn);
} }
else if (avx256 == callee_return_pass_avx256 else
|| avx256 == callee_pass_avx256)
{ {
/* Callee passes 256bit AVX register. Check if callee /* Set state to UNUSED if callee doesn't return 256bit AVX
returns 256bit AVX register. */ register. */
upper_128bits_set = (avx256 == callee_return_pass_avx256); if (avx256 != callee_return_pass_avx256)
state = unused;
/* Must remove vzeroupper since if (avx256 == callee_return_pass_avx256
callee passes in 256bit AVX register. */ || avx256 == callee_pass_avx256)
{
/* Must remove vzeroupper since callee passes in 256bit
AVX register. */
if (dump_file) if (dump_file)
{ {
fprintf (dump_file, "Delete callee pass vzeroupper:\n"); fprintf (dump_file, "Delete callee pass vzeroupper:\n");
print_rtl_single (dump_file, insn); print_rtl_single (dump_file, insn);
} }
count--;
delete_insn (insn); delete_insn (insn);
} }
else else
{
upper_128bits_set = false;
vzeroupper_insn = insn; vzeroupper_insn = insn;
} }
} }
BLOCK_INFO (bb)->upper_128bits_set = upper_128bits_set; BLOCK_INFO (bb)->state = state;
if (BLOCK_INFO (bb)->referenced == unknown)
{
/* The upper 128bits of AVX registers are never referenced if
REFERENCED isn't updated. */
if (referenced == unknown)
referenced = unused;
BLOCK_INFO (bb)->referenced = referenced;
BLOCK_INFO (bb)->count = count;
}
if (dump_file) if (dump_file)
fprintf (dump_file, " BB [%i] exit: upper 128bits: %d\n", fprintf (dump_file, " BB [%i] exit: upper 128bits: %d\n",
bb->index, upper_128bits_set); bb->index, state);
} }
/* Helper function for move_or_delete_vzeroupper. Process vzeroupper /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
...@@ -238,18 +279,18 @@ move_or_delete_vzeroupper_1 (basic_block block) ...@@ -238,18 +279,18 @@ move_or_delete_vzeroupper_1 (basic_block block)
{ {
edge e; edge e;
edge_iterator ei; edge_iterator ei;
bool upper_128bits_set; enum upper_128bits_state state;
if (dump_file) if (dump_file)
fprintf (dump_file, " Process BB [%i]: status: %d\n", fprintf (dump_file, " Process BB [%i]: status: %d\n",
block->index, BLOCK_INFO (block)->done); block->index, BLOCK_INFO (block)->processed);
if (BLOCK_INFO (block)->done) if (BLOCK_INFO (block)->processed)
return; return;
BLOCK_INFO (block)->done = true; BLOCK_INFO (block)->processed = true;
upper_128bits_set = false; state = unknown;
/* Process all predecessor edges of this block. */ /* Process all predecessor edges of this block. */
FOR_EACH_EDGE (e, ei, block->preds) FOR_EACH_EDGE (e, ei, block->preds)
...@@ -257,12 +298,70 @@ move_or_delete_vzeroupper_1 (basic_block block) ...@@ -257,12 +298,70 @@ move_or_delete_vzeroupper_1 (basic_block block)
if (e->src == block) if (e->src == block)
continue; continue;
move_or_delete_vzeroupper_1 (e->src); move_or_delete_vzeroupper_1 (e->src);
if (BLOCK_INFO (e->src)->upper_128bits_set) switch (BLOCK_INFO (e->src)->state)
upper_128bits_set = true; {
case unknown:
if (state == unused)
state = unknown;
break;
case used:
state = used;
break;
case unused:
break;
} }
}
/* If state of any predecessor edges is unknown, we need to rescan. */
if (state == unknown)
cfun->machine->rescan_vzeroupper_p = 1;
/* Process this block. */ /* Process this block. */
move_or_delete_vzeroupper_2 (block, upper_128bits_set); move_or_delete_vzeroupper_2 (block, state);
}
/* Helper function for move_or_delete_vzeroupper. Rescan vzeroupper
in BLOCK and its predecessor blocks recursively. */
static void
rescan_move_or_delete_vzeroupper (basic_block block)
{
edge e;
edge_iterator ei;
enum upper_128bits_state state;
if (dump_file)
fprintf (dump_file, " Rescan BB [%i]: status: %d\n",
block->index, BLOCK_INFO (block)->rescanned);
if (BLOCK_INFO (block)->rescanned)
return;
BLOCK_INFO (block)->rescanned = true;
state = unused;
/* Rescan all predecessor edges of this block. */
FOR_EACH_EDGE (e, ei, block->preds)
{
if (e->src == block)
continue;
rescan_move_or_delete_vzeroupper (e->src);
/* For rescan, UKKNOWN state is treated as UNUSED. */
if (BLOCK_INFO (e->src)->state == used)
state = used;
}
/* Rescan this block only if there are vzerouppers or the upper
128bits of AVX registers are referenced. */
if (BLOCK_INFO (block)->count == 0
&& (state == used || BLOCK_INFO (block)->referenced != used))
{
if (state == used)
BLOCK_INFO (block)->state = state;
}
else
move_or_delete_vzeroupper_2 (block, state);
} }
/* Go through the instruction stream looking for vzeroupper. Delete /* Go through the instruction stream looking for vzeroupper. Delete
...@@ -274,6 +373,8 @@ move_or_delete_vzeroupper (void) ...@@ -274,6 +373,8 @@ move_or_delete_vzeroupper (void)
{ {
edge e; edge e;
edge_iterator ei; edge_iterator ei;
basic_block bb;
unsigned int count = 0;
/* Set up block info for each basic block. */ /* Set up block info for each basic block. */
alloc_aux_for_blocks (sizeof (struct block_info_def)); alloc_aux_for_blocks (sizeof (struct block_info_def));
...@@ -285,16 +386,31 @@ move_or_delete_vzeroupper (void) ...@@ -285,16 +386,31 @@ move_or_delete_vzeroupper (void)
FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs) FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
{ {
move_or_delete_vzeroupper_2 (e->dest, move_or_delete_vzeroupper_2 (e->dest,
cfun->machine->caller_pass_avx256_p); cfun->machine->caller_pass_avx256_p
BLOCK_INFO (e->dest)->done = true; ? used : unused);
BLOCK_INFO (e->dest)->processed = true;
BLOCK_INFO (e->dest)->rescanned = true;
} }
/* Process predecessor blocks of all exit points. */ /* Process all basic blocks. */
if (dump_file) if (dump_file)
fprintf (dump_file, "Process all exit points\n"); fprintf (dump_file, "Process all basic blocks\n");
FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) FOR_EACH_BB (bb)
move_or_delete_vzeroupper_1 (e->src); {
move_or_delete_vzeroupper_1 (bb);
count += BLOCK_INFO (bb)->count;
}
/* Rescan all basic blocks if needed. */
if (count && cfun->machine->rescan_vzeroupper_p)
{
if (dump_file)
fprintf (dump_file, "Rescan all basic blocks\n");
FOR_EACH_BB (bb)
rescan_move_or_delete_vzeroupper (bb);
}
free_aux_for_blocks (); free_aux_for_blocks ();
} }
...@@ -4051,8 +4167,11 @@ ix86_option_override_internal (bool main_args_p) ...@@ -4051,8 +4167,11 @@ ix86_option_override_internal (bool main_args_p)
if (TARGET_AVX) if (TARGET_AVX)
{ {
/* Enable vzeroupper pass by default for TARGET_AVX. */ /* When not optimize for size, enable vzeroupper optimization for
if (!(target_flags_explicit & MASK_VZEROUPPER)) TARGET_AVX with -fexpensive-optimizations. */
if (!optimize_size
&& flag_expensive_optimizations
&& !(target_flags_explicit & MASK_VZEROUPPER))
target_flags |= MASK_VZEROUPPER; target_flags |= MASK_VZEROUPPER;
} }
else else
...@@ -4062,17 +4181,6 @@ ix86_option_override_internal (bool main_args_p) ...@@ -4062,17 +4181,6 @@ ix86_option_override_internal (bool main_args_p)
} }
} }
/* Return TRUE if type TYPE and mode MODE use 256bit AVX modes. */
static bool
use_avx256_p (enum machine_mode mode, const_tree type)
{
return (VALID_AVX256_REG_MODE (mode)
|| (type
&& TREE_CODE (type) == VECTOR_TYPE
&& int_size_in_bytes (type) == 32));
}
/* Return TRUE if VAL is passed in register with 256bit AVX modes. */ /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
static bool static bool
...@@ -5687,7 +5795,6 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ ...@@ -5687,7 +5795,6 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
if (function_pass_avx256_p (fnret_value)) if (function_pass_avx256_p (fnret_value))
{ {
/* The return value of this function uses 256bit AVX modes. */ /* The return value of this function uses 256bit AVX modes. */
cfun->machine->use_avx256_p = true;
if (caller) if (caller)
cfun->machine->callee_return_avx256_p = true; cfun->machine->callee_return_avx256_p = true;
else else
...@@ -6956,7 +7063,6 @@ ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode, ...@@ -6956,7 +7063,6 @@ ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
if (TARGET_VZEROUPPER && function_pass_avx256_p (arg)) if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
{ {
/* This argument uses 256bit AVX modes. */ /* This argument uses 256bit AVX modes. */
cfun->machine->use_avx256_p = true;
if (cum->caller) if (cum->caller)
cfun->machine->callee_pass_avx256_p = true; cfun->machine->callee_pass_avx256_p = true;
else else
...@@ -10970,12 +11076,9 @@ ix86_expand_epilogue (int style) ...@@ -10970,12 +11076,9 @@ ix86_expand_epilogue (int style)
/* Emit vzeroupper if needed. */ /* Emit vzeroupper if needed. */
if (TARGET_VZEROUPPER if (TARGET_VZEROUPPER
&& cfun->machine->use_avx256_p && !TREE_THIS_VOLATILE (cfun->decl)
&& !cfun->machine->caller_return_avx256_p) && !cfun->machine->caller_return_avx256_p)
{
cfun->machine->use_vzeroupper_p = 1;
emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256))); emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
}
if (crtl->args.pops_args && crtl->args.size) if (crtl->args.pops_args && crtl->args.size)
{ {
...@@ -15130,9 +15233,6 @@ ix86_expand_move (enum machine_mode mode, rtx operands[]) ...@@ -15130,9 +15233,6 @@ ix86_expand_move (enum machine_mode mode, rtx operands[])
rtx op0, op1; rtx op0, op1;
enum tls_model model; enum tls_model model;
if (VALID_AVX256_REG_MODE (mode))
cfun->machine->use_avx256_p = true;
op0 = operands[0]; op0 = operands[0];
op1 = operands[1]; op1 = operands[1];
...@@ -15277,9 +15377,6 @@ ix86_expand_vector_move (enum machine_mode mode, rtx operands[]) ...@@ -15277,9 +15377,6 @@ ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
rtx op0 = operands[0], op1 = operands[1]; rtx op0 = operands[0], op1 = operands[1];
unsigned int align = GET_MODE_ALIGNMENT (mode); unsigned int align = GET_MODE_ALIGNMENT (mode);
if (VALID_AVX256_REG_MODE (mode))
cfun->machine->use_avx256_p = true;
/* Force constants other than zero into memory. We do not know how /* Force constants other than zero into memory. We do not know how
the instructions used to build constants modify the upper 64 bits the instructions used to build constants modify the upper 64 bits
of the register, once we have that information we may be able of the register, once we have that information we may be able
...@@ -15386,9 +15483,6 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) ...@@ -15386,9 +15483,6 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
{ {
rtx op0, op1, m; rtx op0, op1, m;
if (VALID_AVX256_REG_MODE (mode))
cfun->machine->use_avx256_p = true;
op0 = operands[0]; op0 = operands[0];
op1 = operands[1]; op1 = operands[1];
...@@ -21661,12 +21755,11 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, ...@@ -21661,12 +21755,11 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
} }
/* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */ /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
if (TARGET_VZEROUPPER && cfun->machine->use_avx256_p) if (TARGET_VZEROUPPER && !TREE_THIS_VOLATILE (cfun->decl))
{ {
rtx unspec; rtx unspec;
int avx256; int avx256;
cfun->machine->use_vzeroupper_p = 1;
if (cfun->machine->callee_pass_avx256_p) if (cfun->machine->callee_pass_avx256_p)
{ {
if (cfun->machine->callee_return_avx256_p) if (cfun->machine->callee_return_avx256_p)
...@@ -22763,9 +22856,6 @@ ix86_local_alignment (tree exp, enum machine_mode mode, ...@@ -22763,9 +22856,6 @@ ix86_local_alignment (tree exp, enum machine_mode mode,
decl = NULL; decl = NULL;
} }
if (use_avx256_p (mode, type))
cfun->machine->use_avx256_p = true;
/* Don't do dynamic stack realignment for long long objects with /* Don't do dynamic stack realignment for long long objects with
-mpreferred-stack-boundary=2. */ -mpreferred-stack-boundary=2. */
if (!TARGET_64BIT if (!TARGET_64BIT
...@@ -22872,9 +22962,6 @@ ix86_minimum_alignment (tree exp, enum machine_mode mode, ...@@ -22872,9 +22962,6 @@ ix86_minimum_alignment (tree exp, enum machine_mode mode,
decl = NULL; decl = NULL;
} }
if (use_avx256_p (mode, type))
cfun->machine->use_avx256_p = true;
if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64) if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
return align; return align;
...@@ -29782,7 +29869,7 @@ ix86_reorg (void) ...@@ -29782,7 +29869,7 @@ ix86_reorg (void)
} }
/* Run the vzeroupper optimization if needed. */ /* Run the vzeroupper optimization if needed. */
if (cfun->machine->use_vzeroupper_p) if (TARGET_VZEROUPPER)
move_or_delete_vzeroupper (); move_or_delete_vzeroupper ();
} }
...@@ -2294,12 +2294,6 @@ struct GTY(()) machine_function { ...@@ -2294,12 +2294,6 @@ struct GTY(()) machine_function {
stack below the return address. */ stack below the return address. */
BOOL_BITFIELD static_chain_on_stack : 1; BOOL_BITFIELD static_chain_on_stack : 1;
/* Nonzero if the current function uses vzeroupper. */
BOOL_BITFIELD use_vzeroupper_p : 1;
/* Nonzero if the current function uses 256bit AVX regisers. */
BOOL_BITFIELD use_avx256_p : 1;
/* Nonzero if caller passes 256bit AVX modes. */ /* Nonzero if caller passes 256bit AVX modes. */
BOOL_BITFIELD caller_pass_avx256_p : 1; BOOL_BITFIELD caller_pass_avx256_p : 1;
...@@ -2312,6 +2306,9 @@ struct GTY(()) machine_function { ...@@ -2312,6 +2306,9 @@ struct GTY(()) machine_function {
/* Nonzero if the current callee returns 256bit AVX modes. */ /* Nonzero if the current callee returns 256bit AVX modes. */
BOOL_BITFIELD callee_return_avx256_p : 1; BOOL_BITFIELD callee_return_avx256_p : 1;
/* Nonzero if rescan vzerouppers in the current function is needed. */
BOOL_BITFIELD rescan_vzeroupper_p : 1;
/* During prologue/epilogue generation, the current frame state. /* During prologue/epilogue generation, the current frame state.
Otherwise, the frame state at the end of the prologue. */ Otherwise, the frame state at the end of the prologue. */
struct machine_frame_state fs; struct machine_frame_state fs;
......
2010-11-24 H.J. Lu <hongjiu.lu@intel.com>
PR target/46519
* gcc.target/i386/avx-vzeroupper-10.c: Expect no avx_vzeroupper.
* gcc.target/i386/avx-vzeroupper-11.c: Likewise.
* gcc.target/i386/avx-vzeroupper-14.c: Replace -O0 with -O2.
* gcc.target/i386/avx-vzeroupper-15.c: Likewise.
* gcc.target/i386/avx-vzeroupper-16.c: Likewise.
* gcc.target/i386/avx-vzeroupper-17.c: Likewise.
* gcc.target/i386/avx-vzeroupper-20.c: New.
* gcc.target/i386/avx-vzeroupper-21.c: Likewise.
* gcc.target/i386/avx-vzeroupper-22.c: Likewise.
* gcc.target/i386/avx-vzeroupper-23.c: Likewise.
* gcc.target/i386/avx-vzeroupper-24.c: Likewise.
* gcc.target/i386/avx-vzeroupper-25.c: Likewise.
* gcc.target/i386/avx-vzeroupper-26.c: Likewise.
2010-11-24 Richard Guenther <rguenther@suse.de> 2010-11-24 Richard Guenther <rguenther@suse.de>
PR lto/43218 PR lto/43218
......
...@@ -14,4 +14,4 @@ foo () ...@@ -14,4 +14,4 @@ foo ()
_mm256_zeroupper (); _mm256_zeroupper ();
} }
/* { dg-final { scan-assembler-times "avx_vzeroupper" 3 } } */ /* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
...@@ -16,4 +16,4 @@ foo () ...@@ -16,4 +16,4 @@ foo ()
} }
/* { dg-final { scan-assembler-times "\\*avx_vzeroall" 1 } } */ /* { dg-final { scan-assembler-times "\\*avx_vzeroall" 1 } } */
/* { dg-final { scan-assembler-times "avx_vzeroupper" 3 } } */ /* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O0 -mavx -mtune=generic -dp" } */ /* { dg-options "-O2 -mavx -mtune=generic -dp" } */
#include <immintrin.h> #include <immintrin.h>
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O0 -mavx -mtune=generic -dp" } */ /* { dg-options "-O2 -mavx -mtune=generic -dp" } */
#include <immintrin.h> #include <immintrin.h>
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-require-effective-target lp64 } */ /* { dg-require-effective-target lp64 } */
/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */ /* { dg-options "-O2 -mavx -mabi=ms -mtune=generic -dp" } */
typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__)); typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-require-effective-target lp64 } */ /* { dg-require-effective-target lp64 } */
/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */ /* { dg-options "-O2 -mavx -mabi=ms -mtune=generic -dp" } */
typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__)); typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
......
/* { dg-do compile } */
/* { dg-options "-O3 -mavx -mtune=generic -dp" } */
extern void free (void *);
void
bar (void *ncstrp)
{
if(ncstrp==((void *)0))
return;
free(ncstrp);
}
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx -mtune=generic -dp" } */
extern void exit (int) __attribute__ ((__noreturn__));
int
foo (int i)
{
if (i == 0)
exit (1);
return 0;
}
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx -mtune=generic -dp" } */
extern void exit (int) __attribute__ ((__noreturn__));
extern void bar (void);
int
foo (int i)
{
if (i == 0)
{
bar ();
exit (1);
}
return 0;
}
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx -mtune=generic -dp" } */
extern void fatal (void) __attribute__ ((__noreturn__));
extern void exit (int) __attribute__ ((__noreturn__));
void
fatal (void)
{
exit (1);
}
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx -mtune=generic -dp" } */
typedef struct bitmap_element_def {
struct bitmap_element_def *next;
unsigned int indx;
} bitmap_element;
typedef struct bitmap_head_def {
bitmap_element *first;
bitmap_element *current;
unsigned int indx;
} bitmap_head;
typedef struct bitmap_head_def *bitmap;
typedef const struct bitmap_head_def *const_bitmap;
extern void bar (void) __attribute__ ((__noreturn__));
unsigned char
bitmap_and_compl_into (bitmap a, const_bitmap b)
{
bitmap_element *a_elt = a->first;
const bitmap_element *b_elt = b->first;
if (a == b)
{
if ((!(a)->first))
return 0;
else
return 1;
}
while (a_elt && b_elt)
{
if (a_elt->indx < b_elt->indx)
a_elt = a_elt->next;
}
if (a->indx == a->current->indx)
bar ();
return 0;
}
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
/* { dg-do compile } */
/* { dg-options "-O0 -mavx -mtune=generic -dp" } */
#include <immintrin.h>
extern __m256 x, y;
void
foo ()
{
x = y;
}
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
/* { dg-do compile } */
/* { dg-options "-Os -mavx -mtune=generic -dp" } */
#include <immintrin.h>
extern __m256 x, y;
extern void (*bar) (void);
void
foo ()
{
x = y;
bar ();
}
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment