Commit 617e6634 by H.J. Lu Committed by H.J. Lu

Improve vzeroupper optimization.

gcc/

2010-11-24  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/46519
	* config/i386/i386.c (upper_128bits_state): New.
	(block_info_def): Remove upper_128bits_set and done.  Add state,
	referenced, count, processed and rescanned. 
	(check_avx256_stores): Updated.
	(move_or_delete_vzeroupper_2): Updated. Handle deleted BB_END.
	Call note_stores only if needed.  Set referenced and count.
	(move_or_delete_vzeroupper_1): Updated.  Set rescan_vzeroupper_p.
	(rescan_move_or_delete_vzeroupper): New.
	(move_or_delete_vzeroupper):  Process and rescan all all basic
	blocks instead of predecessor blocks of all exit points.
	(ix86_option_override_internal): Enable vzeroupper optimization
	only for -fexpensive-optimizations and not optimizing for size.
	(use_avx256_p): Removed.
	(init_cumulative_args): Don't set use_avx256_p.
	(ix86_function_arg): Likewise.
	(ix86_expand_move): Likewise.
	(ix86_expand_vector_move_misalign): Likewise.
	(ix86_local_alignment): Likewise.
	(ix86_minimum_alignment): Likewise.
	(ix86_expand_epilogue): Don't check use_avx256_p when generating
	vzeroupper.
	(ix86_expand_call): Likewise.

	* config/i386/i386.h (machine_function): Remove use_vzeroupper_p
	and use_avx256_p.  Add rescan_vzeroupper_p.

gcc/testsuite/

2010-11-24  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/46519
	* gcc.target/i386/avx-vzeroupper-10.c: Expect no avx_vzeroupper.
	* gcc.target/i386/avx-vzeroupper-11.c: Likewise.

	* gcc.target/i386/avx-vzeroupper-14.c: Replace -O0 with -O2.
	* gcc.target/i386/avx-vzeroupper-15.c: Likewise.
	* gcc.target/i386/avx-vzeroupper-16.c: Likewise.
	* gcc.target/i386/avx-vzeroupper-17.c: Likewise.

	* gcc.target/i386/avx-vzeroupper-20.c: New.
	* gcc.target/i386/avx-vzeroupper-21.c: Likewise.
	* gcc.target/i386/avx-vzeroupper-22.c: Likewise.
	* gcc.target/i386/avx-vzeroupper-23.c: Likewise.
	* gcc.target/i386/avx-vzeroupper-24.c: Likewise.
	* gcc.target/i386/avx-vzeroupper-25.c: Likewise.
	* gcc.target/i386/avx-vzeroupper-26.c: Likewise.

From-SVN: r167124
parent a19ff177
2010-11-24 H.J. Lu <hongjiu.lu@intel.com>
PR target/46519
* config/i386/i386.c (upper_128bits_state): New.
(block_info_def): Remove upper_128bits_set and done. Add state,
referenced, count, processed and rescanned.
(check_avx256_stores): Updated.
(move_or_delete_vzeroupper_2): Updated. Handle deleted BB_END.
Call note_stores only if needed. Set referenced and count.
(move_or_delete_vzeroupper_1): Updated. Set rescan_vzeroupper_p.
(rescan_move_or_delete_vzeroupper): New.
(move_or_delete_vzeroupper): Process and rescan all all basic
blocks instead of predecessor blocks of all exit points.
(ix86_option_override_internal): Enable vzeroupper optimization
only for -fexpensive-optimizations and not optimizing for size.
(use_avx256_p): Removed.
(init_cumulative_args): Don't set use_avx256_p.
(ix86_function_arg): Likewise.
(ix86_expand_move): Likewise.
(ix86_expand_vector_move_misalign): Likewise.
(ix86_local_alignment): Likewise.
(ix86_minimum_alignment): Likewise.
(ix86_expand_epilogue): Don't check use_avx256_p when generating
vzeroupper.
(ix86_expand_call): Likewise.
* config/i386/i386.h (machine_function): Remove use_vzeroupper_p
and use_avx256_p. Add rescan_vzeroupper_p.
2010-11-24 Joseph Myers <joseph@codesourcery.com>
* toplev.c: Include <signal.h>.
......@@ -2294,12 +2294,6 @@ struct GTY(()) machine_function {
stack below the return address. */
BOOL_BITFIELD static_chain_on_stack : 1;
/* Nonzero if the current function uses vzeroupper. */
BOOL_BITFIELD use_vzeroupper_p : 1;
/* Nonzero if the current function uses 256bit AVX regisers. */
BOOL_BITFIELD use_avx256_p : 1;
/* Nonzero if caller passes 256bit AVX modes. */
BOOL_BITFIELD caller_pass_avx256_p : 1;
......@@ -2312,6 +2306,9 @@ struct GTY(()) machine_function {
/* Nonzero if the current callee returns 256bit AVX modes. */
BOOL_BITFIELD callee_return_avx256_p : 1;
/* Nonzero if rescan vzerouppers in the current function is needed. */
BOOL_BITFIELD rescan_vzeroupper_p : 1;
/* During prologue/epilogue generation, the current frame state.
Otherwise, the frame state at the end of the prologue. */
struct machine_frame_state fs;
......
2010-11-24 H.J. Lu <hongjiu.lu@intel.com>
PR target/46519
* gcc.target/i386/avx-vzeroupper-10.c: Expect no avx_vzeroupper.
* gcc.target/i386/avx-vzeroupper-11.c: Likewise.
* gcc.target/i386/avx-vzeroupper-14.c: Replace -O0 with -O2.
* gcc.target/i386/avx-vzeroupper-15.c: Likewise.
* gcc.target/i386/avx-vzeroupper-16.c: Likewise.
* gcc.target/i386/avx-vzeroupper-17.c: Likewise.
* gcc.target/i386/avx-vzeroupper-20.c: New.
* gcc.target/i386/avx-vzeroupper-21.c: Likewise.
* gcc.target/i386/avx-vzeroupper-22.c: Likewise.
* gcc.target/i386/avx-vzeroupper-23.c: Likewise.
* gcc.target/i386/avx-vzeroupper-24.c: Likewise.
* gcc.target/i386/avx-vzeroupper-25.c: Likewise.
* gcc.target/i386/avx-vzeroupper-26.c: Likewise.
2010-11-24 Richard Guenther <rguenther@suse.de>
PR lto/43218
......
......@@ -14,4 +14,4 @@ foo ()
_mm256_zeroupper ();
}
/* { dg-final { scan-assembler-times "avx_vzeroupper" 3 } } */
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
......@@ -16,4 +16,4 @@ foo ()
}
/* { dg-final { scan-assembler-times "\\*avx_vzeroall" 1 } } */
/* { dg-final { scan-assembler-times "avx_vzeroupper" 3 } } */
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
/* { dg-do compile } */
/* { dg-options "-O0 -mavx -mtune=generic -dp" } */
/* { dg-options "-O2 -mavx -mtune=generic -dp" } */
#include <immintrin.h>
......
/* { dg-do compile } */
/* { dg-options "-O0 -mavx -mtune=generic -dp" } */
/* { dg-options "-O2 -mavx -mtune=generic -dp" } */
#include <immintrin.h>
......
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */
/* { dg-options "-O2 -mavx -mabi=ms -mtune=generic -dp" } */
typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
......
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */
/* { dg-options "-O2 -mavx -mabi=ms -mtune=generic -dp" } */
typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
......
/* { dg-do compile } */
/* { dg-options "-O3 -mavx -mtune=generic -dp" } */
extern void free (void *);
void
bar (void *ncstrp)
{
if(ncstrp==((void *)0))
return;
free(ncstrp);
}
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx -mtune=generic -dp" } */
extern void exit (int) __attribute__ ((__noreturn__));
int
foo (int i)
{
if (i == 0)
exit (1);
return 0;
}
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx -mtune=generic -dp" } */
extern void exit (int) __attribute__ ((__noreturn__));
extern void bar (void);
int
foo (int i)
{
if (i == 0)
{
bar ();
exit (1);
}
return 0;
}
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx -mtune=generic -dp" } */
extern void fatal (void) __attribute__ ((__noreturn__));
extern void exit (int) __attribute__ ((__noreturn__));
void
fatal (void)
{
exit (1);
}
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx -mtune=generic -dp" } */
typedef struct bitmap_element_def {
struct bitmap_element_def *next;
unsigned int indx;
} bitmap_element;
typedef struct bitmap_head_def {
bitmap_element *first;
bitmap_element *current;
unsigned int indx;
} bitmap_head;
typedef struct bitmap_head_def *bitmap;
typedef const struct bitmap_head_def *const_bitmap;
extern void bar (void) __attribute__ ((__noreturn__));
unsigned char
bitmap_and_compl_into (bitmap a, const_bitmap b)
{
bitmap_element *a_elt = a->first;
const bitmap_element *b_elt = b->first;
if (a == b)
{
if ((!(a)->first))
return 0;
else
return 1;
}
while (a_elt && b_elt)
{
if (a_elt->indx < b_elt->indx)
a_elt = a_elt->next;
}
if (a->indx == a->current->indx)
bar ();
return 0;
}
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
/* { dg-do compile } */
/* { dg-options "-O0 -mavx -mtune=generic -dp" } */
#include <immintrin.h>
extern __m256 x, y;
void
foo ()
{
x = y;
}
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
/* { dg-do compile } */
/* { dg-options "-Os -mavx -mtune=generic -dp" } */
#include <immintrin.h>
extern __m256 x, y;
extern void (*bar) (void);
void
foo ()
{
x = y;
bar ();
}
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment