Commit 69811448 by Richard Sandiford Committed by Richard Sandiford

[i386] Make the vzeroupper pattern describe its effects (PR91994)

The problem in this PR was that vzeroupper has an effect on register
contents, but those effects weren't modelled in the rtl pattern,
which was just an unspec_volatile.

This patch fixes that by running a subpass after vzeroupper insertion
to add SETs and CLOBBERs as appropriate.  See the comments in the patch
for more details.

2019-10-07  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	PR target/91994
	* config/i386/sse.md (avx_vzeroupper): Turn into a define_expand
	and wrap the unspec_volatile in a parallel.
	(*avx_vzeroupper): New define_insn.  Use a match_parallel around
	the unspec_volatile.
	* config/i386/predicates.md (vzeroupper_pattern): Expect the
	unspec_volatile to be wrapped in a parallel.
	* config/i386/i386-features.c (ix86_add_reg_usage_to_vzeroupper)
	(ix86_add_reg_usage_to_vzerouppers): New functions.
	(rest_of_handle_insert_vzeroupper): Use them to add register
	usage information to the vzeroupper instructions.

gcc/testsuite/
	PR target/91994
	* gcc.target/i386/pr91994.c: New test.

From-SVN: r276648
parent 806bdf4e
2019-10-07 Richard Sandiford <richard.sandiford@arm.com>
PR target/91994
* config/i386/sse.md (avx_vzeroupper): Turn into a define_expand
and wrap the unspec_volatile in a parallel.
(*avx_vzeroupper): New define_insn. Use a match_parallel around
the unspec_volatile.
* config/i386/predicates.md (vzeroupper_pattern): Expect the
unspec_volatile to be wrapped in a parallel.
* config/i386/i386-features.c (ix86_add_reg_usage_to_vzeroupper)
(ix86_add_reg_usage_to_vzerouppers): New functions.
(rest_of_handle_insert_vzeroupper): Use them to add register
usage information to the vzeroupper instructions.
2019-10-07 Richard Biener <rguenther@suse.de> 2019-10-07 Richard Biener <rguenther@suse.de>
PR tree-optimization/91975 PR tree-optimization/91975
......
...@@ -1757,6 +1757,68 @@ convert_scalars_to_vector (bool timode_p) ...@@ -1757,6 +1757,68 @@ convert_scalars_to_vector (bool timode_p)
return 0; return 0;
} }
/* Modify the vzeroupper pattern in INSN so that it describes the effect
that the instruction has on the SSE registers. LIVE_REGS are the set
of registers that are live across the instruction.
For a live register R we use:
(set (reg:V2DF R) (reg:V2DF R))
which preserves the low 128 bits but clobbers the upper bits.
For a dead register we just use:
(clobber (reg:V2DF R))
which invalidates any previous contents of R and stops R from becoming
live across the vzeroupper in future. */
static void
ix86_add_reg_usage_to_vzeroupper (rtx_insn *insn, bitmap live_regs)
{
rtx pattern = PATTERN (insn);
unsigned int nregs = TARGET_64BIT ? 16 : 8;
rtvec vec = rtvec_alloc (nregs + 1);
RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0);
for (unsigned int i = 0; i < nregs; ++i)
{
unsigned int regno = GET_SSE_REGNO (i);
rtx reg = gen_rtx_REG (V2DImode, regno);
if (bitmap_bit_p (live_regs, regno))
RTVEC_ELT (vec, i + 1) = gen_rtx_SET (reg, reg);
else
RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
}
XVEC (pattern, 0) = vec;
df_insn_rescan (insn);
}
/* Walk the vzeroupper instructions in the function and annotate them
with the effect that they have on the SSE registers. */
static void
ix86_add_reg_usage_to_vzerouppers (void)
{
basic_block bb;
rtx_insn *insn;
auto_bitmap live_regs;
df_analyze ();
FOR_EACH_BB_FN (bb, cfun)
{
bitmap_copy (live_regs, df_get_live_out (bb));
df_simulate_initialize_backwards (bb, live_regs);
FOR_BB_INSNS_REVERSE (bb, insn)
{
if (!NONDEBUG_INSN_P (insn))
continue;
if (vzeroupper_pattern (PATTERN (insn), VOIDmode))
ix86_add_reg_usage_to_vzeroupper (insn, live_regs);
df_simulate_one_insn_backwards (bb, insn, live_regs);
}
}
}
static unsigned int static unsigned int
rest_of_handle_insert_vzeroupper (void) rest_of_handle_insert_vzeroupper (void)
{ {
...@@ -1773,6 +1835,7 @@ rest_of_handle_insert_vzeroupper (void) ...@@ -1773,6 +1835,7 @@ rest_of_handle_insert_vzeroupper (void)
/* Call optimize_mode_switching. */ /* Call optimize_mode_switching. */
g->get_passes ()->execute_pass_mode_switching (); g->get_passes ()->execute_pass_mode_switching ();
ix86_add_reg_usage_to_vzerouppers ();
return 0; return 0;
} }
......
...@@ -1441,8 +1441,9 @@ ...@@ -1441,8 +1441,9 @@
;; return true if OP is a vzeroupper pattern. ;; return true if OP is a vzeroupper pattern.
(define_predicate "vzeroupper_pattern" (define_predicate "vzeroupper_pattern"
(and (match_code "unspec_volatile") (and (match_code "parallel")
(match_test "XINT (op, 1) == UNSPECV_VZEROUPPER"))) (match_code "unspec_volatile" "a")
(match_test "XINT (XVECEXP (op, 0, 0), 1) == UNSPECV_VZEROUPPER")))
;; Return true if OP is an addsub vec_merge operation ;; Return true if OP is an addsub vec_merge operation
(define_predicate "addsub_vm_operator" (define_predicate "addsub_vm_operator"
......
...@@ -19622,9 +19622,16 @@ ...@@ -19622,9 +19622,16 @@
(set_attr "mode" "OI")]) (set_attr "mode" "OI")])
;; Clear the upper 128bits of AVX registers, equivalent to a NOP ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
;; if the upper 128bits are unused. ;; if the upper 128bits are unused. Initially we expand the instructions
(define_insn "avx_vzeroupper" ;; as though they had no effect on the SSE registers, but later add SETs and
[(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)] ;; CLOBBERs to the PARALLEL to model the real effect.
(define_expand "avx_vzeroupper"
[(parallel [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
"TARGET_AVX")
(define_insn "*avx_vzeroupper"
[(match_parallel 0 "vzeroupper_pattern"
[(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
"TARGET_AVX" "TARGET_AVX"
"vzeroupper" "vzeroupper"
[(set_attr "type" "sse") [(set_attr "type" "sse")
......
2019-10-07 Richard Sandiford <richard.sandiford@arm.com>
PR target/91994
* gcc.target/i386/pr91994.c: New test.
2019-10-07 Richard Biener <rguenther@suse.de> 2019-10-07 Richard Biener <rguenther@suse.de>
PR tree-optimization/91975 PR tree-optimization/91975
......
/* { dg-do run } */
/* { dg-require-effective-target avx } */
/* { dg-options "-O2 -mavx -mvzeroupper" } */
#include "avx-check.h"
#include <immintrin.h>
__m256i x1, x2, x3;
__attribute__ ((noinline))
static void
foo (void)
{
x1 = x2;
}
void
bar (void)
{
__m256i x = x1;
foo ();
x3 = x;
}
__attribute__ ((noinline))
void
avx_test (void)
{
__m256i x = _mm256_set1_epi8 (3);
x1 = x;
bar ();
if (__builtin_memcmp (&x3, &x, sizeof (x)))
__builtin_abort ();
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment