Commit beed3701 by Uros Bizjak Committed by Uros Bizjak

re PR target/71245 (std::atomic<double> load/store bounces the data to the stack using fild/fistp)

	PR target/71245
	* config/i386/sync.md (define_peephole2 atomic_storedi_fpu):
	New peepholes to remove unneeded fild/fistp pairs.
	(define_peephole2 atomic_loaddi_fpu): Ditto.

testsuite/ChangeLog:

	PR target/71245
	* gcc.target/i386/pr71245-1.c: New test.
	* gcc.target/i386/pr71245-2.c: Ditto.

From-SVN: r236863
parent f0b03e94
2016-05-29 Uros Bizjak <ubizjak@gmail.com>
PR target/71245
* config/i386/sync.md (define_peephole2 atomic_storedi_fpu):
New peepholes to remove unneeded fild/fistp pairs.
(define_peephole2 atomic_loaddi_fpu): Ditto.
2016-05-27 Jan Hubicka <hubicka@ucw.cz>
* predict.c (maybe_hot_frequency_p): Avoid division.
......
......@@ -210,6 +210,34 @@
DONE;
})
(define_peephole2
[(set (match_operand:DF 0 "fp_register_operand")
(unspec:DF [(match_operand:DI 1 "memory_operand")]
UNSPEC_FILD_ATOMIC))
(set (match_operand:DI 2 "memory_operand")
(unspec:DI [(match_dup 0)]
UNSPEC_FIST_ATOMIC))
(set (match_operand:DF 3 "fp_register_operand")
(match_operand:DF 4 "memory_operand"))]
"!TARGET_64BIT
&& peep2_reg_dead_p (2, operands[0])
&& rtx_equal_p (operands[4], adjust_address_nv (operands[2], DFmode, 0))"
[(set (match_dup 3) (match_dup 5))]
"operands[5] = gen_lowpart (DFmode, operands[1]);")
(define_peephole2
[(set (match_operand:DI 0 "sse_reg_operand")
(match_operand:DI 1 "memory_operand"))
(set (match_operand:DI 2 "memory_operand")
(match_dup 0))
(set (match_operand:DF 3 "fp_register_operand")
(match_operand:DF 4 "memory_operand"))]
"!TARGET_64BIT
&& peep2_reg_dead_p (2, operands[0])
&& rtx_equal_p (operands[4], adjust_address_nv (operands[2], DFmode, 0))"
[(set (match_dup 3) (match_dup 5))]
"operands[5] = gen_lowpart (DFmode, operands[1]);")
(define_expand "atomic_store<mode>"
[(set (match_operand:ATOMIC 0 "memory_operand")
(unspec:ATOMIC [(match_operand:ATOMIC 1 "nonimmediate_operand")
......@@ -298,6 +326,34 @@
DONE;
})
(define_peephole2
[(set (match_operand:DF 0 "memory_operand")
(match_operand:DF 1 "fp_register_operand"))
(set (match_operand:DF 2 "fp_register_operand")
(unspec:DF [(match_operand:DI 3 "memory_operand")]
UNSPEC_FILD_ATOMIC))
(set (match_operand:DI 4 "memory_operand")
(unspec:DI [(match_dup 2)]
UNSPEC_FIST_ATOMIC))]
"!TARGET_64BIT
&& peep2_reg_dead_p (3, operands[2])
&& rtx_equal_p (operands[0], adjust_address_nv (operands[3], DFmode, 0))"
[(set (match_dup 5) (match_dup 1))]
"operands[5] = gen_lowpart (DFmode, operands[4]);")
(define_peephole2
[(set (match_operand:DF 0 "memory_operand")
(match_operand:DF 1 "fp_register_operand"))
(set (match_operand:DI 2 "sse_reg_operand")
(match_operand:DI 3 "memory_operand"))
(set (match_operand:DI 4 "memory_operand")
(match_dup 2))]
"!TARGET_64BIT
&& peep2_reg_dead_p (3, operands[2])
&& rtx_equal_p (operands[0], adjust_address_nv (operands[3], DFmode, 0))"
[(set (match_dup 5) (match_dup 1))]
"operands[5] = gen_lowpart (DFmode, operands[4]);")
;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC
;; operations. But the fix_trunc patterns want way more setup than we want
;; to provide. Note that the scratch is DFmode instead of XFmode in order
......
2016-05-29 Uros Bizjak <ubizjak@gmail.com>
PR target/71245
* gcc.target/i386/pr71245-1.c: New test.
* gcc.target/i386/pr71245-2.c: Ditto.
2016-05-29 Paolo Carlini <paolo.carlini@oracle.com>
PR c++/71105
......
/* PR target/71245 */
/* { dg-do compile { target ia32 } } */
/* { dg-options "-O2 -march=pentium -mno-sse -mfpmath=387" } */
typedef union
{
unsigned long long ll;
double d;
} u_t;
u_t d = { .d = 5.0 };
void foo_d (void)
{
u_t tmp;
tmp.ll = __atomic_load_n (&d.ll, __ATOMIC_SEQ_CST);
tmp.d += 1.0;
__atomic_store_n (&d.ll, tmp.ll, __ATOMIC_SEQ_CST);
}
/* { dg-final { scan-assembler-not "(fistp|fild)" } } */
/* PR target/71245 */
/* { dg-do compile { target ia32 } } */
/* { dg-options "-O2 -march=pentium -msse -mno-sse2 -mfpmath=387" } */
typedef union
{
unsigned long long ll;
double d;
} u_t;
u_t d = { .d = 5.0 };
void foo_d (void)
{
u_t tmp;
tmp.ll = __atomic_load_n (&d.ll, __ATOMIC_SEQ_CST);
tmp.d += 1.0;
__atomic_store_n (&d.ll, tmp.ll, __ATOMIC_SEQ_CST);
}
/* { dg-final { scan-assembler-not "movlps" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment