Commit 453773df by Uros Bizjak Committed by Uros Bizjak

re PR target/46091 (missed optimization: x86 bt/btc/bts instructions)

	PR target/46091
	* config/i386/i386.md (*btsq_imm): Rename from *btsq.
	(*btrq_imm): Rename from *btrq.
	(*btcq_imm): Rename from *btcq.
	(btsc): New code attribute.
	(*<btsc><mode>): New insn pattern.
	(*btr<mode>): Ditto.
	(*<btsc><mode>_mask): New insn_and_split pattern.
	(*btr<mode>_mask): Ditto.

testsuite/ChangeLog:

	PR target/46091
	* gcc.target/i386/pr46091-4.c: New test.
	* gcc.target/i386/pr46091-4a.c: Ditto.
	* gcc.target/i386/pr46091-5.c: Ditto.
	* gcc.target/i386/pr46091-5a.c: Ditto.

From-SVN: r251235
parent 2c0378f4
2017-08-21 Uros Bizjak <ubizjak@gmail.com>
PR target/46091
* config/i386/i386.md (*btsq_imm): Rename from *btsq.
(*btrq_imm): Rename from *btrq.
(*btcq_imm): Rename from *btcq.
(btsc): New code attribute.
(*<btsc><mode>): New insn pattern.
(*btr<mode>): Ditto.
(*<btsc><mode>_mask): New insn_and_split pattern.
(*btr<mode>_mask): Ditto.
2017-08-21 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
......
......@@ -1081,6 +1081,9 @@
;; Immediate operand constraint for shifts.
(define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")])
;; Print register name in the specified mode.
(define_mode_attr k [(QI "b") (HI "w") (SI "k") (DI "q")])
;; General operand predicate for integer modes.
(define_mode_attr general_operand
[(QI "general_operand")
......@@ -10998,20 +11001,103 @@
;; Bit set / bit test instructions
;; %%% bts, btr, btc, bt.
;; In general these instructions are *slow* with variable operand
;; when applied to memory. When applied to registers, it depends
;; on the cpu implementation. They're never faster than the
;; corresponding and/ior/xor operations, so with 32-bit there's
;; no point. But in 64-bit, we can't hold the relevant immediates
;; within the instruction itself, so operating on bits in the high
;; 32-bits of a register becomes easier.
;; %%% bts, btr, btc
;; These instructions are *slow* when applied to memory.
(define_code_attr btsc [(ior "bts") (xor "btc")])
(define_insn "*<btsc><mode>"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(any_or:SWI48
(ashift:SWI48 (const_int 1)
(match_operand:QI 1 "register_operand" "r"))
(match_operand:SWI48 2 "nonimmediate_operand" "0")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_USE_BT"
"<btsc>{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}"
[(set_attr "type" "alu1")
(set_attr "prefix_0f" "1")
(set_attr "znver1_decode" "double")
(set_attr "mode" "<MODE>")])
;; Avoid useless masking of count operand.
(define_insn_and_split "*<btsc><mode>_mask"
[(set (match_operand:SWI48 0 "register_operand")
(any_or:SWI48
(ashift:SWI48
(const_int 1)
(subreg:QI
(and:SI
(match_operand:SI 1 "register_operand")
(match_operand:SI 2 "const_int_operand")) 0))
(match_operand:SWI48 3 "nonimmediate_operand")))
(clobber (reg:CC FLAGS_REG))]
"(INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
== GET_MODE_BITSIZE (<MODE>mode)-1
&& can_create_pseudo_p ()"
"#"
"&& 1"
[(parallel
[(set (match_dup 0)
(any_or:SWI48
(ashift:SWI48 (const_int 1)
(match_dup 1))
(match_dup 3)))
(clobber (reg:CC FLAGS_REG))])]
"operands[1] = gen_lowpart (QImode, operands[1]);")
(define_insn "*btr<mode>"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(and:SWI48
(rotate:SWI48 (const_int -2)
(match_operand:QI 1 "register_operand" "r"))
(match_operand:SWI48 2 "nonimmediate_operand" "0")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_USE_BT"
"btr{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}"
[(set_attr "type" "alu1")
(set_attr "prefix_0f" "1")
(set_attr "znver1_decode" "double")
(set_attr "mode" "<MODE>")])
;; Avoid useless masking of count operand.
(define_insn_and_split "*btr<mode>_mask"
[(set (match_operand:SWI48 0 "register_operand")
(and:SWI48
(rotate:SWI48
(const_int -2)
(subreg:QI
(and:SI
(match_operand:SI 1 "register_operand")
(match_operand:SI 2 "const_int_operand")) 0))
(match_operand:SWI48 3 "nonimmediate_operand")))
(clobber (reg:CC FLAGS_REG))]
"(INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
== GET_MODE_BITSIZE (<MODE>mode)-1
&& can_create_pseudo_p ()"
"#"
"&& 1"
[(parallel
[(set (match_dup 0)
(and:SWI48
(rotate:SWI48 (const_int -2)
(match_dup 1))
(match_dup 3)))
(clobber (reg:CC FLAGS_REG))])]
"operands[1] = gen_lowpart (QImode, operands[1]);")
;; These instructions are never faster than the corresponding
;; and/ior/xor operations when using immediate operand, so with
;; 32-bit there's no point. But in 64-bit, we can't hold the
;; relevant immediates within the instruction itself, so operating
;; on bits in the high 32-bits of a register becomes easier.
;;
;; These are slow on Nocona, but fast on Athlon64. We do require the use
;; of btrq and btcq for corner cases of post-reload expansion of absdf and
;; negdf respectively, so they can never be disabled entirely.
(define_insn "*btsq"
(define_insn "*btsq_imm"
[(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
(const_int 1)
(match_operand 1 "const_0_to_63_operand" "J"))
......@@ -11024,7 +11110,7 @@
(set_attr "znver1_decode" "double")
(set_attr "mode" "DI")])
(define_insn "*btrq"
(define_insn "*btrq_imm"
[(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
(const_int 1)
(match_operand 1 "const_0_to_63_operand" "J"))
......@@ -11037,7 +11123,7 @@
(set_attr "znver1_decode" "double")
(set_attr "mode" "DI")])
(define_insn "*btcq"
(define_insn "*btcq_imm"
[(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
(const_int 1)
(match_operand 1 "const_0_to_63_operand" "J"))
......@@ -11125,6 +11211,8 @@
}
})
;; %%% bt
(define_insn "*bt<mode>"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
......@@ -13148,8 +13236,6 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<MODE>")])
(define_mode_attr k [(SI "k") (DI "q")])
(define_insn "*bmi2_bzhi_<mode>3_1"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(zero_extract:SWI48
......
2017-08-21 Uros Bizjak <ubizjak@gmail.com>
PR target/46091
* gcc.target/i386/pr46091-4.c: New test.
* gcc.target/i386/pr46091-4a.c: Ditto.
* gcc.target/i386/pr46091-5.c: Ditto.
* gcc.target/i386/pr46091-5a.c: Ditto.
2017-08-21 Nathan Sidwell <nathan@acm.org>
PR c++/81899
......
/* { dg-do compile } */
/* { dg-options "-O2" } */
int test_1 (int x, int n)
{
x &= ~(0x01 << n);
return x;
}
/* { dg-final { scan-assembler "btr" } } */
int test_2 (int x, int n)
{
x |= (0x01 << n);
return x;
}
/* { dg-final { scan-assembler "bts" } } */
int test_3 (int x, int n)
{
x ^= (0x01 << n);
return x;
}
/* { dg-final { scan-assembler "btc" } } */
/* { dg-do compile } */
/* { dg-options "-O2" } */
int test_1 (int x, int n)
{
n &= 0x1f;
x &= ~(0x01 << n);
return x;
}
int test_2 (int x, int n)
{
n &= 0x1f;
x |= (0x01 << n);
return x;
}
int test_3 (int x, int n)
{
n &= 0x1f;
x ^= (0x01 << n);
return x;
}
/* { dg-final { scan-assembler-not "and\[lq\]\[ \t\]" } } */
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2" } */
long test_1 (long x, int n)
{
x &= ~((long)0x01 << n);
return x;
}
/* { dg-final { scan-assembler "btr" } } */
long test_2 (long x, int n)
{
x |= ((long)0x01 << n);
return x;
}
/* { dg-final { scan-assembler "bts" } } */
long test_3 (long x, int n)
{
x ^= ((long)0x01 << n);
return x;
}
/* { dg-final { scan-assembler "btc" } } */
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2" } */
long test_1 (long x, int n)
{
n &= 0x3f;
x &= ~((long)0x01 << n);
return x;
}
long test_2 (long x, int n)
{
n &= 0x3f;
x |= ((long)0x01 << n);
return x;
}
long test_3 (long x, int n)
{
n &= 0x3f;
x ^= ((long)0x01 << n);
return x;
}
/* { dg-final { scan-assembler-not "and\[lq\]\[ \t\]" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment