Commit 912a7ec3 by Jakub Jelinek Committed by Jakub Jelinek

re PR target/82498 (Missed optimization for x86 rotate instruction)

	PR target/82498
	* config/i386/i386.md (*ashl<mode>3_mask_1,
	*<shift_insn><mode>3_mask_1, *<rotate_insn><mode>3_mask_1,
	*<btsc><mode>_mask_1, *btr<mode>_mask_1): New define_insn_and_split
	patterns.

	* gcc.target/i386/pr82498-1.c: New test.
	* gcc.target/i386/pr82498-2.c: New test.

From-SVN: r253695
parent 1baafc8d
2017-10-12 Jakub Jelinek <jakub@redhat.com>
PR target/82498
* config/i386/i386.md (*ashl<mode>3_mask_1,
*<shift_insn><mode>3_mask_1, *<rotate_insn><mode>3_mask_1,
*<btsc><mode>_mask_1, *btr<mode>_mask_1): New define_insn_and_split
patterns.
2017-10-12 Jan Hubicka <hubicka@ucw.cz>
* profile-count.h (safe_scale_64bit): Fix GCC4.x path.
......@@ -10228,6 +10228,26 @@
(clobber (reg:CC FLAGS_REG))])]
"operands[2] = gen_lowpart (QImode, operands[2]);")
(define_insn_and_split "*ashl<mode>3_mask_1"
[(set (match_operand:SWI48 0 "nonimmediate_operand")
(ashift:SWI48
(match_operand:SWI48 1 "nonimmediate_operand")
(and:QI
(match_operand:QI 2 "register_operand")
(match_operand:QI 3 "const_int_operand"))))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
&& (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
== GET_MODE_BITSIZE (<MODE>mode)-1
&& can_create_pseudo_p ()"
"#"
"&& 1"
[(parallel
[(set (match_dup 0)
(ashift:SWI48 (match_dup 1)
(match_dup 2)))
(clobber (reg:CC FLAGS_REG))])])
(define_insn "*bmi2_ashl<mode>3_1"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
......@@ -10728,6 +10748,26 @@
(clobber (reg:CC FLAGS_REG))])]
"operands[2] = gen_lowpart (QImode, operands[2]);")
(define_insn_and_split "*<shift_insn><mode>3_mask_1"
[(set (match_operand:SWI48 0 "nonimmediate_operand")
(any_shiftrt:SWI48
(match_operand:SWI48 1 "nonimmediate_operand")
(and:QI
(match_operand:QI 2 "register_operand")
(match_operand:QI 3 "const_int_operand"))))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
&& (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
== GET_MODE_BITSIZE (<MODE>mode)-1
&& can_create_pseudo_p ()"
"#"
"&& 1"
[(parallel
[(set (match_dup 0)
(any_shiftrt:SWI48 (match_dup 1)
(match_dup 2)))
(clobber (reg:CC FLAGS_REG))])])
(define_insn_and_split "*<shift_insn><mode>3_doubleword"
[(set (match_operand:DWI 0 "register_operand" "=&r")
(any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0")
......@@ -11187,6 +11227,26 @@
(clobber (reg:CC FLAGS_REG))])]
"operands[2] = gen_lowpart (QImode, operands[2]);")
(define_insn_and_split "*<rotate_insn><mode>3_mask_1"
[(set (match_operand:SWI48 0 "nonimmediate_operand")
(any_rotate:SWI48
(match_operand:SWI48 1 "nonimmediate_operand")
(and:QI
(match_operand:QI 2 "register_operand")
(match_operand:QI 3 "const_int_operand"))))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
&& (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
== GET_MODE_BITSIZE (<MODE>mode)-1
&& can_create_pseudo_p ()"
"#"
"&& 1"
[(parallel
[(set (match_dup 0)
(any_rotate:SWI48 (match_dup 1)
(match_dup 2)))
(clobber (reg:CC FLAGS_REG))])])
;; Implement rotation using two double-precision
;; shift instructions and a scratch register.
......@@ -11494,6 +11554,30 @@
(clobber (reg:CC FLAGS_REG))])]
"operands[1] = gen_lowpart (QImode, operands[1]);")
(define_insn_and_split "*<btsc><mode>_mask_1"
[(set (match_operand:SWI48 0 "register_operand")
(any_or:SWI48
(ashift:SWI48
(const_int 1)
(and:QI
(match_operand:QI 1 "register_operand")
(match_operand:QI 2 "const_int_operand")))
(match_operand:SWI48 3 "register_operand")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_USE_BT
&& (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
== GET_MODE_BITSIZE (<MODE>mode)-1
&& can_create_pseudo_p ()"
"#"
"&& 1"
[(parallel
[(set (match_dup 0)
(any_or:SWI48
(ashift:SWI48 (const_int 1)
(match_dup 1))
(match_dup 3)))
(clobber (reg:CC FLAGS_REG))])])
(define_insn "*btr<mode>"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(and:SWI48
......@@ -11535,6 +11619,30 @@
(clobber (reg:CC FLAGS_REG))])]
"operands[1] = gen_lowpart (QImode, operands[1]);")
(define_insn_and_split "*btr<mode>_mask_1"
[(set (match_operand:SWI48 0 "register_operand")
(and:SWI48
(rotate:SWI48
(const_int -2)
(and:QI
(match_operand:QI 1 "register_operand")
(match_operand:QI 2 "const_int_operand")))
(match_operand:SWI48 3 "register_operand")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_USE_BT
&& (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
== GET_MODE_BITSIZE (<MODE>mode)-1
&& can_create_pseudo_p ()"
"#"
"&& 1"
[(parallel
[(set (match_dup 0)
(and:SWI48
(rotate:SWI48 (const_int -2)
(match_dup 1))
(match_dup 3)))
(clobber (reg:CC FLAGS_REG))])])
;; These instructions are never faster than the corresponding
;; and/ior/xor operations when using immediate operand, so with
;; 32-bit there's no point. But in 64-bit, we can't hold the
......
2017-10-12 Jakub Jelinek <jakub@redhat.com>
PR target/82498
* gcc.target/i386/pr82498-1.c: New test.
* gcc.target/i386/pr82498-2.c: New test.
2017-10-12 Jan Hubicka <hubicka@ucw.cz>
* gcc.dg/predict-13.c: Update template for probaility change.
......
/* PR target/82498 */
/* { dg-do compile } */
/* { dg-options "-O2 -mtune=generic -masm=att" } */
/* { dg-final { scan-assembler-not {\mand[bwlq]\M} } } */
unsigned
f1 (unsigned x, unsigned char y)
{
if (y == 0)
return x;
y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
}
unsigned
f2 (unsigned x, unsigned y)
{
if (y == 0)
return x;
y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
}
unsigned
f3 (unsigned x, unsigned short y)
{
if (y == 0)
return x;
y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
}
unsigned
f4 (unsigned x, unsigned char y)
{
y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
}
unsigned
f5 (unsigned x, unsigned int y)
{
y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
}
unsigned
f6 (unsigned x, unsigned short y)
{
y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
}
/* PR target/82498 */
/* { dg-do compile } */
/* { dg-options "-O2 -mtune=generic -masm=att" } */
/* { dg-final { scan-assembler-not {\mand[bwlq]\M} } } */
int
f1 (int x, unsigned char y)
{
y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
return x >> y;
}
unsigned
f2 (unsigned x, unsigned char y)
{
y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
return x >> y;
}
unsigned
f3 (unsigned x, unsigned char y)
{
y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
return x << y;
}
unsigned
f4 (unsigned x, unsigned char y)
{
y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
return x | (1U << y);
}
unsigned
f5 (unsigned x, unsigned char y)
{
y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
return x ^ (1U << y);
}
unsigned
f6 (unsigned x, unsigned char y)
{
y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
return (x + 2) & ~(1U << y);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment