Commit 7813b280 by Kyrylo Tkachov Committed by Michael Collison

re PR target/70119 (AArch64 should take advantage of implicit truncation of…

re PR target/70119 (AArch64 should take advantage of implicit truncation of variable shift amount without defining SHIFT_COUNT_TRUNCATED)

2017-06-29  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
	    Michael Collison <michael.collison@arm.com>

	PR target/70119
	* config/aarch64/aarch64.md (*aarch64_<optab>_reg_<mode>3_mask1):
	New pattern.
	(*aarch64_reg_<mode>3_neg_mask2): New pattern.
	(*aarch64_reg_<mode>3_minus_mask): New pattern.
	(*aarch64_<optab>_reg_di3_mask2): New pattern.
	* config/aarch64/aarch64.c (aarch64_rtx_costs): Account for cost
	of shift when the shift amount is masked with constant equal to
	the size of the mode.
	* config/aarch64/predicates.md (subreg_lowpart_operator): New
	predicate.


2017-06-29  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
	    Michael Collison <michael.collison@arm.com>

	PR target/70119
	* gcc.target/aarch64/var_shift_mask_1.c: New test.

Co-Authored-By: Michael Collison <michael.collison@arm.com>

From-SVN: r249774
parent 696bafe6
2017-06-29 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Michael Collison <michael.collison@arm.com>
PR target/70119
* config/aarch64/aarch64.md (*aarch64_<optab>_reg_<mode>3_mask1):
New pattern.
(*aarch64_reg_<mode>3_neg_mask2): New pattern.
(*aarch64_reg_<mode>3_minus_mask): New pattern.
(*aarch64_<optab>_reg_di3_mask2): New pattern.
* config/aarch64/aarch64.c (aarch64_rtx_costs): Account for cost
of shift when the shift amount is masked with constant equal to
the size of the mode.
* config/aarch64/predicates.md (subreg_lowpart_operator): New
predicate.
2017-06-29 Martin Liska <mliska@suse.cz>
* config/i386/i386.opt: Change range from [1,5] to [0,5].
......
......@@ -7541,17 +7541,26 @@ cost_plus:
}
else
{
if (speed)
if (VECTOR_MODE_P (mode))
{
if (VECTOR_MODE_P (mode))
{
/* Vector shift (register). */
*cost += extra_cost->vect.alu;
}
else
if (speed)
/* Vector shift (register). */
*cost += extra_cost->vect.alu;
}
else
{
if (speed)
/* LSLV. */
*cost += extra_cost->alu.shift_reg;
if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
&& CONST_INT_P (XEXP (op1, 1))
&& INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1)
{
/* LSLV. */
*cost += extra_cost->alu.shift_reg;
*cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
/* We already demanded XEXP (op1, 0) to be REG_P, so
don't recurse into it. */
return true;
}
}
return false; /* All arguments need to be in registers. */
......@@ -7580,14 +7589,27 @@ cost_plus:
}
else
{
/* ASR (register) and friends. */
if (speed)
if (VECTOR_MODE_P (mode))
{
if (VECTOR_MODE_P (mode))
if (speed)
/* Vector shift (register). */
*cost += extra_cost->vect.alu;
else
}
else
{
if (speed)
/* ASR (register) and friends. */
*cost += extra_cost->alu.shift_reg;
if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
&& CONST_INT_P (XEXP (op1, 1))
&& INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1)
{
*cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
/* We already demanded XEXP (op1, 0) to be REG_P, so
don't recurse into it. */
return true;
}
}
return false; /* All arguments need to be in registers. */
}
......
......@@ -3942,6 +3942,97 @@
}
)
;; When the LSL, LSR, ASR, ROR instructions operate on all register arguments
;; they truncate the shift/rotate amount by the size of the registers they
;; operate on: 32 for W-regs, 64 for X-regs. This allows us to optimise away
;; such redundant masking instructions. GCC can do that automatically when
;; SHIFT_COUNT_TRUNCATED is true, but we can't enable it for TARGET_SIMD
;; because some of the SISD shift alternatives don't perform this truncations.
;; So this pattern exists to catch such cases.
(define_insn "*aarch64_<optab>_reg_<mode>3_mask1"
[(set (match_operand:GPI 0 "register_operand" "=r")
(SHIFT:GPI
(match_operand:GPI 1 "register_operand" "r")
(match_operator 4 "subreg_lowpart_operator"
[(and:GPI (match_operand:GPI 2 "register_operand" "r")
(match_operand 3 "const_int_operand" "n"))])))]
"(~INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1)) == 0"
"<shift>\t%<w>0, %<w>1, %<w>2"
[(set_attr "type" "shift_reg")]
)
(define_insn_and_split "*aarch64_reg_<mode>3_neg_mask2"
[(set (match_operand:GPI 0 "register_operand" "=&r")
(SHIFT:GPI
(match_operand:GPI 1 "register_operand" "r")
(match_operator 4 "subreg_lowpart_operator"
[(neg:SI (and:SI (match_operand:SI 2 "register_operand" "r")
(match_operand 3 "const_int_operand" "n")))])))]
"((~INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1)) == 0)"
"#"
"&& true"
[(const_int 0)]
{
rtx tmp = (can_create_pseudo_p () ? gen_reg_rtx (SImode)
: operands[0]);
emit_insn (gen_negsi2 (tmp, operands[2]));
rtx and_op = gen_rtx_AND (SImode, tmp, operands[3]);
rtx subreg_tmp = gen_rtx_SUBREG (GET_MODE (operands[4]), and_op,
SUBREG_BYTE (operands[4]));
emit_insn (gen_<optab><mode>3 (operands[0], operands[1], subreg_tmp));
DONE;
}
)
(define_insn_and_split "*aarch64_reg_<mode>3_minus_mask"
[(set (match_operand:GPI 0 "register_operand" "=&r")
(ashift:GPI
(match_operand:GPI 1 "register_operand" "r")
(minus:QI (match_operand 2 "const_int_operand" "n")
(match_operator 5 "subreg_lowpart_operator"
[(and:SI (match_operand:SI 3 "register_operand" "r")
(match_operand 4 "const_int_operand" "n"))]))))]
"((~INTVAL (operands[4]) & (GET_MODE_BITSIZE (<MODE>mode) - 1)) == 0)
&& INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)"
"#"
"&& true"
[(const_int 0)]
{
rtx tmp = (can_create_pseudo_p () ? gen_reg_rtx (SImode)
: operands[0]);
emit_insn (gen_negsi2 (tmp, operands[3]));
rtx and_op = gen_rtx_AND (SImode, tmp, operands[4]);
rtx subreg_tmp = gen_rtx_SUBREG (GET_MODE (operands[5]), and_op,
SUBREG_BYTE (operands[5]));
emit_insn (gen_ashl<mode>3 (operands[0], operands[1], subreg_tmp));
DONE;
}
)
(define_insn "*aarch64_<optab>_reg_di3_mask2"
[(set (match_operand:DI 0 "register_operand" "=r")
(SHIFT:DI
(match_operand:DI 1 "register_operand" "r")
(match_operator 4 "subreg_lowpart_operator"
[(and:SI (match_operand:SI 2 "register_operand" "r")
(match_operand 3 "aarch64_shift_imm_di" "Usd"))])))]
"((~INTVAL (operands[3]) & (GET_MODE_BITSIZE (DImode)-1)) == 0)"
{
rtx xop[3];
xop[0] = operands[0];
xop[1] = operands[1];
xop[2] = gen_lowpart (GET_MODE (operands[4]), operands[2]);
output_asm_insn ("<shift>\t%x0, %x1, %x2", xop);
return "";
}
[(set_attr "type" "shift_reg")]
)
;; Logical left shift using SISD or Integer instruction
(define_insn "*aarch64_ashl_sisd_or_int_<mode>3"
[(set (match_operand:GPI 0 "register_operand" "=r,r,w,w")
......
......@@ -35,6 +35,10 @@
(and (match_code "const_int")
(match_test "op == CONST0_RTX (mode)")))
(define_special_predicate "subreg_lowpart_operator"
(and (match_code "subreg")
(match_test "subreg_lowpart_p (op)")))
(define_predicate "aarch64_ccmp_immediate"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), -31, 31)")))
......
2016-06-29 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Michael Collison <michael.collison@arm.com>
PR target/70119
* gcc.target/aarch64/var_shift_mask_1.c: New test.
2017-06-28 Sebastian Peryt <sebastian.peryt@intel.com>
* gcc.target/i386/avx512vl-vpermd-1.c (_mm256_permutexvar_epi32):
......
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* The integer variable shift and rotate instructions truncate their
shift amounts by the datasize. Make sure that we don't emit a redundant
masking operation. */
unsigned
f1 (unsigned x, int y)
{
return x << (y & 31);
}
unsigned long
f2 (unsigned long x, int y)
{
return x << (y & 63);
}
unsigned long
f3 (unsigned long bit_addr, int y)
{
unsigned long bitnumb = bit_addr & 63;
return (1L << bitnumb);
}
unsigned int
f4 (unsigned int x, unsigned int y)
{
y &= 31;
return x >> y | (x << (32 - y));
}
unsigned long
f5 (unsigned long x, unsigned long y)
{
y &= 63;
return x >> y | (x << (64 - y));
}
unsigned long
f6 (unsigned long x, unsigned long y)
{
return (x << (64 - (y & 63)));
}
unsigned long
f7 (unsigned long x, unsigned long y)
{
return (x << -(y & 63));
}
/* { dg-final { scan-assembler-times "lsl\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "lsl\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" 4 } } */
/* { dg-final { scan-assembler-times "ror\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "ror\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-not "and\tw\[0-9\]+, w\[0-9\]+, 31" } } */
/* { dg-final { scan-assembler-not "and\tx\[0-9\]+, x\[0-9\]+, 63" } } */
/* { dg-final { scan-assembler-not "sub\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment