Commit 3d44ff99 by Andreas Krebbel Committed by Andreas Krebbel

[PATCH] S/390: Improve risbg usage

gcc/ChangeLog:

2015-07-24  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	    * config/s390/s390.c (s390_rtx_costs): Make risbg patterns
	    cheaper.
	    (s390_expand_insv): Don't generate risbg pattern for constant zero
	    sources.
	    * config/s390/s390.md ("*insv<mode>_zEC12_appendbitsleft")
	    ("*insv<mode>_z10_appendbitsleft"): New pattern definitions.  New
	    splitters.

gcc/testsuite/ChangeLog:

2015-07-24  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	    * gcc.target/s390/insv-1.c: New test.
	    * gcc.target/s390/insv-2.c: New test.
	    * gcc.target/s390/insv-3.c: New test.

From-SVN: r226148
parent bacf8ec3
2015-07-24 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
* config/s390/s390.c (s390_rtx_costs): Make risbg patterns
cheaper.
(s390_expand_insv): Don't generate risbg pattern for constant zero
sources.
* config/s390/s390.md ("*insv<mode>_zEC12_appendbitsleft")
("*insv<mode>_z10_appendbitsleft"): New pattern definitions. New
splitters.
2015-07-24 Dominik Vogt <vogt@linux.vnet.ibm.com>
* config/s390/s390.c (s390_reorg): Clean up handling of processors
......
......@@ -3321,13 +3321,26 @@ s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
*total = 0;
return true;
case IOR:
/* risbg */
if (GET_CODE (XEXP (x, 0)) == AND
&& GET_CODE (XEXP (x, 1)) == ASHIFT
&& REG_P (XEXP (XEXP (x, 0), 0))
&& REG_P (XEXP (XEXP (x, 1), 0))
&& CONST_INT_P (XEXP (XEXP (x, 0), 1))
&& CONST_INT_P (XEXP (XEXP (x, 1), 1))
&& (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
(1UL << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
{
*total = COSTS_N_INSNS (2);
return true;
}
case ASHIFT:
case ASHIFTRT:
case LSHIFTRT:
case ROTATE:
case ROTATERT:
case AND:
case IOR:
case XOR:
case NEG:
case NOT:
......@@ -5839,8 +5852,17 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
if (mode_s == VOIDmode)
{
/* Assume const_int etc already in the proper mode. */
src = force_reg (mode, src);
/* For constant zero values the representation with AND
appears to be folded in more situations than the (set
(zero_extract) ...).
We only do this when the start and end of the bitfield
remain in the same SImode chunk. That way nihf or nilf
can be used.
The AND patterns might still generate a risbg for this. */
if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
return false;
else
src = force_reg (mode, src);
}
else if (mode_s != mode)
{
......
......@@ -3776,6 +3776,71 @@
[(set_attr "op_type" "RIE")
(set_attr "z10prop" "z10_super_E1")])
; Implement appending Y on the left of S bits of X
; x = (y << s) | (x & ((1 << s) - 1))
(define_insn "*insv<mode>_zEC12_appendbitsleft"
[(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
(ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "0")
(match_operand:GPR 2 "immediate_operand" ""))
(ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "d")
(match_operand:GPR 4 "nonzero_shift_count_operand" ""))))]
"TARGET_ZEC12 && UINTVAL (operands[2]) == (1UL << UINTVAL (operands[4])) - 1"
"risbgn\t%0,%3,64-<bitsize>,64-%4-1,%4"
[(set_attr "op_type" "RIE")
(set_attr "z10prop" "z10_super_E1")])
(define_insn "*insv<mode>_z10_appendbitsleft"
[(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
(ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "0")
(match_operand:GPR 2 "immediate_operand" ""))
(ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "d")
(match_operand:GPR 4 "nonzero_shift_count_operand" ""))))
(clobber (reg:CC CC_REGNUM))]
"TARGET_Z10 && !TARGET_ZEC12 && UINTVAL (operands[2]) == (1UL << UINTVAL (operands[4])) - 1"
"risbg\t%0,%3,64-<bitsize>,64-%4-1,%4"
[(set_attr "op_type" "RIE")
(set_attr "z10prop" "z10_super_E1")])
; z = (x << c) | (y >> d) with (x << c) and (y >> d) not overlapping after shifting
; -> z = y >> d; z = (x << c) | (z & ((1 << c) - 1))
; -> z = y >> d; z = risbg;
(define_split
[(set (match_operand:GPR 0 "nonimmediate_operand" "")
(ior:GPR (lshiftrt:GPR (match_operand:GPR 1 "nonimmediate_operand" "")
(match_operand:GPR 2 "nonzero_shift_count_operand" ""))
(ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "")
(match_operand:GPR 4 "nonzero_shift_count_operand" ""))))]
"TARGET_ZEC12 && UINTVAL (operands[2]) + UINTVAL (operands[4]) >= <bitsize>"
[(set (match_dup 0)
(lshiftrt:GPR (match_dup 1) (match_dup 2)))
(set (match_dup 0)
(ior:GPR (and:GPR (match_dup 0) (match_dup 5))
(ashift:GPR (match_dup 3) (match_dup 4))))]
{
operands[5] = GEN_INT ((1UL << UINTVAL (operands[4])) - 1);
})
(define_split
[(parallel
[(set (match_operand:GPR 0 "nonimmediate_operand" "")
(ior:GPR (lshiftrt:GPR (match_operand:GPR 1 "nonimmediate_operand" "")
(match_operand:GPR 2 "nonzero_shift_count_operand" ""))
(ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "")
(match_operand:GPR 4 "nonzero_shift_count_operand" ""))))
(clobber (reg:CC CC_REGNUM))])]
"TARGET_Z10 && !TARGET_ZEC12 && UINTVAL (operands[2]) + UINTVAL (operands[4]) >= <bitsize>"
[(set (match_dup 0)
(lshiftrt:GPR (match_dup 1) (match_dup 2)))
(parallel
[(set (match_dup 0)
(ior:GPR (and:GPR (match_dup 0) (match_dup 5))
(ashift:GPR (match_dup 3) (match_dup 4))))
(clobber (reg:CC CC_REGNUM))])]
{
operands[5] = GEN_INT ((1UL << UINTVAL (operands[4])) - 1);
})
(define_insn "*r<noxa>sbg_<mode>_noshift"
[(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
(IXOR:GPR
......
2015-07-24 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
* gcc.target/s390/insv-1.c: New test.
* gcc.target/s390/insv-2.c: New test.
* gcc.target/s390/insv-3.c: New test.
2015-07-24 Tom de Vries <tom@codesourcery.com>
* gcc.dg/autopar/uns-outer-4.c: Remove loopfn xfail.
......
/* { dg-do compile } */
/* { dg-options "-O3 -march=z10 -mzarch" } */
unsigned long
foo1 (unsigned long a, unsigned long b)
{
return (a << 5) | (b & (((1UL << 5) - 1)));
}
/* This generates very different RTX than foo1. The output reg (r2)
matches the unshifted argument. So it actually is a
(set (zero_extract a 59 0) b) */
unsigned long
foo2 (unsigned long a, unsigned long b)
{
return (b << 5) | (a & (((1UL << 5) - 1)));
}
/* risbg cannot be used when less bits are removed with the mask. */
unsigned long
foo1b (unsigned long a, unsigned long b)
{
return (a << 5) | (b & 1);
}
unsigned long
foo2b (unsigned long a, unsigned long b)
{
return (b << 5) | (a & 1);
}
/* risbg cannot be used when the masked bits would end up in the
result since a real OR is required then. */
unsigned long
foo1c (unsigned long a, unsigned long b)
{
return (a << 5) | (b & 127);
}
unsigned long
foo2c (unsigned long a, unsigned long b)
{
return (b << 5) | (a & 127);
}
unsigned long
foo3 (unsigned long a, unsigned long b)
{
#ifdef __s390x__
return (a << 5) | (b >> 59);
#else
return (a << 5) | (b >> 27);
#endif
}
unsigned long
foo4 (unsigned long a, unsigned long b)
{
#ifdef __s390x__
return (b << 5) | (a >> 59);
#else
return (b << 5) | (a >> 27);
#endif
}
/* risbg can be used also if there are some bits spared in the middle
of the two chunks. */
unsigned long
foo3b (unsigned long a, unsigned long b)
{
#ifdef __s390x__
return (a << 6) | (b >> 59);
#else
return (a << 6) | (b >> 27);
#endif
}
unsigned long
foo4b (unsigned long a, unsigned long b)
{
#ifdef __s390x__
return (b << 6) | (a >> 59);
#else
return (b << 6) | (a >> 27);
#endif
}
/* One bit of overlap so better don't use risbg. */
unsigned long
foo3c (unsigned long a, unsigned long b)
{
#ifdef __s390x__
return (a << 4) | (b >> 59);
#else
return (a << 4) | (b >> 27);
#endif
}
unsigned long
foo4c (unsigned long a, unsigned long b)
{
#ifdef __s390x__
return (b << 4) | (a >> 59);
#else
return (b << 4) | (a >> 27);
#endif
}
/* { dg-final { scan-assembler-times "risbg" 6 } } */
/* { dg-do compile } */
/* { dg-options "-O3 -march=zEC12 -mzarch" } */
unsigned long
foo1 (unsigned long a, unsigned long b)
{
return (a << 5) | (b & (((1UL << 5) - 1)));
}
/* This generates very different RTX than foo1. The output reg (r2)
matches the unshifted argument. So it actually is a
(set (zero_extract a 59 0) b) */
unsigned long
foo2 (unsigned long a, unsigned long b)
{
return (b << 5) | (a & (((1UL << 5) - 1)));
}
/* risbgn cannot be used when less bits are removed with the mask. */
unsigned long
foo1b (unsigned long a, unsigned long b)
{
return (a << 5) | (b & 1);
}
unsigned long
foo2b (unsigned long a, unsigned long b)
{
return (b << 5) | (a & 1);
}
/* risbgn cannot be used when the masked bits would end up in the
result since a real OR is required then. */
unsigned long
foo1c (unsigned long a, unsigned long b)
{
return (a << 5) | (b & 127);
}
unsigned long
foo2c (unsigned long a, unsigned long b)
{
return (b << 5) | (a & 127);
}
unsigned long
foo3 (unsigned long a, unsigned long b)
{
#ifdef __s390x__
return (a << 5) | (b >> 59);
#else
return (a << 5) | (b >> 27);
#endif
}
unsigned long
foo4 (unsigned long a, unsigned long b)
{
#ifdef __s390x__
return (b << 5) | (a >> 59);
#else
return (b << 5) | (a >> 27);
#endif
}
/* risbgn can be used also if there are some bits spared in the middle
of the two chunks. */
unsigned long
foo3b (unsigned long a, unsigned long b)
{
#ifdef __s390x__
return (a << 6) | (b >> 59);
#else
return (a << 6) | (b >> 27);
#endif
}
unsigned long
foo4b (unsigned long a, unsigned long b)
{
#ifdef __s390x__
return (b << 6) | (a >> 59);
#else
return (b << 6) | (a >> 27);
#endif
}
/* One bit of overlap so better don't use risbgn. */
unsigned long
foo3c (unsigned long a, unsigned long b)
{
#ifdef __s390x__
return (a << 4) | (b >> 59);
#else
return (a << 4) | (b >> 27);
#endif
}
unsigned long
foo4c (unsigned long a, unsigned long b)
{
#ifdef __s390x__
return (b << 4) | (a >> 59);
#else
return (b << 4) | (a >> 27);
#endif
}
/* { dg-final { scan-assembler-times "risbgn" 6 } } */
/* { dg-do compile } */
/* { dg-options "-O3 -march=z10 -mzarch" } */
/* risbg with z bit would work here but we rather want this to be a shift. */
struct
{
int a:31;
int b:1;
} s;
void
foo (int in)
{
s.a = in;
s.b = 0;
}
/* { dg-final { scan-assembler-not "risbg" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment