Commit e2839e47 by Robin Dapp Committed by Robin Dapp

S/390: Rework shift count handling.

Add s390_valid_shift_count to determine the validity of a
shift-count operand.  This is used to replace increasingly
complex substitutions that should have allowed address-style
shift-count handling, an and mask as well as no-op subregs
on the operand.

gcc/ChangeLog:

2019-07-08  Robin Dapp  <rdapp@linux.ibm.com>

        * config/s390/constraints.md: Add new jsc constraint.
        * config/s390/predicates.md: New predicates.
        * config/s390/s390-protos.h (s390_valid_shift_count): New function.
        * config/s390/s390.c (s390_valid_shift_count): New function.
        (print_shift_count_operand): Use s390_valid_shift_count.
        (print_operand): Likewise.
        * config/s390/s390.md: Use new predicate.
        * config/s390/subst.md: Remove addr_style_op and masked_op substs.
        * config/s390/vector.md: Use new predicate.

2019-07-08  Robin Dapp  <rdapp@linux.ibm.com>

        * gcc.target/s390/combine-rotate-modulo.c: New test.
        * gcc.target/s390/combine-shift-rotate-add-mod.c: New test.
        * gcc.target/s390/vector/combine-shift-vec.c: New test.

From-SVN: r273236
parent f069f0f2
2019-07-08 Robin Dapp <rdapp@linux.ibm.com>
* config/s390/constraints.md: Add new jsc constraint.
* config/s390/predicates.md: New predicates.
* config/s390/s390-protos.h (s390_valid_shift_count): New function.
* config/s390/s390.c (s390_valid_shift_count): New function.
(print_shift_count_operand): Use s390_valid_shift_count.
(print_operand): Likewise.
* config/s390/s390.md: Use new predicate.
* config/s390/subst.md: Remove addr_style_op and masked_op substs.
* config/s390/vector.md: Use new predicate.
2019-07-08 Joern Rennecke <joern.rennecke@riscy-ip.com>
Avoid clash with system header declaration.
......
......@@ -204,6 +204,18 @@
(match_test "s390_decompose_addrstyle_without_index (op, NULL, NULL)" ))
;; Shift count operands are not necessarily legitimate addresses
;; but the predicate shift_count_operand will only allow
;; proper operands. If reload/lra need to change e.g. a spilled register
;; they can still do so via the special handling of address constraints.
;; To avoid further reloading (caused by a non-matching constraint) we
;; always return true here as the predicate's checks are already sufficient.
(define_address_constraint "jsc"
"Address style operand used as shift count."
(match_test "true" ))
;; N -- Multiple letter constraint followed by 4 parameter letters.
;; 0..9,x: number of the part counting from most to least significant
;; S,H,Q: mode of the part
......
......@@ -556,3 +556,32 @@
{
return memory_operand (op, mode) && !contains_symbol_ref_p (op);
})
;; Check for a valid shift count operand with an implicit
;; shift truncation mask of 63.
(define_predicate "shift_count_operand"
(and (match_code "reg, subreg, and, plus, const_int")
(match_test "CONST_INT_P (op) || GET_MODE (op) == E_QImode"))
{
return s390_valid_shift_count (op, 63);
}
)
;; This is used as operand predicate. As we do not know
;; the mode of the first operand here and the shift truncation
;; mask depends on the mode, we cannot check the mask.
;; This is supposed to happen in the insn condition which
;; calls s390_valid_shift_count with the proper mode size.
;; We need two separate predicates for non-vector and vector
;; shifts since the (less restrictive) insn condition is checked
;; after the more restrictive operand predicate which will
;; disallow the operand before we can check the condition.
(define_predicate "shift_count_operand_vec"
(and (match_code "reg, subreg, and, plus, const_int")
(match_test "CONST_INT_P (op) || GET_MODE (op) == E_QImode"))
{
return s390_valid_shift_count (op, 0);
}
)
......@@ -141,6 +141,7 @@ extern void s390_emit_tpf_eh_return (rtx);
extern bool s390_legitimate_address_without_index_p (rtx);
extern bool s390_decompose_addrstyle_without_index (rtx, rtx *,
HOST_WIDE_INT *);
extern bool s390_valid_shift_count (rtx op, HOST_WIDE_INT required_mask = 63);
extern int s390_branch_condition_mask (rtx);
extern int s390_compare_and_branch_condition_mask (rtx);
extern bool s390_extzv_shift_ok (int, int, unsigned HOST_WIDE_INT);
......
......@@ -3131,6 +3131,49 @@ s390_decompose_addrstyle_without_index (rtx op, rtx *base,
return true;
}
/* Check that OP is a valid shift count operand.
It should be of the following structure:
(subreg (and (plus (reg imm_op)) 2^k-1) 7)
where subreg, and and plus are optional.
If IMPLICIT_MASK is > 0 and OP contains and
(AND ... immediate)
it is checked whether IMPLICIT_MASK and the immediate match.
Otherwise, no checking is performed.
*/
bool
s390_valid_shift_count (rtx op, HOST_WIDE_INT implicit_mask)
{
/* Strip subreg. */
while (GET_CODE (op) == SUBREG && subreg_lowpart_p (op))
op = XEXP (op, 0);
/* Check for an and with proper constant. */
if (GET_CODE (op) == AND)
{
rtx op1 = XEXP (op, 0);
rtx imm = XEXP (op, 1);
if (GET_CODE (op1) == SUBREG && subreg_lowpart_p (op1))
op1 = XEXP (op1, 0);
if (!(register_operand (op1, GET_MODE (op1)) || GET_CODE (op1) == PLUS))
return false;
if (!immediate_operand (imm, GET_MODE (imm)))
return false;
HOST_WIDE_INT val = INTVAL (imm);
if (implicit_mask > 0
&& (val & implicit_mask) != implicit_mask)
return false;
op = op1;
}
/* Check the rest. */
return s390_decompose_addrstyle_without_index (op, NULL, NULL);
}
/* Return true if CODE is a valid address without index. */
......@@ -7448,6 +7491,27 @@ print_addrstyle_operand (FILE *file, rtx op)
fprintf (file, "(%s)", reg_names[REGNO (base)]);
}
/* Print the shift count operand OP to FILE.
OP is an address-style operand in a form which
s390_valid_shift_count permits. Subregs and no-op
and-masking of the operand are stripped. */
static void
print_shift_count_operand (FILE *file, rtx op)
{
/* No checking of the and mask required here. */
if (!s390_valid_shift_count (op, 0))
gcc_unreachable ();
while (op && GET_CODE (op) == SUBREG)
op = SUBREG_REG (op);
if (GET_CODE (op) == AND)
op = XEXP (op, 0);
print_addrstyle_operand (file, op);
}
/* Assigns the number of NOP halfwords to be emitted before and after the
function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
If hotpatching is disabled for the function, the values are set to zero.
......@@ -7912,7 +7976,7 @@ print_operand (FILE *file, rtx x, int code)
break;
case 'Y':
print_addrstyle_operand (file, x);
print_shift_count_operand (file, x);
return;
}
......
......@@ -8937,17 +8937,17 @@
(define_expand "rotl<mode>3"
[(set (match_operand:GPR 0 "register_operand" "")
(rotate:GPR (match_operand:GPR 1 "register_operand" "")
(match_operand:SI 2 "nonmemory_operand" "")))]
(match_operand:QI 2 "shift_count_operand" "")))]
""
"")
; rll, rllg
(define_insn "*rotl<mode>3<addr_style_op><masked_op>"
(define_insn "*rotl<mode>3"
[(set (match_operand:GPR 0 "register_operand" "=d")
(rotate:GPR (match_operand:GPR 1 "register_operand" "d")
(match_operand:SI 2 "nonmemory_operand" "an")))]
(match_operand:QI 2 "shift_count_operand" "jsc")))]
""
"rll<g>\t%0,%1,<addr_style_op_ops>"
"rll<g>\t%0,%1,%Y2"
[(set_attr "op_type" "RSE")
(set_attr "atype" "reg")
(set_attr "z10prop" "z10_super_E1")])
......@@ -8964,18 +8964,18 @@
(define_expand "<shift><mode>3"
[(set (match_operand:DSI 0 "register_operand" "")
(SHIFT:DSI (match_operand:DSI 1 "register_operand" "")
(match_operand:SI 2 "nonmemory_operand" "")))]
(match_operand:QI 2 "shift_count_operand" "")))]
""
"")
; ESA 64 bit register pair shift with reg or imm shift count
; sldl, srdl
(define_insn "*<shift>di3_31<addr_style_op><masked_op>"
(define_insn "*<shift>di3_31"
[(set (match_operand:DI 0 "register_operand" "=d")
(SHIFT:DI (match_operand:DI 1 "register_operand" "0")
(match_operand:SI 2 "nonmemory_operand" "an")))]
(match_operand:QI 2 "shift_count_operand" "jsc")))]
"!TARGET_ZARCH"
"s<lr>dl\t%0,<addr_style_op_ops>"
"s<lr>dl\t%0,%Y2"
[(set_attr "op_type" "RS")
(set_attr "atype" "reg")
(set_attr "z196prop" "z196_cracked")])
......@@ -8983,19 +8983,20 @@
; 64 bit register shift with reg or imm shift count
; sll, srl, sllg, srlg, sllk, srlk
(define_insn "*<shift><mode>3<addr_style_op><masked_op>"
(define_insn "*<shift><mode>3"
[(set (match_operand:GPR 0 "register_operand" "=d, d")
(SHIFT:GPR (match_operand:GPR 1 "register_operand" "<d0>, d")
(match_operand:SI 2 "nonmemory_operand" "an,an")))]
(match_operand:QI 2 "shift_count_operand" "jsc,jsc")))]
""
"@
s<lr>l<g>\t%0,<1><addr_style_op_ops>
s<lr>l<gk>\t%0,%1,<addr_style_op_ops>"
s<lr>l<g>\t%0,<1>%Y2
s<lr>l<gk>\t%0,%1,%Y2"
[(set_attr "op_type" "RS<E>,RSY")
(set_attr "atype" "reg,reg")
(set_attr "cpu_facility" "*,z196")
(set_attr "z10prop" "z10_super_E1,*")])
;
; ashr(di|si)3 instruction pattern(s).
; Arithmetic right shifts
......@@ -9004,7 +9005,7 @@
[(parallel
[(set (match_operand:DSI 0 "register_operand" "")
(ashiftrt:DSI (match_operand:DSI 1 "register_operand" "")
(match_operand:SI 2 "nonmemory_operand" "")))
(match_operand:QI 2 "shift_count_operand" "")))
(clobber (reg:CC CC_REGNUM))])]
""
"")
......@@ -9013,29 +9014,29 @@
; number of 2 in the subst pattern for the (clobber (match_scratch...
; The right fix should be to support match_scratch in the output
; pattern of a define_subst.
(define_insn "*ashrdi3_31<addr_style_op_cc><masked_op_cc><setcc><cconly>"
(define_insn "*ashrdi3_31<setcc><cconly>"
[(set (match_operand:DI 0 "register_operand" "=d, d")
(ashiftrt:DI (match_operand:DI 1 "register_operand" "0, 0")
(match_operand:SI 2 "nonmemory_operand" "an,an")))
(match_operand:QI 2 "shift_count_operand" "jsc,jsc")))
(clobber (reg:CC CC_REGNUM))]
"!TARGET_ZARCH"
"@
srda\t%0,<addr_style_op_cc_ops>
srda\t%0,<addr_style_op_cc_ops>"
srda\t%0,%Y2
srda\t%0,%Y2"
[(set_attr "op_type" "RS")
(set_attr "atype" "reg")])
; sra, srag
(define_insn "*ashr<mode>3<addr_style_op_cc><masked_op_cc><setcc><cconly>"
(define_insn "*ashr<mode>3<setcc><cconly>"
[(set (match_operand:GPR 0 "register_operand" "=d, d")
(ashiftrt:GPR (match_operand:GPR 1 "register_operand" "<d0>, d")
(match_operand:SI 2 "nonmemory_operand" "an,an")))
(match_operand:QI 2 "shift_count_operand" "jsc,jsc")))
(clobber (reg:CC CC_REGNUM))]
""
"@
sra<g>\t%0,<1><addr_style_op_cc_ops>
sra<gk>\t%0,%1,<addr_style_op_cc_ops>"
sra<g>\t%0,<1>%Y2
sra<gk>\t%0,%1,%Y2"
[(set_attr "op_type" "RS<E>,RSY")
(set_attr "atype" "reg")
(set_attr "cpu_facility" "*,z196")
......
......@@ -22,78 +22,6 @@
(define_code_iterator SUBST [rotate ashift lshiftrt ashiftrt])
(define_mode_iterator DSI_VI [SI DI V2QI V4QI V8QI V16QI V2HI V4HI V8HI V2SI V4SI V2DI])
; This expands an register/immediate operand to a register+immediate
; operand to draw advantage of the address style operand format
; providing a addition for free.
(define_subst "addr_style_op_subst"
[(set (match_operand:DSI_VI 0 "" "")
(SUBST:DSI_VI (match_operand:DSI_VI 1 "" "")
(match_operand:SI 2 "" "")))]
""
[(set (match_dup 0)
(SUBST:DSI_VI (match_dup 1)
(plus:SI (match_operand:SI 2 "register_operand" "a")
(match_operand 3 "const_int_operand" "n"))))])
; Use this in the insn name.
(define_subst_attr "addr_style_op" "addr_style_op_subst" "" "_plus")
; In the subst pattern the additional const int operand will be used
; as displacement. In the normal version %Y is able to print the
; operand either as displacement or as base register.
(define_subst_attr "addr_style_op_ops" "addr_style_op_subst" "%Y2" "%Y3(%2)")
; This substitution adds an explicit AND operation to the second
; operand. This way previous operations on the now masked out bits
; might get optimized away.
(define_subst "masked_op_subst"
[(set (match_operand:DSI 0 "" "")
(SUBST:DSI (match_operand:DSI 1 "" "")
(match_operand:SI 2 "" "")))]
""
[(set (match_dup 0)
(SUBST:DSI (match_dup 1)
(and:SI (match_dup 2)
(match_operand:SI 3 "const_int_6bitset_operand" "jm6"))))])
; Use this in the insn name.
(define_subst_attr "masked_op" "masked_op_subst" "" "_and")
; This is like the addr_style_op substitution above but with a CC clobber.
(define_subst "addr_style_op_cc_subst"
[(set (match_operand:DSI 0 "" "")
(ashiftrt:DSI (match_operand:DSI 1 "" "")
(match_operand:SI 2 "" "")))
(clobber (reg:CC CC_REGNUM))]
"REG_P (operands[2])"
[(set (match_dup 0)
(ashiftrt:DSI (match_dup 1)
(plus:SI (match_dup 2)
(match_operand 3 "const_int_operand" "n"))))
(clobber (reg:CC CC_REGNUM))])
(define_subst_attr "addr_style_op_cc" "addr_style_op_cc_subst" "" "_plus")
(define_subst_attr "addr_style_op_cc_ops" "addr_style_op_cc_subst" "%Y2" "%Y3(%2)")
; This is like the masked_op substitution but with a CC clobber.
(define_subst "masked_op_cc_subst"
[(set (match_operand:DSI 0 "" "")
(ashiftrt:DSI (match_operand:DSI 1 "" "")
(match_operand:SI 2 "" "")))
(clobber (reg:CC CC_REGNUM))]
""
[(set (match_dup 0)
(ashiftrt:DSI (match_dup 1)
(and:SI (match_dup 2)
(match_operand:SI 3 "const_int_6bitset_operand" ""))))
(clobber (reg:CC CC_REGNUM))])
(define_subst_attr "masked_op_cc" "masked_op_cc_subst" "" "_and")
; This adds an explicit CC reg set to an operation while keeping the
; set for the operation result as well.
(define_subst "setcc_subst"
......
......@@ -969,21 +969,25 @@
(define_expand "<vec_shifts_name><mode>3"
[(set (match_operand:VI 0 "register_operand" "")
(VEC_SHIFTS:VI (match_operand:VI 1 "register_operand" "")
(match_operand:SI 2 "nonmemory_operand" "")))]
(match_operand:QI 2 "shift_count_operand" "")))]
"TARGET_VX")
; verllb, verllh, verllf, verllg
; veslb, veslh, veslf, veslg
; vesrab, vesrah, vesraf, vesrag
; vesrlb, vesrlh, vesrlf, vesrlg
(define_insn "*<vec_shifts_name><mode>3<addr_style_op>"
(define_insn "*<vec_shifts_name><mode>3"
[(set (match_operand:VI 0 "register_operand" "=v")
(VEC_SHIFTS:VI (match_operand:VI 1 "register_operand" "v")
(match_operand:SI 2 "nonmemory_operand" "an")))]
"TARGET_VX"
"<vec_shifts_mnem><bhfgq>\t%v0,%v1,<addr_style_op_ops>"
(match_operand:QI 2 "shift_count_operand_vec" "jsc")))]
"TARGET_VX
&& s390_valid_shift_count (operands[2],
GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) - 1)
"
"<vec_shifts_mnem><bhfgq>\t%v0,%v1,%Y2"
[(set_attr "op_type" "VRS")])
; Shift each element by corresponding vector element
; veslvb, veslvh, veslvf, veslvg
......
2019-07-08 Robin Dapp <rdapp@linux.ibm.com>
* gcc.target/s390/combine-rotate-modulo.c: New test.
* gcc.target/s390/combine-shift-rotate-add-mod.c: New test.
* gcc.target/s390/vector/combine-shift-vec.c: New test.
2019-07-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/91108
......
/* Check that we do not emit & 63 via risbg for rotating. */
/* { dg-options "-O1 -m64" } */
/* { dg-final { scan-assembler-not "risbg" } } */
/* { dg-final { scan-assembler-not "nilf" } } */
long shiftl (long in, unsigned long sh)
{
sh %= 64;
return (in << sh);
}
unsigned long shiftll (unsigned long in, unsigned long sh)
{
sh %= 64;
return (in << sh);
}
long shiftr (long in, unsigned long sh)
{
sh %= 64;
return (in >> sh);
}
unsigned long shiftrl (unsigned long in, unsigned long sh)
{
sh %= 64;
return (in >> sh);
}
unsigned long rotlmod (unsigned long in, unsigned long sh)
{
sh %= 64;
return (in << sh) | (in >> (64 - sh));
}
/* Check shift via address-style displacement. There should not be any
and operations that the instructions perform implicitly anyway.*/
/* { dg-options "-O1 -m64" } */
/* { dg-final { scan-assembler-not "risbg\t%r.+,.*63" } } */
/* { dg-final { scan-assembler "rllg\t%r.+,3.%r.+" } } */
/* { dg-final { scan-assembler "sllg\t%r.+,2.%r.+" } } */
unsigned long rotlmodp (unsigned long in, unsigned long sh)
{
sh = (sh + 3) % 64;
return (in << sh) | (in >> (64 - sh));
}
unsigned long shiftmodp (unsigned long in, unsigned long sh)
{
sh = (sh + 2) % 64;
return (in << sh);
}
/* We expect a displacement of 1 here since combine simplifies
modulo 255 when substituting into a QImode subreg. */
/* { dg-final { scan-assembler "sllg\t%r.+,1.%r.+" } } */
unsigned long shiftp (unsigned long in, unsigned long sh)
{
sh = sh + 4097;
return (in << sh);
}
/* Check vector shift patterns. */
/* { dg-options "-march=z13 -O1 -m64" } */
/* { dg-final { scan-assembler "veslb\t%v.+,%v.+,2.%r2" } } */
/* { dg-final { scan-assembler "veslh\t%v.+,%v.+,3.%r2" } } */
/* { dg-final { scan-assembler "veslf\t%v.+,%v.+,4.%r2" } } */
/* { dg-final { scan-assembler "veslg\t%v.+,%v.+,5.%r2" } } */
/* { dg-final { scan-assembler "vesrab\t%v.+,%v.+,2.%r2" } } */
/* { dg-final { scan-assembler "vesrah\t%v.+,%v.+,3.%r2" } } */
/* { dg-final { scan-assembler "vesraf\t%v.+,%v.+,4.%r2" } } */
/* { dg-final { scan-assembler "vesrag\t%v.+,%v.+,5.%r2" } } */
/* { dg-final { scan-assembler "vesrlb\t%v.+,%v.+,2.%r2" } } */
/* { dg-final { scan-assembler "vesrlh\t%v.+,%v.+,3.%r2" } } */
/* { dg-final { scan-assembler "vesrlf\t%v.+,%v.+,4.%r2" } } */
/* { dg-final { scan-assembler "vesrlg\t%v.+,%v.+,5.%r2" } } */
/* { dg-final { scan-assembler-not "ahi" } } */
/* { dg-final { scan-assembler-not "nilf" } } */
/* { dg-final { scan-assembler-not "risbg" } } */
typedef __attribute__((vector_size(16))) signed char v16qi;
v16qi vshiftlqi (v16qi in, unsigned int sh)
{
sh = (sh + 2) % 8;
return (in << sh);
}
typedef __attribute__((vector_size(16))) signed short v8hi;
v8hi vshiftlhi (v8hi in, unsigned int sh)
{
sh = (sh + 3) % 16;
return (in << sh);
}
typedef __attribute__((vector_size(16))) signed int v4si;
v4si vshiftlsi (v4si in, unsigned int sh)
{
sh = (sh + 4) % 32;
return (in << sh);
}
typedef __attribute__((vector_size(16))) signed long v2di;
v2di vshiftldi (v2di in, unsigned int sh)
{
sh = (sh + 5) % 64;
return (in << sh);
}
typedef __attribute__((vector_size(16))) unsigned char uv16qi;
uv16qi vshiftrqiu (uv16qi in, unsigned int sh)
{
sh = (sh + 2) % 8;
return (in >> sh);
}
typedef __attribute__((vector_size(16))) unsigned short uv8hi;
uv8hi vshiftrhiu (uv8hi in, unsigned int sh)
{
sh = (sh + 3) % 16;
return (in >> sh);
}
typedef __attribute__((vector_size(16))) unsigned int uv4si;
uv4si vshiftrsiu (uv4si in, unsigned int sh)
{
sh = (sh + 4) % 32;
return (in >> sh);
}
typedef __attribute__((vector_size(16))) unsigned long uv2di;
uv2di vshiftrdiu (uv2di in, unsigned int sh)
{
sh = (sh + 5) % 64;
return (in >> sh);
}
v16qi vshiftrqi (v16qi in, unsigned int sh)
{
sh = (sh + 2) % 8;
return (in >> sh);
}
v8hi vshiftrhi (v8hi in, unsigned int sh)
{
sh = (sh + 3) % 16;
return (in >> sh);
}
v4si vshiftrsi (v4si in, unsigned int sh)
{
sh = (sh + 4) % 32;
return (in >> sh);
}
v2di vshiftrdi (v2di in, unsigned int sh)
{
sh = (sh + 5) % 64;
return (in >> sh);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment