Commit bc071d3a by Wilco Dijkstra

Fix ctz issues (PR93231)

Further improve the ctz recognition: Avoid ICEing on negative shift
counts or multiply constants.  Check the type is a char type for the
string constant case to avoid accidentally matching a wide STRING_CST.
Add a tree_expr_nonzero_p check to allow the optimization even if
CTZ_DEFINED_VALUE_AT_ZERO returns 0 or 1.  Add extra test cases.

Bootstrap OK on AArch64 and x64.

    gcc/
	PR tree-optimization/93231
	* tree-ssa-forwprop.c (optimize_count_trailing_zeroes): Check
	input_type is unsigned.  Use tree_to_shwi for shift constant.
	Check CST_STRING element size is CHAR_TYPE_SIZE bits.
	(simplify_count_trailing_zeroes): Add test to handle known non-zero
	inputs more efficiently.

    testsuite/
	PR tree-optimization/93231
	* gcc.dg/pr90838.c: New test.
	* gcc.dg/pr93231.c: New test.
	* gcc.target/aarch64/pr90838.c: Use #define u 0.
parent 2a0f6c61
2020-01-15 Wilco Dijkstra <wdijkstr@arm.com>
PR tree-optimization/93231
* tree-ssa-forwprop.c (optimize_count_trailing_zeroes): Check
input_type is unsigned. Use tree_to_shwi for shift constant.
Check CST_STRING element size is CHAR_TYPE_SIZE bits.
(simplify_count_trailing_zeroes): Add test to handle known non-zero
inputs more efficiently.
2020-01-15 Uroš Bizjak <ubizjak@gmail.com> 2020-01-15 Uroš Bizjak <ubizjak@gmail.com>
* config/i386/i386.md (*movsf_internal): Do not require * config/i386/i386.md (*movsf_internal): Do not require
......
2020-01-15 Wilco Dijkstra <wdijkstr@arm.com>
* gcc.dg/pr90838.c: New test.
* gcc.dg/pr93231.c: New test.
* gcc.target/aarch64/pr90838.c: Use #define u 0.
2020-01-15 Richard Biener <rguenther@suse.de> 2020-01-15 Richard Biener <rguenther@suse.de>
PR middle-end/93273 PR middle-end/93273
......
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-forwprop2-details" } */
int ctz1 (unsigned x)
{
static const char table[32] = "\x00\x01\x1c\x02\x1d\x0e\x18\x03\x1e\x16\x14"
"\x0f\x19\x11\x04\b\x1f\x1b\r\x17\x15\x13\x10\x07\x1a\f\x12\x06\v\x05\n\t";
return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
}
int ctz2 (unsigned x)
{
#define u 0
static short table[64] =
{
32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
};
x = (x & -x) * 0x0450FBAF;
return table[x >> 26];
}
int ctz3 (unsigned x)
{
static int table[32] =
{
0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
};
if (x == 0) return 32;
x = (x & -x) * 0x04D7651F;
return table[x >> 27];
}
static const unsigned long long magic = 0x03f08c5392f756cdULL;
static const char table[64] = {
0, 1, 12, 2, 13, 22, 17, 3,
14, 33, 23, 36, 18, 58, 28, 4,
62, 15, 34, 26, 24, 48, 50, 37,
19, 55, 59, 52, 29, 44, 39, 5,
63, 11, 21, 16, 32, 35, 57, 27,
61, 25, 47, 49, 54, 51, 43, 38,
10, 20, 31, 56, 60, 46, 53, 42,
9, 30, 45, 41, 8, 40, 7, 6,
};
int ctz4 (unsigned long x)
{
unsigned long lsb = x & -x;
return table[(lsb * magic) >> 58];
}
/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target aarch64*-*-* } } } */
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-forwprop2-details -Wno-shift-count-negative" } */
int ctz_ice1 (int x)
{
static const char table[32] =
{
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
};
return table[((int)((x & -x) * -0x077CB531)) >> 27];
}
int ctz_ice2 (unsigned x)
{
static const char table[32] =
{
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
};
return table[((unsigned)((x & -x) * 0x077CB531U)) >> -27];
}
// This should never match
int ctz_fail (unsigned x)
{
static const unsigned short int table[32] =
u"\x0100\x021c\x0e1d\x0318\x161e\x0f14\x1119\x0804\x1b1f\x170d\x1315\x0710\x0c1a\x0612\x050b\x090a";
return table[((x & -x) * 0x077CB531) >> 27];
}
/* { dg-final { scan-tree-dump-not {= \.CTZ} "forwprop2" } } */
...@@ -14,7 +14,7 @@ int ctz1 (unsigned x) ...@@ -14,7 +14,7 @@ int ctz1 (unsigned x)
int ctz2 (unsigned x) int ctz2 (unsigned x)
{ {
const int u = 0; #define u 0
static short table[64] = static short table[64] =
{ {
32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14, 32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
......
...@@ -1864,9 +1864,9 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc, ...@@ -1864,9 +1864,9 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc,
tree input_type = TREE_TYPE (x); tree input_type = TREE_TYPE (x);
unsigned input_bits = tree_to_shwi (TYPE_SIZE (input_type)); unsigned input_bits = tree_to_shwi (TYPE_SIZE (input_type));
/* Check the array is not wider than integer type and the input is a 32-bit /* Check the array element type is not wider than 32 bits and the input is
or 64-bit type. */ an unsigned 32-bit or 64-bit type. */
if (TYPE_PRECISION (type) > 32) if (TYPE_PRECISION (type) > 32 || !TYPE_UNSIGNED (input_type))
return false; return false;
if (input_bits != 32 && input_bits != 64) if (input_bits != 32 && input_bits != 64)
return false; return false;
...@@ -1879,7 +1879,7 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc, ...@@ -1879,7 +1879,7 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc,
if (!low || !integer_zerop (low)) if (!low || !integer_zerop (low))
return false; return false;
unsigned shiftval = tree_to_uhwi (tshift); unsigned shiftval = tree_to_shwi (tshift);
/* Check the shift extracts the top 5..7 bits. */ /* Check the shift extracts the top 5..7 bits. */
if (shiftval < input_bits - 7 || shiftval > input_bits - 5) if (shiftval < input_bits - 7 || shiftval > input_bits - 5)
...@@ -1894,7 +1894,8 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc, ...@@ -1894,7 +1894,8 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc,
if (TREE_CODE (ctor) == CONSTRUCTOR) if (TREE_CODE (ctor) == CONSTRUCTOR)
return check_ctz_array (ctor, val, zero_val, shiftval, input_bits); return check_ctz_array (ctor, val, zero_val, shiftval, input_bits);
if (TREE_CODE (ctor) == STRING_CST) if (TREE_CODE (ctor) == STRING_CST
&& TYPE_PRECISION (type) == CHAR_TYPE_SIZE)
return check_ctz_string (ctor, val, zero_val, shiftval, input_bits); return check_ctz_string (ctor, val, zero_val, shiftval, input_bits);
return false; return false;
...@@ -1920,16 +1921,24 @@ simplify_count_trailing_zeroes (gimple_stmt_iterator *gsi) ...@@ -1920,16 +1921,24 @@ simplify_count_trailing_zeroes (gimple_stmt_iterator *gsi)
res_ops[1], res_ops[2], zero_val)) res_ops[1], res_ops[2], zero_val))
{ {
tree type = TREE_TYPE (res_ops[0]); tree type = TREE_TYPE (res_ops[0]);
HOST_WIDE_INT ctzval = 0; HOST_WIDE_INT ctz_val = 0;
HOST_WIDE_INT type_size = tree_to_shwi (TYPE_SIZE (type)); HOST_WIDE_INT type_size = tree_to_shwi (TYPE_SIZE (type));
bool zero_ok bool zero_ok
= CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type), ctzval) == 2; = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type), ctz_val) == 2;
/* If the input value can't be zero, don't special case ctz (0). */
if (tree_expr_nonzero_p (res_ops[0]))
{
zero_ok = true;
zero_val = 0;
ctz_val = 0;
}
/* Skip if there is no value defined at zero, or if we can't easily /* Skip if there is no value defined at zero, or if we can't easily
return the correct value for zero. */ return the correct value for zero. */
if (!zero_ok) if (!zero_ok)
return false; return false;
if (zero_val != ctzval && !(zero_val == 0 && ctzval == type_size)) if (zero_val != ctz_val && !(zero_val == 0 && ctz_val == type_size))
return false; return false;
gimple_seq seq = NULL; gimple_seq seq = NULL;
...@@ -1942,7 +1951,7 @@ simplify_count_trailing_zeroes (gimple_stmt_iterator *gsi) ...@@ -1942,7 +1951,7 @@ simplify_count_trailing_zeroes (gimple_stmt_iterator *gsi)
tree prev_lhs = gimple_call_lhs (call); tree prev_lhs = gimple_call_lhs (call);
/* Emit ctz (x) & 31 if ctz (0) is 32 but we need to return 0. */ /* Emit ctz (x) & 31 if ctz (0) is 32 but we need to return 0. */
if (zero_val == 0 && ctzval == type_size) if (zero_val == 0 && ctz_val == type_size)
{ {
g = gimple_build_assign (make_ssa_name (integer_type_node), g = gimple_build_assign (make_ssa_name (integer_type_node),
BIT_AND_EXPR, prev_lhs, BIT_AND_EXPR, prev_lhs,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment