Commit c2b640a7 by Andrew Stubbs Committed by Andrew Stubbs

arm.c (struct four_ints): New type.

2011-08-26  Andrew Stubbs  <ams@codesourcery.com>

	gcc/
	* config/arm/arm.c (struct four_ints): New type.
	(count_insns_for_constant): Delete function.
	(find_best_start): Delete function.
	(optimal_immediate_sequence): New function.
	(optimal_immediate_sequence_1): New function.
	(arm_gen_constant): Move constant splitting code to
	optimal_immediate_sequence.
	Rewrite constant negation/invertion code.

	gcc/testsuite/
	* gcc.target/arm/thumb2-replicated-constant1.c: New file.
	* gcc.target/arm/thumb2-replicated-constant2.c: New file.
	* gcc.target/arm/thumb2-replicated-constant3.c: New file.
	* gcc.target/arm/thumb2-replicated-constant4.c: New file.

2011-08-26  Andrew Stubbs  <ams@codesourcery.com>

	gcc/
	* config/arm/arm-protos.h (const_ok_for_op): Add prototype.
	* config/arm/arm.c (const_ok_for_op): Add support for addw/subw.
	Remove prototype. Remove static function type.
	* config/arm/arm.md (*arm_addsi3): Add addw/subw support.
	Add arch attribute.
	* config/arm/constraints.md (Pj, PJ): New constraints.

From-SVN: r178099
parent 58a2bda0
2011-08-26 Andrew Stubbs <ams@codesourcery.com>
* config/arm/arm.c (struct four_ints): New type.
(count_insns_for_constant): Delete function.
(find_best_start): Delete function.
(optimal_immediate_sequence): New function.
(optimal_immediate_sequence_1): New function.
(arm_gen_constant): Move constant splitting code to
optimal_immediate_sequence.
Rewrite constant negation/invertion code.
2011-08-26 Andrew Stubbs <ams@codesourcery.com>
* config/arm/arm-protos.h (const_ok_for_op): Add prototype.
* config/arm/arm.c (const_ok_for_op): Add support for addw/subw.
Remove prototype. Remove static function type.
* config/arm/arm.md (*arm_addsi3): Add addw/subw support.
Add arch attribute.
* config/arm/constraints.md (Pj, PJ): New constraints.
2011-08-26 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> 2011-08-26 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
* config/arm/cortex-a9.md ("cortex_a9_mult_long"): New. * config/arm/cortex-a9.md ("cortex_a9_mult_long"): New.
......
...@@ -47,6 +47,7 @@ extern bool arm_vector_mode_supported_p (enum machine_mode); ...@@ -47,6 +47,7 @@ extern bool arm_vector_mode_supported_p (enum machine_mode);
extern bool arm_small_register_classes_for_mode_p (enum machine_mode); extern bool arm_small_register_classes_for_mode_p (enum machine_mode);
extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode); extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);
extern int const_ok_for_arm (HOST_WIDE_INT); extern int const_ok_for_arm (HOST_WIDE_INT);
extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx, extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
HOST_WIDE_INT, rtx, rtx, int); HOST_WIDE_INT, rtx, rtx, int);
extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *); extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *);
......
...@@ -64,6 +64,11 @@ typedef struct minipool_fixup Mfix; ...@@ -64,6 +64,11 @@ typedef struct minipool_fixup Mfix;
void (*arm_lang_output_object_attributes_hook)(void); void (*arm_lang_output_object_attributes_hook)(void);
struct four_ints
{
int i[4];
};
/* Forward function declarations. */ /* Forward function declarations. */
static bool arm_needs_doubleword_align (enum machine_mode, const_tree); static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
static int arm_compute_static_chain_stack_bytes (void); static int arm_compute_static_chain_stack_bytes (void);
...@@ -82,7 +87,6 @@ inline static int thumb1_index_register_rtx_p (rtx, int); ...@@ -82,7 +87,6 @@ inline static int thumb1_index_register_rtx_p (rtx, int);
static bool arm_legitimate_address_p (enum machine_mode, rtx, bool); static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
static int thumb_far_jump_used_p (void); static int thumb_far_jump_used_p (void);
static bool thumb_force_lr_save (void); static bool thumb_force_lr_save (void);
static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
static rtx emit_sfm (int, int); static rtx emit_sfm (int, int);
static unsigned arm_size_return_regs (void); static unsigned arm_size_return_regs (void);
static bool arm_assemble_integer (rtx, unsigned int, int); static bool arm_assemble_integer (rtx, unsigned int, int);
...@@ -129,7 +133,13 @@ static void arm_output_function_prologue (FILE *, HOST_WIDE_INT); ...@@ -129,7 +133,13 @@ static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
static int arm_comp_type_attributes (const_tree, const_tree); static int arm_comp_type_attributes (const_tree, const_tree);
static void arm_set_default_type_attributes (tree); static void arm_set_default_type_attributes (tree);
static int arm_adjust_cost (rtx, rtx, rtx, int); static int arm_adjust_cost (rtx, rtx, rtx, int);
static int count_insns_for_constant (HOST_WIDE_INT, int); static int optimal_immediate_sequence (enum rtx_code code,
unsigned HOST_WIDE_INT val,
struct four_ints *return_sequence);
static int optimal_immediate_sequence_1 (enum rtx_code code,
unsigned HOST_WIDE_INT val,
struct four_ints *return_sequence,
int i);
static int arm_get_strip_length (int); static int arm_get_strip_length (int);
static bool arm_function_ok_for_sibcall (tree, tree); static bool arm_function_ok_for_sibcall (tree, tree);
static enum machine_mode arm_promote_function_mode (const_tree, static enum machine_mode arm_promote_function_mode (const_tree,
...@@ -2375,7 +2385,7 @@ const_ok_for_arm (HOST_WIDE_INT i) ...@@ -2375,7 +2385,7 @@ const_ok_for_arm (HOST_WIDE_INT i)
} }
/* Return true if I is a valid constant for the operation CODE. */ /* Return true if I is a valid constant for the operation CODE. */
static int int
const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code) const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
{ {
if (const_ok_for_arm (i)) if (const_ok_for_arm (i))
...@@ -2392,6 +2402,13 @@ const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code) ...@@ -2392,6 +2402,13 @@ const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
return const_ok_for_arm (ARM_SIGN_EXTEND (~i)); return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
case PLUS: case PLUS:
/* See if we can use addw or subw. */
if (TARGET_THUMB2
&& ((i & 0xfffff000) == 0
|| ((-i) & 0xfffff000) == 0))
return 1;
/* else fall through. */
case COMPARE: case COMPARE:
case EQ: case EQ:
case NE: case NE:
...@@ -2507,58 +2524,30 @@ arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn, ...@@ -2507,58 +2524,30 @@ arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1); 1);
} }
/* Return the number of instructions required to synthesize the given /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
constant, if we start emitting them from bit-position I. */ ARM/THUMB2 immediates, and add up to VAL.
Thr function return value gives the number of insns required. */
static int static int
count_insns_for_constant (HOST_WIDE_INT remainder, int i) optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
{ struct four_ints *return_sequence)
HOST_WIDE_INT temp1;
int step_size = TARGET_ARM ? 2 : 1;
int num_insns = 0;
gcc_assert (TARGET_ARM || i == 0);
do
{
int end;
if (i <= 0)
i += 32;
if (remainder & (((1 << step_size) - 1) << (i - step_size)))
{
end = i - 8;
if (end < 0)
end += 32;
temp1 = remainder & ((0x0ff << end)
| ((i < end) ? (0xff >> (32 - end)) : 0));
remainder &= ~temp1;
num_insns++;
i -= 8 - step_size;
}
i -= step_size;
} while (remainder);
return num_insns;
}
static int
find_best_start (unsigned HOST_WIDE_INT remainder)
{ {
int best_consecutive_zeros = 0; int best_consecutive_zeros = 0;
int i; int i;
int best_start = 0; int best_start = 0;
int insns1, insns2;
struct four_ints tmp_sequence;
/* If we aren't targetting ARM, the best place to start is always at /* If we aren't targetting ARM, the best place to start is always at
the bottom. */ the bottom, otherwise look more closely. */
if (! TARGET_ARM) if (TARGET_ARM)
return 0; {
for (i = 0; i < 32; i += 2) for (i = 0; i < 32; i += 2)
{ {
int consecutive_zeros = 0; int consecutive_zeros = 0;
if (!(remainder & (3 << i))) if (!(val & (3 << i)))
{ {
while ((i < 32) && !(remainder & (3 << i))) while ((i < 32) && !(val & (3 << i)))
{ {
consecutive_zeros += 2; consecutive_zeros += 2;
i += 2; i += 2;
...@@ -2571,6 +2560,7 @@ find_best_start (unsigned HOST_WIDE_INT remainder) ...@@ -2571,6 +2560,7 @@ find_best_start (unsigned HOST_WIDE_INT remainder)
i -= 2; i -= 2;
} }
} }
}
/* So long as it won't require any more insns to do so, it's /* So long as it won't require any more insns to do so, it's
desirable to emit a small constant (in bits 0...9) in the last desirable to emit a small constant (in bits 0...9) in the last
...@@ -2595,13 +2585,161 @@ find_best_start (unsigned HOST_WIDE_INT remainder) ...@@ -2595,13 +2585,161 @@ find_best_start (unsigned HOST_WIDE_INT remainder)
the constant starting from `best_start', and also starting from the constant starting from `best_start', and also starting from
zero (i.e. with bit 31 first to be output). If `best_start' doesn't zero (i.e. with bit 31 first to be output). If `best_start' doesn't
yield a shorter sequence, we may as well use zero. */ yield a shorter sequence, we may as well use zero. */
insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
if (best_start != 0 if (best_start != 0
&& ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder) && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
&& (count_insns_for_constant (remainder, 0) <= {
count_insns_for_constant (remainder, best_start))) insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
best_start = 0; if (insns2 <= insns1)
{
*return_sequence = tmp_sequence;
insns1 = insns2;
}
}
return best_start; return insns1;
}
/* As for optimal_immediate_sequence, but starting at bit-position I. */
static int
optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
struct four_ints *return_sequence, int i)
{
int remainder = val & 0xffffffff;
int insns = 0;
/* Try and find a way of doing the job in either two or three
instructions.
In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
location. We start at position I. This may be the MSB, or
optimial_immediate_sequence may have positioned it at the largest block
of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
wrapping around to the top of the word when we drop off the bottom.
In the worst case this code should produce no more than four insns.
In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
constants, shifted to any arbitrary location. We should always start
at the MSB. */
do
{
int end;
int b1, b2, b3, b4;
unsigned HOST_WIDE_INT result;
int loc;
gcc_assert (insns < 4);
if (i <= 0)
i += 32;
/* First, find the next normal 12/8-bit shifted/rotated immediate. */
if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
{
loc = i;
if (i <= 12 && TARGET_THUMB2 && code == PLUS)
/* We can use addw/subw for the last 12 bits. */
result = remainder;
else
{
/* Use an 8-bit shifted/rotated immediate. */
end = i - 8;
if (end < 0)
end += 32;
result = remainder & ((0x0ff << end)
| ((i < end) ? (0xff >> (32 - end))
: 0));
i -= 8;
}
}
else
{
/* Arm allows rotates by a multiple of two. Thumb-2 allows
arbitrary shifts. */
i -= TARGET_ARM ? 2 : 1;
continue;
}
/* Next, see if we can do a better job with a thumb2 replicated
constant.
We do it this way around to catch the cases like 0x01F001E0 where
two 8-bit immediates would work, but a replicated constant would
make it worse.
TODO: 16-bit constants that don't clear all the bits, but still win.
TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
if (TARGET_THUMB2)
{
b1 = (remainder & 0xff000000) >> 24;
b2 = (remainder & 0x00ff0000) >> 16;
b3 = (remainder & 0x0000ff00) >> 8;
b4 = remainder & 0xff;
if (loc > 24)
{
/* The 8-bit immediate already found clears b1 (and maybe b2),
but must leave b3 and b4 alone. */
/* First try to find a 32-bit replicated constant that clears
almost everything. We can assume that we can't do it in one,
or else we wouldn't be here. */
unsigned int tmp = b1 & b2 & b3 & b4;
unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
+ (tmp << 24);
unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
+ (tmp == b3) + (tmp == b4);
if (tmp
&& (matching_bytes >= 3
|| (matching_bytes == 2
&& const_ok_for_op (remainder & ~tmp2, code))))
{
/* At least 3 of the bytes match, and the fourth has at
least as many bits set, or two of the bytes match
and it will only require one more insn to finish. */
result = tmp2;
i = tmp != b1 ? 32
: tmp != b2 ? 24
: tmp != b3 ? 16
: 8;
}
/* Second, try to find a 16-bit replicated constant that can
leave three of the bytes clear. If b2 or b4 is already
zero, then we can. If the 8-bit from above would not
clear b2 anyway, then we still win. */
else if (b1 == b3 && (!b2 || !b4
|| (remainder & 0x00ff0000 & ~result)))
{
result = remainder & 0xff00ff00;
i = 24;
}
}
else if (loc > 16)
{
/* The 8-bit immediate already found clears b2 (and maybe b3)
and we don't get here unless b1 is alredy clear, but it will
leave b4 unchanged. */
/* If we can clear b2 and b4 at once, then we win, since the
8-bits couldn't possibly reach that far. */
if (b2 == b4)
{
result = remainder & 0x00ff00ff;
i = 16;
}
}
}
return_sequence->i[insns++] = result;
remainder &= ~result;
if (code == SET || code == MINUS)
code = PLUS;
}
while (remainder);
return insns;
} }
/* Emit an instruction with the indicated PATTERN. If COND is /* Emit an instruction with the indicated PATTERN. If COND is
...@@ -2618,7 +2756,6 @@ emit_constant_insn (rtx cond, rtx pattern) ...@@ -2618,7 +2756,6 @@ emit_constant_insn (rtx cond, rtx pattern)
/* As above, but extra parameter GENERATE which, if clear, suppresses /* As above, but extra parameter GENERATE which, if clear, suppresses
RTL generation. */ RTL generation. */
/* ??? This needs more work for thumb2. */
static int static int
arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
...@@ -2629,15 +2766,15 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, ...@@ -2629,15 +2766,15 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
int can_negate = 0; int can_negate = 0;
int final_invert = 0; int final_invert = 0;
int i; int i;
int num_bits_set = 0;
int set_sign_bit_copies = 0; int set_sign_bit_copies = 0;
int clear_sign_bit_copies = 0; int clear_sign_bit_copies = 0;
int clear_zero_bit_copies = 0; int clear_zero_bit_copies = 0;
int set_zero_bit_copies = 0; int set_zero_bit_copies = 0;
int insns = 0; int insns = 0, neg_insns, inv_insns;
unsigned HOST_WIDE_INT temp1, temp2; unsigned HOST_WIDE_INT temp1, temp2;
unsigned HOST_WIDE_INT remainder = val & 0xffffffff; unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
int step_size = TARGET_ARM ? 2 : 1; struct four_ints *immediates;
struct four_ints pos_immediates, neg_immediates, inv_immediates;
/* Find out which operations are safe for a given CODE. Also do a quick /* Find out which operations are safe for a given CODE. Also do a quick
check for degenerate cases; these can occur when DImode operations check for degenerate cases; these can occur when DImode operations
...@@ -2646,7 +2783,6 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, ...@@ -2646,7 +2783,6 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
{ {
case SET: case SET:
can_invert = 1; can_invert = 1;
can_negate = 1;
break; break;
case PLUS: case PLUS:
...@@ -2714,6 +2850,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, ...@@ -2714,6 +2850,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
gen_rtx_NOT (mode, source))); gen_rtx_NOT (mode, source)));
return 1; return 1;
} }
final_invert = 1;
break; break;
case MINUS: case MINUS:
...@@ -2736,7 +2873,6 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, ...@@ -2736,7 +2873,6 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
source))); source)));
return 1; return 1;
} }
can_negate = 1;
break; break;
...@@ -3148,86 +3284,71 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, ...@@ -3148,86 +3284,71 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
break; break;
} }
for (i = 0; i < 32; i++) /* Calculate what the instruction sequences would be if we generated it
if (remainder & (1 << i)) normally, negated, or inverted. */
num_bits_set++; if (code == AND)
/* AND cannot be split into multiple insns, so invert and use BIC. */
if ((code == AND) || (can_invert && num_bits_set > 16)) insns = 99;
remainder ^= 0xffffffff;
else if (code == PLUS && num_bits_set > 16)
remainder = (-remainder) & 0xffffffff;
/* For XOR, if more than half the bits are set and there's a sequence
of more than 8 consecutive ones in the pattern then we can XOR by the
inverted constant and then invert the final result; this may save an
instruction and might also lead to the final mvn being merged with
some other operation. */
else if (code == XOR && num_bits_set > 16
&& (count_insns_for_constant (remainder ^ 0xffffffff,
find_best_start
(remainder ^ 0xffffffff))
< count_insns_for_constant (remainder,
find_best_start (remainder))))
{
remainder ^= 0xffffffff;
final_invert = 1;
}
else else
{ insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
can_invert = 0;
can_negate = 0;
}
/* Now try and find a way of doing the job in either two or three if (can_negate)
instructions. neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
We start by looking for the largest block of zeros that are aligned on &neg_immediates);
a 2-bit boundary, we then fill up the temps, wrapping around to the else
top of the word when we drop off the bottom. neg_insns = 99;
In the worst case this code should produce no more than four insns.
Thumb-2 constants are shifted, not rotated, so the MSB is always the
best place to start. */
/* ??? Use thumb2 replicated constants when the high and low halfwords are if (can_invert || final_invert)
the same. */ inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
{ &inv_immediates);
/* Now start emitting the insns. */ else
i = find_best_start (remainder); inv_insns = 99;
do
immediates = &pos_immediates;
/* Is the negated immediate sequence more efficient? */
if (neg_insns < insns && neg_insns <= inv_insns)
{ {
int end; insns = neg_insns;
immediates = &neg_immediates;
}
else
can_negate = 0;
if (i <= 0) /* Is the inverted immediate sequence more efficient?
i += 32; We must allow for an extra NOT instruction for XOR operations, although
if (remainder & (3 << (i - 2))) there is some chance that the final 'mvn' will get optimized later. */
if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
{ {
end = i - 8; insns = inv_insns;
if (end < 0) immediates = &inv_immediates;
end += 32; }
temp1 = remainder & ((0x0ff << end) else
| ((i < end) ? (0xff >> (32 - end)) : 0)); {
remainder &= ~temp1; can_invert = 0;
final_invert = 0;
}
/* Now output the chosen sequence as instructions. */
if (generate) if (generate)
{ {
for (i = 0; i < insns; i++)
{
rtx new_src, temp1_rtx; rtx new_src, temp1_rtx;
temp1 = immediates->i[i];
if (code == SET || code == MINUS) if (code == SET || code == MINUS)
{
new_src = (subtargets ? gen_reg_rtx (mode) : target); new_src = (subtargets ? gen_reg_rtx (mode) : target);
if (can_invert && code != MINUS) else if ((final_invert || i < (insns - 1)) && subtargets)
temp1 = ~temp1;
}
else
{
if ((final_invert || remainder) && subtargets)
new_src = gen_reg_rtx (mode); new_src = gen_reg_rtx (mode);
else else
new_src = target; new_src = target;
if (can_invert) if (can_invert)
temp1 = ~temp1; temp1 = ~temp1;
else if (can_negate) else if (can_negate)
temp1 = -temp1; temp1 = -temp1;
}
temp1 = trunc_int_for_mode (temp1, mode); temp1 = trunc_int_for_mode (temp1, mode);
temp1_rtx = GEN_INT (temp1); temp1_rtx = GEN_INT (temp1);
...@@ -3243,24 +3364,16 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, ...@@ -3243,24 +3364,16 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
gen_rtx_SET (VOIDmode, new_src, gen_rtx_SET (VOIDmode, new_src,
temp1_rtx)); temp1_rtx));
source = new_src; source = new_src;
}
if (code == SET) if (code == SET)
{ {
can_invert = 0; can_invert = 0;
can_negate = 1;
code = PLUS; code = PLUS;
} }
else if (code == MINUS) else if (code == MINUS)
code = PLUS; code = PLUS;
insns++;
i -= 8 - step_size;
} }
/* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
shifts. */
i -= step_size;
}
while (remainder);
} }
if (final_invert) if (final_invert)
......
...@@ -708,21 +708,24 @@ ...@@ -708,21 +708,24 @@
;; (plus (reg rN) (reg sp)) into (reg rN). In this case reload will ;; (plus (reg rN) (reg sp)) into (reg rN). In this case reload will
;; put the duplicated register first, and not try the commutative version. ;; put the duplicated register first, and not try the commutative version.
(define_insn_and_split "*arm_addsi3" (define_insn_and_split "*arm_addsi3"
[(set (match_operand:SI 0 "s_register_operand" "=r, k,r,r, k,r") [(set (match_operand:SI 0 "s_register_operand" "=r, k,r,r, k, r, k,r, k, r")
(plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k,rk") (plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k, rk,k,rk,k, rk")
(match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,L, L,?n")))] (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,Pj,Pj,L, L,PJ,PJ,?n")))]
"TARGET_32BIT" "TARGET_32BIT"
"@ "@
add%?\\t%0, %1, %2 add%?\\t%0, %1, %2
add%?\\t%0, %1, %2 add%?\\t%0, %1, %2
add%?\\t%0, %2, %1 add%?\\t%0, %2, %1
addw%?\\t%0, %1, %2
addw%?\\t%0, %1, %2
sub%?\\t%0, %1, #%n2 sub%?\\t%0, %1, #%n2
sub%?\\t%0, %1, #%n2 sub%?\\t%0, %1, #%n2
subw%?\\t%0, %1, #%n2
subw%?\\t%0, %1, #%n2
#" #"
"TARGET_32BIT "TARGET_32BIT
&& GET_CODE (operands[2]) == CONST_INT && GET_CODE (operands[2]) == CONST_INT
&& !(const_ok_for_arm (INTVAL (operands[2])) && !const_ok_for_op (INTVAL (operands[2]), PLUS)
|| const_ok_for_arm (-INTVAL (operands[2])))
&& (reload_completed || !arm_eliminable_register (operands[1]))" && (reload_completed || !arm_eliminable_register (operands[1]))"
[(clobber (const_int 0))] [(clobber (const_int 0))]
" "
...@@ -731,8 +734,9 @@ ...@@ -731,8 +734,9 @@
operands[1], 0); operands[1], 0);
DONE; DONE;
" "
[(set_attr "length" "4,4,4,4,4,16") [(set_attr "length" "4,4,4,4,4,4,4,4,4,16")
(set_attr "predicable" "yes")] (set_attr "predicable" "yes")
(set_attr "arch" "*,*,*,t2,t2,*,*,t2,t2,*")]
) )
(define_insn_and_split "*thumb1_addsi3" (define_insn_and_split "*thumb1_addsi3"
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
;; The following multi-letter normal constraints have been used: ;; The following multi-letter normal constraints have been used:
;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz
;; in Thumb-1 state: Pa, Pb, Pc, Pd ;; in Thumb-1 state: Pa, Pb, Pc, Pd
;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px, Py ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py
;; The following memory constraints have been used: ;; The following memory constraints have been used:
;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
...@@ -75,6 +75,18 @@ ...@@ -75,6 +75,18 @@
(and (match_code "const_int") (and (match_code "const_int")
(match_test "(ival & 0xffff0000) == 0"))))) (match_test "(ival & 0xffff0000) == 0")))))
(define_constraint "Pj"
"@internal A 12-bit constant suitable for an ADDW or SUBW instruction. (Thumb-2)"
(and (match_code "const_int")
(and (match_test "TARGET_THUMB2")
(match_test "(ival & 0xfffff000) == 0"))))
(define_constraint "PJ"
"@internal A constant that satisfies the Pj constrant if negated."
(and (match_code "const_int")
(and (match_test "TARGET_THUMB2")
(match_test "((-ival) & 0xfffff000) == 0"))))
(define_register_constraint "k" "STACK_REG" (define_register_constraint "k" "STACK_REG"
"@internal The stack register.") "@internal The stack register.")
......
2011-08-26 Andrew Stubbs <ams@codesourcery.com>
* gcc.target/arm/thumb2-replicated-constant1.c: New file.
* gcc.target/arm/thumb2-replicated-constant2.c: New file.
* gcc.target/arm/thumb2-replicated-constant3.c: New file.
* gcc.target/arm/thumb2-replicated-constant4.c: New file.
2011-08-25 Mikael Morin <mikael.morin@gcc.gnu.org> 2011-08-25 Mikael Morin <mikael.morin@gcc.gnu.org>
PR fortran/50050 PR fortran/50050
......
/* Ensure simple replicated constant immediates work. */
/* { dg-options "-mthumb -O2" } */
/* { dg-require-effective-target arm_thumb2_ok } */
int
foo1 (int a)
{
return a + 0xfefefefe;
}
/* { dg-final { scan-assembler "add.*#-16843010" } } */
int
foo2 (int a)
{
return a - 0xab00ab00;
}
/* { dg-final { scan-assembler "sub.*#-1426019584" } } */
int
foo3 (int a)
{
return a & 0x00cd00cd;
}
/* { dg-final { scan-assembler "and.*#13435085" } } */
/* Ensure split constants can use replicated patterns. */
/* { dg-options "-mthumb -O2" } */
/* { dg-require-effective-target arm_thumb2_ok } */
int
foo1 (int a)
{
return a + 0xfe00fe01;
}
/* { dg-final { scan-assembler "add.*#-33489408" } } */
/* { dg-final { scan-assembler "add.*#1" } } */
int
foo2 (int a)
{
return a + 0xdd01dd00;
}
/* { dg-final { scan-assembler "add.*#-587145984" } } */
/* { dg-final { scan-assembler "add.*#65536" } } */
int
foo3 (int a)
{
return a + 0x00443344;
}
/* { dg-final { scan-assembler "add.*#4456516" } } */
/* { dg-final { scan-assembler "add.*#13056" } } */
int
foo4 (int a)
{
return a + 0x77330033;
}
/* { dg-final { scan-assembler "add.*#1996488704" } } */
/* { dg-final { scan-assembler "add.*#3342387" } } */
int
foo5 (int a)
{
return a + 0x11221122;
}
/* { dg-final { scan-assembler "add.*#285217024" } } */
/* { dg-final { scan-assembler "add.*#2228258" } } */
int
foo6 (int a)
{
return a + 0x66666677;
}
/* { dg-final { scan-assembler "add.*#1717986918" } } */
/* { dg-final { scan-assembler "add.*#17" } } */
int
foo7 (int a)
{
return a + 0x99888888;
}
/* { dg-final { scan-assembler "add.*#-2004318072" } } */
/* { dg-final { scan-assembler "add.*#285212672" } } */
int
foo8 (int a)
{
return a + 0xdddddfff;
}
/* { dg-final { scan-assembler "add.*#-572662307" } } */
/* { dg-final { scan-assembler "addw.*#546" } } */
/* Ensure negated/inverted replicated constant immediates work. */
/* { dg-options "-mthumb -O2" } */
/* { dg-require-effective-target arm_thumb2_ok } */
int
foo1 (int a)
{
return a | 0xffffff00;
}
/* { dg-final { scan-assembler "orn.*#255" } } */
int
foo2 (int a)
{
return a & 0xffeeffee;
}
/* { dg-final { scan-assembler "bic.*#1114129" } } */
int
foo3 (int a)
{
return a & 0xaaaaaa00;
}
/* { dg-final { scan-assembler "and.*#-1431655766" } } */
/* { dg-final { scan-assembler "bic.*#170" } } */
/* Ensure replicated constants don't make things worse. */
/* { dg-options "-mthumb -O2" } */
/* { dg-require-effective-target arm_thumb2_ok } */
int
foo1 (int a)
{
/* It might be tempting to use 0x01000100, but it wouldn't help. */
return a + 0x01f001e0;
}
/* { dg-final { scan-assembler "add.*#32505856" } } */
/* { dg-final { scan-assembler "add.*#480" } } */
int
foo2 (int a)
{
return a + 0x0f100e10;
}
/* { dg-final { scan-assembler "add.*#252706816" } } */
/* { dg-final { scan-assembler "add.*#3600" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment