Commit 02231c13 by Terry Guo Committed by Xuepeng Guo

invoke.texi (-mslow-flash-data): Document new option.

gcc/ChangeLog
2013-11-25  Terry Guo  <terry.guo@arm.com>

	* doc/invoke.texi (-mslow-flash-data): Document new option.
	* config/arm/arm.opt (mslow-flash-data): New option.
	* config/arm/arm-protos.h (arm_max_const_double_inline_cost): Declare
	it.
	* config/arm/arm.h (TARGET_USE_MOVT): Always true when literal pools
	are disabled.
	(arm_disable_literal_pool): Declare it.
	* config/arm/arm.c (arm_disable_literal_pool): New variable.
	(arm_option_override): Handle new option.
	(thumb2_legitimate_address_p): Don't allow symbol references when
	literal pools are disabled.
	(arm_max_const_double_inline_cost): New function.
	* config/arm/arm.md (types.md): Include it before ...
	(use_literal_pool): New attribute.
	(enabled): Use new attribute.
	(split pattern): Replace symbol+offset with MOVW/MOVT.

gcc/testsuite/ChangeLog
2013-11-25  Terry Guo  <terry.guo@arm.com>

	* gcc.target/arm/thumb2-slow-flash-data.c: New.

From-SVN: r205342
parent 36ef4e9d
2013-11-25 Terry Guo <terry.guo@arm.com>
* doc/invoke.texi (-mslow-flash-data): Document new option.
* config/arm/arm.opt (mslow-flash-data): New option.
* config/arm/arm-protos.h (arm_max_const_double_inline_cost): Declare
it.
* config/arm/arm.h (TARGET_USE_MOVT): Always true when literal pools
are disabled.
(arm_disable_literal_pool): Declare it.
* config/arm/arm.c (arm_disable_literal_pool): New variable.
(arm_option_override): Handle new option.
(thumb2_legitimate_address_p): Don't allow symbol references when
literal pools are disabled.
(arm_max_const_double_inline_cost): New function.
* config/arm/arm.md (types.md): Include it before ...
(use_literal_pool): New attribute.
(enabled): Use new attribute.
(split pattern): Replace symbol+offset with MOVW/MOVT.
2013-11-24 Steven Bosscher <steven@gcc.gnu.org> 2013-11-24 Steven Bosscher <steven@gcc.gnu.org>
PR bootstrap/59279 PR bootstrap/59279
...@@ -121,6 +121,7 @@ extern rtx arm_gen_compare_reg (RTX_CODE, rtx, rtx, rtx); ...@@ -121,6 +121,7 @@ extern rtx arm_gen_compare_reg (RTX_CODE, rtx, rtx, rtx);
extern rtx arm_gen_return_addr_mask (void); extern rtx arm_gen_return_addr_mask (void);
extern void arm_reload_in_hi (rtx *); extern void arm_reload_in_hi (rtx *);
extern void arm_reload_out_hi (rtx *); extern void arm_reload_out_hi (rtx *);
extern int arm_max_const_double_inline_cost (void);
extern int arm_const_double_inline_cost (rtx); extern int arm_const_double_inline_cost (rtx);
extern bool arm_const_double_by_parts (rtx); extern bool arm_const_double_by_parts (rtx);
extern bool arm_const_double_by_immediates (rtx); extern bool arm_const_double_by_immediates (rtx);
......
...@@ -869,6 +869,9 @@ int arm_arch_thumb_hwdiv; ...@@ -869,6 +869,9 @@ int arm_arch_thumb_hwdiv;
than core registers. */ than core registers. */
int prefer_neon_for_64bits = 0; int prefer_neon_for_64bits = 0;
/* Nonzero if we shouldn't use literal pools. */
bool arm_disable_literal_pool = false;
/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
we must report the mode of the memory reference from we must report the mode of the memory reference from
TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */ TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
...@@ -2573,6 +2576,16 @@ arm_option_override (void) ...@@ -2573,6 +2576,16 @@ arm_option_override (void)
if (TARGET_APCS_FRAME) if (TARGET_APCS_FRAME)
flag_shrink_wrap = false; flag_shrink_wrap = false;
/* We only support -mslow-flash-data on armv7-m targets. */
if (target_slow_flash_data
&& ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
|| (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
/* Currently, for slow flash data, we just disable literal pools. */
if (target_slow_flash_data)
arm_disable_literal_pool = true;
/* Register global variables with the garbage collector. */ /* Register global variables with the garbage collector. */
arm_add_gc_roots (); arm_add_gc_roots ();
} }
...@@ -6417,6 +6430,25 @@ thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) ...@@ -6417,6 +6430,25 @@ thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
&& thumb2_legitimate_index_p (mode, xop0, strict_p))); && thumb2_legitimate_index_p (mode, xop0, strict_p)));
} }
/* Normally we can assign constant values to target registers without
the help of constant pool. But there are cases we have to use constant
pool like:
1) assign a label to register.
2) sign-extend a 8bit value to 32bit and then assign to register.
Constant pool access in format:
(set (reg r0) (mem (symbol_ref (".LC0"))))
will cause the use of literal pool (later in function arm_reorg).
So here we mark such format as an invalid format, then the compiler
will adjust it into:
(set (reg r0) (symbol_ref (".LC0")))
(set (reg r0) (mem (reg r0))).
No extra register is required, and (mem (reg r0)) won't cause the use
of literal pools. */
else if (arm_disable_literal_pool && code == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x))
return 0;
else if (GET_MODE_CLASS (mode) != MODE_FLOAT else if (GET_MODE_CLASS (mode) != MODE_FLOAT
&& code == SYMBOL_REF && code == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x) && CONSTANT_POOL_ADDRESS_P (x)
...@@ -16222,6 +16254,19 @@ push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc, ...@@ -16222,6 +16254,19 @@ push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
minipool_fix_tail = fix; minipool_fix_tail = fix;
} }
/* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
Returns the number of insns needed, or 99 if we always want to synthesize
the value. */
int
arm_max_const_double_inline_cost ()
{
/* Let the value get synthesized to avoid the use of literal pools. */
if (arm_disable_literal_pool)
return 99;
return ((optimize_size || arm_ld_sched) ? 3 : 4);
}
/* Return the cost of synthesizing a 64-bit constant VAL inline. /* Return the cost of synthesizing a 64-bit constant VAL inline.
Returns the number of insns needed, or 99 if we don't know how to Returns the number of insns needed, or 99 if we don't know how to
do it. */ do it. */
......
...@@ -329,7 +329,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void); ...@@ -329,7 +329,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
/* Should MOVW/MOVT be used in preference to a constant pool. */ /* Should MOVW/MOVT be used in preference to a constant pool. */
#define TARGET_USE_MOVT \ #define TARGET_USE_MOVT \
(arm_arch_thumb2 && !optimize_size && !current_tune->prefer_constant_pool) (arm_arch_thumb2 \
&& (arm_disable_literal_pool \
|| (!optimize_size && !current_tune->prefer_constant_pool)))
/* We could use unified syntax for arm mode, but for now we just use it /* We could use unified syntax for arm mode, but for now we just use it
for Thumb-2. */ for Thumb-2. */
...@@ -554,6 +556,11 @@ extern int arm_arch_thumb_hwdiv; ...@@ -554,6 +556,11 @@ extern int arm_arch_thumb_hwdiv;
than core registers. */ than core registers. */
extern int prefer_neon_for_64bits; extern int prefer_neon_for_64bits;
/* Nonzero if we shouldn't use literal pools. */
#ifndef USED_FOR_TARGET
extern bool arm_disable_literal_pool;
#endif
#ifndef TARGET_DEFAULT #ifndef TARGET_DEFAULT
#define TARGET_DEFAULT (MASK_APCS_FRAME) #define TARGET_DEFAULT (MASK_APCS_FRAME)
#endif #endif
......
...@@ -82,6 +82,9 @@ ...@@ -82,6 +82,9 @@
;; Processor type. This is created automatically from arm-cores.def. ;; Processor type. This is created automatically from arm-cores.def.
(include "arm-tune.md") (include "arm-tune.md")
;; Instruction classification types
(include "types.md")
; IS_THUMB is set to 'yes' when we are generating Thumb code, and 'no' when ; IS_THUMB is set to 'yes' when we are generating Thumb code, and 'no' when
; generating ARM code. This is used to control the length of some insn ; generating ARM code. This is used to control the length of some insn
; patterns that share the same RTL in both ARM and Thumb code. ; patterns that share the same RTL in both ARM and Thumb code.
...@@ -191,6 +194,12 @@ ...@@ -191,6 +194,12 @@
(const_string "yes")] (const_string "yes")]
(const_string "no"))) (const_string "no")))
(define_attr "use_literal_pool" "no,yes"
(cond [(and (eq_attr "type" "f_loads,f_loadd")
(match_test "CONSTANT_P (operands[1])"))
(const_string "yes")]
(const_string "no")))
; Allows an insn to disable certain alternatives for reasons other than ; Allows an insn to disable certain alternatives for reasons other than
; arch support. ; arch support.
(define_attr "insn_enabled" "no,yes" (define_attr "insn_enabled" "no,yes"
...@@ -210,6 +219,10 @@ ...@@ -210,6 +219,10 @@
(match_test "arm_restrict_it")) (match_test "arm_restrict_it"))
(const_string "no") (const_string "no")
(and (eq_attr "use_literal_pool" "yes")
(match_test "arm_disable_literal_pool"))
(const_string "no")
(eq_attr "arch_enabled" "no") (eq_attr "arch_enabled" "no")
(const_string "no") (const_string "no")
...@@ -245,9 +258,6 @@ ...@@ -245,9 +258,6 @@
(set_attr "length" "4") (set_attr "length" "4")
(set_attr "pool_range" "250")]) (set_attr "pool_range" "250")])
;; Instruction classification types
(include "types.md")
; Load scheduling, set from the arm_ld_sched variable ; Load scheduling, set from the arm_ld_sched variable
; initialized by arm_option_override() ; initialized by arm_option_override()
(define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched"))) (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))
...@@ -6049,7 +6059,7 @@ ...@@ -6049,7 +6059,7 @@
"TARGET_32BIT "TARGET_32BIT
&& reload_completed && reload_completed
&& (arm_const_double_inline_cost (operands[1]) && (arm_const_double_inline_cost (operands[1])
<= ((optimize_size || arm_ld_sched) ? 3 : 4))" <= arm_max_const_double_inline_cost ())"
[(const_int 0)] [(const_int 0)]
" "
arm_split_constant (SET, SImode, curr_insn, arm_split_constant (SET, SImode, curr_insn,
...@@ -6312,6 +6322,47 @@ ...@@ -6312,6 +6322,47 @@
" "
) )
;; A normal way to do (symbol + offset) requires three instructions at least
;; (depends on how big the offset is) as below:
;; movw r0, #:lower16:g
;; movw r0, #:upper16:g
;; adds r0, #4
;;
;; A better way would be:
;; movw r0, #:lower16:g+4
;; movw r0, #:upper16:g+4
;;
;; The limitation of this way is that the length of offset should be a 16-bit
;; signed value, because current assembler only supports REL type relocation for
;; such case. If the more powerful RELA type is supported in future, we should
;; update this pattern to go with better way.
(define_split
[(set (match_operand:SI 0 "arm_general_register_operand" "")
(const:SI (plus:SI (match_operand:SI 1 "general_operand" "")
(match_operand:SI 2 "const_int_operand" ""))))]
"TARGET_THUMB2
&& arm_disable_literal_pool
&& reload_completed
&& GET_CODE (operands[1]) == SYMBOL_REF"
[(clobber (const_int 0))]
"
int offset = INTVAL (operands[2]);
if (offset < -0x8000 || offset > 0x7fff)
{
arm_emit_movpair (operands[0], operands[1]);
emit_insn (gen_rtx_SET (SImode, operands[0],
gen_rtx_PLUS (SImode, operands[0], operands[2])));
}
else
{
rtx op = gen_rtx_CONST (SImode,
gen_rtx_PLUS (SImode, operands[1], operands[2]));
arm_emit_movpair (operands[0], op);
}
"
)
;; Split symbol_refs at the later stage (after cprop), instead of generating ;; Split symbol_refs at the later stage (after cprop), instead of generating
;; movt/movw pair directly at expand. Otherwise corresponding high_sum ;; movt/movw pair directly at expand. Otherwise corresponding high_sum
;; and lo_sum would be merged back into memory load at cprop. However, ;; and lo_sum would be merged back into memory load at cprop. However,
......
...@@ -271,3 +271,7 @@ Enable unaligned word and halfword accesses to packed data. ...@@ -271,3 +271,7 @@ Enable unaligned word and halfword accesses to packed data.
mneon-for-64bits mneon-for-64bits
Target Report RejectNegative Var(use_neon_for_64bits) Init(0) Target Report RejectNegative Var(use_neon_for_64bits) Init(0)
Use Neon to perform 64-bits operations rather than core registers. Use Neon to perform 64-bits operations rather than core registers.
mslow-flash-data
Target Report Var(target_slow_flash_data) Init(0)
Assume loading data from flash is slower than fetching instructions.
...@@ -533,6 +533,7 @@ Objective-C and Objective-C++ Dialects}. ...@@ -533,6 +533,7 @@ Objective-C and Objective-C++ Dialects}.
-mfix-cortex-m3-ldrd @gol -mfix-cortex-m3-ldrd @gol
-munaligned-access @gol -munaligned-access @gol
-mneon-for-64bits @gol -mneon-for-64bits @gol
-mslow-flash-data @gol
-mrestrict-it} -mrestrict-it}
@emph{AVR Options} @emph{AVR Options}
...@@ -12345,6 +12346,13 @@ Enables using Neon to handle scalar 64-bits operations. This is ...@@ -12345,6 +12346,13 @@ Enables using Neon to handle scalar 64-bits operations. This is
disabled by default since the cost of moving data from core registers disabled by default since the cost of moving data from core registers
to Neon is high. to Neon is high.
@item -mslow-flash-data
@opindex mslow-flash-data
Assume loading data from flash is slower than fetching instruction.
Therefore literal load is minimized for better performance.
This option is only supported when compiling for ARMv7 M-profile and
off by default.
@item -mrestrict-it @item -mrestrict-it
@opindex mrestrict-it @opindex mrestrict-it
Restricts generation of IT blocks to conform to the rules of ARMv8. Restricts generation of IT blocks to conform to the rules of ARMv8.
......
2013-11-25 Terry Guo <terry.guo@arm.com>
* gcc.target/arm/thumb2-slow-flash-data.c: New.
2013-11-23 Uros Bizjak <ubizjak@gmail.com> 2013-11-23 Uros Bizjak <ubizjak@gmail.com>
* gcc.dg/float-exact-1.c: Use dg-add-options ieee. * gcc.dg/float-exact-1.c: Use dg-add-options ieee.
......
/* The option -mslow-flash-data is just for performance tuning, it
doesn't totally disable the use of literal pools. But for below
simple cases, the use of literal pool should be replaced by
movw/movt or read-only constant pool. */
/* { dg-do compile } */
/* { dg-require-effective-target arm_cortex_m } */
/* { dg-require-effective-target arm_thumb2_ok } */
/* { dg-options "-O2 -mthumb -mslow-flash-data" } */
float sf;
double df;
long long l;
static char *p = "Hello World";
float
testsf (float *p)
{
if (*p > 1.1234f)
return 2.1234f;
else
return 3.1234f;
}
double
testdf (double *p)
{
if (*p > 4.1234)
return 2.1234;
else
return 3.1234;
}
long long
testll (long long *p)
{
if (*p > 0x123456789ABCDEFll)
return 0x111111111ll;
else
return 0x222222222ll;
}
char *
testchar ()
{
return p + 4;
}
int
foo (int a, int b)
{
int i;
volatile *labelref = &&label1;
if (a > b)
{
while (i < b)
{
a += *labelref;
i += 1;
}
goto *labelref;
}
else
b = b + 3;
a = a * b;
label1:
return a + b;
}
/* { dg-final { scan-assembler-times "movt" 13 } } */
/* { dg-final { scan-assembler-times "movt.*LC0\\+4" 1 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment