invoke.texi (-mslow-flash-data): Document new option.

gcc/ChangeLog 2013-11-25 Terry Guo <terry.guo@arm.com> * doc/invoke.texi (-mslow-flash-data): Document new option. * config/arm/arm.opt (mslow-flash-data): New option. * config/arm/arm-protos.h (arm_max_const_double_inline_cost): Declare it. * config/arm/arm.h (TARGET_USE_MOVT): Always true when literal pools are disabled. (arm_disable_literal_pool): Declare it. * config/arm/arm.c (arm_disable_literal_pool): New variable. (arm_option_override): Handle new option. (thumb2_legitimate_address_p): Don't allow symbol references when literal pools are disabled. (arm_max_const_double_inline_cost): New function. * config/arm/arm.md (types.md): Include it before ... (use_literal_pool): New attribute. (enabled): Use new attribute. (split pattern): Replace symbol+offset with MOVW/MOVT. gcc/testsuite/ChangeLog 2013-11-25 Terry Guo <terry.guo@arm.com> * gcc.target/arm/thumb2-slow-flash-data.c: New. From-SVN: r205342

invoke.texi (-mslow-flash-data): Document new option.
gcc/ChangeLog 2013-11-25 Terry Guo <terry.guo@arm.com> * doc/invoke.texi (-mslow-flash-data): Document new option. * config/arm/arm.opt (mslow-flash-data): New option. * config/arm/arm-protos.h (arm_max_const_double_inline_cost): Declare it. * config/arm/arm.h (TARGET_USE_MOVT): Always true when literal pools are disabled. (arm_disable_literal_pool): Declare it. * config/arm/arm.c (arm_disable_literal_pool): New variable. (arm_option_override): Handle new option. (thumb2_legitimate_address_p): Don't allow symbol references when literal pools are disabled. (arm_max_const_double_inline_cost): New function. * config/arm/arm.md (types.md): Include it before ... (use_literal_pool): New attribute. (enabled): Use new attribute. (split pattern): Replace symbol+offset with MOVW/MOVT. gcc/testsuite/ChangeLog 2013-11-25 Terry Guo <terry.guo@arm.com> * gcc.target/arm/thumb2-slow-flash-data.c: New. From-SVN: r205342
02231c13 · Terry Guo · Xuepeng Guo · 36ef4e9d · 02231c13 · 02231c13
Commit 02231c13 authored Nov 25, 2013 by Terry Guo Committed by Xuepeng Guo Nov 25, 2013
9 changed files
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2013-11-25  Terry Guo  <terry.guo@arm.com>
+
+	* doc/invoke.texi (-mslow-flash-data): Document new option.
+	* config/arm/arm.opt (mslow-flash-data): New option.
+	* config/arm/arm-protos.h (arm_max_const_double_inline_cost): Declare
+	it.
+	* config/arm/arm.h (TARGET_USE_MOVT): Always true when literal pools
+	are disabled.
+	(arm_disable_literal_pool): Declare it.
+	* config/arm/arm.c (arm_disable_literal_pool): New variable.
+	(arm_option_override): Handle new option.
+	(thumb2_legitimate_address_p): Don't allow symbol references when
+	literal pools are disabled.
+	(arm_max_const_double_inline_cost): New function.
+	* config/arm/arm.md (types.md): Include it before ...
+	(use_literal_pool): New attribute.
+	(enabled): Use new attribute.
+	(split pattern): Replace symbol+offset with MOVW/MOVT.
+
 2013-11-24  Steven Bosscher  <steven@gcc.gnu.org>

 	PR bootstrap/59279
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -121,6 +121,7 @@ extern rtx arm_gen_compare_reg (RTX_CODE, rtx, rtx, rtx);
 extern rtx arm_gen_return_addr_mask (void);
 extern void arm_reload_in_hi (rtx *);
 extern void arm_reload_out_hi (rtx *);
+extern int arm_max_const_double_inline_cost (void);
 extern int arm_const_double_inline_cost (rtx);
 extern bool arm_const_double_by_parts (rtx);
 extern bool arm_const_double_by_immediates (rtx);

--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -869,6 +869,9 @@ int arm_arch_thumb_hwdiv;
   than core registers.  */
 int prefer_neon_for_64bits = 0;

+/* Nonzero if we shouldn't use literal pools.  */
+bool arm_disable_literal_pool = false;
+
 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
   we must report the mode of the memory reference from
   TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS.  */
@@ -2573,6 +2576,16 @@ arm_option_override (void)
  if (TARGET_APCS_FRAME)
    flag_shrink_wrap = false;

+  /* We only support -mslow-flash-data on armv7-m targets.  */
+  if (target_slow_flash_data
+      && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
+	  || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
+    error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
+
+  /* Currently, for slow flash data, we just disable literal pools.  */
+  if (target_slow_flash_data)
+    arm_disable_literal_pool = true;
+
  /* Register global variables with the garbage collector.  */
  arm_add_gc_roots ();
 }
@@ -6417,6 +6430,25 @@ thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
 		  && thumb2_legitimate_index_p (mode, xop0, strict_p)));
    }

+  /* Normally we can assign constant values to target registers without
+     the help of constant pool.  But there are cases we have to use constant
+     pool like:
+     1) assign a label to register.
+     2) sign-extend a 8bit value to 32bit and then assign to register.
+
+     Constant pool access in format:
+     (set (reg r0) (mem (symbol_ref (".LC0"))))
+     will cause the use of literal pool (later in function arm_reorg).
+     So here we mark such format as an invalid format, then the compiler
+     will adjust it into:
+     (set (reg r0) (symbol_ref (".LC0")))
+     (set (reg r0) (mem (reg r0))).
+     No extra register is required, and (mem (reg r0)) won't cause the use
+     of literal pools.  */
+  else if (arm_disable_literal_pool && code == SYMBOL_REF
+	   && CONSTANT_POOL_ADDRESS_P (x))
+    return 0;
+
  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
 	   && code == SYMBOL_REF
 	   && CONSTANT_POOL_ADDRESS_P (x)
@@ -16222,6 +16254,19 @@ push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
  minipool_fix_tail = fix;
 }

+/* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
+   Returns the number of insns needed, or 99 if we always want to synthesize
+   the value.  */
+int
+arm_max_const_double_inline_cost ()
+{
+  /* Let the value get synthesized to avoid the use of literal pools.  */
+  if (arm_disable_literal_pool)
+    return 99;
+
+  return ((optimize_size || arm_ld_sched) ? 3 : 4);
+}
+
 /* Return the cost of synthesizing a 64-bit constant VAL inline.
   Returns the number of insns needed, or 99 if we don't know how to
   do it.  */

--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -329,7 +329,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void);

 /* Should MOVW/MOVT be used in preference to a constant pool.  */
 #define TARGET_USE_MOVT \
-  (arm_arch_thumb2 && !optimize_size && !current_tune->prefer_constant_pool)
+  (arm_arch_thumb2 \
+   && (arm_disable_literal_pool \
+       || (!optimize_size && !current_tune->prefer_constant_pool)))

 /* We could use unified syntax for arm mode, but for now we just use it
   for Thumb-2.  */
@@ -554,6 +556,11 @@ extern int arm_arch_thumb_hwdiv;
   than core registers.  */
 extern int prefer_neon_for_64bits;

+/* Nonzero if we shouldn't use literal pools.  */
+#ifndef USED_FOR_TARGET
+extern bool arm_disable_literal_pool;
+#endif
+
 #ifndef TARGET_DEFAULT
 #define TARGET_DEFAULT  (MASK_APCS_FRAME)
 #endif

--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -82,6 +82,9 @@
 ;; Processor type.  This is created automatically from arm-cores.def.
 (include "arm-tune.md")

+;; Instruction classification types
+(include "types.md")
+
 ; IS_THUMB is set to 'yes' when we are generating Thumb code, and 'no' when
 ; generating ARM code.  This is used to control the length of some insn
 ; patterns that share the same RTL in both ARM and Thumb code.
@@ -191,6 +194,12 @@
 	 (const_string "yes")]
 	(const_string "no")))

+(define_attr "use_literal_pool" "no,yes"
+   (cond [(and (eq_attr "type" "f_loads,f_loadd")
+	       (match_test "CONSTANT_P (operands[1])"))
+	  (const_string "yes")]
+	 (const_string "no")))
+
 ; Allows an insn to disable certain alternatives for reasons other than
 ; arch support.
 (define_attr "insn_enabled" "no,yes"
@@ -210,6 +219,10 @@
 	       (match_test "arm_restrict_it"))
 	  (const_string "no")

+	  (and (eq_attr "use_literal_pool" "yes")
+	       (match_test "arm_disable_literal_pool"))
+	  (const_string "no")
+
 	  (eq_attr "arch_enabled" "no")
 	  (const_string "no")

@@ -245,9 +258,6 @@
  (set_attr "length" "4")
  (set_attr "pool_range" "250")])

-;; Instruction classification types
-(include "types.md")
-
 ; Load scheduling, set from the arm_ld_sched variable
 ; initialized by arm_option_override()
 (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))
@@ -6049,7 +6059,7 @@
  "TARGET_32BIT
   && reload_completed
   && (arm_const_double_inline_cost (operands[1])
-       <= ((optimize_size || arm_ld_sched) ? 3 : 4))"
+       <= arm_max_const_double_inline_cost ())"
  [(const_int 0)]
  "
  arm_split_constant (SET, SImode, curr_insn,
@@ -6312,6 +6322,47 @@
  "
 )

+;; A normal way to do (symbol + offset) requires three instructions at least
+;; (depends on how big the offset is) as below:
+;; movw r0, #:lower16:g
+;; movw r0, #:upper16:g
+;; adds r0, #4
+;;
+;; A better way would be:
+;; movw r0, #:lower16:g+4
+;; movw r0, #:upper16:g+4
+;;
+;; The limitation of this way is that the length of offset should be a 16-bit
+;; signed value, because current assembler only supports REL type relocation for
+;; such case.  If the more powerful RELA type is supported in future, we should
+;; update this pattern to go with better way.
+(define_split
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+	(const:SI (plus:SI (match_operand:SI 1 "general_operand" "")
+			   (match_operand:SI 2 "const_int_operand" ""))))]
+  "TARGET_THUMB2
+   && arm_disable_literal_pool
+   && reload_completed
+   && GET_CODE (operands[1]) == SYMBOL_REF"
+  [(clobber (const_int 0))]
+  "
+    int offset = INTVAL (operands[2]);
+
+    if (offset < -0x8000 || offset > 0x7fff)
+      {
+	arm_emit_movpair (operands[0], operands[1]);
+	emit_insn (gen_rtx_SET (SImode, operands[0],
+				gen_rtx_PLUS (SImode, operands[0], operands[2])));
+      }
+    else
+      {
+	rtx op = gen_rtx_CONST (SImode,
+				gen_rtx_PLUS (SImode, operands[1], operands[2]));
+	arm_emit_movpair (operands[0], op);
+      }
+  "
+)
+
 ;; Split symbol_refs at the later stage (after cprop), instead of generating
 ;; movt/movw pair directly at expand.  Otherwise corresponding high_sum
 ;; and lo_sum would be merged back into memory load at cprop.  However,

--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -271,3 +271,7 @@ Enable unaligned word and halfword accesses to packed data.
 mneon-for-64bits
 Target Report RejectNegative Var(use_neon_for_64bits) Init(0)
 Use Neon to perform 64-bits operations rather than core registers.
+
+mslow-flash-data
+Target Report Var(target_slow_flash_data) Init(0)
+Assume loading data from flash is slower than fetching instructions.
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -533,6 +533,7 @@ Objective-C and Objective-C++ Dialects}.
 -mfix-cortex-m3-ldrd @gol
 -munaligned-access @gol
 -mneon-for-64bits @gol
+-mslow-flash-data @gol
 -mrestrict-it}

 @emph{AVR Options}
@@ -12345,6 +12346,13 @@ Enables using Neon to handle scalar 64-bits operations. This is
 disabled by default since the cost of moving data from core registers
 to Neon is high.

+@item -mslow-flash-data
+@opindex mslow-flash-data
+Assume loading data from flash is slower than fetching instruction.
+Therefore literal load is minimized for better performance.
+This option is only supported when compiling for ARMv7 M-profile and
+off by default.
+
 @item -mrestrict-it
 @opindex mrestrict-it
 Restricts generation of IT blocks to conform to the rules of ARMv8.

--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
+2013-11-25  Terry Guo  <terry.guo@arm.com>
+
+	* gcc.target/arm/thumb2-slow-flash-data.c: New.
+
 2013-11-23  Uros Bizjak  <ubizjak@gmail.com>

 	* gcc.dg/float-exact-1.c: Use dg-add-options ieee.

--- a/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data.c
+++ b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data.c
+/* The option -mslow-flash-data is just for performance tuning, it
+   doesn't totally disable the use of literal pools.  But for below
+   simple cases, the use of literal pool should be replaced by
+   movw/movt or read-only constant pool.  */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_cortex_m } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-options "-O2 -mthumb -mslow-flash-data" } */
+
+float sf;
+double df;
+long long l;
+static char *p = "Hello World";
+
+float
+testsf (float *p)
+{
+  if (*p > 1.1234f)
+    return 2.1234f;
+  else
+    return 3.1234f;
+}
+
+double
+testdf (double *p)
+{
+  if (*p > 4.1234)
+    return 2.1234;
+  else
+    return 3.1234;
+}
+
+long long
+testll (long long *p)
+{
+  if (*p > 0x123456789ABCDEFll)
+    return 0x111111111ll;
+  else
+    return 0x222222222ll;
+}
+
+char *
+testchar ()
+{
+  return p + 4;
+}
+
+int
+foo (int a, int b)
+{
+  int i;
+  volatile *labelref = &&label1;
+
+  if (a > b)
+    {
+      while (i < b)
+	{
+	  a += *labelref;
+	  i += 1;
+	}
+      goto *labelref;
+    }
+  else
+    b = b + 3;
+
+  a = a * b;
+
+label1:
+  return a + b;
+}
+
+/* { dg-final { scan-assembler-times "movt" 13 } } */
+/* { dg-final { scan-assembler-times "movt.*LC0\\+4" 1 } } */