Commit 571880a0 by Joey Ye

[multiple changes]

2013-11-14  Julian Brown  <julian@codesourcery.com>
        Joey Ye  <joey.ye@arm.com>

        * config/arm/arm.c (arm_cortex_m_branch_cost): New.
        (arm_v7m_tune): New.
        (arm_slowmul_tune, arm_fastmul_tune,
        arm_strongarm_tune, arm_9e_tune, arm_v6t2_tune,
        arm_cortex_tune, arm_cortex_a15_tune,
        arm_cortex_a5_tune, arm_v6m_tune): Add comments
        for Sched adj cost.
        * config/arm/arm-cores.def (cortex-m4, cortex-m3):
        Use arm_v7m_tune.

testsuite:
2013-11-14  Joey Ye  <joey.ye@arm.com>

        * gcc.dg/tree-ssa/forwprop-28.c: Disable for cortex_m.
        * gcc.dg/tree-ssa/vrp47.c: Likewise.
        * gcc.dg/tree-ssa/vrp87.c: Likewise.
        * gcc.dg/tree-ssa/ssa-dom-thread-4.c: Ingore for cortex_m.
        * gcc.dg/tree-ssa/ssa-vrp-thread-1.c: Likewise.

From-SVN: r204778
parent 2430d1e2
2013-11-14 Julian Brown <julian@codesourcery.com>
Joey Ye <joey.ye@arm.com>
* config/arm/arm.c (arm_cortex_m_branch_cost): New.
(arm_v7m_tune): New.
(arm_slowmul_tune, arm_fastmul_tune,
arm_strongarm_tune, arm_9e_tune, arm_v6t2_tune,
arm_cortex_tune, arm_cortex_a15_tune,
arm_cortex_a5_tune, arm_v6m_tune): Add comments
for Sched adj cost.
* config/arm/arm-cores.def (cortex-m4, cortex-m3):
Use arm_v7m_tune.
2013-11-14 Kirill Yukhin <kirill.yukhin@intel.com>
PR target/57491
......@@ -134,8 +134,8 @@ ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex)
ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex)
ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
ARM_CORE("cortex-r7", cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, cortex)
ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex)
ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, v7m)
ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, v7m)
ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, v6m)
ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, v6m)
ARM_CORE("cortex-m0plus", cortexm0plus, 6M, FL_LDSCHED, v6m)
......
......@@ -266,6 +266,7 @@ static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
static unsigned int arm_autovectorize_vector_sizes (void);
static int arm_default_branch_cost (bool, bool);
static int arm_cortex_a5_branch_cost (bool, bool);
static int arm_cortex_m_branch_cost (bool, bool);
static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
const unsigned char *sel);
......@@ -1260,7 +1261,7 @@ const struct tune_params arm_slowmul_tune =
{
arm_slowmul_rtx_costs,
NULL,
NULL,
NULL, /* Sched adj cost. */
3, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
......@@ -1276,7 +1277,7 @@ const struct tune_params arm_fastmul_tune =
{
arm_fastmul_rtx_costs,
NULL,
NULL,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
......@@ -1295,7 +1296,7 @@ const struct tune_params arm_strongarm_tune =
{
arm_fastmul_rtx_costs,
NULL,
NULL,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
3, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
......@@ -1327,7 +1328,7 @@ const struct tune_params arm_9e_tune =
{
arm_9e_rtx_costs,
NULL,
NULL,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
......@@ -1343,7 +1344,7 @@ const struct tune_params arm_v6t2_tune =
{
arm_9e_rtx_costs,
NULL,
NULL,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
......@@ -1360,7 +1361,7 @@ const struct tune_params arm_cortex_tune =
{
arm_9e_rtx_costs,
&generic_extra_costs,
NULL,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
......@@ -1392,7 +1393,7 @@ const struct tune_params arm_cortex_a15_tune =
{
arm_9e_rtx_costs,
&cortexa15_extra_costs,
NULL,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
2, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
......@@ -1411,7 +1412,7 @@ const struct tune_params arm_cortex_a5_tune =
{
arm_9e_rtx_costs,
NULL,
NULL,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
1, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
......@@ -1439,13 +1440,36 @@ const struct tune_params arm_cortex_a9_tune =
false /* Prefer Neon for 64-bits bitops. */
};
/* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
cycle to execute each. An LDR from the constant pool also takes two cycles
to execute, but mildly increases pipelining opportunity (consecutive
loads/stores can be pipelined together, saving one cycle), and may also
improve icache utilisation. Hence we prefer the constant pool for such
processors. */
const struct tune_params arm_v7m_tune =
{
arm_9e_rtx_costs,
&generic_extra_costs,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
arm_cortex_m_branch_cost,
false, /* Prefer LDRD/STRD. */
{false, false}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
false /* Prefer Neon for 64-bits bitops. */
};
/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
const struct tune_params arm_v6m_tune =
{
arm_9e_rtx_costs,
NULL,
NULL,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
......@@ -11241,6 +11265,20 @@ arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
}
/* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
sequences of non-executed instructions in IT blocks probably take the same
amount of time as executed instructions (and the IT instruction itself takes
space in icache). This function was experimentally determined to give good
results on a popular embedded benchmark. */
static int
arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
{
return (TARGET_32BIT && speed_p) ? 1
: arm_default_branch_cost (speed_p, predictable_p);
}
static bool fp_consts_inited = false;
static REAL_VALUE_TYPE value_fp0;
......
2013-11-14 Joey Ye <joey.ye@arm.com>
* gcc.dg/tree-ssa/forwprop-28.c: Disable for cortex_m.
* gcc.dg/tree-ssa/vrp47.c: Likewise.
* gcc.dg/tree-ssa/vrp87.c: Likewise.
* gcc.dg/tree-ssa/ssa-dom-thread-4.c: Ingore for cortex_m.
* gcc.dg/tree-ssa/ssa-vrp-thread-1.c: Likewise.
2013-11-14 Adam Butcher <adam@jessamine.co.uk>
PR c++/58533
......
/* { dg-do compile { target { ! "m68k*-*-* mmix*-*-* mep*-*-* bfin*-*-* v850*-*-* picochip*-*-* moxie*-*-* cris*-*-* m32c*-*-* fr30*-*-* mcore*-*-* powerpc*-*-* xtensa*-*-* arc*-*-*"} } } */
/* { dg-options "-O2 -fdump-tree-forwprop1" } */
/* Skip on ARM Cortex-M, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false,
leading to two conditional jumps when evaluating an && condition. Forwprop1
is not able to optimize this. */
/* { dg-skip-if "" { arm_cortex_m } } */
extern char *frob (void);
extern _Bool testit (void);
......
......@@ -59,9 +59,9 @@ bitmap_ior_and_compl (bitmap dst, const_bitmap a, const_bitmap b,
code we missed the edge when the first conditional is false
(b_elt is zero, which means the second conditional is always
zero. */
/* ARM Cortex-M0 defined LOGICAL_OP_NON_SHORT_CIRCUIT to false,
/* ARM Cortex-M defined LOGICAL_OP_NON_SHORT_CIRCUIT to false,
so skip below test. */
/* { dg-final { scan-tree-dump-times "Threaded" 3 "dom1" { target { ! { { mips*-*-* avr-*-* arc*-*-* } || { arm_cortex_m && arm_thumb1 } } } } } } */
/* { dg-final { scan-tree-dump-times "Threaded" 3 "dom1" { target { ! { { mips*-*-* avr-*-* arc*-*-* } || { arm_cortex_m } } } } } } */
/* MIPS defines LOGICAL_OP_NON_SHORT_CIRCUIT to 0, so we split both
"a_elt || b_elt" and "b_elt && kill_elt" into two conditions each,
rather than using "(var1 != 0) op (var2 != 0)". Also, as on other targets,
......
......@@ -26,6 +26,8 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent,
oof ();
}
/* { dg-final { scan-tree-dump-times "Threaded" 1 "vrp1" } } */
/* ARM Cortex-M defined LOGICAL_OP_NON_SHORT_CIRCUIT to false,
so skip below test. */
/* { dg-final { { scan-tree-dump-times "Threaded" 1 "vrp1" } || { arm_cortex_m } } } */
/* { dg-final { cleanup-tree-dump "vrp1" } } */
......@@ -6,10 +6,10 @@
/* { dg-do compile { target { ! "mips*-*-* arc*-*-* s390*-*-* avr-*-* mn10300-*-*" } } } */
/* { dg-options "-O2 -fdump-tree-vrp1 -fdump-tree-dom1 -fdump-tree-vrp2" } */
/* { dg-additional-options "-march=i586" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */
/* Skip on ARM Cortex-M0, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false,
/* Skip on ARM Cortex-M, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false,
leading to two conditional jumps when evaluating an && condition. VRP is
not able to optimize this. */
/* { dg-skip-if "" { arm_cortex_m && arm_thumb1} } */
/* { dg-skip-if "" { arm_cortex_m } } */
int h(int x, int y)
{
......
......@@ -2,6 +2,10 @@
/* { dg-options "-O2 -fdump-tree-vrp2-details -fdump-tree-cddce2-details" } */
/* { dg-additional-options "-mbranch-cost=2" { target avr-*-* } } */
/* Skip on ARM Cortex-M, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false,
leading to two conditional jumps when evaluating an && condition. VRP is
not able to optimize this. */
/* { dg-skip-if "" { arm_cortex_m } } */
struct bitmap_head_def;
typedef struct bitmap_head_def *bitmap;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment