Commit 8cbc2ea8 by Greta Yorsh Committed by Greta Yorsh

arm-protos.h (arm_mac_accumulator_is_result): New declaration.

2013-01-29  Greta Yorsh  <Greta.Yorsh@arm.com>

        * config/arm/arm-protos.h (arm_mac_accumulator_is_result): New
        declaration.
        * config/arm/arm.c (arm_mac_accumulator_is_result): New function.
        * config/arm/cortex-a7.md: New bypasses using
        arm_mac_accumulator_is_result.

From-SVN: r195553
parent 697a3325
2013-01-29 Greta Yorsh <Greta.Yorsh@arm.com>
* config/arm/arm-protos.h (arm_mac_accumulator_is_result): New
declaration.
* config/arm/arm.c (arm_mac_accumulator_is_result): New function.
* config/arm/cortex-a7.md: New bypasses using
arm_mac_accumulator_is_result.
2013-01-29 Greta Yorsh <Greta.Yorsh@arm.com>
* config/arm/cortex-a7.md (cortex_a7_neon_mul): New reservation.
(cortex_a7_neon_mla): Likewise.
(cortex_a7_fpfmad): New reservation.
......
......@@ -101,6 +101,7 @@ extern int arm_early_load_addr_dep (rtx, rtx);
extern int arm_no_early_alu_shift_dep (rtx, rtx);
extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
extern int arm_no_early_mul_dep (rtx, rtx);
extern int arm_mac_accumulator_is_result (rtx, rtx);
extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
extern int tls_mentioned_p (rtx);
......
......@@ -24608,6 +24608,62 @@ arm_cxx_guard_type (void)
return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
}
/* Return non-zero iff the consumer (a multiply-accumulate or a
multiple-subtract instruction) has an accumulator dependency on the
result of the producer and no other dependency on that result. It
does not check if the producer is multiply-accumulate instruction. */
int
arm_mac_accumulator_is_result (rtx producer, rtx consumer)
{
rtx result;
rtx op0, op1, acc;
producer = PATTERN (producer);
consumer = PATTERN (consumer);
if (GET_CODE (producer) == COND_EXEC)
producer = COND_EXEC_CODE (producer);
if (GET_CODE (consumer) == COND_EXEC)
consumer = COND_EXEC_CODE (consumer);
if (GET_CODE (producer) != SET)
return 0;
result = XEXP (producer, 0);
if (GET_CODE (consumer) != SET)
return 0;
/* Check that the consumer is of the form
(set (...) (plus (mult ...) (...)))
or
(set (...) (minus (...) (mult ...))). */
if (GET_CODE (XEXP (consumer, 1)) == PLUS)
{
if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
return 0;
op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
acc = XEXP (XEXP (consumer, 1), 1);
}
else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
{
if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
return 0;
op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
acc = XEXP (XEXP (consumer, 1), 0);
}
else
return 0;
return (reg_overlap_mentioned_p (result, acc)
&& !reg_overlap_mentioned_p (result, op0)
&& !reg_overlap_mentioned_p (result, op1));
}
/* Return non-zero if the consumer (a multiply-accumulate instruction)
has an accumulator dependency on the result of the producer (a
multiplication instruction) and no other dependency on that result. */
......
......@@ -137,6 +137,12 @@
(eq_attr "neon_type" "none")))
"cortex_a7_both")
;; Forward the result of a multiply operation to the accumulator
;; of the following multiply and accumulate instruction.
(define_bypass 1 "cortex_a7_mul"
"cortex_a7_mul"
"arm_mac_accumulator_is_result")
;; The latency depends on the operands, so we use an estimate here.
(define_insn_reservation "cortex_a7_idiv" 5
(and (eq_attr "tune" "cortexa7")
......@@ -264,6 +270,10 @@
neon_fp_vmla_qqq_scalar"))
"cortex_a7_both+cortex_a7_fpmul_pipe")
(define_bypass 4 "cortex_a7_fpmacs,cortex_a7_neon_mla"
"cortex_a7_fpmacs,cortex_a7_neon_mla"
"arm_mac_accumulator_is_result")
;; Non-multiply instructions can issue between two cycles of a
;; double-precision multiply.
......@@ -285,6 +295,10 @@
(eq_attr "neon_type" "none")))
"cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*4")
(define_bypass 7 "cortex_a7_fpmacd"
"cortex_a7_fpmacd,cortex_a7_fpfmad"
"arm_mac_accumulator_is_result")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Floating-point divide/square root instructions.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment