Commit 8cbc2ea8 by Greta Yorsh Committed by Greta Yorsh

arm-protos.h (arm_mac_accumulator_is_result): New declaration.

2013-01-29  Greta Yorsh  <Greta.Yorsh@arm.com>

        * config/arm/arm-protos.h (arm_mac_accumulator_is_result): New
        declaration.
        * config/arm/arm.c (arm_mac_accumulator_is_result): New function.
        * config/arm/cortex-a7.md: New bypasses using
        arm_mac_accumulator_is_result.

From-SVN: r195553
parent 697a3325
2013-01-29 Greta Yorsh <Greta.Yorsh@arm.com> 2013-01-29 Greta Yorsh <Greta.Yorsh@arm.com>
* config/arm/arm-protos.h (arm_mac_accumulator_is_result): New
declaration.
* config/arm/arm.c (arm_mac_accumulator_is_result): New function.
* config/arm/cortex-a7.md: New bypasses using
arm_mac_accumulator_is_result.
2013-01-29 Greta Yorsh <Greta.Yorsh@arm.com>
* config/arm/cortex-a7.md (cortex_a7_neon_mul): New reservation. * config/arm/cortex-a7.md (cortex_a7_neon_mul): New reservation.
(cortex_a7_neon_mla): Likewise. (cortex_a7_neon_mla): Likewise.
(cortex_a7_fpfmad): New reservation. (cortex_a7_fpfmad): New reservation.
......
...@@ -101,6 +101,7 @@ extern int arm_early_load_addr_dep (rtx, rtx); ...@@ -101,6 +101,7 @@ extern int arm_early_load_addr_dep (rtx, rtx);
extern int arm_no_early_alu_shift_dep (rtx, rtx); extern int arm_no_early_alu_shift_dep (rtx, rtx);
extern int arm_no_early_alu_shift_value_dep (rtx, rtx); extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
extern int arm_no_early_mul_dep (rtx, rtx); extern int arm_no_early_mul_dep (rtx, rtx);
extern int arm_mac_accumulator_is_result (rtx, rtx);
extern int arm_mac_accumulator_is_mul_result (rtx, rtx); extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
extern int tls_mentioned_p (rtx); extern int tls_mentioned_p (rtx);
......
...@@ -24608,6 +24608,62 @@ arm_cxx_guard_type (void) ...@@ -24608,6 +24608,62 @@ arm_cxx_guard_type (void)
return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node; return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
} }
/* Return non-zero iff the consumer (a multiply-accumulate or a
multiple-subtract instruction) has an accumulator dependency on the
result of the producer and no other dependency on that result. It
does not check if the producer is multiply-accumulate instruction. */
int
arm_mac_accumulator_is_result (rtx producer, rtx consumer)
{
rtx result;
rtx op0, op1, acc;
producer = PATTERN (producer);
consumer = PATTERN (consumer);
if (GET_CODE (producer) == COND_EXEC)
producer = COND_EXEC_CODE (producer);
if (GET_CODE (consumer) == COND_EXEC)
consumer = COND_EXEC_CODE (consumer);
if (GET_CODE (producer) != SET)
return 0;
result = XEXP (producer, 0);
if (GET_CODE (consumer) != SET)
return 0;
/* Check that the consumer is of the form
(set (...) (plus (mult ...) (...)))
or
(set (...) (minus (...) (mult ...))). */
if (GET_CODE (XEXP (consumer, 1)) == PLUS)
{
if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
return 0;
op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
acc = XEXP (XEXP (consumer, 1), 1);
}
else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
{
if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
return 0;
op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
acc = XEXP (XEXP (consumer, 1), 0);
}
else
return 0;
return (reg_overlap_mentioned_p (result, acc)
&& !reg_overlap_mentioned_p (result, op0)
&& !reg_overlap_mentioned_p (result, op1));
}
/* Return non-zero if the consumer (a multiply-accumulate instruction) /* Return non-zero if the consumer (a multiply-accumulate instruction)
has an accumulator dependency on the result of the producer (a has an accumulator dependency on the result of the producer (a
multiplication instruction) and no other dependency on that result. */ multiplication instruction) and no other dependency on that result. */
......
...@@ -137,6 +137,12 @@ ...@@ -137,6 +137,12 @@
(eq_attr "neon_type" "none"))) (eq_attr "neon_type" "none")))
"cortex_a7_both") "cortex_a7_both")
;; Forward the result of a multiply operation to the accumulator
;; of the following multiply and accumulate instruction.
(define_bypass 1 "cortex_a7_mul"
"cortex_a7_mul"
"arm_mac_accumulator_is_result")
;; The latency depends on the operands, so we use an estimate here. ;; The latency depends on the operands, so we use an estimate here.
(define_insn_reservation "cortex_a7_idiv" 5 (define_insn_reservation "cortex_a7_idiv" 5
(and (eq_attr "tune" "cortexa7") (and (eq_attr "tune" "cortexa7")
...@@ -264,6 +270,10 @@ ...@@ -264,6 +270,10 @@
neon_fp_vmla_qqq_scalar")) neon_fp_vmla_qqq_scalar"))
"cortex_a7_both+cortex_a7_fpmul_pipe") "cortex_a7_both+cortex_a7_fpmul_pipe")
(define_bypass 4 "cortex_a7_fpmacs,cortex_a7_neon_mla"
"cortex_a7_fpmacs,cortex_a7_neon_mla"
"arm_mac_accumulator_is_result")
;; Non-multiply instructions can issue between two cycles of a ;; Non-multiply instructions can issue between two cycles of a
;; double-precision multiply. ;; double-precision multiply.
...@@ -285,6 +295,10 @@ ...@@ -285,6 +295,10 @@
(eq_attr "neon_type" "none"))) (eq_attr "neon_type" "none")))
"cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*4") "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*4")
(define_bypass 7 "cortex_a7_fpmacd"
"cortex_a7_fpmacd,cortex_a7_fpfmad"
"arm_mac_accumulator_is_result")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Floating-point divide/square root instructions. ;; Floating-point divide/square root instructions.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment