arm-protos.h (arm_mac_accumulator_is_result): New declaration.
2013-01-29 Greta Yorsh <Greta.Yorsh@arm.com> * config/arm/arm-protos.h (arm_mac_accumulator_is_result): New declaration. * config/arm/arm.c (arm_mac_accumulator_is_result): New function. * config/arm/cortex-a7.md: New bypasses using arm_mac_accumulator_is_result. From-SVN: r195553
This commit is contained in:
parent
697a3325ef
commit
8cbc2ea849
4 changed files with 79 additions and 0 deletions
|
@ -1,3 +1,11 @@
|
|||
2013-01-29 Greta Yorsh <Greta.Yorsh@arm.com>
|
||||
|
||||
* config/arm/arm-protos.h (arm_mac_accumulator_is_result): New
|
||||
declaration.
|
||||
* config/arm/arm.c (arm_mac_accumulator_is_result): New function.
|
||||
* config/arm/cortex-a7.md: New bypasses using
|
||||
arm_mac_accumulator_is_result.
|
||||
|
||||
2013-01-29 Greta Yorsh <Greta.Yorsh@arm.com>
|
||||
|
||||
* config/arm/cortex-a7.md (cortex_a7_neon_mul): New reservation.
|
||||
|
|
|
@ -101,6 +101,7 @@ extern int arm_early_load_addr_dep (rtx, rtx);
|
|||
extern int arm_no_early_alu_shift_dep (rtx, rtx);
|
||||
extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
|
||||
extern int arm_no_early_mul_dep (rtx, rtx);
|
||||
extern int arm_mac_accumulator_is_result (rtx, rtx);
|
||||
extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
|
||||
|
||||
extern int tls_mentioned_p (rtx);
|
||||
|
|
|
@ -24608,6 +24608,62 @@ arm_cxx_guard_type (void)
|
|||
return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
|
||||
}
|
||||
|
||||
/* Return non-zero iff the consumer (a multiply-accumulate or a
|
||||
multiple-subtract instruction) has an accumulator dependency on the
|
||||
result of the producer and no other dependency on that result. It
|
||||
does not check if the producer is multiply-accumulate instruction. */
|
||||
int
|
||||
arm_mac_accumulator_is_result (rtx producer, rtx consumer)
|
||||
{
|
||||
rtx result;
|
||||
rtx op0, op1, acc;
|
||||
|
||||
producer = PATTERN (producer);
|
||||
consumer = PATTERN (consumer);
|
||||
|
||||
if (GET_CODE (producer) == COND_EXEC)
|
||||
producer = COND_EXEC_CODE (producer);
|
||||
if (GET_CODE (consumer) == COND_EXEC)
|
||||
consumer = COND_EXEC_CODE (consumer);
|
||||
|
||||
if (GET_CODE (producer) != SET)
|
||||
return 0;
|
||||
|
||||
result = XEXP (producer, 0);
|
||||
|
||||
if (GET_CODE (consumer) != SET)
|
||||
return 0;
|
||||
|
||||
/* Check that the consumer is of the form
|
||||
(set (...) (plus (mult ...) (...)))
|
||||
or
|
||||
(set (...) (minus (...) (mult ...))). */
|
||||
if (GET_CODE (XEXP (consumer, 1)) == PLUS)
|
||||
{
|
||||
if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
|
||||
return 0;
|
||||
|
||||
op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
|
||||
op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
|
||||
acc = XEXP (XEXP (consumer, 1), 1);
|
||||
}
|
||||
else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
|
||||
{
|
||||
if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
|
||||
return 0;
|
||||
|
||||
op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
|
||||
op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
|
||||
acc = XEXP (XEXP (consumer, 1), 0);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
|
||||
return (reg_overlap_mentioned_p (result, acc)
|
||||
&& !reg_overlap_mentioned_p (result, op0)
|
||||
&& !reg_overlap_mentioned_p (result, op1));
|
||||
}
|
||||
|
||||
/* Return non-zero if the consumer (a multiply-accumulate instruction)
|
||||
has an accumulator dependency on the result of the producer (a
|
||||
multiplication instruction) and no other dependency on that result. */
|
||||
|
|
|
@ -137,6 +137,12 @@
|
|||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_both")
|
||||
|
||||
;; Forward the result of a multiply operation to the accumulator
|
||||
;; of the following multiply and accumulate instruction.
|
||||
(define_bypass 1 "cortex_a7_mul"
|
||||
"cortex_a7_mul"
|
||||
"arm_mac_accumulator_is_result")
|
||||
|
||||
;; The latency depends on the operands, so we use an estimate here.
|
||||
(define_insn_reservation "cortex_a7_idiv" 5
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
|
@ -264,6 +270,10 @@
|
|||
neon_fp_vmla_qqq_scalar"))
|
||||
"cortex_a7_both+cortex_a7_fpmul_pipe")
|
||||
|
||||
(define_bypass 4 "cortex_a7_fpmacs,cortex_a7_neon_mla"
|
||||
"cortex_a7_fpmacs,cortex_a7_neon_mla"
|
||||
"arm_mac_accumulator_is_result")
|
||||
|
||||
;; Non-multiply instructions can issue between two cycles of a
|
||||
;; double-precision multiply.
|
||||
|
||||
|
@ -285,6 +295,10 @@
|
|||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*4")
|
||||
|
||||
(define_bypass 7 "cortex_a7_fpmacd"
|
||||
"cortex_a7_fpmacd,cortex_a7_fpfmad"
|
||||
"arm_mac_accumulator_is_result")
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Floating-point divide/square root instructions.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
|
Loading…
Add table
Reference in a new issue