i386.md (*float<SSEMODEI24:mode><X87MODEF:mode>2_1): Emit gen_floatdi<X87MODEF:mode>2_i387_with_xmm for DImode values in 32bit mode...

* config/i386/i386.md (*float<SSEMODEI24:mode><X87MODEF:mode>2_1):
        Emit gen_floatdi<X87MODEF:mode>2_i387_with_xmm for DImode values
        in 32bit mode when XMM registers are available to avoid store
        forwarding stalls.
        (floatdi<X87MODEF:mode>2_i387_with_xmm): New insn pattern and
        corresponding post-reload splitters.

From-SVN: r133845
This commit is contained in:
Uros Bizjak 2008-04-02 21:07:27 +02:00 committed by Uros Bizjak
parent ce52c73bed
commit 7b1980026c
2 changed files with 79 additions and 1 deletions

View file

@ -1,3 +1,12 @@
2008-04-02 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.md (*float<SSEMODEI24:mode><X87MODEF:mode>2_1):
Emit gen_floatdi<X87MODEF:mode>2_i387_with_xmm for DImode values
in 32bit mode when XMM registers are available to avoid store
forwarding stalls.
(floatdi<X87MODEF:mode>2_i387_with_xmm): New insn pattern and
corresponding post-reload splitters.
2008-04-02 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.c (bdesc_sse_3arg): Add __builtin_ia32_shufps

View file

@ -4925,7 +4925,21 @@
"&& 1"
[(parallel [(set (match_dup 0) (float:X87MODEF (match_dup 1)))
(clobber (match_dup 2))])]
"operands[2] = assign_386_stack_local (<SSEMODEI24:MODE>mode, SLOT_TEMP);")
{
operands[2] = assign_386_stack_local (<SSEMODEI24:MODE>mode, SLOT_TEMP);
/* Avoid store forwarding (partial memory) stall penalty
by passing DImode value through XMM registers. */
if (<SSEMODEI24:MODE>mode == DImode && !TARGET_64BIT
&& TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
&& !optimize_size)
{
emit_insn (gen_floatdi<X87MODEF:mode>2_i387_with_xmm (operands[0],
operands[1],
operands[2]));
DONE;
}
})
(define_insn "*floatsi<mode>2_vector_mixed_with_temp"
[(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x,x")
@ -5310,6 +5324,61 @@
[(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
"")
;; Avoid store forwarding (partial memory) stall penalty
;; by passing DImode value through XMM registers. */
(define_insn "floatdi<X87MODEF:mode>2_i387_with_xmm"
[(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
(float:X87MODEF
(match_operand:DI 1 "nonimmediate_operand" "m,?r")))
(clobber (match_scratch:V4SI 3 "=&x,x"))
(clobber (match_scratch:V4SI 4 "=&x,x"))
(clobber (match_operand:DI 2 "memory_operand" "=m,m"))]
"TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
&& !TARGET_64BIT && !optimize_size"
"#"
[(set_attr "type" "multi")
(set_attr "mode" "<X87MODEF:MODE>")
(set_attr "unit" "i387")
(set_attr "fp_int_src" "true")])
(define_split
[(set (match_operand:X87MODEF 0 "register_operand" "")
(float:X87MODEF (match_operand:DI 1 "register_operand" "")))
(clobber (match_operand:V4SI 3 "register_operand" ""))
(clobber (match_operand:V4SI 4 "register_operand" ""))
(clobber (match_operand:DI 2 "memory_operand" ""))]
"TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
&& !TARGET_64BIT && !optimize_size
&& reload_completed
&& FP_REG_P (operands[0])"
[(set (match_dup 2) (match_dup 3))
(set (match_dup 0) (float:X87MODEF (match_dup 2)))]
{
/* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
Assemble the 64-bit DImode value in an xmm register. */
emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode),
gen_rtx_SUBREG (SImode, operands[1], 0)));
emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
gen_rtx_SUBREG (SImode, operands[1], 4)));
emit_insn (gen_sse2_punpckldq (operands[3], operands[3], operands[4]));
operands[3] = gen_rtx_REG (DImode, REGNO (operands[3]));
})
(define_split
[(set (match_operand:X87MODEF 0 "register_operand" "")
(float:X87MODEF (match_operand:DI 1 "memory_operand" "")))
(clobber (match_operand:V4SI 2 "register_operand" ""))
(clobber (match_operand:V4SI 3 "register_operand" ""))
(clobber (match_operand:DI 4 "memory_operand" ""))]
"TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
&& !TARGET_64BIT && !optimize_size
&& reload_completed
&& FP_REG_P (operands[0])"
[(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
"")
;; Avoid store forwarding (partial memory) stall penalty by extending
;; SImode value to DImode through XMM register instead of pushing two
;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES