i386: Add peephole2 patterns to improve subtract with borrow with memory destination [PR79173]

This patch adds subborrow<mode> alternative so that it can have memory
destination and adds various peephole2s which help to match it.

2023-06-15  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/79173
	* config/i386/i386.md (subborrow<mode>): Add alternative with
	memory destination and add for it define_peephole2
	TARGET_READ_MODIFY_WRITE/-Os patterns to prefer using memory
	destination in these patterns.
This commit is contained in:
Jakub Jelinek 2023-06-15 09:08:37 +02:00
parent b6ca11407d
commit ec52d228d6

View file

@ -8381,13 +8381,13 @@
[(set (reg:CCC FLAGS_REG)
(compare:CCC
(zero_extend:<DWI>
(match_operand:SWI48 1 "nonimmediate_operand" "0"))
(match_operand:SWI48 1 "nonimmediate_operand" "0,0"))
(plus:<DWI>
(match_operator:<DWI> 4 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)])
(zero_extend:<DWI>
(match_operand:SWI48 2 "nonimmediate_operand" "rm")))))
(set (match_operand:SWI48 0 "register_operand" "=r")
(match_operand:SWI48 2 "nonimmediate_operand" "r,rm")))))
(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
(minus:SWI48 (minus:SWI48
(match_dup 1)
(match_operator:SWI48 5 "ix86_carry_flag_operator"
@ -8400,6 +8400,154 @@
(set_attr "pent_pair" "pu")
(set_attr "mode" "<MODE>")])
(define_peephole2
[(set (match_operand:SWI48 0 "general_reg_operand")
(match_operand:SWI48 1 "memory_operand"))
(parallel [(set (reg:CCC FLAGS_REG)
(compare:CCC
(zero_extend:<DWI> (match_dup 0))
(plus:<DWI>
(match_operator:<DWI> 4 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)])
(zero_extend:<DWI>
(match_operand:SWI48 2 "memory_operand")))))
(set (match_dup 0)
(minus:SWI48
(minus:SWI48
(match_dup 0)
(match_operator:SWI48 5 "ix86_carry_flag_operator"
[(match_dup 3) (const_int 0)]))
(match_dup 2)))])
(set (match_dup 1) (match_dup 0))]
"(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
&& peep2_reg_dead_p (3, operands[0])
&& !reg_overlap_mentioned_p (operands[0], operands[1])
&& !reg_overlap_mentioned_p (operands[0], operands[2])"
[(set (match_dup 0) (match_dup 2))
(parallel [(set (reg:CCC FLAGS_REG)
(compare:CCC
(zero_extend:<DWI> (match_dup 1))
(plus:<DWI> (match_op_dup 4
[(match_dup 3) (const_int 0)])
(zero_extend:<DWI> (match_dup 0)))))
(set (match_dup 1)
(minus:SWI48 (minus:SWI48 (match_dup 1)
(match_op_dup 5
[(match_dup 3) (const_int 0)]))
(match_dup 0)))])])
(define_peephole2
[(set (match_operand:SWI48 6 "general_reg_operand")
(match_operand:SWI48 7 "memory_operand"))
(set (match_operand:SWI48 8 "general_reg_operand")
(match_operand:SWI48 9 "memory_operand"))
(parallel [(set (reg:CCC FLAGS_REG)
(compare:CCC
(zero_extend:<DWI>
(match_operand:SWI48 0 "general_reg_operand"))
(plus:<DWI>
(match_operator:<DWI> 4 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)])
(zero_extend:<DWI>
(match_operand:SWI48 2 "general_reg_operand")))))
(set (match_dup 0)
(minus:SWI48
(minus:SWI48
(match_dup 0)
(match_operator:SWI48 5 "ix86_carry_flag_operator"
[(match_dup 3) (const_int 0)]))
(match_dup 2)))])
(set (match_operand:SWI48 1 "memory_operand") (match_dup 0))]
"(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
&& peep2_reg_dead_p (4, operands[0])
&& peep2_reg_dead_p (3, operands[2])
&& !reg_overlap_mentioned_p (operands[0], operands[1])
&& !reg_overlap_mentioned_p (operands[2], operands[1])
&& !reg_overlap_mentioned_p (operands[6], operands[9])
&& (rtx_equal_p (operands[6], operands[0])
? (rtx_equal_p (operands[7], operands[1])
&& rtx_equal_p (operands[8], operands[2]))
: (rtx_equal_p (operands[8], operands[0])
&& rtx_equal_p (operands[9], operands[1])
&& rtx_equal_p (operands[6], operands[2])))"
[(set (match_dup 0) (match_dup 9))
(parallel [(set (reg:CCC FLAGS_REG)
(compare:CCC
(zero_extend:<DWI> (match_dup 1))
(plus:<DWI> (match_op_dup 4
[(match_dup 3) (const_int 0)])
(zero_extend:<DWI> (match_dup 0)))))
(set (match_dup 1)
(minus:SWI48 (minus:SWI48 (match_dup 1)
(match_op_dup 5
[(match_dup 3) (const_int 0)]))
(match_dup 0)))])]
{
if (!rtx_equal_p (operands[6], operands[0]))
operands[9] = operands[7];
})
(define_peephole2
[(set (match_operand:SWI48 6 "general_reg_operand")
(match_operand:SWI48 7 "memory_operand"))
(set (match_operand:SWI48 8 "general_reg_operand")
(match_operand:SWI48 9 "memory_operand"))
(parallel [(set (reg:CCC FLAGS_REG)
(compare:CCC
(zero_extend:<DWI>
(match_operand:SWI48 0 "general_reg_operand"))
(plus:<DWI>
(match_operator:<DWI> 4 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)])
(zero_extend:<DWI>
(match_operand:SWI48 2 "general_reg_operand")))))
(set (match_dup 0)
(minus:SWI48
(minus:SWI48
(match_dup 0)
(match_operator:SWI48 5 "ix86_carry_flag_operator"
[(match_dup 3) (const_int 0)]))
(match_dup 2)))])
(set (match_operand:QI 10 "general_reg_operand")
(ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
(set (match_operand:SWI48 11 "general_reg_operand")
(zero_extend:SWI48 (match_dup 10)))
(set (match_operand:SWI48 1 "memory_operand") (match_dup 0))]
"(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
&& peep2_reg_dead_p (6, operands[0])
&& peep2_reg_dead_p (3, operands[2])
&& !reg_overlap_mentioned_p (operands[0], operands[1])
&& !reg_overlap_mentioned_p (operands[2], operands[1])
&& !reg_overlap_mentioned_p (operands[6], operands[9])
&& !reg_overlap_mentioned_p (operands[0], operands[10])
&& !reg_overlap_mentioned_p (operands[10], operands[1])
&& !reg_overlap_mentioned_p (operands[0], operands[11])
&& !reg_overlap_mentioned_p (operands[11], operands[1])
&& (rtx_equal_p (operands[6], operands[0])
? (rtx_equal_p (operands[7], operands[1])
&& rtx_equal_p (operands[8], operands[2]))
: (rtx_equal_p (operands[8], operands[0])
&& rtx_equal_p (operands[9], operands[1])
&& rtx_equal_p (operands[6], operands[2])))"
[(set (match_dup 0) (match_dup 9))
(parallel [(set (reg:CCC FLAGS_REG)
(compare:CCC
(zero_extend:<DWI> (match_dup 1))
(plus:<DWI> (match_op_dup 4
[(match_dup 3) (const_int 0)])
(zero_extend:<DWI> (match_dup 0)))))
(set (match_dup 1)
(minus:SWI48 (minus:SWI48 (match_dup 1)
(match_op_dup 5
[(match_dup 3) (const_int 0)]))
(match_dup 0)))])
(set (match_dup 10) (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
(set (match_dup 11) (zero_extend:SWI48 (match_dup 10)))]
{
if (!rtx_equal_p (operands[6], operands[0]))
operands[9] = operands[7];
})
(define_expand "subborrow<mode>_0"
[(parallel
[(set (reg:CC FLAGS_REG)