From ec52d228d6db7f77188ad099a8c0ff65dead3241 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 15 Jun 2023 09:08:37 +0200 Subject: [PATCH] i386: Add peephole2 patterns to improve subtract with borrow with memory destination [PR79173] This patch adds subborrow alternative so that it can have memory destination and adds various peephole2s which help to match it. 2023-06-15 Jakub Jelinek PR middle-end/79173 * config/i386/i386.md (subborrow): Add alternative with memory destination and add for it define_peephole2 TARGET_READ_MODIFY_WRITE/-Os patterns to prefer using memory destination in these patterns. --- gcc/config/i386/i386.md | 154 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 151 insertions(+), 3 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 23be04f5218..75eda25dfcd 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -8381,13 +8381,13 @@ [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: - (match_operand:SWI48 1 "nonimmediate_operand" "0")) + (match_operand:SWI48 1 "nonimmediate_operand" "0,0")) (plus: (match_operator: 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) (zero_extend: - (match_operand:SWI48 2 "nonimmediate_operand" "rm"))))) - (set (match_operand:SWI48 0 "register_operand" "=r") + (match_operand:SWI48 2 "nonimmediate_operand" "r,rm"))))) + (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") (minus:SWI48 (minus:SWI48 (match_dup 1) (match_operator:SWI48 5 "ix86_carry_flag_operator" @@ -8400,6 +8400,154 @@ (set_attr "pent_pair" "pu") (set_attr "mode" "")]) +(define_peephole2 + [(set (match_operand:SWI48 0 "general_reg_operand") + (match_operand:SWI48 1 "memory_operand")) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend: (match_dup 0)) + (plus: + (match_operator: 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (zero_extend: + (match_operand:SWI48 2 "memory_operand"))))) + (set (match_dup 0) + (minus:SWI48 + (minus:SWI48 + (match_dup 0) + (match_operator:SWI48 5 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)])) + (match_dup 2)))]) + (set (match_dup 1) (match_dup 0))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 2)) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend: (match_dup 1)) + (plus: (match_op_dup 4 + [(match_dup 3) (const_int 0)]) + (zero_extend: (match_dup 0))))) + (set (match_dup 1) + (minus:SWI48 (minus:SWI48 (match_dup 1) + (match_op_dup 5 + [(match_dup 3) (const_int 0)])) + (match_dup 0)))])]) + +(define_peephole2 + [(set (match_operand:SWI48 6 "general_reg_operand") + (match_operand:SWI48 7 "memory_operand")) + (set (match_operand:SWI48 8 "general_reg_operand") + (match_operand:SWI48 9 "memory_operand")) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend: + (match_operand:SWI48 0 "general_reg_operand")) + (plus: + (match_operator: 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (zero_extend: + (match_operand:SWI48 2 "general_reg_operand"))))) + (set (match_dup 0) + (minus:SWI48 + (minus:SWI48 + (match_dup 0) + (match_operator:SWI48 5 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)])) + (match_dup 2)))]) + (set (match_operand:SWI48 1 "memory_operand") (match_dup 0))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (4, operands[0]) + && peep2_reg_dead_p (3, operands[2]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[2], operands[1]) + && !reg_overlap_mentioned_p (operands[6], operands[9]) + && (rtx_equal_p (operands[6], operands[0]) + ? (rtx_equal_p (operands[7], operands[1]) + && rtx_equal_p (operands[8], operands[2])) + : (rtx_equal_p (operands[8], operands[0]) + && rtx_equal_p (operands[9], operands[1]) + && rtx_equal_p (operands[6], operands[2])))" + [(set (match_dup 0) (match_dup 9)) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend: (match_dup 1)) + (plus: (match_op_dup 4 + [(match_dup 3) (const_int 0)]) + (zero_extend: (match_dup 0))))) + (set (match_dup 1) + (minus:SWI48 (minus:SWI48 (match_dup 1) + (match_op_dup 5 + [(match_dup 3) (const_int 0)])) + (match_dup 0)))])] +{ + if (!rtx_equal_p (operands[6], operands[0])) + operands[9] = operands[7]; +}) + +(define_peephole2 + [(set (match_operand:SWI48 6 "general_reg_operand") + (match_operand:SWI48 7 "memory_operand")) + (set (match_operand:SWI48 8 "general_reg_operand") + (match_operand:SWI48 9 "memory_operand")) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend: + (match_operand:SWI48 0 "general_reg_operand")) + (plus: + (match_operator: 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (zero_extend: + (match_operand:SWI48 2 "general_reg_operand"))))) + (set (match_dup 0) + (minus:SWI48 + (minus:SWI48 + (match_dup 0) + (match_operator:SWI48 5 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)])) + (match_dup 2)))]) + (set (match_operand:QI 10 "general_reg_operand") + (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) + (set (match_operand:SWI48 11 "general_reg_operand") + (zero_extend:SWI48 (match_dup 10))) + (set (match_operand:SWI48 1 "memory_operand") (match_dup 0))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (6, operands[0]) + && peep2_reg_dead_p (3, operands[2]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[2], operands[1]) + && !reg_overlap_mentioned_p (operands[6], operands[9]) + && !reg_overlap_mentioned_p (operands[0], operands[10]) + && !reg_overlap_mentioned_p (operands[10], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[11]) + && !reg_overlap_mentioned_p (operands[11], operands[1]) + && (rtx_equal_p (operands[6], operands[0]) + ? (rtx_equal_p (operands[7], operands[1]) + && rtx_equal_p (operands[8], operands[2])) + : (rtx_equal_p (operands[8], operands[0]) + && rtx_equal_p (operands[9], operands[1]) + && rtx_equal_p (operands[6], operands[2])))" + [(set (match_dup 0) (match_dup 9)) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend: (match_dup 1)) + (plus: (match_op_dup 4 + [(match_dup 3) (const_int 0)]) + (zero_extend: (match_dup 0))))) + (set (match_dup 1) + (minus:SWI48 (minus:SWI48 (match_dup 1) + (match_op_dup 5 + [(match_dup 3) (const_int 0)])) + (match_dup 0)))]) + (set (match_dup 10) (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) + (set (match_dup 11) (zero_extend:SWI48 (match_dup 10)))] +{ + if (!rtx_equal_p (operands[6], operands[0])) + operands[9] = operands[7]; +}) + (define_expand "subborrow_0" [(parallel [(set (reg:CC FLAGS_REG)