From b6ca11407d4f5d16ccfb580ea2d3d9aa08d7cd11 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 15 Jun 2023 09:05:01 +0200 Subject: [PATCH] i386: Add peephole2 patterns to improve add with carry or subtract with borrow with memory destination [PR79173] This patch adds various peephole2s which help to recognize add with carry or subtract with borrow with memory destination. 2023-06-14 Jakub Jelinek PR middle-end/79173 * config/i386/i386.md (*sub_3, @add3_carry, addcarry, @sub3_carry, *add3_cc_overflow_1): Add define_peephole2 TARGET_READ_MODIFY_WRITE/-Os patterns to prefer using memory destination in these patterns. --- gcc/config/i386/i386.md | 289 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 289 insertions(+) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 0929115ed4d..23be04f5218 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -7733,6 +7733,25 @@ [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 0) (match_dup 1)))]) +(define_peephole2 + [(set (match_operand:SWI 0 "general_reg_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_dup 0) + (match_operand:SWI 2 "memory_operand"))) + (set (match_dup 0) + (minus:SWI (match_dup 0) (match_dup 2)))]) + (set (match_dup 1) (match_dup 0))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 2)) + (parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_dup 1) (match_dup 0))) + (set (match_dup 1) + (minus:SWI (match_dup 1) (match_dup 0)))])]) + ;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into ;; subl $1, %eax; jnc .Lxx; (define_peephole2 @@ -7818,6 +7837,59 @@ (set_attr "pent_pair" "pu") (set_attr "mode" "")]) +(define_peephole2 + [(set (match_operand:SWI 0 "general_reg_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (match_dup 0) + (plus:SWI + (plus:SWI + (match_operator:SWI 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") + (const_int 0)]) + (match_dup 0)) + (match_operand:SWI 2 "memory_operand"))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 1) (match_dup 0))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 2)) + (parallel [(set (match_dup 1) + (plus:SWI (plus:SWI (match_op_dup 4 + [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 0))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_peephole2 + [(set (match_operand:SWI 0 "general_reg_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (match_dup 0) + (plus:SWI + (plus:SWI + (match_operator:SWI 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") + (const_int 0)]) + (match_dup 0)) + (match_operand:SWI 2 "memory_operand"))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_operand:SWI 5 "general_reg_operand") (match_dup 0)) + (set (match_dup 1) (match_dup 5))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && peep2_reg_dead_p (4, operands[5]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && !reg_overlap_mentioned_p (operands[5], operands[1])" + [(set (match_dup 0) (match_dup 2)) + (parallel [(set (match_dup 1) + (plus:SWI (plus:SWI (match_op_dup 4 + [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 0))) + (clobber (reg:CC FLAGS_REG))])]) + (define_insn "*add3_carry_0" [(set (match_operand:SWI 0 "nonimmediate_operand" "=m") (plus:SWI @@ -7918,6 +7990,149 @@ (set_attr "pent_pair" "pu") (set_attr "mode" "")]) +(define_peephole2 + [(parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend: + (plus:SWI48 + (plus:SWI48 + (match_operator:SWI48 4 "ix86_carry_flag_operator" + [(match_operand 2 "flags_reg_operand") + (const_int 0)]) + (match_operand:SWI48 0 "general_reg_operand")) + (match_operand:SWI48 1 "memory_operand"))) + (plus: + (zero_extend: (match_dup 1)) + (match_operator: 3 "ix86_carry_flag_operator" + [(match_dup 2) (const_int 0)])))) + (set (match_dup 0) + (plus:SWI48 (plus:SWI48 (match_op_dup 4 + [(match_dup 2) (const_int 0)]) + (match_dup 0)) + (match_dup 1)))]) + (set (match_dup 1) (match_dup 0))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (2, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1])" + [(parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend: + (plus:SWI48 + (plus:SWI48 + (match_op_dup 4 + [(match_dup 2) (const_int 0)]) + (match_dup 1)) + (match_dup 0))) + (plus: + (zero_extend: (match_dup 0)) + (match_op_dup 3 + [(match_dup 2) (const_int 0)])))) + (set (match_dup 1) + (plus:SWI48 (plus:SWI48 (match_op_dup 4 + [(match_dup 2) (const_int 0)]) + (match_dup 1)) + (match_dup 0)))])]) + +(define_peephole2 + [(set (match_operand:SWI48 0 "general_reg_operand") + (match_operand:SWI48 1 "memory_operand")) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend: + (plus:SWI48 + (plus:SWI48 + (match_operator:SWI48 5 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") + (const_int 0)]) + (match_dup 0)) + (match_operand:SWI48 2 "memory_operand"))) + (plus: + (zero_extend: (match_dup 2)) + (match_operator: 4 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)])))) + (set (match_dup 0) + (plus:SWI48 (plus:SWI48 (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 0)) + (match_dup 2)))]) + (set (match_dup 1) (match_dup 0))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 2)) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend: + (plus:SWI48 + (plus:SWI48 + (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 0))) + (plus: + (zero_extend: (match_dup 0)) + (match_op_dup 4 + [(match_dup 3) (const_int 0)])))) + (set (match_dup 1) + (plus:SWI48 (plus:SWI48 (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 0)))])]) + +(define_peephole2 + [(parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend: + (plus:SWI48 + (plus:SWI48 + (match_operator:SWI48 4 "ix86_carry_flag_operator" + [(match_operand 2 "flags_reg_operand") + (const_int 0)]) + (match_operand:SWI48 0 "general_reg_operand")) + (match_operand:SWI48 1 "memory_operand"))) + (plus: + (zero_extend: (match_dup 1)) + (match_operator: 3 "ix86_carry_flag_operator" + [(match_dup 2) (const_int 0)])))) + (set (match_dup 0) + (plus:SWI48 (plus:SWI48 (match_op_dup 4 + [(match_dup 2) (const_int 0)]) + (match_dup 0)) + (match_dup 1)))]) + (set (match_operand:QI 5 "general_reg_operand") + (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) + (set (match_operand:SWI48 6 "general_reg_operand") + (zero_extend:SWI48 (match_dup 5))) + (set (match_dup 1) (match_dup 0))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (4, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[5]) + && !reg_overlap_mentioned_p (operands[5], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[6]) + && !reg_overlap_mentioned_p (operands[6], operands[1])" + [(parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend: + (plus:SWI48 + (plus:SWI48 + (match_op_dup 4 + [(match_dup 2) (const_int 0)]) + (match_dup 1)) + (match_dup 0))) + (plus: + (zero_extend: (match_dup 0)) + (match_op_dup 3 + [(match_dup 2) (const_int 0)])))) + (set (match_dup 1) + (plus:SWI48 (plus:SWI48 (match_op_dup 4 + [(match_dup 2) (const_int 0)]) + (match_dup 1)) + (match_dup 0)))]) + (set (match_dup 5) (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) + (set (match_dup 6) (zero_extend:SWI48 (match_dup 5)))]) + (define_expand "addcarry_0" [(parallel [(set (reg:CCC FLAGS_REG) @@ -7988,6 +8203,59 @@ (set_attr "pent_pair" "pu") (set_attr "mode" "")]) +(define_peephole2 + [(set (match_operand:SWI 0 "general_reg_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (match_dup 0) + (minus:SWI + (minus:SWI + (match_dup 0) + (match_operator:SWI 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") + (const_int 0)])) + (match_operand:SWI 2 "memory_operand"))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 1) (match_dup 0))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 2)) + (parallel [(set (match_dup 1) + (minus:SWI (minus:SWI (match_dup 1) + (match_op_dup 4 + [(match_dup 3) (const_int 0)])) + (match_dup 0))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_peephole2 + [(set (match_operand:SWI 0 "general_reg_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (match_dup 0) + (minus:SWI + (minus:SWI + (match_dup 0) + (match_operator:SWI 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") + (const_int 0)])) + (match_operand:SWI 2 "memory_operand"))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_operand:SWI 5 "general_reg_operand") (match_dup 0)) + (set (match_dup 1) (match_dup 5))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && peep2_reg_dead_p (4, operands[5]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && !reg_overlap_mentioned_p (operands[5], operands[1])" + [(set (match_dup 0) (match_dup 2)) + (parallel [(set (match_dup 1) + (minus:SWI (minus:SWI (match_dup 1) + (match_op_dup 4 + [(match_dup 3) (const_int 0)])) + (match_dup 0))) + (clobber (reg:CC FLAGS_REG))])]) + (define_insn "*sub3_carry_0" [(set (match_operand:SWI 0 "nonimmediate_operand" "=m") (minus:SWI @@ -8241,6 +8509,27 @@ (match_dup 1))) (set (match_dup 1) (plus:SWI (match_dup 1) (match_dup 0)))])]) +(define_peephole2 + [(set (match_operand:SWI 0 "general_reg_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:SWI (match_dup 0) + (match_operand:SWI 2 "memory_operand")) + (match_dup 0))) + (set (match_dup 0) (plus:SWI (match_dup 0) (match_dup 2)))]) + (set (match_dup 1) (match_dup 0))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 2)) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:SWI (match_dup 1) (match_dup 0)) + (match_dup 1))) + (set (match_dup 1) (plus:SWI (match_dup 1) (match_dup 0)))])]) + (define_insn "*addsi3_zext_cc_overflow_1" [(set (reg:CCC FLAGS_REG) (compare:CCC