From 4abcf4ad38a4081f02ed09aed7892a3a6af61cbb Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Sat, 7 Dec 2024 11:40:12 +0100 Subject: [PATCH] i386: x r<< (c - y) to x r>> y etc. optimization [PR117930] The following patch optimizes x r<< (c - y) to x r>> y, x r>> (c - y) to x r<< y, x r<< (c + y) to x r<< y and x r>> (c + y) to x r>> y if c is a multiple of x's bitsize. 2024-12-07 Jakub Jelinek PR target/117930 * config/i386/i386.md (crotate): New define_code_attr. (*3_add, *3_add_1, *3_sub, *3_sub_1): New define_insn_and_split patterns plus following define_split for constant first input operand. * gcc.target/i386/pr117930.c: New test. --- gcc/config/i386/i386.md | 141 +++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr117930.c | 118 +++++++++++++++++++ 2 files changed, 259 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr117930.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ec816be6182..6edcb6dc657 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1079,6 +1079,9 @@ ;; Base name for insn mnemonic. (define_code_attr rotate [(rotate "rol") (rotatert "ror")]) +;; Counter rotate. +(define_code_attr crotate [(rotate "rotatert") (rotatert "rotate")]) + ;; Mapping of abs neg operators (define_code_iterator absneg [abs neg]) @@ -18216,6 +18219,144 @@ (any_rotate:SWI (match_dup 4) (match_dup 2)))] "operands[4] = gen_reg_rtx (mode);") +(define_insn_and_split "*3_add" + [(set (match_operand:SWI 0 "nonimmediate_operand") + (any_rotate:SWI + (match_operand:SWI 1 "nonimmediate_operand") + (subreg:QI + (plus + (match_operand 2 "int_nonimmediate_operand") + (match_operand 3 "const_int_operand")) 0))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (, mode, operands) + && (INTVAL (operands[3]) & ( * BITS_PER_UNIT - 1)) == 0 + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel + [(set (match_dup 0) + (any_rotate:SWI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); + operands[2] = gen_lowpart (QImode, operands[2]); +}) + +(define_split + [(set (match_operand:SWI 0 "register_operand") + (any_rotate:SWI + (match_operand:SWI 1 "const_int_operand") + (subreg:QI + (plus + (match_operand 2 "int248_register_operand") + (match_operand 3 "const_int_operand")) 0)))] + "(INTVAL (operands[3]) & ( * BITS_PER_UNIT - 1)) == 0" + [(set (match_dup 4) (match_dup 1)) + (set (match_dup 0) + (any_rotate:SWI (match_dup 4) (subreg:QI (match_dup 2) 0)))] + "operands[4] = gen_reg_rtx (mode);") + +(define_insn_and_split "*3_add_1" + [(set (match_operand:SWI 0 "nonimmediate_operand") + (any_rotate:SWI + (match_operand:SWI 1 "nonimmediate_operand") + (plus:QI + (match_operand:QI 2 "nonimmediate_operand") + (match_operand:QI 3 "const_int_operand")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (, mode, operands) + && (INTVAL (operands[3]) & ( * BITS_PER_UNIT - 1)) == 0 + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel + [(set (match_dup 0) + (any_rotate:SWI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);") + +(define_split + [(set (match_operand:SWI 0 "register_operand") + (any_rotate:SWI + (match_operand:SWI 1 "const_int_operand") + (plus:QI + (match_operand:QI 2 "register_operand") + (match_operand:QI 3 "const_int_operand"))))] + "(INTVAL (operands[3]) & ( * BITS_PER_UNIT - 1)) == 0" + [(set (match_dup 4) (match_dup 1)) + (set (match_dup 0) + (any_rotate:SWI (match_dup 4) (match_dup 2)))] + "operands[4] = gen_reg_rtx (mode);") + +(define_insn_and_split "*3_sub" + [(set (match_operand:SWI 0 "nonimmediate_operand") + (any_rotate:SWI + (match_operand:SWI 1 "nonimmediate_operand") + (subreg:QI + (minus + (match_operand 3 "const_int_operand") + (match_operand 2 "int_nonimmediate_operand")) 0))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (, mode, operands) + && (INTVAL (operands[3]) & ( * BITS_PER_UNIT - 1)) == 0 + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel + [(set (match_dup 0) + (:SWI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); + operands[2] = gen_lowpart (QImode, operands[2]); +}) + +(define_split + [(set (match_operand:SWI 0 "register_operand") + (any_rotate:SWI + (match_operand:SWI 1 "const_int_operand") + (subreg:QI + (minus + (match_operand 3 "const_int_operand") + (match_operand 2 "int248_register_operand")) 0)))] + "(INTVAL (operands[3]) & ( * BITS_PER_UNIT - 1)) == 0" + [(set (match_dup 4) (match_dup 1)) + (set (match_dup 0) + (:SWI (match_dup 4) (subreg:QI (match_dup 2) 0)))] + "operands[4] = gen_reg_rtx (mode);") + +(define_insn_and_split "*3_sub_1" + [(set (match_operand:SWI 0 "nonimmediate_operand") + (any_rotate:SWI + (match_operand:SWI 1 "nonimmediate_operand") + (minus:QI + (match_operand:QI 3 "const_int_operand") + (match_operand:QI 2 "nonimmediate_operand")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (, mode, operands) + && (INTVAL (operands[3]) & ( * BITS_PER_UNIT - 1)) == 0 + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel + [(set (match_dup 0) + (:SWI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);") + +(define_split + [(set (match_operand:SWI 0 "register_operand") + (any_rotate:SWI + (match_operand:SWI 1 "const_int_operand") + (minus:QI + (match_operand:QI 3 "const_int_operand") + (match_operand:QI 2 "register_operand"))))] + "(INTVAL (operands[3]) & ( * BITS_PER_UNIT - 1)) == 0" + [(set (match_dup 4) (match_dup 1)) + (set (match_dup 0) + (:SWI (match_dup 4) (match_dup 2)))] + "operands[4] = gen_reg_rtx (mode);") + ;; Implement rotation using two double-precision ;; shift instructions and a scratch register. diff --git a/gcc/testsuite/gcc.target/i386/pr117930.c b/gcc/testsuite/gcc.target/i386/pr117930.c new file mode 100644 index 00000000000..e8dec929e6c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117930.c @@ -0,0 +1,118 @@ +/* PR target/117930 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-not "sub\[bwlq\]\t" } } */ +/* { dg-final { scan-assembler-not "add\[bwlq\]\t" } } */ +/* { dg-final { scan-assembler-not "lea\[lq\]\t" } } */ + +static inline +unsigned lrotate (unsigned x, int t) +{ + unsigned tl = x << t; + unsigned th = x >> (-t & 31); + return tl | th; +} + +static inline +unsigned rrotate (unsigned x, int t) +{ + unsigned tl = x >> t; + unsigned th = x << (-t & 31); + return tl | th; +} + +unsigned +f1 (unsigned x, int t) +{ + return lrotate (x, 32 - t); +} + +unsigned +f2 (unsigned x, int t) +{ + return lrotate (x, 64 - t); +} + +unsigned +f3 (unsigned x, int t) +{ + return lrotate (x, 32 + t); +} + +unsigned +f4 (unsigned x, int t) +{ + return lrotate (x, 64 + t); +} + +unsigned +f5 (unsigned x, int t) +{ + return rrotate (x, 32 - t); +} + +unsigned +f6 (unsigned x, int t) +{ + return rrotate (x, 64 - t); +} + +unsigned +f7 (unsigned x, int t) +{ + return rrotate (x, 32 + t); +} + +unsigned +f8 (unsigned x, int t) +{ + return rrotate (x, 64 + t); +} + +unsigned +f9 (int t) +{ + return lrotate (0xdeadbeefU, 32 - t); +} + +unsigned +f10 (int t) +{ + return lrotate (0xdeadbeefU, 64 - t); +} + +unsigned +f11 (int t) +{ + return lrotate (0xdeadbeefU, 32 + t); +} + +unsigned +f12 (int t) +{ + return lrotate (0xdeadbeefU, 64 + t); +} + +unsigned +f13 (int t) +{ + return rrotate (0xdeadbeefU, 32 - t); +} + +unsigned +f14 (int t) +{ + return rrotate (0xdeadbeefU, 64 - t); +} + +unsigned +f15 (int t) +{ + return rrotate (0xdeadbeefU, 32 + t); +} + +unsigned +f16 (int t) +{ + return rrotate (0xdeadbeefU, 64 + t); +}