i386: x r<< (c - y) to x r>> y etc. optimization [PR117930]
The following patch optimizes x r<< (c - y) to x r>> y, x r>> (c - y) to x r<< y, x r<< (c + y) to x r<< y and x r>> (c + y) to x r>> y if c is a multiple of x's bitsize. 2024-12-07 Jakub Jelinek <jakub@redhat.com> PR target/117930 * config/i386/i386.md (crotate): New define_code_attr. (*<insn><mode>3_add, *<insn><mode>3_add_1, *<insn><mode>3_sub, *<insn><mode>3_sub_1): New define_insn_and_split patterns plus following define_split for constant first input operand. * gcc.target/i386/pr117930.c: New test.
This commit is contained in:
parent
b7dd0d9760
commit
4abcf4ad38
2 changed files with 259 additions and 0 deletions
|
@ -1079,6 +1079,9 @@
|
|||
;; Base name for insn mnemonic.
|
||||
(define_code_attr rotate [(rotate "rol") (rotatert "ror")])
|
||||
|
||||
;; Counter rotate.
|
||||
(define_code_attr crotate [(rotate "rotatert") (rotatert "rotate")])
|
||||
|
||||
;; Mapping of abs neg operators
|
||||
(define_code_iterator absneg [abs neg])
|
||||
|
||||
|
@ -18216,6 +18219,144 @@
|
|||
(any_rotate:SWI (match_dup 4) (match_dup 2)))]
|
||||
"operands[4] = gen_reg_rtx (<MODE>mode);")
|
||||
|
||||
(define_insn_and_split "*<insn><mode>3_add"
|
||||
[(set (match_operand:SWI 0 "nonimmediate_operand")
|
||||
(any_rotate:SWI
|
||||
(match_operand:SWI 1 "nonimmediate_operand")
|
||||
(subreg:QI
|
||||
(plus
|
||||
(match_operand 2 "int_nonimmediate_operand")
|
||||
(match_operand 3 "const_int_operand")) 0)))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
|
||||
&& (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
|
||||
&& ix86_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(parallel
|
||||
[(set (match_dup 0)
|
||||
(any_rotate:SWI (match_dup 1) (match_dup 2)))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
{
|
||||
operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
|
||||
operands[2] = gen_lowpart (QImode, operands[2]);
|
||||
})
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:SWI 0 "register_operand")
|
||||
(any_rotate:SWI
|
||||
(match_operand:SWI 1 "const_int_operand")
|
||||
(subreg:QI
|
||||
(plus
|
||||
(match_operand 2 "int248_register_operand")
|
||||
(match_operand 3 "const_int_operand")) 0)))]
|
||||
"(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
|
||||
[(set (match_dup 4) (match_dup 1))
|
||||
(set (match_dup 0)
|
||||
(any_rotate:SWI (match_dup 4) (subreg:QI (match_dup 2) 0)))]
|
||||
"operands[4] = gen_reg_rtx (<MODE>mode);")
|
||||
|
||||
(define_insn_and_split "*<insn><mode>3_add_1"
|
||||
[(set (match_operand:SWI 0 "nonimmediate_operand")
|
||||
(any_rotate:SWI
|
||||
(match_operand:SWI 1 "nonimmediate_operand")
|
||||
(plus:QI
|
||||
(match_operand:QI 2 "nonimmediate_operand")
|
||||
(match_operand:QI 3 "const_int_operand"))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
|
||||
&& (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
|
||||
&& ix86_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(parallel
|
||||
[(set (match_dup 0)
|
||||
(any_rotate:SWI (match_dup 1) (match_dup 2)))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);")
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:SWI 0 "register_operand")
|
||||
(any_rotate:SWI
|
||||
(match_operand:SWI 1 "const_int_operand")
|
||||
(plus:QI
|
||||
(match_operand:QI 2 "register_operand")
|
||||
(match_operand:QI 3 "const_int_operand"))))]
|
||||
"(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
|
||||
[(set (match_dup 4) (match_dup 1))
|
||||
(set (match_dup 0)
|
||||
(any_rotate:SWI (match_dup 4) (match_dup 2)))]
|
||||
"operands[4] = gen_reg_rtx (<MODE>mode);")
|
||||
|
||||
(define_insn_and_split "*<insn><mode>3_sub"
|
||||
[(set (match_operand:SWI 0 "nonimmediate_operand")
|
||||
(any_rotate:SWI
|
||||
(match_operand:SWI 1 "nonimmediate_operand")
|
||||
(subreg:QI
|
||||
(minus
|
||||
(match_operand 3 "const_int_operand")
|
||||
(match_operand 2 "int_nonimmediate_operand")) 0)))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
|
||||
&& (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
|
||||
&& ix86_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(parallel
|
||||
[(set (match_dup 0)
|
||||
(<crotate>:SWI (match_dup 1) (match_dup 2)))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
{
|
||||
operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
|
||||
operands[2] = gen_lowpart (QImode, operands[2]);
|
||||
})
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:SWI 0 "register_operand")
|
||||
(any_rotate:SWI
|
||||
(match_operand:SWI 1 "const_int_operand")
|
||||
(subreg:QI
|
||||
(minus
|
||||
(match_operand 3 "const_int_operand")
|
||||
(match_operand 2 "int248_register_operand")) 0)))]
|
||||
"(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
|
||||
[(set (match_dup 4) (match_dup 1))
|
||||
(set (match_dup 0)
|
||||
(<crotate>:SWI (match_dup 4) (subreg:QI (match_dup 2) 0)))]
|
||||
"operands[4] = gen_reg_rtx (<MODE>mode);")
|
||||
|
||||
(define_insn_and_split "*<insn><mode>3_sub_1"
|
||||
[(set (match_operand:SWI 0 "nonimmediate_operand")
|
||||
(any_rotate:SWI
|
||||
(match_operand:SWI 1 "nonimmediate_operand")
|
||||
(minus:QI
|
||||
(match_operand:QI 3 "const_int_operand")
|
||||
(match_operand:QI 2 "nonimmediate_operand"))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
|
||||
&& (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
|
||||
&& ix86_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(parallel
|
||||
[(set (match_dup 0)
|
||||
(<crotate>:SWI (match_dup 1) (match_dup 2)))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);")
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:SWI 0 "register_operand")
|
||||
(any_rotate:SWI
|
||||
(match_operand:SWI 1 "const_int_operand")
|
||||
(minus:QI
|
||||
(match_operand:QI 3 "const_int_operand")
|
||||
(match_operand:QI 2 "register_operand"))))]
|
||||
"(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
|
||||
[(set (match_dup 4) (match_dup 1))
|
||||
(set (match_dup 0)
|
||||
(<crotate>:SWI (match_dup 4) (match_dup 2)))]
|
||||
"operands[4] = gen_reg_rtx (<MODE>mode);")
|
||||
|
||||
;; Implement rotation using two double-precision
|
||||
;; shift instructions and a scratch register.
|
||||
|
||||
|
|
118
gcc/testsuite/gcc.target/i386/pr117930.c
Normal file
118
gcc/testsuite/gcc.target/i386/pr117930.c
Normal file
|
@ -0,0 +1,118 @@
|
|||
/* PR target/117930 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
/* { dg-final { scan-assembler-not "sub\[bwlq\]\t" } } */
|
||||
/* { dg-final { scan-assembler-not "add\[bwlq\]\t" } } */
|
||||
/* { dg-final { scan-assembler-not "lea\[lq\]\t" } } */
|
||||
|
||||
static inline
|
||||
unsigned lrotate (unsigned x, int t)
|
||||
{
|
||||
unsigned tl = x << t;
|
||||
unsigned th = x >> (-t & 31);
|
||||
return tl | th;
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned rrotate (unsigned x, int t)
|
||||
{
|
||||
unsigned tl = x >> t;
|
||||
unsigned th = x << (-t & 31);
|
||||
return tl | th;
|
||||
}
|
||||
|
||||
unsigned
|
||||
f1 (unsigned x, int t)
|
||||
{
|
||||
return lrotate (x, 32 - t);
|
||||
}
|
||||
|
||||
unsigned
|
||||
f2 (unsigned x, int t)
|
||||
{
|
||||
return lrotate (x, 64 - t);
|
||||
}
|
||||
|
||||
unsigned
|
||||
f3 (unsigned x, int t)
|
||||
{
|
||||
return lrotate (x, 32 + t);
|
||||
}
|
||||
|
||||
unsigned
|
||||
f4 (unsigned x, int t)
|
||||
{
|
||||
return lrotate (x, 64 + t);
|
||||
}
|
||||
|
||||
unsigned
|
||||
f5 (unsigned x, int t)
|
||||
{
|
||||
return rrotate (x, 32 - t);
|
||||
}
|
||||
|
||||
unsigned
|
||||
f6 (unsigned x, int t)
|
||||
{
|
||||
return rrotate (x, 64 - t);
|
||||
}
|
||||
|
||||
unsigned
|
||||
f7 (unsigned x, int t)
|
||||
{
|
||||
return rrotate (x, 32 + t);
|
||||
}
|
||||
|
||||
unsigned
|
||||
f8 (unsigned x, int t)
|
||||
{
|
||||
return rrotate (x, 64 + t);
|
||||
}
|
||||
|
||||
unsigned
|
||||
f9 (int t)
|
||||
{
|
||||
return lrotate (0xdeadbeefU, 32 - t);
|
||||
}
|
||||
|
||||
unsigned
|
||||
f10 (int t)
|
||||
{
|
||||
return lrotate (0xdeadbeefU, 64 - t);
|
||||
}
|
||||
|
||||
unsigned
|
||||
f11 (int t)
|
||||
{
|
||||
return lrotate (0xdeadbeefU, 32 + t);
|
||||
}
|
||||
|
||||
unsigned
|
||||
f12 (int t)
|
||||
{
|
||||
return lrotate (0xdeadbeefU, 64 + t);
|
||||
}
|
||||
|
||||
unsigned
|
||||
f13 (int t)
|
||||
{
|
||||
return rrotate (0xdeadbeefU, 32 - t);
|
||||
}
|
||||
|
||||
unsigned
|
||||
f14 (int t)
|
||||
{
|
||||
return rrotate (0xdeadbeefU, 64 - t);
|
||||
}
|
||||
|
||||
unsigned
|
||||
f15 (int t)
|
||||
{
|
||||
return rrotate (0xdeadbeefU, 32 + t);
|
||||
}
|
||||
|
||||
unsigned
|
||||
f16 (int t)
|
||||
{
|
||||
return rrotate (0xdeadbeefU, 64 + t);
|
||||
}
|
Loading…
Add table
Reference in a new issue