Enable more optimization for 32-bit/64-bit shrd/shld with imm shift count.
This patch doens't handle variable count since it require 5 insns to be combined to get wanted pattern, but current pass_combine only supports at most 4. This patch doesn't handle 16-bit shrd/shld either. gcc/ChangeLog: PR target/55583 * config/i386/i386.md (*x86_64_shld_1): Rename to .. (x86_64_shld_1): .. this. (*x86_shld_1): Rename to .. (x86_shld_1): .. this. (*x86_64_shrd_1): Rename to .. (x86_64_shrd_1): .. this. (*x86_shrd_1): Rename to .. (x86_shrd_1): .. this. (*x86_64_shld_shrd_1_nozext): New pre_reload splitter. (*x86_shld_shrd_1_nozext): Ditto. (*x86_64_shrd_shld_1_nozext): Ditto. (*x86_shrd_shld_1_nozext): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr55583.c: New test.
This commit is contained in:
parent
af6d747df7
commit
5c5ef2f9ab
2 changed files with 173 additions and 4 deletions
|
@ -12479,7 +12479,7 @@
|
|||
(set_attr "amdfam10_decode" "vector")
|
||||
(set_attr "bdver1_decode" "vector")])
|
||||
|
||||
(define_insn "*x86_64_shld_1"
|
||||
(define_insn "x86_64_shld_1"
|
||||
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
|
||||
(ior:DI (ashift:DI (match_dup 0)
|
||||
(match_operand:QI 2 "const_0_to_63_operand"))
|
||||
|
@ -12500,6 +12500,42 @@
|
|||
(set_attr "amdfam10_decode" "vector")
|
||||
(set_attr "bdver1_decode" "vector")])
|
||||
|
||||
(define_insn_and_split "*x86_64_shld_shrd_1_nozext"
|
||||
[(set (match_operand:DI 0 "nonimmediate_operand")
|
||||
(ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand")
|
||||
(match_operand:QI 2 "const_0_to_63_operand"))
|
||||
(lshiftrt:DI
|
||||
(match_operand:DI 1 "nonimmediate_operand")
|
||||
(match_operand:QI 3 "const_0_to_63_operand"))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_64BIT
|
||||
&& INTVAL (operands[3]) == 64 - INTVAL (operands[2])
|
||||
&& ix86_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(const_int 0)]
|
||||
{
|
||||
if (rtx_equal_p (operands[4], operands[0]))
|
||||
{
|
||||
operands[1] = force_reg (DImode, operands[1]);
|
||||
emit_insn (gen_x86_64_shld_1 (operands[0], operands[1], operands[2], operands[3]));
|
||||
}
|
||||
else if (rtx_equal_p (operands[1], operands[0]))
|
||||
{
|
||||
operands[4] = force_reg (DImode, operands[4]);
|
||||
emit_insn (gen_x86_64_shrd_1 (operands[0], operands[4], operands[3], operands[2]));
|
||||
}
|
||||
else
|
||||
{
|
||||
operands[1] = force_reg (DImode, operands[1]);
|
||||
rtx tmp = gen_reg_rtx (DImode);
|
||||
emit_move_insn (tmp, operands[4]);
|
||||
emit_insn (gen_x86_64_shld_1 (tmp, operands[1], operands[2], operands[3]));
|
||||
emit_move_insn (operands[0], tmp);
|
||||
}
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn_and_split "*x86_64_shld_2"
|
||||
[(set (match_operand:DI 0 "nonimmediate_operand")
|
||||
(ior:DI (ashift:DI (match_dup 0)
|
||||
|
@ -12543,7 +12579,7 @@
|
|||
(set_attr "amdfam10_decode" "vector")
|
||||
(set_attr "bdver1_decode" "vector")])
|
||||
|
||||
(define_insn "*x86_shld_1"
|
||||
(define_insn "x86_shld_1"
|
||||
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
|
||||
(ior:SI (ashift:SI (match_dup 0)
|
||||
(match_operand:QI 2 "const_0_to_31_operand"))
|
||||
|
@ -12564,6 +12600,41 @@
|
|||
(set_attr "amdfam10_decode" "vector")
|
||||
(set_attr "bdver1_decode" "vector")])
|
||||
|
||||
(define_insn_and_split "*x86_shld_shrd_1_nozext"
|
||||
[(set (match_operand:SI 0 "nonimmediate_operand")
|
||||
(ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand")
|
||||
(match_operand:QI 2 "const_0_to_31_operand"))
|
||||
(lshiftrt:SI
|
||||
(match_operand:SI 1 "nonimmediate_operand")
|
||||
(match_operand:QI 3 "const_0_to_31_operand"))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"INTVAL (operands[3]) == 32 - INTVAL (operands[2])
|
||||
&& ix86_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(const_int 0)]
|
||||
{
|
||||
if (rtx_equal_p (operands[4], operands[0]))
|
||||
{
|
||||
operands[1] = force_reg (SImode, operands[1]);
|
||||
emit_insn (gen_x86_shld_1 (operands[0], operands[1], operands[2], operands[3]));
|
||||
}
|
||||
else if (rtx_equal_p (operands[1], operands[0]))
|
||||
{
|
||||
operands[4] = force_reg (SImode, operands[4]);
|
||||
emit_insn (gen_x86_shrd_1 (operands[0], operands[4], operands[3], operands[2]));
|
||||
}
|
||||
else
|
||||
{
|
||||
operands[1] = force_reg (SImode, operands[1]);
|
||||
rtx tmp = gen_reg_rtx (SImode);
|
||||
emit_move_insn (tmp, operands[4]);
|
||||
emit_insn (gen_x86_shld_1 (tmp, operands[1], operands[2], operands[3]));
|
||||
emit_move_insn (operands[0], tmp);
|
||||
}
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn_and_split "*x86_shld_2"
|
||||
[(set (match_operand:SI 0 "nonimmediate_operand")
|
||||
(ior:SI (ashift:SI (match_dup 0)
|
||||
|
@ -13442,7 +13513,7 @@
|
|||
(set_attr "amdfam10_decode" "vector")
|
||||
(set_attr "bdver1_decode" "vector")])
|
||||
|
||||
(define_insn "*x86_64_shrd_1"
|
||||
(define_insn "x86_64_shrd_1"
|
||||
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
|
||||
(ior:DI (lshiftrt:DI (match_dup 0)
|
||||
(match_operand:QI 2 "const_0_to_63_operand"))
|
||||
|
@ -13463,6 +13534,42 @@
|
|||
(set_attr "amdfam10_decode" "vector")
|
||||
(set_attr "bdver1_decode" "vector")])
|
||||
|
||||
(define_insn_and_split "*x86_64_shrd_shld_1_nozext"
|
||||
[(set (match_operand:DI 0 "nonimmediate_operand")
|
||||
(ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand")
|
||||
(match_operand:QI 2 "const_0_to_63_operand"))
|
||||
(ashift:DI
|
||||
(match_operand:DI 1 "nonimmediate_operand")
|
||||
(match_operand:QI 3 "const_0_to_63_operand"))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_64BIT
|
||||
&& INTVAL (operands[3]) == 64 - INTVAL (operands[2])
|
||||
&& ix86_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(const_int 0)]
|
||||
{
|
||||
if (rtx_equal_p (operands[4], operands[0]))
|
||||
{
|
||||
operands[1] = force_reg (DImode, operands[1]);
|
||||
emit_insn (gen_x86_64_shrd_1 (operands[0], operands[1], operands[2], operands[3]));
|
||||
}
|
||||
else if (rtx_equal_p (operands[1], operands[0]))
|
||||
{
|
||||
operands[4] = force_reg (DImode, operands[4]);
|
||||
emit_insn (gen_x86_64_shld_1 (operands[0], operands[4], operands[3], operands[2]));
|
||||
}
|
||||
else
|
||||
{
|
||||
operands[1] = force_reg (DImode, operands[1]);
|
||||
rtx tmp = gen_reg_rtx (DImode);
|
||||
emit_move_insn (tmp, operands[4]);
|
||||
emit_insn (gen_x86_64_shrd_1 (tmp, operands[1], operands[2], operands[3]));
|
||||
emit_move_insn (operands[0], tmp);
|
||||
}
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn_and_split "*x86_64_shrd_2"
|
||||
[(set (match_operand:DI 0 "nonimmediate_operand")
|
||||
(ior:DI (lshiftrt:DI (match_dup 0)
|
||||
|
@ -13506,7 +13613,7 @@
|
|||
(set_attr "amdfam10_decode" "vector")
|
||||
(set_attr "bdver1_decode" "vector")])
|
||||
|
||||
(define_insn "*x86_shrd_1"
|
||||
(define_insn "x86_shrd_1"
|
||||
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
|
||||
(ior:SI (lshiftrt:SI (match_dup 0)
|
||||
(match_operand:QI 2 "const_0_to_31_operand"))
|
||||
|
@ -13527,6 +13634,41 @@
|
|||
(set_attr "amdfam10_decode" "vector")
|
||||
(set_attr "bdver1_decode" "vector")])
|
||||
|
||||
(define_insn_and_split "*x86_shrd_shld_1_nozext"
|
||||
[(set (match_operand:SI 0 "nonimmediate_operand")
|
||||
(ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand")
|
||||
(match_operand:QI 2 "const_0_to_31_operand"))
|
||||
(ashift:SI
|
||||
(match_operand:SI 1 "nonimmediate_operand")
|
||||
(match_operand:QI 3 "const_0_to_31_operand"))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"INTVAL (operands[3]) == 32 - INTVAL (operands[2])
|
||||
&& ix86_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(const_int 0)]
|
||||
{
|
||||
if (rtx_equal_p (operands[4], operands[0]))
|
||||
{
|
||||
operands[1] = force_reg (SImode, operands[1]);
|
||||
emit_insn (gen_x86_shrd_1 (operands[0], operands[1], operands[2], operands[3]));
|
||||
}
|
||||
else if (rtx_equal_p (operands[1], operands[0]))
|
||||
{
|
||||
operands[4] = force_reg (SImode, operands[4]);
|
||||
emit_insn (gen_x86_shld_1 (operands[0], operands[4], operands[3], operands[2]));
|
||||
}
|
||||
else
|
||||
{
|
||||
operands[1] = force_reg (SImode, operands[1]);
|
||||
rtx tmp = gen_reg_rtx (SImode);
|
||||
emit_move_insn (tmp, operands[4]);
|
||||
emit_insn (gen_x86_shrd_1 (tmp, operands[1], operands[2], operands[3]));
|
||||
emit_move_insn (operands[0], tmp);
|
||||
}
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn_and_split "*x86_shrd_2"
|
||||
[(set (match_operand:SI 0 "nonimmediate_operand")
|
||||
(ior:SI (lshiftrt:SI (match_dup 0)
|
||||
|
|
27
gcc/testsuite/gcc.target/i386/pr55583.c
Normal file
27
gcc/testsuite/gcc.target/i386/pr55583.c
Normal file
|
@ -0,0 +1,27 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -Wno-shift-count-overflow" } */
|
||||
/* { dg-final { scan-assembler-times {(?n)shrd[ql]?[\t ]*\$2} 4 { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times {(?n)shrdl?[\t ]*\$2} 2 { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-times {(?n)shldl?[\t ]*\$2} 1 { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-times {(?n)shld[ql]?[\t ]*\$2} 2 { target { ! ia32 } } } } */
|
||||
|
||||
typedef unsigned long u64;
|
||||
typedef unsigned int u32;
|
||||
typedef unsigned short u16;
|
||||
|
||||
long a, b;
|
||||
int c, d;
|
||||
short e, f;
|
||||
const int n = 2;
|
||||
|
||||
void test64r () { b = ((u64)b >> n) | (a << (64 - n)); }
|
||||
void test32r () { d = ((u32)d >> n) | (c << (32 - n)); }
|
||||
|
||||
unsigned long ua, ub;
|
||||
unsigned int uc, ud;
|
||||
unsigned short ue, uf;
|
||||
|
||||
void testu64l () { ub = (ub << n) | (ua >> (64 - n)); }
|
||||
void testu64r () { ub = (ub >> n) | (ua << (64 - n)); }
|
||||
void testu32l () { ud = (ud << n) | (uc >> (32 - n)); }
|
||||
void testu32r () { ud = (ud >> n) | (uc << (32 - n)); }
|
Loading…
Add table
Reference in a new issue