diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index f28c2a171d7..85567980aa3 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -12479,7 +12479,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "*x86_64_shld_1" +(define_insn "x86_64_shld_1" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (ashift:DI (match_dup 0) (match_operand:QI 2 "const_0_to_63_operand")) @@ -12500,6 +12500,42 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) +(define_insn_and_split "*x86_64_shld_shrd_1_nozext" + [(set (match_operand:DI 0 "nonimmediate_operand") + (ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand") + (match_operand:QI 2 "const_0_to_63_operand")) + (lshiftrt:DI + (match_operand:DI 1 "nonimmediate_operand") + (match_operand:QI 3 "const_0_to_63_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && INTVAL (operands[3]) == 64 - INTVAL (operands[2]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (rtx_equal_p (operands[4], operands[0])) + { + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_x86_64_shld_1 (operands[0], operands[1], operands[2], operands[3])); + } + else if (rtx_equal_p (operands[1], operands[0])) + { + operands[4] = force_reg (DImode, operands[4]); + emit_insn (gen_x86_64_shrd_1 (operands[0], operands[4], operands[3], operands[2])); + } + else + { + operands[1] = force_reg (DImode, operands[1]); + rtx tmp = gen_reg_rtx (DImode); + emit_move_insn (tmp, operands[4]); + emit_insn (gen_x86_64_shld_1 (tmp, operands[1], operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + DONE; +}) + (define_insn_and_split "*x86_64_shld_2" [(set (match_operand:DI 0 "nonimmediate_operand") (ior:DI (ashift:DI (match_dup 0) @@ -12543,7 +12579,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "*x86_shld_1" +(define_insn "x86_shld_1" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (ashift:SI (match_dup 0) (match_operand:QI 2 "const_0_to_31_operand")) @@ -12564,6 +12600,41 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) +(define_insn_and_split "*x86_shld_shrd_1_nozext" + [(set (match_operand:SI 0 "nonimmediate_operand") + (ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand") + (match_operand:QI 2 "const_0_to_31_operand")) + (lshiftrt:SI + (match_operand:SI 1 "nonimmediate_operand") + (match_operand:QI 3 "const_0_to_31_operand")))) + (clobber (reg:CC FLAGS_REG))] + "INTVAL (operands[3]) == 32 - INTVAL (operands[2]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (rtx_equal_p (operands[4], operands[0])) + { + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_x86_shld_1 (operands[0], operands[1], operands[2], operands[3])); + } + else if (rtx_equal_p (operands[1], operands[0])) + { + operands[4] = force_reg (SImode, operands[4]); + emit_insn (gen_x86_shrd_1 (operands[0], operands[4], operands[3], operands[2])); + } + else + { + operands[1] = force_reg (SImode, operands[1]); + rtx tmp = gen_reg_rtx (SImode); + emit_move_insn (tmp, operands[4]); + emit_insn (gen_x86_shld_1 (tmp, operands[1], operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + DONE; +}) + (define_insn_and_split "*x86_shld_2" [(set (match_operand:SI 0 "nonimmediate_operand") (ior:SI (ashift:SI (match_dup 0) @@ -13442,7 +13513,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "*x86_64_shrd_1" +(define_insn "x86_64_shrd_1" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (lshiftrt:DI (match_dup 0) (match_operand:QI 2 "const_0_to_63_operand")) @@ -13463,6 +13534,42 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) +(define_insn_and_split "*x86_64_shrd_shld_1_nozext" + [(set (match_operand:DI 0 "nonimmediate_operand") + (ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand") + (match_operand:QI 2 "const_0_to_63_operand")) + (ashift:DI + (match_operand:DI 1 "nonimmediate_operand") + (match_operand:QI 3 "const_0_to_63_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && INTVAL (operands[3]) == 64 - INTVAL (operands[2]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (rtx_equal_p (operands[4], operands[0])) + { + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_x86_64_shrd_1 (operands[0], operands[1], operands[2], operands[3])); + } + else if (rtx_equal_p (operands[1], operands[0])) + { + operands[4] = force_reg (DImode, operands[4]); + emit_insn (gen_x86_64_shld_1 (operands[0], operands[4], operands[3], operands[2])); + } + else + { + operands[1] = force_reg (DImode, operands[1]); + rtx tmp = gen_reg_rtx (DImode); + emit_move_insn (tmp, operands[4]); + emit_insn (gen_x86_64_shrd_1 (tmp, operands[1], operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + DONE; +}) + (define_insn_and_split "*x86_64_shrd_2" [(set (match_operand:DI 0 "nonimmediate_operand") (ior:DI (lshiftrt:DI (match_dup 0) @@ -13506,7 +13613,7 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) -(define_insn "*x86_shrd_1" +(define_insn "x86_shrd_1" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (lshiftrt:SI (match_dup 0) (match_operand:QI 2 "const_0_to_31_operand")) @@ -13527,6 +13634,41 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) +(define_insn_and_split "*x86_shrd_shld_1_nozext" + [(set (match_operand:SI 0 "nonimmediate_operand") + (ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand") + (match_operand:QI 2 "const_0_to_31_operand")) + (ashift:SI + (match_operand:SI 1 "nonimmediate_operand") + (match_operand:QI 3 "const_0_to_31_operand")))) + (clobber (reg:CC FLAGS_REG))] + "INTVAL (operands[3]) == 32 - INTVAL (operands[2]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (rtx_equal_p (operands[4], operands[0])) + { + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_x86_shrd_1 (operands[0], operands[1], operands[2], operands[3])); + } + else if (rtx_equal_p (operands[1], operands[0])) + { + operands[4] = force_reg (SImode, operands[4]); + emit_insn (gen_x86_shld_1 (operands[0], operands[4], operands[3], operands[2])); + } + else + { + operands[1] = force_reg (SImode, operands[1]); + rtx tmp = gen_reg_rtx (SImode); + emit_move_insn (tmp, operands[4]); + emit_insn (gen_x86_shrd_1 (tmp, operands[1], operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + DONE; +}) + (define_insn_and_split "*x86_shrd_2" [(set (match_operand:SI 0 "nonimmediate_operand") (ior:SI (lshiftrt:SI (match_dup 0) diff --git a/gcc/testsuite/gcc.target/i386/pr55583.c b/gcc/testsuite/gcc.target/i386/pr55583.c new file mode 100644 index 00000000000..1c128b5d929 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr55583.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -Wno-shift-count-overflow" } */ +/* { dg-final { scan-assembler-times {(?n)shrd[ql]?[\t ]*\$2} 4 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times {(?n)shrdl?[\t ]*\$2} 2 { target ia32 } } } */ +/* { dg-final { scan-assembler-times {(?n)shldl?[\t ]*\$2} 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times {(?n)shld[ql]?[\t ]*\$2} 2 { target { ! ia32 } } } } */ + +typedef unsigned long u64; +typedef unsigned int u32; +typedef unsigned short u16; + +long a, b; +int c, d; +short e, f; +const int n = 2; + +void test64r () { b = ((u64)b >> n) | (a << (64 - n)); } +void test32r () { d = ((u32)d >> n) | (c << (32 - n)); } + +unsigned long ua, ub; +unsigned int uc, ud; +unsigned short ue, uf; + +void testu64l () { ub = (ub << n) | (ua >> (64 - n)); } +void testu64r () { ub = (ub >> n) | (ua << (64 - n)); } +void testu32l () { ud = (ud << n) | (uc >> (32 - n)); } +void testu32r () { ud = (ud >> n) | (uc << (32 - n)); }