i386: Rewrite bswaphi2 handling [PR115102]

Introduce *bswaphi2 instruction pattern and enable bswaphi2 expander
also for non-movbe targets.  The testcase:

unsigned short bswap8 (unsigned short val)
{
  return ((val & 0xff00) >> 8) | ((val & 0xff) << 8);
}

now expands through bswaphi2 named expander.

Rewrite bswaphi_lowpart insn pattern as bswaphisi2_lowpart in the RTX form
that combine pass can use to simplify:

Trying 6, 9, 8 -> 10:
    6: r99:SI=bswap(r103:SI)
    9: {r107:SI=r103:SI&0xffffffffffff0000;clobber flags:CC;}
      REG_DEAD r103:SI
      REG_UNUSED flags:CC
    8: {r106:SI=r99:SI 0>>0x10;clobber flags:CC;}
      REG_DEAD r99:SI
      REG_UNUSED flags:CC
   10: {r104:SI=r106:SI|r107:SI;clobber flags:CC;}
      REG_DEAD r107:SI
      REG_DEAD r106:SI
      REG_UNUSED flags:CC

Successfully matched this instruction:
(set (reg:SI 104 [ _8 ])
    (ior:SI (and:SI (reg/v:SI 103 [ val ])
            (const_int -65536 [0xffffffffffff0000]))
        (lshiftrt:SI (bswap:SI (reg/v:SI 103 [ val ]))
            (const_int 16 [0x10]))))
allowing combination of insns 6, 8, 9 and 10

when compiling the following testcase:

unsigned int bswap8 (unsigned int val)
{
  return (val & 0xffff0000) | ((val & 0xff00) >> 8) | ((val & 0xff) << 8);
}

to produce:

	movl    %edi, %eax
	xchgb   %ah, %al
	ret

The expansion now always goes through a clobberless form of the bswaphi
instruction.  The instruction is conditionally converted to a rotate at
peephole2 pass.  This significantly simplifies bswaphisi2_lowpart
insn pattern attributes.

	PR target/115102

gcc/ChangeLog:

	* config/i386/i386.md (bswaphi2): Also enable for !TARGET_MOVBE.
	(*bswaphi2): New insn pattern.
	(bswaphisi2_lowpart): Rename from bswaphi_lowpart.  Rewrite
	insn RTX to match the expected form of the combine pass.
	Remove rol{w} alternative and corresponding attributes.
	(bswsaphisi2_lowpart peephole2): New peephole2 pattern to
	conditionally convert bswaphisi2_lowpart to rotlhi3_1_slp.
	(bswapsi2): Update expander for rename.
	(rotlhi3_1_slp splitter): Conditionally split to bswaphi2.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr115102.c: New test.
This commit is contained in:
Uros Bizjak 2024-05-30 21:27:42 +02:00
parent 46d931b3dd
commit e715204f20
2 changed files with 60 additions and 27 deletions

View file

@ -17210,9 +17210,7 @@
(clobber (reg:CC FLAGS_REG))]
"reload_completed
&& (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))"
[(parallel [(set (strict_low_part (match_dup 0))
(bswap:HI (match_dup 0)))
(clobber (reg:CC FLAGS_REG))])])
[(set (match_dup 0) (bswap:HI (match_dup 0)))])
;; Rotations through carry flag
(define_insn "rcrsi2"
@ -20730,12 +20728,11 @@
operands[1] = force_reg (SImode, operands[1]);
else
{
rtx x = operands[0];
rtx x = gen_reg_rtx (SImode);
emit_move_insn (x, operands[1]);
emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
emit_insn (gen_bswaphisi2_lowpart (x, operands[1]));
emit_insn (gen_rotlsi3 (x, x, GEN_INT (16)));
emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
emit_insn (gen_bswaphisi2_lowpart (operands[0], x));
DONE;
}
})
@ -20767,7 +20764,11 @@
(define_expand "bswaphi2"
[(set (match_operand:HI 0 "register_operand")
(bswap:HI (match_operand:HI 1 "nonimmediate_operand")))]
"TARGET_MOVBE")
""
{
if (!TARGET_MOVBE)
operands[1] = force_reg (HImode, operands[1]);
})
(define_insn "*bswaphi2_movbe"
[(set (match_operand:HI 0 "nonimmediate_operand" "=Q,r,m")
@ -20788,33 +20789,55 @@
(set_attr "bdver1_decode" "double,*,*")
(set_attr "mode" "QI,HI,HI")])
(define_insn "*bswaphi2"
[(set (match_operand:HI 0 "register_operand" "=Q")
(bswap:HI (match_operand:HI 1 "register_operand" "0")))]
"!TARGET_MOVBE"
"xchg{b}\t{%h0, %b0|%b0, %h0}"
[(set_attr "type" "imov")
(set_attr "pent_pair" "np")
(set_attr "athlon_decode" "vector")
(set_attr "amdfam10_decode" "double")
(set_attr "bdver1_decode" "double")
(set_attr "mode" "QI")])
(define_peephole2
[(set (match_operand:HI 0 "general_reg_operand")
(bswap:HI (match_dup 0)))]
"TARGET_MOVBE
&& !(TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))
"!(TARGET_USE_XCHGB ||
TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
&& peep2_regno_dead_p (0, FLAGS_REG)"
[(parallel [(set (match_dup 0) (rotate:HI (match_dup 0) (const_int 8)))
(clobber (reg:CC FLAGS_REG))])])
(define_insn "bswaphi_lowpart"
[(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r"))
(bswap:HI (match_dup 0)))
(clobber (reg:CC FLAGS_REG))]
(define_insn "bswaphisi2_lowpart"
[(set (match_operand:SI 0 "register_operand" "=Q")
(ior:SI (and:SI (match_operand:SI 1 "register_operand" "0")
(const_int -65536))
(lshiftrt:SI (bswap:SI (match_dup 1))
(const_int 16))))]
""
"@
xchg{b}\t{%h0, %b0|%b0, %h0}
rol{w}\t{$8, %0|%0, 8}"
[(set (attr "preferred_for_size")
(cond [(eq_attr "alternative" "0")
(symbol_ref "true")]
(symbol_ref "false")))
(set (attr "preferred_for_speed")
(cond [(eq_attr "alternative" "0")
(symbol_ref "TARGET_USE_XCHGB")]
(symbol_ref "!TARGET_USE_XCHGB")))
(set_attr "length" "2,4")
(set_attr "mode" "QI,HI")])
"xchg{b}\t{%h0, %b0|%b0, %h0}"
[(set_attr "type" "imov")
(set_attr "pent_pair" "np")
(set_attr "athlon_decode" "vector")
(set_attr "amdfam10_decode" "double")
(set_attr "bdver1_decode" "double")
(set_attr "mode" "QI")])
(define_peephole2
[(set (match_operand:SI 0 "general_reg_operand")
(ior:SI (and:SI (match_dup 0)
(const_int -65536))
(lshiftrt:SI (bswap:SI (match_dup 0))
(const_int 16))))]
"!(TARGET_USE_XCHGB ||
TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
&& peep2_regno_dead_p (0, FLAGS_REG)"
[(parallel [(set (strict_low_part (match_dup 0))
(rotate:HI (match_dup 0) (const_int 8)))
(clobber (reg:CC FLAGS_REG))])]
"operands[0] = gen_lowpart (HImode, operands[0]);")
(define_expand "paritydi2"
[(set (match_operand:DI 0 "register_operand")

View file

@ -0,0 +1,10 @@
/* PR target/115102 */
/* { dg-do compile } */
/* { dg-options "-Os -march=x86-64 -dp" } */
unsigned int bswap8 (unsigned int val)
{
return (val & 0xffff0000) | ((val & 0xff00) >> 8) | ((val & 0xff) << 8);
}
/* { dg-final { scan-assembler "bswaphisi2_lowpart" } } */