i386: psrlq is not used for PERM<a,{0},1,2,3,4> [PR113871]
Introduce vec_shl_<mode> and vec_shr_<mode> expanders to improve '*a = __builtin_shufflevector(*a, (vect64){0}, 1, 2, 3, 4);' and '*a = __builtin_shufflevector((vect64){0}, *a, 3, 4, 5, 6);' shuffles. The generated code improves from: movzwl 6(%rdi), %eax movzwl 4(%rdi), %edx salq $16, %rax orq %rdx, %rax movzwl 2(%rdi), %edx salq $16, %rax orq %rdx, %rax movq %rax, (%rdi) to: movq (%rdi), %xmm0 psrlq $16, %xmm0 movq %xmm0, (%rdi) and to: movq (%rdi), %xmm0 psllq $16, %xmm0 movq %xmm0, (%rdi) in the second case. The patch handles 32-bit vectors as well and improves generated code from: movd (%rdi), %xmm0 pxor %xmm1, %xmm1 punpcklwd %xmm1, %xmm0 pshuflw $230, %xmm0, %xmm0 movd %xmm0, (%rdi) to: movd (%rdi), %xmm0 psrld $16, %xmm0 movd %xmm0, (%rdi) and to: movd (%rdi), %xmm0 pslld $16, %xmm0 movd %xmm0, (%rdi) PR target/113871 gcc/ChangeLog: * config/i386/mmx.md (V248FI): New mode iterator. (V24FI_32): DItto. (vec_shl_<V248FI:mode>): New expander. (vec_shl_<V24FI_32:mode>): Ditto. (vec_shr_<V248FI:mode>): Ditto. (vec_shr_<V24FI_32:mode>): Ditto. * config/i386/sse.md (vec_shl_<V_128:mode>): Simplify expander. (vec_shr_<V248FI:mode>): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr113871-1a.c: New test. * gcc.target/i386/pr113871-1b.c: New test. * gcc.target/i386/pr113871-2a.c: New test. * gcc.target/i386/pr113871-2b.c: New test. * gcc.target/i386/pr113871-3a.c: New test. * gcc.target/i386/pr113871-3b.c: New test. * gcc.target/i386/pr113871-4a.c: New test.
This commit is contained in:
parent
ea76757612
commit
2c2f57e415
9 changed files with 222 additions and 14 deletions
|
@ -84,6 +84,11 @@
|
|||
(define_mode_iterator V2FI [V2SF V2SI])
|
||||
|
||||
(define_mode_iterator V24FI [V2SF V2SI V4HF V4HI])
|
||||
|
||||
(define_mode_iterator V248FI [V2SF V2SI V4HF V4HI V8QI])
|
||||
|
||||
(define_mode_iterator V24FI_32 [V2HF V2HI V4QI])
|
||||
|
||||
;; Mapping from integer vector mode to mnemonic suffix
|
||||
(define_mode_attr mmxvecsize
|
||||
[(V8QI "b") (V4QI "b") (V2QI "b")
|
||||
|
@ -3729,6 +3734,70 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_shl_<mode>"
|
||||
[(set (match_operand:V248FI 0 "register_operand")
|
||||
(ashift:V1DI
|
||||
(match_operand:V248FI 1 "nonimmediate_operand")
|
||||
(match_operand:DI 2 "nonmemory_operand")))]
|
||||
"TARGET_MMX_WITH_SSE"
|
||||
{
|
||||
rtx op0 = gen_reg_rtx (V1DImode);
|
||||
rtx op1 = force_reg (<MODE>mode, operands[1]);
|
||||
|
||||
emit_insn (gen_mmx_ashlv1di3
|
||||
(op0, gen_lowpart (V1DImode, op1), operands[2]));
|
||||
emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_shl_<mode>"
|
||||
[(set (match_operand:V24FI_32 0 "register_operand")
|
||||
(ashift:V1SI
|
||||
(match_operand:V24FI_32 1 "nonimmediate_operand")
|
||||
(match_operand:DI 2 "nonmemory_operand")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx op0 = gen_reg_rtx (V1SImode);
|
||||
rtx op1 = force_reg (<MODE>mode, operands[1]);
|
||||
|
||||
emit_insn (gen_mmx_ashlv1si3
|
||||
(op0, gen_lowpart (V1SImode, op1), operands[2]));
|
||||
emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_shr_<mode>"
|
||||
[(set (match_operand:V248FI 0 "register_operand")
|
||||
(lshiftrt:V1DI
|
||||
(match_operand:V248FI 1 "nonimmediate_operand")
|
||||
(match_operand:DI 2 "nonmemory_operand")))]
|
||||
"TARGET_MMX_WITH_SSE"
|
||||
{
|
||||
rtx op0 = gen_reg_rtx (V1DImode);
|
||||
rtx op1 = force_reg (<MODE>mode, operands[1]);
|
||||
|
||||
emit_insn (gen_mmx_lshrv1di3
|
||||
(op0, gen_lowpart (V1DImode, op1), operands[2]));
|
||||
emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_shr_<mode>"
|
||||
[(set (match_operand:V24FI_32 0 "register_operand")
|
||||
(lshiftrt:V1SI
|
||||
(match_operand:V24FI_32 1 "nonimmediate_operand")
|
||||
(match_operand:DI 2 "nonmemory_operand")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx op0 = gen_reg_rtx (V1SImode);
|
||||
rtx op1 = force_reg (<MODE>mode, operands[1]);
|
||||
|
||||
emit_insn (gen_mmx_lshrv1si3
|
||||
(op0, gen_lowpart (V1SImode, op1), operands[2]));
|
||||
emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0));
|
||||
DONE;
|
||||
})
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
;; Parallel integral comparisons
|
||||
|
|
|
@ -16498,29 +16498,35 @@
|
|||
"operands[3] = XVECEXP (operands[2], 0, 0);")
|
||||
|
||||
(define_expand "vec_shl_<mode>"
|
||||
[(set (match_dup 3)
|
||||
[(set (match_operand:V_128 0 "register_operand")
|
||||
(ashift:V1TI
|
||||
(match_operand:V_128 1 "register_operand")
|
||||
(match_operand:SI 2 "const_0_to_255_mul_8_operand")))
|
||||
(set (match_operand:V_128 0 "register_operand") (match_dup 4))]
|
||||
(match_operand:V_128 1 "nonimmediate_operand")
|
||||
(match_operand:SI 2 "const_0_to_255_mul_8_operand")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
operands[1] = gen_lowpart (V1TImode, operands[1]);
|
||||
operands[3] = gen_reg_rtx (V1TImode);
|
||||
operands[4] = gen_lowpart (<MODE>mode, operands[3]);
|
||||
rtx op0 = gen_reg_rtx (V1TImode);
|
||||
rtx op1 = force_reg (<MODE>mode, operands[1]);
|
||||
|
||||
emit_insn (gen_sse2_ashlv1ti3
|
||||
(op0, gen_lowpart (V1TImode, op1), operands[2]));
|
||||
emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_shr_<mode>"
|
||||
[(set (match_dup 3)
|
||||
[(set (match_operand:V_128 0 "register_operand")
|
||||
(lshiftrt:V1TI
|
||||
(match_operand:V_128 1 "register_operand")
|
||||
(match_operand:SI 2 "const_0_to_255_mul_8_operand")))
|
||||
(set (match_operand:V_128 0 "register_operand") (match_dup 4))]
|
||||
(match_operand:V_128 1 "nonimmediate_operand")
|
||||
(match_operand:SI 2 "const_0_to_255_mul_8_operand")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
operands[1] = gen_lowpart (V1TImode, operands[1]);
|
||||
operands[3] = gen_reg_rtx (V1TImode);
|
||||
operands[4] = gen_lowpart (<MODE>mode, operands[3]);
|
||||
rtx op0 = gen_reg_rtx (V1TImode);
|
||||
rtx op1 = force_reg (<MODE>mode, operands[1]);
|
||||
|
||||
emit_insn (gen_sse2_lshrv1ti3
|
||||
(op0, gen_lowpart (V1TImode, op1), operands[2]));
|
||||
emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "ashlv1ti3"
|
||||
|
|
19
gcc/testsuite/gcc.target/i386/pr113871-1a.c
Normal file
19
gcc/testsuite/gcc.target/i386/pr113871-1a.c
Normal file
|
@ -0,0 +1,19 @@
|
|||
/* PR target/113871 */
|
||||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
typedef char vect64 __attribute__((vector_size(8)));
|
||||
|
||||
void f (vect64 *a)
|
||||
{
|
||||
*a = __builtin_shufflevector(*a, (vect64){0}, 1, 2, 3, 4, 5, 6, 7, 8);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "psrlq" } } */
|
||||
|
||||
void g(vect64 *a)
|
||||
{
|
||||
*a = __builtin_shufflevector((vect64){0}, *a, 7, 8, 9, 10, 11, 12, 13, 14);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "psllq" } } */
|
19
gcc/testsuite/gcc.target/i386/pr113871-1b.c
Normal file
19
gcc/testsuite/gcc.target/i386/pr113871-1b.c
Normal file
|
@ -0,0 +1,19 @@
|
|||
/* PR target/113871 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse2" } */
|
||||
|
||||
typedef char vect32 __attribute__((vector_size(4)));
|
||||
|
||||
void f (vect32 *a)
|
||||
{
|
||||
*a = __builtin_shufflevector(*a, (vect32){0}, 1, 2, 3, 4);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "psrld" } } */
|
||||
|
||||
void g(vect32 *a)
|
||||
{
|
||||
*a = __builtin_shufflevector((vect32){0}, *a, 3, 4, 5, 6);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "pslld" } } */
|
19
gcc/testsuite/gcc.target/i386/pr113871-2a.c
Normal file
19
gcc/testsuite/gcc.target/i386/pr113871-2a.c
Normal file
|
@ -0,0 +1,19 @@
|
|||
/* PR target/113871 */
|
||||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
typedef short vect64 __attribute__((vector_size(8)));
|
||||
|
||||
void f (vect64 *a)
|
||||
{
|
||||
*a = __builtin_shufflevector(*a, (vect64){0}, 1, 2, 3, 4);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "psrlq" } } */
|
||||
|
||||
void g(vect64 *a)
|
||||
{
|
||||
*a = __builtin_shufflevector((vect64){0}, *a, 3, 4, 5, 6);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "psllq" } } */
|
19
gcc/testsuite/gcc.target/i386/pr113871-2b.c
Normal file
19
gcc/testsuite/gcc.target/i386/pr113871-2b.c
Normal file
|
@ -0,0 +1,19 @@
|
|||
/* PR target/113871 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse2" } */
|
||||
|
||||
typedef short vect32 __attribute__((vector_size(4)));
|
||||
|
||||
void f (vect32 *a)
|
||||
{
|
||||
*a = __builtin_shufflevector(*a, (vect32){0}, 1, 2);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "psrld" } } */
|
||||
|
||||
void g(vect32 *a)
|
||||
{
|
||||
*a = __builtin_shufflevector((vect32){0}, *a, 1, 2);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "pslld" } } */
|
19
gcc/testsuite/gcc.target/i386/pr113871-3a.c
Normal file
19
gcc/testsuite/gcc.target/i386/pr113871-3a.c
Normal file
|
@ -0,0 +1,19 @@
|
|||
/* PR target/113871 */
|
||||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
typedef _Float16 vect64 __attribute__((vector_size(8)));
|
||||
|
||||
void f (vect64 *a)
|
||||
{
|
||||
*a = __builtin_shufflevector(*a, (vect64){0}, 1, 2, 3, 4);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "psrlq" } } */
|
||||
|
||||
void g(vect64 *a)
|
||||
{
|
||||
*a = __builtin_shufflevector((vect64){0}, *a, 3, 4, 5, 6);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "psllq" } } */
|
19
gcc/testsuite/gcc.target/i386/pr113871-3b.c
Normal file
19
gcc/testsuite/gcc.target/i386/pr113871-3b.c
Normal file
|
@ -0,0 +1,19 @@
|
|||
/* PR target/113871 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse2" } */
|
||||
|
||||
typedef _Float16 vect32 __attribute__((vector_size(4)));
|
||||
|
||||
void f (vect32 *a)
|
||||
{
|
||||
*a = __builtin_shufflevector(*a, (vect32){0}, 1, 2);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "psrld" } } */
|
||||
|
||||
void g(vect32 *a)
|
||||
{
|
||||
*a = __builtin_shufflevector((vect32){0}, *a, 1, 2);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "pslld" } } */
|
19
gcc/testsuite/gcc.target/i386/pr113871-4a.c
Normal file
19
gcc/testsuite/gcc.target/i386/pr113871-4a.c
Normal file
|
@ -0,0 +1,19 @@
|
|||
/* PR target/113871 */
|
||||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
typedef int vect64 __attribute__((vector_size(8)));
|
||||
|
||||
void f (vect64 *a)
|
||||
{
|
||||
*a = __builtin_shufflevector(*a, (vect64){0}, 1, 2);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "psrlq" } } */
|
||||
|
||||
void g(vect64 *a)
|
||||
{
|
||||
*a = __builtin_shufflevector((vect64){0}, *a, 1, 2);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "psllq" } } */
|
Loading…
Add table
Reference in a new issue