Optimize __builtin_shuffle when it's used to zero the upper bits of the dest. [PR target/94680]
If the second operand of __builtin_shuffle is const vector 0, and with specific mask, it can be optimized to movq/vmovps. .i.e. foo128: - vxorps %xmm1, %xmm1, %xmm1 - vmovlhps %xmm1, %xmm0, %xmm0 + vmovq %xmm0, %xmm0 foo256: - vxorps %xmm1, %xmm1, %xmm1 - vshuff32x4 $0, %ymm1, %ymm0, %ymm0 + vmovaps %xmm0, %xmm0 foo512: - vxorps %xmm1, %xmm1, %xmm1 - vshuff32x4 $68, %zmm1, %zmm0, %zmm0 + vmovaps %ymm0, %ymm0 gcc/ChangeLog: PR target/94680 * config/i386/sse.md (ssedoublevecmode): Add attribute for V64QI/V32HI/V16SI/V4DI. (ssehalfvecmode): Add attribute for V2DI/V2DF. (*vec_concatv4si_0): Extend to VI124_128. (*vec_concat<mode>_0): New pre-reload splitter. * config/i386/predicates.md (movq_parallel): New predicate. gcc/testsuite/ChangeLog: PR target/94680 * gcc.target/i386/avx-pr94680.c: New test. * gcc.target/i386/avx512f-pr94680.c: New test. * gcc.target/i386/sse2-pr94680.c: New test.
This commit is contained in:
parent
0ff3a0f2b9
commit
94de7e225c
5 changed files with 403 additions and 8 deletions
|
@ -1535,6 +1535,38 @@
|
|||
(and (match_code "mem")
|
||||
(match_test "MEM_ALIGN (op) < GET_MODE_BITSIZE (mode)")))
|
||||
|
||||
;; Return true if OP is a parallel for an mov{d,q,dqa,ps,pd} vec_select,
|
||||
;; where one of the two operands of the vec_concat is const0_operand.
|
||||
(define_predicate "movq_parallel"
|
||||
(match_code "parallel")
|
||||
{
|
||||
unsigned nelt = XVECLEN (op, 0);
|
||||
unsigned nelt2 = nelt >> 1;
|
||||
unsigned i;
|
||||
|
||||
if (nelt < 2)
|
||||
return false;
|
||||
|
||||
/* Validate that all of the elements are constants,
|
||||
lower halves of permute are lower halves of the first operand,
|
||||
upper halves of permute come from any of the second operand. */
|
||||
for (i = 0; i < nelt; ++i)
|
||||
{
|
||||
rtx er = XVECEXP (op, 0, i);
|
||||
unsigned HOST_WIDE_INT ei;
|
||||
|
||||
if (!CONST_INT_P (er))
|
||||
return 0;
|
||||
ei = INTVAL (er);
|
||||
if (i < nelt2 && ei != i)
|
||||
return 0;
|
||||
if (i >= nelt2 && (ei < nelt || ei >= nelt << 1))
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
})
|
||||
|
||||
;; Return true if OP is a vzeroall operation, known to be a PARALLEL.
|
||||
(define_predicate "vzeroall_operation"
|
||||
(match_code "parallel")
|
||||
|
|
|
@ -811,19 +811,22 @@
|
|||
|
||||
;; Mapping of vector modes to a vector mode of double size
|
||||
(define_mode_attr ssedoublevecmode
|
||||
[(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
|
||||
[(V64QI "V128QI") (V32HI "V64HI") (V16SI "V32SI") (V8DI "V16DI")
|
||||
(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
|
||||
(V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
|
||||
(V16SF "V32SF") (V8DF "V16DF")
|
||||
(V8SF "V16SF") (V4DF "V8DF")
|
||||
(V4SF "V8SF") (V2DF "V4DF")])
|
||||
|
||||
;; Mapping of vector modes to a vector mode of half size
|
||||
;; instead of V1DI/V1DF, DI/DF are used for V2DI/V2DF although they are scalar.
|
||||
(define_mode_attr ssehalfvecmode
|
||||
[(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
|
||||
(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
|
||||
(V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
|
||||
(V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V2DI "DI")
|
||||
(V16SF "V8SF") (V8DF "V4DF")
|
||||
(V8SF "V4SF") (V4DF "V2DF")
|
||||
(V4SF "V2SF")])
|
||||
(V4SF "V2SF") (V2DF "DF")])
|
||||
|
||||
(define_mode_attr ssehalfvecmodelower
|
||||
[(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
|
||||
|
@ -15939,11 +15942,11 @@
|
|||
(set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
|
||||
(set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
|
||||
|
||||
(define_insn "*vec_concatv4si_0"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=v,x")
|
||||
(vec_concat:V4SI
|
||||
(match_operand:V2SI 1 "nonimmediate_operand" "vm,?!*y")
|
||||
(match_operand:V2SI 2 "const0_operand" " C,C")))]
|
||||
(define_insn "*vec_concat<mode>_0"
|
||||
[(set (match_operand:VI124_128 0 "register_operand" "=v,x")
|
||||
(vec_concat:VI124_128
|
||||
(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "vm,?!*y")
|
||||
(match_operand:<ssehalfvecmode> 2 "const0_operand" " C,C")))]
|
||||
"TARGET_SSE2"
|
||||
"@
|
||||
%vmovq\t{%1, %0|%0, %1}
|
||||
|
@ -22158,6 +22161,24 @@
|
|||
(set_attr "prefix" "maybe_evex")
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
|
||||
(define_insn_and_split "*vec_concat<mode>_0_1"
|
||||
[(set (match_operand:V 0 "register_operand")
|
||||
(vec_select:V
|
||||
(vec_concat:<ssedoublevecmode>
|
||||
(match_operand:V 1 "nonimmediate_operand")
|
||||
(match_operand:V 2 "const0_operand"))
|
||||
(match_parallel 3 "movq_parallel"
|
||||
[(match_operand 4 "const_int_operand")])))]
|
||||
"ix86_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(set (match_dup 0)
|
||||
(vec_concat:V (match_dup 1) (match_dup 5)))]
|
||||
{
|
||||
operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
|
||||
operands[5] = CONST0_RTX (<ssehalfvecmode>mode);
|
||||
})
|
||||
|
||||
(define_insn "vcvtph2ps<mask_name>"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(vec_select:V4SF
|
||||
|
|
107
gcc/testsuite/gcc.target/i386/avx-pr94680.c
Normal file
107
gcc/testsuite/gcc.target/i386/avx-pr94680.c
Normal file
|
@ -0,0 +1,107 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx -mno-avx512f -O2" } */
|
||||
/* { dg-final { scan-assembler-times {(?n)vmov[a-z0-9]*[ \t]*%xmm[0-9]} 12 } } */
|
||||
/* { dg-final { scan-assembler-not "pxor" } } */
|
||||
|
||||
typedef float v8sf __attribute__((vector_size(32)));
|
||||
typedef double v4df __attribute__ ((vector_size (32)));
|
||||
typedef long long v4di __attribute__((vector_size(32)));
|
||||
typedef int v8si __attribute__((vector_size(32)));
|
||||
typedef short v16hi __attribute__ ((vector_size (32)));
|
||||
typedef char v32qi __attribute__ ((vector_size (32)));
|
||||
|
||||
v4df
|
||||
foo_v4df (v4df x)
|
||||
{
|
||||
return __builtin_shuffle (x, (v4df) { 0, 0, 0, 0 }, (v4di) { 0, 1, 4, 5 });
|
||||
}
|
||||
|
||||
v4df
|
||||
foo_v4df_l (v4df x)
|
||||
{
|
||||
return __builtin_shuffle ((v4df) { 0, 0, 0, 0 }, x, (v4di) { 4, 5, 1, 2 });
|
||||
}
|
||||
|
||||
v4di
|
||||
foo_v4di (v4di x)
|
||||
{
|
||||
return __builtin_shuffle (x, (v4di) { 0, 0, 0, 0 }, (v4di) { 0, 1, 4, 7 });
|
||||
}
|
||||
|
||||
v4di
|
||||
foo_v4di_l (v4di x)
|
||||
{
|
||||
return __builtin_shuffle ((v4di) { 0, 0, 0, 0 }, x, (v4di) { 4, 5, 3, 1 });
|
||||
}
|
||||
|
||||
v8sf
|
||||
foo_v8sf (v8sf x)
|
||||
{
|
||||
return __builtin_shuffle ((v8sf) { 0, 0, 0, 0, 0, 0, 0, 0 }, x,
|
||||
(v8si) { 8, 9, 10, 11, 0, 1, 2, 3 });
|
||||
}
|
||||
|
||||
v8sf
|
||||
foo_v8sf_l (v8sf x)
|
||||
{
|
||||
return __builtin_shuffle (x, (v8sf) { 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
(v8si) { 0, 1, 2, 3, 8, 9, 10, 11 });
|
||||
}
|
||||
|
||||
v8si
|
||||
foo_v8si (v8si x)
|
||||
{
|
||||
return __builtin_shuffle (x, (v8si) { 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
(v8si) { 0, 1, 2, 3, 13, 12, 11, 15 });
|
||||
}
|
||||
|
||||
v8si
|
||||
foo_v8si_l (v8si x)
|
||||
{
|
||||
return __builtin_shuffle ((v8si) { 0, 0, 0, 0, 0, 0, 0, 0 }, x,
|
||||
(v8si) { 8, 9, 10, 11, 7, 6, 5, 4 });
|
||||
}
|
||||
|
||||
v16hi
|
||||
foo_v16hi (v16hi x)
|
||||
{
|
||||
return __builtin_shuffle (x, (v16hi) { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
(v16hi) { 0, 1, 2, 3, 4, 5, 6, 7,
|
||||
24, 17, 26, 19, 28, 21, 30, 23 });
|
||||
}
|
||||
|
||||
v16hi
|
||||
foo_v16hi_l (v16hi x)
|
||||
{
|
||||
return __builtin_shuffle ((v16hi) { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 }, x,
|
||||
(v16hi) { 16, 17, 18, 20, 21, 22, 23,
|
||||
15, 0, 13, 2, 11, 4, 9, 6 });
|
||||
}
|
||||
|
||||
v32qi
|
||||
foo_v32qi (v32qi x)
|
||||
{
|
||||
return __builtin_shuffle (x, (v32qi) { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
(v32qi) { 0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
32, 49, 34, 58, 36, 53, 38, 39,
|
||||
40, 60, 42, 43, 63, 45, 46, 47 });
|
||||
}
|
||||
|
||||
v32qi
|
||||
foo_v32qi_l (v32qi x)
|
||||
{
|
||||
return __builtin_shuffle ((v32qi) { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 }, x,
|
||||
(v32qi) { 32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
31, 0, 29, 2, 27, 4, 25, 6,
|
||||
23, 8, 21, 10, 19, 12, 17, 14 });
|
||||
}
|
144
gcc/testsuite/gcc.target/i386/avx512f-pr94680.c
Normal file
144
gcc/testsuite/gcc.target/i386/avx512f-pr94680.c
Normal file
|
@ -0,0 +1,144 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512bw -mavx512vbmi -O2" } */
|
||||
/* { dg-final { scan-assembler-times {(?n)vmov[a-z0-9]*[ \t]*%ymm[0-9]} 12} } */
|
||||
/* { dg-final { scan-assembler-not "pxor" } } */
|
||||
|
||||
|
||||
typedef float v16sf __attribute__((vector_size(64)));
|
||||
typedef double v8df __attribute__ ((vector_size (64)));
|
||||
typedef long long v8di __attribute__((vector_size(64)));
|
||||
typedef int v16si __attribute__((vector_size(64)));
|
||||
typedef short v32hi __attribute__ ((vector_size (64)));
|
||||
typedef char v64qi __attribute__ ((vector_size (64)));
|
||||
|
||||
v8df
|
||||
foo_v8df (v8df x)
|
||||
{
|
||||
return __builtin_shuffle (x, (v8df) { 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
(v8di) { 0, 1, 2, 3, 15, 14, 10, 11 });
|
||||
}
|
||||
|
||||
v8df
|
||||
foo_v8df_l (v8df x)
|
||||
{
|
||||
return __builtin_shuffle ((v8df) { 0, 0, 0, 0, 0, 0, 0, 0 }, x,
|
||||
(v8di) { 8, 9, 10, 11, 0, 1, 2, 3 });
|
||||
}
|
||||
|
||||
v8di
|
||||
foo_v8di (v8di x)
|
||||
{
|
||||
return __builtin_shuffle (x, (v8di) { 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
(v8di) { 0, 1, 2, 3, 8, 9, 10, 11 });
|
||||
}
|
||||
|
||||
v8di
|
||||
foo_v8di_l (v8di x)
|
||||
{
|
||||
return __builtin_shuffle ((v8di) { 0, 0, 0, 0, 0, 0, 0, 0 }, x,
|
||||
(v8di) { 8, 9, 10, 11, 7, 6, 5, 4 });
|
||||
}
|
||||
|
||||
v16sf
|
||||
foo_v16sf (v16sf x)
|
||||
{
|
||||
return __builtin_shuffle (x, (v16sf) { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
(v16si) { 0, 1, 2, 3, 4, 5, 6, 7,
|
||||
16, 17, 18, 19, 20, 21, 22, 23 });
|
||||
}
|
||||
|
||||
v16sf
|
||||
foo_v16sf_l (v16sf x)
|
||||
{
|
||||
return __builtin_shuffle ((v16sf) { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 }, x,
|
||||
(v16si) { 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
0, 15, 2, 13, 4, 11, 6, 9 });
|
||||
}
|
||||
|
||||
v16si
|
||||
foo_v16si (v16si x)
|
||||
{
|
||||
return __builtin_shuffle (x, (v16si) { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
(v16si) { 0, 1, 2, 3, 4, 5, 6, 7,
|
||||
31, 30, 29, 28, 20, 21, 22, 23 });
|
||||
}
|
||||
|
||||
v16si
|
||||
foo_v16si_l (v16si x)
|
||||
{
|
||||
return __builtin_shuffle ((v16si) { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 }, x,
|
||||
(v16si) { 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
15, 0, 13, 2, 11, 4, 9, 6 });
|
||||
}
|
||||
|
||||
v32hi
|
||||
foo_v32hi (v32hi x)
|
||||
{
|
||||
return __builtin_shuffle (x, (v32hi) { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
(v32hi) { 0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
63, 33, 61, 35, 59, 37, 57, 39,
|
||||
55, 41, 53, 43, 51, 45, 49, 47 });
|
||||
}
|
||||
|
||||
v32hi
|
||||
foo_v32hi_l (v32hi x)
|
||||
{
|
||||
return __builtin_shuffle ((v32hi) { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 }, x,
|
||||
(v32hi) { 32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
31, 0, 29, 2, 27, 4, 25, 6,
|
||||
23, 8, 21, 10, 19, 12, 17, 14 });
|
||||
}
|
||||
|
||||
v64qi
|
||||
foo_v64qi (v64qi x)
|
||||
{
|
||||
return __builtin_shuffle (x, (v64qi) { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
(v64qi) {0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
64, 127, 66, 125, 68, 123, 70, 121,
|
||||
72, 119, 74, 117, 76, 115, 78, 113,
|
||||
80, 111, 82, 109, 84, 107, 86, 105,
|
||||
88, 103, 90, 101, 92, 99, 94, 97 });
|
||||
}
|
||||
|
||||
v64qi
|
||||
foo_v64qi_l (v64qi x)
|
||||
{
|
||||
return __builtin_shuffle ((v64qi) { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 }, x,
|
||||
(v64qi) { 64, 65, 66, 67, 68, 69, 70, 71,
|
||||
72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87,
|
||||
88, 89, 90, 91, 92, 93, 94, 95,
|
||||
0, 63, 2, 61, 4, 59, 6, 57,
|
||||
8, 55, 10, 53, 12, 51, 14, 49,
|
||||
16, 47, 18, 45, 20, 43, 22, 41,
|
||||
24, 39, 26, 37, 28, 35, 30, 33 });
|
||||
}
|
91
gcc/testsuite/gcc.target/i386/sse2-pr94680.c
Normal file
91
gcc/testsuite/gcc.target/i386/sse2-pr94680.c
Normal file
|
@ -0,0 +1,91 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-msse2 -mno-sse4.1 -O2" } */
|
||||
/* { dg-final { scan-assembler-times {(?n)(?:mov|psrldq).*%xmm[0-9]} 12 } } */
|
||||
/* { dg-final { scan-assembler-not "pxor" } } */
|
||||
|
||||
typedef float v4sf __attribute__((vector_size(16)));
|
||||
typedef double v2df __attribute__ ((vector_size (16)));
|
||||
typedef long long v2di __attribute__((vector_size(16)));
|
||||
typedef int v4si __attribute__((vector_size(16)));
|
||||
typedef short v8hi __attribute__ ((vector_size (16)));
|
||||
typedef char v16qi __attribute__ ((vector_size (16)));
|
||||
|
||||
v2df
|
||||
foo_v2df (v2df x)
|
||||
{
|
||||
return __builtin_shuffle (x, (v2df) { 0, 0 }, (v2di) {0, 2});
|
||||
}
|
||||
|
||||
v2df
|
||||
foo_v2df_l (v2df x)
|
||||
{
|
||||
return __builtin_shuffle ((v2df) { 0, 0 }, x, (v2di) {3, 1});
|
||||
}
|
||||
|
||||
v2di
|
||||
foo_v2di (v2di x)
|
||||
{
|
||||
return __builtin_shuffle (x, (v2di) { 0, 0 }, (v2di) {0, 3});
|
||||
}
|
||||
|
||||
v2di
|
||||
foo_v2di_l (v2di x)
|
||||
{
|
||||
return __builtin_shuffle ((v2di) { 0, 0 }, x, (v2di) {3, 0});
|
||||
}
|
||||
|
||||
v4sf
|
||||
foo_v4sf (v4sf x)
|
||||
{
|
||||
return __builtin_shuffle (x, (v4sf) { 0, 0, 0, 0 }, (v4si) {0, 1, 4, 5});
|
||||
}
|
||||
|
||||
v4sf
|
||||
foo_v4sf_l (v4sf x)
|
||||
{
|
||||
return __builtin_shuffle ((v4sf) { 0, 0, 0, 0 }, x, (v4si) {4, 5, 3, 1});
|
||||
}
|
||||
|
||||
v4si
|
||||
foo_v4si (v4si x)
|
||||
{
|
||||
return __builtin_shuffle (x, (v4si) { 0, 0, 0, 0 }, (v4si) {0, 1, 6, 7});
|
||||
}
|
||||
|
||||
v4si
|
||||
foo_v4si_l (v4si x)
|
||||
{
|
||||
return __builtin_shuffle ((v4si) { 0, 0, 0, 0 }, x, (v4si) {4, 5, 1, 2});
|
||||
}
|
||||
|
||||
v8hi
|
||||
foo_v8hi (v8hi x)
|
||||
{
|
||||
return __builtin_shuffle (x, (v8hi) { 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
(v8hi) { 0, 1, 2, 3, 8, 12, 10, 13 });
|
||||
}
|
||||
|
||||
v8hi
|
||||
foo_v8hi_l (v8hi x)
|
||||
{
|
||||
return __builtin_shuffle ((v8hi) { 0, 0, 0, 0, 0, 0, 0, 0 }, x,
|
||||
(v8hi) { 8, 9, 10, 11, 7, 6, 5, 4 });
|
||||
}
|
||||
|
||||
v16qi
|
||||
foo_v16qi (v16qi x)
|
||||
{
|
||||
return __builtin_shuffle (x, (v16qi) { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
(v16qi) {0, 1, 2, 3, 4, 5, 6, 7,
|
||||
16, 24, 18, 26, 20, 28, 22, 30 });
|
||||
}
|
||||
|
||||
v16qi
|
||||
foo_v16qi_l (v16qi x)
|
||||
{
|
||||
return __builtin_shuffle ((v16qi) { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 }, x,
|
||||
(v16qi) { 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
15, 0, 13, 2, 11, 4, 9, 6 });
|
||||
}
|
Loading…
Add table
Reference in a new issue