i386-cpuid.h (bit_SSE4_1): New.
2007-05-22 H.J. Lu <hongjiu.lu@intel.com> * gcc.dg/i386-cpuid.h (bit_SSE4_1): New. (bit_SSE4_2): Likewise. (bit_POPCNT): Likewise. * gcc.target/i386/i386.exp (check_effective_target_sse4): New. Check if assembler supports SSE4 instructions. * gcc.target/i386/sse4_1-blendpd.c: New file. * gcc.target/i386/sse4_1-blendps.c: Likewise. * gcc.target/i386/sse4_1-blendvpd.c: Likewise. * gcc.target/i386/sse4_1-blendvps.c: Likewise. * gcc.target/i386/sse4_1-check.h: Likewise. * gcc.target/i386/sse4_1-dppd-1.c: Likewise. * gcc.target/i386/sse4_1-dppd-2.c: Likewise. * gcc.target/i386/sse4_1-dpps-1.c: Likewise. * gcc.target/i386/sse4_1-dpps-2.c: Likewise. * gcc.target/i386/sse4_1-extractps.c: Likewise. * gcc.target/i386/sse4_1-insertps-1.c: Likewise. * gcc.target/i386/sse4_1-insertps-2.c: Likewise. * gcc.target/i386/sse4_1-movntdqa.c: Likewise. * gcc.target/i386/sse4_1-mpsadbw.c: Likewise. * gcc.target/i386/sse4_1-packusdw.c: Likewise. * gcc.target/i386/sse4_1-pblendvb.c: Likewise. * gcc.target/i386/sse4_1-pblendw.c: Likewise. * gcc.target/i386/sse4_1-pcmpeqq.c: Likewise. * gcc.target/i386/sse4_1-pextrb.c: Likewise. * gcc.target/i386/sse4_1-pextrd.c: Likewise. * gcc.target/i386/sse4_1-pextrq.c: Likewise. * gcc.target/i386/sse4_1-pextrw.c: Likewise. * gcc.target/i386/sse4_1-phminposuw.c: Likewise. * gcc.target/i386/sse4_1-pinsrb.c: Likewise. * gcc.target/i386/sse4_1-pinsrd.c: Likewise. * gcc.target/i386/sse4_1-pinsrq.c: Likewise. * gcc.target/i386/sse4_1-pmaxsb.c: Likewise. * gcc.target/i386/sse4_1-pmaxsd.c: Likewise. * gcc.target/i386/sse4_1-pmaxud.c: Likewise. * gcc.target/i386/sse4_1-pmaxuw.c: Likewise. * gcc.target/i386/sse4_1-pminsb.c: Likewise. * gcc.target/i386/sse4_1-pminsd.c: Likewise. * gcc.target/i386/sse4_1-pminud.c: Likewise. * gcc.target/i386/sse4_1-pminuw.c: Likewise. * gcc.target/i386/sse4_1-pmovsxbd.c: Likewise. * gcc.target/i386/sse4_1-pmovsxbq.c: Likewise. * gcc.target/i386/sse4_1-pmovsxbw.c: Likewise. * gcc.target/i386/sse4_1-pmovsxdq.c: Likewise. * gcc.target/i386/sse4_1-pmovsxwd.c: Likewise. * gcc.target/i386/sse4_1-pmovsxwq.c: Likewise. * gcc.target/i386/sse4_1-pmovzxbd.c: Likewise. * gcc.target/i386/sse4_1-pmovzxbq.c: Likewise. * gcc.target/i386/sse4_1-pmovzxbw.c: Likewise. * gcc.target/i386/sse4_1-pmovzxdq.c: Likewise. * gcc.target/i386/sse4_1-pmovzxwd.c: Likewise. * gcc.target/i386/sse4_1-pmovzxwq.c: Likewise. * gcc.target/i386/sse4_1-pmuldq.c: Likewise. * gcc.target/i386/sse4_1-pmulld.c: Likewise. * gcc.target/i386/sse4_1-ptest-1.c: Likewise. * gcc.target/i386/sse4_1-ptest-2.c: Likewise. * gcc.target/i386/sse4_1-ptest-3.c: Likewise. * gcc.target/i386/sse4_1-round.h: Likewise. * gcc.target/i386/sse4_1-roundpd-1.c: Likewise. * gcc.target/i386/sse4_1-roundpd-2.c: Likewise. * gcc.target/i386/sse4_1-roundpd-3.c: Likewise. * gcc.target/i386/sse4_1-roundps-1.c: Likewise. * gcc.target/i386/sse4_1-roundps-2.c: Likewise. * gcc.target/i386/sse4_1-roundps-3.c: Likewise. * gcc.target/i386/sse4_1-roundsd-1.c: Likewise. * gcc.target/i386/sse4_1-roundsd-2.c: Likewise. * gcc.target/i386/sse4_1-roundsd-3.c: Likewise. * gcc.target/i386/sse4_1-roundsd-4.c: Likewise. * gcc.target/i386/sse4_1-roundss-1.c: Likewise. * gcc.target/i386/sse4_1-roundss-2.c: Likewise. * gcc.target/i386/sse4_1-roundss-3.c: Likewise. * gcc.target/i386/sse4_1-roundss-4.c: Likewise. From-SVN: r124947
This commit is contained in:
parent
9a5cee0228
commit
056299563b
69 changed files with 3383 additions and 0 deletions
|
@ -1,3 +1,79 @@
|
|||
2007-05-22 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* gcc.dg/i386-cpuid.h (bit_SSE4_1): New.
|
||||
(bit_SSE4_2): Likewise.
|
||||
(bit_POPCNT): Likewise.
|
||||
|
||||
* gcc.target/i386/i386.exp (check_effective_target_sse4): New.
|
||||
Check if assembler supports SSE4 instructions.
|
||||
|
||||
* gcc.target/i386/sse4_1-blendpd.c: New file.
|
||||
* gcc.target/i386/sse4_1-blendps.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-blendvpd.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-blendvps.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-check.h: Likewise.
|
||||
* gcc.target/i386/sse4_1-dppd-1.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-dppd-2.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-dpps-1.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-dpps-2.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-extractps.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-insertps-1.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-insertps-2.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-movntdqa.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-mpsadbw.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-packusdw.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pblendvb.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pblendw.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pcmpeqq.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pextrb.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pextrd.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pextrq.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pextrw.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-phminposuw.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pinsrb.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pinsrd.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pinsrq.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pmaxsb.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pmaxsd.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pmaxud.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pmaxuw.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pminsb.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pminsd.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pminud.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pminuw.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pmovsxbd.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pmovsxbq.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pmovsxbw.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pmovsxdq.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pmovsxwd.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pmovsxwq.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pmovzxbd.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pmovzxbq.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pmovzxbw.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pmovzxdq.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pmovzxwd.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pmovzxwq.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pmuldq.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-pmulld.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-ptest-1.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-ptest-2.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-ptest-3.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-round.h: Likewise.
|
||||
* gcc.target/i386/sse4_1-roundpd-1.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-roundpd-2.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-roundpd-3.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-roundps-1.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-roundps-2.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-roundps-3.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-roundsd-1.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-roundsd-2.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-roundsd-3.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-roundsd-4.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-roundss-1.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-roundss-2.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-roundss-3.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-roundss-4.c: Likewise.
|
||||
|
||||
2007-05-22 Francois-Xavier Coudert <fxcoudert@gcc.gnu.org>
|
||||
|
||||
PR fortran/31627
|
||||
|
|
|
@ -5,6 +5,9 @@
|
|||
/* %ecx */
|
||||
#define bit_SSE3 (1 << 0)
|
||||
#define bit_SSSE3 (1 << 9)
|
||||
#define bit_SSE4_1 (1 << 19)
|
||||
#define bit_SSE4_2 (1 << 20)
|
||||
#define bit_POPCNT (1 << 23)
|
||||
|
||||
/* %edx */
|
||||
#define bit_CMOV (1 << 15)
|
||||
|
|
|
@ -37,6 +37,20 @@ proc check_effective_target_ssse3 { } {
|
|||
} "-O2 -mssse3" ]
|
||||
}
|
||||
|
||||
# Return 1 if sse4 instructions can be compiled.
|
||||
proc check_effective_target_sse4 { } {
|
||||
return [check_no_compiler_messages sse4.1 object {
|
||||
typedef long long __m128i __attribute__ ((__vector_size__ (16)));
|
||||
typedef int __v4si __attribute__ ((__vector_size__ (16)));
|
||||
|
||||
__m128i _mm_mullo_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmulld128 ((__v4si)__X,
|
||||
(__v4si)__Y);
|
||||
}
|
||||
} "-O2 -msse4.1" ]
|
||||
}
|
||||
|
||||
# Return 1 if sse4a instructions can be compiled.
|
||||
proc check_effective_target_sse4a { } {
|
||||
return [check_no_compiler_messages sse4a object {
|
||||
|
|
81
gcc/testsuite/gcc.target/i386/sse4_1-blendpd.c
Normal file
81
gcc/testsuite/gcc.target/i386/sse4_1-blendpd.c
Normal file
|
@ -0,0 +1,81 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#define NUM 20
|
||||
|
||||
#ifndef MASK
|
||||
#define MASK 0x03
|
||||
#endif
|
||||
|
||||
static void
|
||||
init_blendpd (double *src1, double *src2)
|
||||
{
|
||||
int i, sign = 1;
|
||||
|
||||
for (i = 0; i < NUM * 2; i++)
|
||||
{
|
||||
src1[i] = i * i * sign;
|
||||
src2[i] = (i + 20) * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
check_blendpd (__m128d *dst, double *src1, double *src2)
|
||||
{
|
||||
double tmp[2];
|
||||
int j;
|
||||
|
||||
memcpy (&tmp[0], src1, sizeof (tmp));
|
||||
|
||||
for(j = 0; j < 2; j++)
|
||||
if ((MASK & (1 << j)))
|
||||
tmp[j] = src2[j];
|
||||
|
||||
return memcmp (dst, &tmp[0], sizeof (tmp));
|
||||
}
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
__m128d x, y;
|
||||
union
|
||||
{
|
||||
__m128d x[NUM];
|
||||
double d[NUM * 2];
|
||||
} dst, src1, src2;
|
||||
union
|
||||
{
|
||||
__m128d x;
|
||||
double d[2];
|
||||
} src3;
|
||||
int i;
|
||||
|
||||
init_blendpd (src1.d, src2.d);
|
||||
|
||||
/* Check blendpd imm8, m128, xmm */
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
dst.x[i] = _mm_blend_pd (src1.x[i], src2.x[i], MASK);
|
||||
if (check_blendpd (&dst.x[i], &src1.d[i * 2], &src2.d[i * 2]))
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* Check blendpd imm8, xmm, xmm */
|
||||
src3.x = _mm_setzero_pd ();
|
||||
|
||||
x = _mm_blend_pd (dst.x[2], src3.x, MASK);
|
||||
y = _mm_blend_pd (src3.x, dst.x[2], MASK);
|
||||
|
||||
if (check_blendpd (&x, &dst.d[4], &src3.d[0]))
|
||||
abort ();
|
||||
|
||||
if (check_blendpd (&y, &src3.d[0], &dst.d[4]))
|
||||
abort ();
|
||||
}
|
78
gcc/testsuite/gcc.target/i386/sse4_1-blendps.c
Normal file
78
gcc/testsuite/gcc.target/i386/sse4_1-blendps.c
Normal file
|
@ -0,0 +1,78 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#define NUM 20
|
||||
|
||||
#ifndef MASK
|
||||
#define MASK 0x0f
|
||||
#endif
|
||||
|
||||
static void
|
||||
init_blendps (float *src1, float *src2)
|
||||
{
|
||||
int i, sign = 1;
|
||||
|
||||
for (i = 0; i < NUM * 4; i++)
|
||||
{
|
||||
src1[i] = i * i * sign;
|
||||
src2[i] = (i + 20) * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
check_blendps (__m128 *dst, float *src1, float *src2)
|
||||
{
|
||||
float tmp[4];
|
||||
int j;
|
||||
|
||||
memcpy (&tmp[0], src1, sizeof (tmp));
|
||||
for (j = 0; j < 4; j++)
|
||||
if ((MASK & (1 << j)))
|
||||
tmp[j] = src2[j];
|
||||
|
||||
return memcmp (dst, &tmp[0], sizeof (tmp));
|
||||
}
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
__m128 x, y;
|
||||
union
|
||||
{
|
||||
__m128 x[NUM];
|
||||
float f[NUM * 4];
|
||||
} dst, src1, src2;
|
||||
union
|
||||
{
|
||||
__m128 x;
|
||||
float f[4];
|
||||
} src3;
|
||||
int i;
|
||||
|
||||
init_blendps (src1.f, src2.f);
|
||||
|
||||
/* Check blendps imm8, m128, xmm */
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
dst.x[i] = _mm_blend_ps (src1.x[i], src2.x[i], MASK);
|
||||
if (check_blendps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4]))
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* Check blendps imm8, xmm, xmm */
|
||||
x = _mm_blend_ps (dst.x[2], src3.x, MASK);
|
||||
y = _mm_blend_ps (src3.x, dst.x[2], MASK);
|
||||
|
||||
if (check_blendps (&x, &dst.f[8], &src3.f[0]))
|
||||
abort ();
|
||||
|
||||
if (check_blendps (&y, &src3.f[0], &dst.f[8]))
|
||||
abort ();
|
||||
}
|
65
gcc/testsuite/gcc.target/i386/sse4_1-blendvpd.c
Normal file
65
gcc/testsuite/gcc.target/i386/sse4_1-blendvpd.c
Normal file
|
@ -0,0 +1,65 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#define NUM 20
|
||||
|
||||
static void
|
||||
init_blendvpd (double *src1, double *src2, double *mask)
|
||||
{
|
||||
int i, msk, sign = 1;
|
||||
|
||||
msk = -1;
|
||||
for (i = 0; i < NUM * 2; i++)
|
||||
{
|
||||
if((i % 2) == 0)
|
||||
msk++;
|
||||
src1[i] = i* (i + 1) * sign;
|
||||
src2[i] = (i + 20) * sign;
|
||||
mask[i] = (i + 120) * i;
|
||||
if( (msk & (1 << (i % 2))))
|
||||
mask[i] = -mask[i];
|
||||
sign = -sign;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
check_blendvpd (__m128d *dst, double *src1, double *src2,
|
||||
double *mask)
|
||||
{
|
||||
double tmp[2];
|
||||
int j;
|
||||
|
||||
memcpy (&tmp[0], src1, sizeof (tmp));
|
||||
for (j = 0; j < 2; j++)
|
||||
if (mask [j] < 0.0)
|
||||
tmp[j] = src2[j];
|
||||
|
||||
return memcmp (dst, &tmp[0], sizeof (tmp));
|
||||
}
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128d x[NUM];
|
||||
double d[NUM * 2];
|
||||
} dst, src1, src2, mask;
|
||||
int i;
|
||||
|
||||
init_blendvpd (src1.d, src2.d, mask.d);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
dst.x[i] = _mm_blendv_pd (src1.x[i], src2.x[i], mask.x[i]);
|
||||
if (check_blendvpd (&dst.x[i], &src1.d[i * 2], &src2.d[i * 2],
|
||||
&mask.d[i * 2]))
|
||||
abort ();
|
||||
}
|
||||
}
|
65
gcc/testsuite/gcc.target/i386/sse4_1-blendvps.c
Normal file
65
gcc/testsuite/gcc.target/i386/sse4_1-blendvps.c
Normal file
|
@ -0,0 +1,65 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#define NUM 20
|
||||
|
||||
static void
|
||||
init_blendvps (float *src1, float *src2, float *mask)
|
||||
{
|
||||
int i, msk, sign = 1;
|
||||
|
||||
msk = -1;
|
||||
for (i = 0; i < NUM * 4; i++)
|
||||
{
|
||||
if((i % 4) == 0)
|
||||
msk++;
|
||||
src1[i] = i* (i + 1) * sign;
|
||||
src2[i] = (i + 20) * sign;
|
||||
mask[i] = (i + 120) * i;
|
||||
if( (msk & (1 << (i % 4))))
|
||||
mask[i] = -mask[i];
|
||||
sign = -sign;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
check_blendvps (__m128 *dst, float *src1, float *src2,
|
||||
float *mask)
|
||||
{
|
||||
float tmp[4];
|
||||
int j;
|
||||
|
||||
memcpy (&tmp[0], src1, sizeof (tmp));
|
||||
for (j = 0; j < 4; j++)
|
||||
if (mask [j] < 0.0)
|
||||
tmp[j] = src2[j];
|
||||
|
||||
return memcmp (dst, &tmp[0], sizeof (tmp));
|
||||
}
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128 x[NUM];
|
||||
float f[NUM * 4];
|
||||
} dst, src1, src2, mask;
|
||||
int i;
|
||||
|
||||
init_blendvps (src1.f, src2.f, mask.f);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
dst.x[i] = _mm_blendv_ps (src1.x[i], src2.x[i], mask.x[i]);
|
||||
if (check_blendvps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4],
|
||||
&mask.f[i * 4]))
|
||||
abort ();
|
||||
}
|
||||
}
|
22
gcc/testsuite/gcc.target/i386/sse4_1-check.h
Normal file
22
gcc/testsuite/gcc.target/i386/sse4_1-check.h
Normal file
|
@ -0,0 +1,22 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "../auto-host.h"
|
||||
|
||||
#include "../../gcc.dg/i386-cpuid.h"
|
||||
|
||||
static void sse4_1_test (void);
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
unsigned long cpu_facilities;
|
||||
|
||||
cpu_facilities = i386_cpuid_ecx ();
|
||||
|
||||
/* Run SSE4.1 test only if host has SSE4.1 support. */
|
||||
if ((cpu_facilities & bit_SSE4_1))
|
||||
sse4_1_test ();
|
||||
|
||||
exit (0);
|
||||
}
|
63
gcc/testsuite/gcc.target/i386/sse4_1-dppd-1.c
Normal file
63
gcc/testsuite/gcc.target/i386/sse4_1-dppd-1.c
Normal file
|
@ -0,0 +1,63 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define lmskN 0x00
|
||||
#define lmsk0 0x01
|
||||
#define lmsk1 0x02
|
||||
#define lmsk01 0x03
|
||||
|
||||
#define hmskA 0x30
|
||||
#define hmsk0 0x10
|
||||
#define hmsk1 0x20
|
||||
#define hmsk01 0x30
|
||||
#define hmskN 0x00
|
||||
|
||||
#ifndef HIMASK
|
||||
#define HIMASK hmskA
|
||||
#endif
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128d x;
|
||||
double d[2];
|
||||
} val1, val2, res[4];
|
||||
int masks[4];
|
||||
int i, j;
|
||||
|
||||
val1.d[0] = 2.;
|
||||
val1.d[1] = 3.;
|
||||
|
||||
val2.d[0] = 10.;
|
||||
val2.d[1] = 100.;
|
||||
|
||||
res[0].x = _mm_dp_pd (val1.x, val2.x, HIMASK | lmskN);
|
||||
res[1].x = _mm_dp_pd (val1.x, val2.x, HIMASK | lmsk0);
|
||||
res[2].x = _mm_dp_pd (val1.x, val2.x, HIMASK | lmsk1);
|
||||
res[3].x = _mm_dp_pd (val1.x, val2.x, HIMASK | lmsk01);
|
||||
|
||||
masks[0] = HIMASK | lmskN;
|
||||
masks[1] = HIMASK | lmsk0;
|
||||
masks[2] = HIMASK | lmsk1;
|
||||
masks[3] = HIMASK | lmsk01;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
double tmp = 0.;
|
||||
|
||||
for (j = 0; j < 2; j++)
|
||||
if (HIMASK & (0x10 << j))
|
||||
tmp = tmp + (val1.d[j] * val2.d[j]);
|
||||
|
||||
for (j = 0; j < 2; j++)
|
||||
if ((masks[i] & (1 << j)) && res[i].d[j] != tmp)
|
||||
abort ();
|
||||
}
|
||||
}
|
64
gcc/testsuite/gcc.target/i386/sse4_1-dppd-2.c
Normal file
64
gcc/testsuite/gcc.target/i386/sse4_1-dppd-2.c
Normal file
|
@ -0,0 +1,64 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#define lmskN 0x00
|
||||
#define lmsk0 0x01
|
||||
#define lmsk1 0x02
|
||||
#define lmsk01 0x03
|
||||
|
||||
#define hmskA 0x30
|
||||
#define hmsk0 0x10
|
||||
#define hmsk1 0x20
|
||||
#define hmsk01 0x30
|
||||
#define hmskN 0x00
|
||||
|
||||
#ifndef HIMASK
|
||||
#define HIMASK hmskA
|
||||
#endif
|
||||
|
||||
#ifndef LOMASK
|
||||
#define LOMASK lmsk01
|
||||
#endif
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128d x;
|
||||
double d[2];
|
||||
} val1[4], val2[4], res[4], chk[4];
|
||||
int i, j;
|
||||
double tmp;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
val1[i].d [0] = 2.;
|
||||
val1[i].d [1] = 3.;
|
||||
|
||||
val2[i].d [0] = 10.;
|
||||
val2[i].d [1] = 100.;
|
||||
|
||||
tmp = 0.;
|
||||
for (j = 0; j < 2; j++)
|
||||
if ((HIMASK & (0x10 << j)))
|
||||
tmp += val1[i].d [j] * val2[i].d [j];
|
||||
|
||||
for (j = 0; j < 2; j++)
|
||||
if ((LOMASK & (1 << j)))
|
||||
chk[i].d[j] = tmp;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
res[i].x = _mm_dp_pd (val1[i].x, val2[i].x, HIMASK | LOMASK);
|
||||
if (memcmp (&res[i], &chk[i], sizeof (chk[i])))
|
||||
abort ();
|
||||
}
|
||||
}
|
106
gcc/testsuite/gcc.target/i386/sse4_1-dpps-1.c
Normal file
106
gcc/testsuite/gcc.target/i386/sse4_1-dpps-1.c
Normal file
|
@ -0,0 +1,106 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define lmskN 0x00
|
||||
#define lmsk0 0x01
|
||||
#define lmsk1 0x02
|
||||
#define lmsk2 0x04
|
||||
#define lmsk3 0x08
|
||||
#define lmsk01 0x03
|
||||
#define lmsk02 0x05
|
||||
#define lmsk03 0x09
|
||||
#define lmsk12 0x06
|
||||
#define lmsk13 0x0A
|
||||
#define lmsk23 0x0C
|
||||
#define lmskA 0x0F
|
||||
|
||||
#define hmskN 0x00
|
||||
#define hmskA 0xF0
|
||||
#define hmsk0 0x10
|
||||
#define hmsk1 0x20
|
||||
#define hmsk2 0x40
|
||||
#define hmsk3 0x80
|
||||
#define hmsk01 0x30
|
||||
#define hmsk02 0x50
|
||||
#define hmsk03 0x90
|
||||
#define hmsk12 0x60
|
||||
#define hmsk13 0xA0
|
||||
#define hmsk23 0xC0
|
||||
|
||||
#ifndef HIMASK
|
||||
#define HIMASK hmskA
|
||||
#endif
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128 x;
|
||||
float f[4];
|
||||
} val1, val2, res[16];
|
||||
int masks[16];
|
||||
int i, j;
|
||||
|
||||
val1.f[0] = 2.;
|
||||
val1.f[1] = 3.;
|
||||
val1.f[2] = 4.;
|
||||
val1.f[3] = 5.;
|
||||
|
||||
val2.f[0] = 10.;
|
||||
val2.f[1] = 100.;
|
||||
val2.f[2] = 1000.;
|
||||
val2.f[3] = 10000.;
|
||||
|
||||
res[0].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk0);
|
||||
res[1].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk1);
|
||||
res[2].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk2);
|
||||
res[3].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk3);
|
||||
res[4].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk01);
|
||||
res[5].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk02);
|
||||
res[6].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk03);
|
||||
res[7].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk12);
|
||||
res[8].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk13);
|
||||
res[9].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk23);
|
||||
res[10].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk0));
|
||||
res[11].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk1));
|
||||
res[12].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk2));
|
||||
res[13].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk3));
|
||||
res[14].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmskN);
|
||||
res[15].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmskA);
|
||||
|
||||
masks[0] = HIMASK | lmsk0;
|
||||
masks[1] = HIMASK | lmsk1;
|
||||
masks[2] = HIMASK | lmsk2;
|
||||
masks[3] = HIMASK | lmsk3;
|
||||
masks[4] = HIMASK | lmsk01;
|
||||
masks[5] = HIMASK | lmsk02;
|
||||
masks[6] = HIMASK | lmsk03;
|
||||
masks[7] = HIMASK | lmsk12;
|
||||
masks[8] = HIMASK | lmsk13;
|
||||
masks[9] = HIMASK | lmsk23;
|
||||
masks[10] = HIMASK | (0x0F & ~lmsk0);
|
||||
masks[11] = HIMASK | (0x0F & ~lmsk1);
|
||||
masks[12] = HIMASK | (0x0F & ~lmsk2);
|
||||
masks[13] = HIMASK | (0x0F & ~lmsk3);
|
||||
masks[14] = HIMASK | lmskN;
|
||||
masks[15] = HIMASK | lmskA;
|
||||
|
||||
for (i = 0; i <= 15; i++)
|
||||
{
|
||||
float tmp = 0.;
|
||||
|
||||
for (j = 0; j < 4; j++)
|
||||
if ((HIMASK & (0x10 << j)))
|
||||
tmp += val1.f[j] * val2.f[j];
|
||||
|
||||
for (j = 0; j < 4; j++)
|
||||
if ((masks[i] & (1 << j)) && res[i].f[j] != tmp)
|
||||
abort ();
|
||||
}
|
||||
}
|
83
gcc/testsuite/gcc.target/i386/sse4_1-dpps-2.c
Normal file
83
gcc/testsuite/gcc.target/i386/sse4_1-dpps-2.c
Normal file
|
@ -0,0 +1,83 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#define lmskN 0x00
|
||||
#define lmsk0 0x01
|
||||
#define lmsk1 0x02
|
||||
#define lmsk2 0x04
|
||||
#define lmsk3 0x08
|
||||
#define lmsk01 0x03
|
||||
#define lmsk02 0x05
|
||||
#define lmsk03 0x09
|
||||
#define lmsk12 0x06
|
||||
#define lmsk13 0x0A
|
||||
#define lmsk23 0x0C
|
||||
#define lmskA 0x0F
|
||||
|
||||
#define hmskN 0x00
|
||||
#define hmskA 0xF0
|
||||
#define hmsk0 0x10
|
||||
#define hmsk1 0x20
|
||||
#define hmsk2 0x40
|
||||
#define hmsk3 0x80
|
||||
#define hmsk01 0x30
|
||||
#define hmsk02 0x50
|
||||
#define hmsk03 0x90
|
||||
#define hmsk12 0x60
|
||||
#define hmsk13 0xA0
|
||||
#define hmsk23 0xC0
|
||||
|
||||
#ifndef HIMASK
|
||||
#define HIMASK hmskA
|
||||
#endif
|
||||
|
||||
#ifndef LOMASK
|
||||
#define LOMASK lmskA
|
||||
#endif
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128 x;
|
||||
float f[4];
|
||||
} val1[16], val2[16], res[16], chk[16];
|
||||
int i,j;
|
||||
float tmp;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
val1[i].f[0] = 2.;
|
||||
val1[i].f[1] = 3.;
|
||||
val1[i].f[2] = 4.;
|
||||
val1[i].f[3] = 5.;
|
||||
|
||||
val2[i].f[0] = 10.;
|
||||
val2[i].f[1] = 100.;
|
||||
val2[i].f[2] = 1000.;
|
||||
val2[i].f[3] = 10000.;
|
||||
|
||||
tmp = 0.;
|
||||
for (j = 0; j < 4; j++)
|
||||
if ((HIMASK & (0x10 << j)))
|
||||
tmp += val1[i].f [j] * val2[i].f [j];
|
||||
|
||||
for (j = 0; j < 4; j++)
|
||||
if ((LOMASK & (1 << j)))
|
||||
chk[i].f[j] = tmp;
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
res[i].x = _mm_dp_ps (val1[i].x, val2[i].x, HIMASK | LOMASK);
|
||||
if (memcmp (&res[i], &chk[i], sizeof (chk[i])))
|
||||
abort ();
|
||||
}
|
||||
}
|
64
gcc/testsuite/gcc.target/i386/sse4_1-extractps.c
Normal file
64
gcc/testsuite/gcc.target/i386/sse4_1-extractps.c
Normal file
|
@ -0,0 +1,64 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
int masks[4];
|
||||
|
||||
#define msk0 0x00
|
||||
#define msk1 0x01
|
||||
#define msk2 0x02
|
||||
#define msk3 0x03
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128 x;
|
||||
float f[4];
|
||||
} val1, val2;
|
||||
union
|
||||
{
|
||||
int i;
|
||||
float f;
|
||||
} res[4];
|
||||
float resm[4];
|
||||
int i;
|
||||
|
||||
val1.f[0] = 10.;
|
||||
val1.f[1] = 2.;
|
||||
val1.f[2] = 3.;
|
||||
val1.f[3] = 40.;
|
||||
|
||||
val2.f[0] = 77.;
|
||||
val2.f[1] = 21.;
|
||||
val2.f[2] = 34.;
|
||||
val2.f[3] = 49.;
|
||||
|
||||
res[0].i = _mm_extract_ps (val1.x, msk0);
|
||||
res[1].i = _mm_extract_ps (val1.x, msk1);
|
||||
res[2].i = _mm_extract_ps (val1.x, msk2);
|
||||
res[3].i = _mm_extract_ps (val1.x, msk3);
|
||||
|
||||
_MM_EXTRACT_FLOAT (resm[0], val2.x, msk0);
|
||||
_MM_EXTRACT_FLOAT (resm[1], val2.x, msk1);
|
||||
_MM_EXTRACT_FLOAT (resm[2], val2.x, msk2);
|
||||
_MM_EXTRACT_FLOAT (resm[3], val2.x, msk3);
|
||||
|
||||
masks[0] = msk0;
|
||||
masks[1] = msk1;
|
||||
masks[2] = msk2;
|
||||
masks[3] = msk3;
|
||||
|
||||
for( i=0; i < 4; i++ )
|
||||
{
|
||||
if (res[i].f != val1.f[masks[i]])
|
||||
abort ();
|
||||
if (resm[i] != val2.f[masks[i]])
|
||||
abort ();
|
||||
}
|
||||
}
|
71
gcc/testsuite/gcc.target/i386/sse4_1-insertps-1.c
Normal file
71
gcc/testsuite/gcc.target/i386/sse4_1-insertps-1.c
Normal file
|
@ -0,0 +1,71 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#define msk0 0x01
|
||||
#define msk1 0x10
|
||||
#define msk2 0x29
|
||||
#define msk3 0x30
|
||||
|
||||
#define msk4 0xFC
|
||||
#define msk5 0x05
|
||||
#define msk6 0x0A
|
||||
#define msk7 0x0F
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128 x;
|
||||
float f[4];
|
||||
} res[8], val1, val2, tmp;
|
||||
int masks[8];
|
||||
int i, j;
|
||||
|
||||
val2.f[0] = 55.0;
|
||||
val2.f[1] = 55.0;
|
||||
val2.f[2] = 55.0;
|
||||
val2.f[3] = 55.0;
|
||||
|
||||
val1.f[0] = 1.;
|
||||
val1.f[1] = 2.;
|
||||
val1.f[2] = 3.;
|
||||
val1.f[3] = 4.;
|
||||
|
||||
res[0].x = _mm_insert_ps (val2.x, val1.x, msk0);
|
||||
res[1].x = _mm_insert_ps (val2.x, val1.x, msk1);
|
||||
res[2].x = _mm_insert_ps (val2.x, val1.x, msk2);
|
||||
res[3].x = _mm_insert_ps (val2.x, val1.x, msk3);
|
||||
|
||||
masks[0] = msk0;
|
||||
masks[1] = msk1;
|
||||
masks[2] = msk2;
|
||||
masks[3] = msk3;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
res[i + 4].x = _mm_insert_ps (val2.x, val1.x, msk4);
|
||||
|
||||
masks[4] = msk4;
|
||||
masks[5] = msk4;
|
||||
masks[6] = msk4;
|
||||
masks[7] = msk4;
|
||||
|
||||
for (i=0; i < 8; i++)
|
||||
{
|
||||
tmp = val2;
|
||||
tmp.f[(masks[i] & 0x30) >> 4] = val1.f[(masks[i] & 0xC0) >> 6];
|
||||
|
||||
for (j = 0; j < 4; j++)
|
||||
if (masks[i] & (0x1 << j))
|
||||
tmp.f[j] = 0.f;
|
||||
|
||||
if (memcmp (&res[i], &tmp, sizeof (tmp)))
|
||||
abort ();
|
||||
}
|
||||
}
|
44
gcc/testsuite/gcc.target/i386/sse4_1-insertps-2.c
Normal file
44
gcc/testsuite/gcc.target/i386/sse4_1-insertps-2.c
Normal file
|
@ -0,0 +1,44 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128 x;
|
||||
float f[4];
|
||||
} vals[4], val;
|
||||
int i, j;
|
||||
|
||||
val.f[0]= 1.;
|
||||
val.f[1]= 2.;
|
||||
val.f[2]= 3.;
|
||||
val.f[3]= 4.;
|
||||
|
||||
vals[0].x = _MM_PICK_OUT_PS (val.x, 0);
|
||||
vals[1].x = _MM_PICK_OUT_PS (val.x, 1);
|
||||
vals[2].x = _MM_PICK_OUT_PS (val.x, 2);
|
||||
vals[3].x = _MM_PICK_OUT_PS (val.x, 3);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
for (j = 0; j < 4; j++)
|
||||
if ((j != 0 && vals[i].f[j] != 0)
|
||||
|| (j == 0 && vals[i].f[j] != val.f[i]))
|
||||
abort ();
|
||||
|
||||
if (_MM_MK_INSERTPS_NDX(0, 0, 0x1) != 0x01
|
||||
|| _MM_MK_INSERTPS_NDX(0, 1, 0x2) != 0x12
|
||||
|| _MM_MK_INSERTPS_NDX(0, 2, 0x3) != 0x23
|
||||
|| _MM_MK_INSERTPS_NDX(0, 3, 0x4) != 0x34
|
||||
|| _MM_MK_INSERTPS_NDX(1, 0, 0x5) != 0x45
|
||||
|| _MM_MK_INSERTPS_NDX(1, 1, 0x6) != 0x56
|
||||
|| _MM_MK_INSERTPS_NDX(2, 2, 0x7) != 0xA7
|
||||
|| _MM_MK_INSERTPS_NDX(3, 3, 0x8) != 0xF8)
|
||||
abort ();
|
||||
}
|
43
gcc/testsuite/gcc.target/i386/sse4_1-movntdqa.c
Normal file
43
gcc/testsuite/gcc.target/i386/sse4_1-movntdqa.c
Normal file
|
@ -0,0 +1,43 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#define NUM 20
|
||||
|
||||
static void
|
||||
init_movntdqa (int *src)
|
||||
{
|
||||
int i, j, sign = 1;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
src[i * 4 + j] = j * i * i * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM];
|
||||
int i[NUM * 4];
|
||||
} dst, src;
|
||||
int i;
|
||||
|
||||
init_movntdqa (src.i);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
dst.x[i] = _mm_stream_load_si128 (&src.x[i]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
if (memcmp (&dst.x[i], &src.x[i], sizeof(src.x[i])))
|
||||
abort ();
|
||||
}
|
122
gcc/testsuite/gcc.target/i386/sse4_1-mpsadbw.c
Normal file
122
gcc/testsuite/gcc.target/i386/sse4_1-mpsadbw.c
Normal file
|
@ -0,0 +1,122 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#define msk0 0xC0
|
||||
#define msk1 0x01
|
||||
#define msk2 0xF2
|
||||
#define msk3 0x03
|
||||
#define msk4 0x84
|
||||
#define msk5 0x05
|
||||
#define msk6 0xE6
|
||||
#define msk7 0x67
|
||||
|
||||
static __m128i
|
||||
compute_mpsadbw (unsigned char *v1, unsigned char *v2, int mask)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x;
|
||||
unsigned short s[8];
|
||||
} ret;
|
||||
unsigned char s[4];
|
||||
int i, j;
|
||||
int offs1, offs2;
|
||||
|
||||
offs2 = 4 * (mask & 3);
|
||||
for (i = 0; i < 4; i++)
|
||||
s[i] = v2[offs2 + i];
|
||||
|
||||
offs1 = 4 * ((mask & 4) >> 2);
|
||||
for (j = 0; j < 8; j++)
|
||||
{
|
||||
ret.s[j] = 0;
|
||||
for (i = 0; i < 4; i++)
|
||||
ret.s[j] += abs (v1[offs1 + j + i] - s[i]);
|
||||
}
|
||||
|
||||
return ret.x;
|
||||
}
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x;
|
||||
unsigned int i[4];
|
||||
unsigned char c[16];
|
||||
} val1, val2, val3 [8];
|
||||
__m128i res[8], tmp;
|
||||
unsigned char masks[8];
|
||||
int i;
|
||||
|
||||
val1.i[0] = 0x35251505;
|
||||
val1.i[1] = 0x75655545;
|
||||
val1.i[2] = 0xB5A59585;
|
||||
val1.i[3] = 0xF5E5D5C5;
|
||||
|
||||
val2.i[0] = 0x31211101;
|
||||
val2.i[1] = 0x71615141;
|
||||
val2.i[2] = 0xB1A19181;
|
||||
val2.i[3] = 0xF1E1D1C1;
|
||||
|
||||
for (i=0; i < 8; i++)
|
||||
switch (i % 3)
|
||||
{
|
||||
case 1:
|
||||
val3[i].i[0] = 0xF1E1D1C1;
|
||||
val3[i].i[1] = 0xB1A19181;
|
||||
val3[i].i[2] = 0x71615141;
|
||||
val3[i].i[3] = 0x31211101;
|
||||
break;
|
||||
default:
|
||||
val3[i].x = val2.x;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Check mpsadbw imm8, xmm, xmm. */
|
||||
res[0] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk0);
|
||||
res[1] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk1);
|
||||
res[2] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk2);
|
||||
res[3] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk3);
|
||||
res[4] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk4);
|
||||
res[5] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk5);
|
||||
res[6] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk6);
|
||||
res[7] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk7);
|
||||
|
||||
masks[0] = msk0;
|
||||
masks[1] = msk1;
|
||||
masks[2] = msk2;
|
||||
masks[3] = msk3;
|
||||
masks[4] = msk4;
|
||||
masks[5] = msk5;
|
||||
masks[6] = msk6;
|
||||
masks[7] = msk7;
|
||||
|
||||
for (i=0; i < 8; i++)
|
||||
{
|
||||
tmp = compute_mpsadbw (val1.c, val2.c, masks[i]);
|
||||
if (memcmp (&tmp, &res[i], sizeof (tmp)))
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* Check mpsadbw imm8, m128, xmm. */
|
||||
for (i=0; i < 8; i++)
|
||||
{
|
||||
res[i] = _mm_mpsadbw_epu8 (val1.x, val3[i].x, msk4);
|
||||
masks[i] = msk4;
|
||||
}
|
||||
|
||||
for (i=0; i < 8; i++)
|
||||
{
|
||||
tmp = compute_mpsadbw (val1.c, val3[i].c, masks[i]);
|
||||
if (memcmp (&tmp, &res[i], sizeof (tmp)))
|
||||
abort ();
|
||||
}
|
||||
}
|
65
gcc/testsuite/gcc.target/i386/sse4_1-packusdw.c
Normal file
65
gcc/testsuite/gcc.target/i386/sse4_1-packusdw.c
Normal file
|
@ -0,0 +1,65 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static unsigned short
|
||||
int_to_ushort (int iVal)
|
||||
{
|
||||
unsigned short sVal;
|
||||
|
||||
if (iVal < 0)
|
||||
sVal = 0;
|
||||
else if (iVal > 0xffff)
|
||||
sVal = 0xffff;
|
||||
else sVal = iVal;
|
||||
|
||||
return sVal;
|
||||
}
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 4];
|
||||
int i[NUM];
|
||||
} src1, src2;
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 4];
|
||||
unsigned short s[NUM * 2];
|
||||
} dst;
|
||||
int i, sign = 1;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src1.i[i] = i * i * sign;
|
||||
src2.i[i] = (i + 20) * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 4)
|
||||
dst.x[i / 4] = _mm_packus_epi32 (src1.x [i / 4], src2.x [i / 4]);
|
||||
|
||||
for (i = 0; i < NUM; i ++)
|
||||
{
|
||||
int dstIndex;
|
||||
unsigned short sVal;
|
||||
|
||||
sVal = int_to_ushort (src1.i[i]);
|
||||
dstIndex = (i % 4) + (i / 4) * 8;
|
||||
if (sVal != dst.s[dstIndex])
|
||||
abort ();
|
||||
|
||||
sVal = int_to_ushort (src2.i[i]);
|
||||
dstIndex += 4;
|
||||
if (sVal != dst.s[dstIndex])
|
||||
abort ();
|
||||
}
|
||||
}
|
62
gcc/testsuite/gcc.target/i386/sse4_1-pblendvb.c
Normal file
62
gcc/testsuite/gcc.target/i386/sse4_1-pblendvb.c
Normal file
|
@ -0,0 +1,62 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#define NUM 20
|
||||
|
||||
static void
|
||||
init_pblendvb (unsigned char *src1, unsigned char *src2,
|
||||
unsigned char *mask)
|
||||
{
|
||||
int i, sign = 1;
|
||||
|
||||
for (i = 0; i < NUM * 16; i++)
|
||||
{
|
||||
src1[i] = i* i * sign;
|
||||
src2[i] = (i + 20) * sign;
|
||||
mask[i] = (i % 3) + ((i * (14 + sign))
|
||||
^ (src1[i] | src2[i] | (i*3)));
|
||||
sign = -sign;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
check_pblendvb (__m128i *dst, unsigned char *src1,
|
||||
unsigned char *src2, unsigned char *mask)
|
||||
{
|
||||
unsigned char tmp[16];
|
||||
int j;
|
||||
|
||||
memcpy (&tmp[0], src1, sizeof (tmp));
|
||||
for (j = 0; j < 16; j++)
|
||||
if (mask [j] & 0x80)
|
||||
tmp[j] = src2[j];
|
||||
|
||||
return memcmp (dst, &tmp[0], sizeof (tmp));
|
||||
}
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM];
|
||||
unsigned char c[NUM * 16];
|
||||
} dst, src1, src2, mask;
|
||||
int i;
|
||||
|
||||
init_pblendvb (src1.c, src2.c, mask.c);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
dst.x[i] = _mm_blendv_epi8 (src1.x[i], src2.x[i], mask.x[i]);
|
||||
if (check_pblendvb (&dst.x[i], &src1.c[i * 16], &src2.c[i * 16],
|
||||
&mask.c[i * 16]))
|
||||
abort ();
|
||||
}
|
||||
}
|
80
gcc/testsuite/gcc.target/i386/sse4_1-pblendw.c
Normal file
80
gcc/testsuite/gcc.target/i386/sse4_1-pblendw.c
Normal file
|
@ -0,0 +1,80 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#define NUM 20
|
||||
|
||||
#ifndef MASK
|
||||
#define MASK 0x0f
|
||||
#endif
|
||||
|
||||
static void
|
||||
init_pblendw (short *src1, short *src2)
|
||||
{
|
||||
int i, sign = 1;
|
||||
|
||||
for (i = 0; i < NUM * 8; i++)
|
||||
{
|
||||
src1[i] = i * i * sign;
|
||||
src2[i] = (i + 20) * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
check_pblendw (__m128i *dst, short *src1, short *src2)
|
||||
{
|
||||
short tmp[8];
|
||||
int j;
|
||||
|
||||
memcpy (&tmp[0], src1, sizeof (tmp));
|
||||
for (j = 0; j < 8; j++)
|
||||
if ((MASK & (1 << j)))
|
||||
tmp[j] = src2[j];
|
||||
|
||||
return memcmp (dst, &tmp[0], sizeof (tmp));
|
||||
}
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
__m128i x, y;
|
||||
union
|
||||
{
|
||||
__m128i x[NUM];
|
||||
short s[NUM * 8];
|
||||
} dst, src1, src2;
|
||||
union
|
||||
{
|
||||
__m128i x;
|
||||
short s[8];
|
||||
} src3;
|
||||
int i;
|
||||
|
||||
init_pblendw (src1.s, src2.s);
|
||||
|
||||
/* Check pblendw imm8, m128, xmm */
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
dst.x[i] = _mm_blend_epi16 (src1.x[i], src2.x[i], MASK);
|
||||
if (check_pblendw (&dst.x[i], &src1.s[i * 8], &src2.s[i * 8]))
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* Check pblendw imm8, xmm, xmm */
|
||||
src3.x = _mm_setzero_si128 ();
|
||||
|
||||
x = _mm_blend_epi16 (dst.x[2], src3.x, MASK);
|
||||
y = _mm_blend_epi16 (src3.x, dst.x[2], MASK);
|
||||
|
||||
if (check_pblendw (&x, &dst.s[16], &src3.s[0]))
|
||||
abort ();
|
||||
|
||||
if (check_pblendw (&y, &src3.s[0], &dst.s[16]))
|
||||
abort ();
|
||||
}
|
38
gcc/testsuite/gcc.target/i386/sse4_1-pcmpeqq.c
Normal file
38
gcc/testsuite/gcc.target/i386/sse4_1-pcmpeqq.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 2];
|
||||
long long ll[NUM];
|
||||
} dst, src1, src2;
|
||||
int i, sign=1;
|
||||
long long is_eq;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src1.ll[i] = i * i * sign;
|
||||
src2.ll[i] = (i + 20) * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 2)
|
||||
dst.x [i / 2] = _mm_cmpeq_epi64(src1.x [i / 2], src2.x [i / 2]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
is_eq = src1.ll[i] == src2.ll[i] ? 0xffffffffffffffffLL : 0LL;
|
||||
if (is_eq != dst.ll[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
80
gcc/testsuite/gcc.target/i386/sse4_1-pextrb.c
Normal file
80
gcc/testsuite/gcc.target/i386/sse4_1-pextrb.c
Normal file
|
@ -0,0 +1,80 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define msk0 0
|
||||
#define msk1 1
|
||||
#define msk2 2
|
||||
#define msk3 3
|
||||
#define msk4 4
|
||||
#define msk5 5
|
||||
#define msk6 6
|
||||
#define msk7 7
|
||||
#define msk8 8
|
||||
#define msk9 9
|
||||
#define msk10 10
|
||||
#define msk11 11
|
||||
#define msk12 12
|
||||
#define msk13 13
|
||||
#define msk14 14
|
||||
#define msk15 15
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x;
|
||||
int i[4];
|
||||
char c[16];
|
||||
} val1;
|
||||
int res[16], masks[16];
|
||||
int i;
|
||||
|
||||
val1.i[0] = 0x04030201;
|
||||
val1.i[1] = 0x08070605;
|
||||
val1.i[2] = 0x0C0B0A09;
|
||||
val1.i[3] = 0x100F0E0D;
|
||||
|
||||
res[0] = _mm_extract_epi8 (val1.x, msk0);
|
||||
res[1] = _mm_extract_epi8 (val1.x, msk1);
|
||||
res[2] = _mm_extract_epi8 (val1.x, msk2);
|
||||
res[3] = _mm_extract_epi8 (val1.x, msk3);
|
||||
res[4] = _mm_extract_epi8 (val1.x, msk4);
|
||||
res[5] = _mm_extract_epi8 (val1.x, msk5);
|
||||
res[6] = _mm_extract_epi8 (val1.x, msk6);
|
||||
res[7] = _mm_extract_epi8 (val1.x, msk7);
|
||||
res[8] = _mm_extract_epi8 (val1.x, msk8);
|
||||
res[9] = _mm_extract_epi8 (val1.x, msk9);
|
||||
res[10] = _mm_extract_epi8 (val1.x, msk10);
|
||||
res[11] = _mm_extract_epi8 (val1.x, msk11);
|
||||
res[12] = _mm_extract_epi8 (val1.x, msk12);
|
||||
res[13] = _mm_extract_epi8 (val1.x, msk13);
|
||||
res[14] = _mm_extract_epi8 (val1.x, msk14);
|
||||
res[15] = _mm_extract_epi8 (val1.x, msk15);
|
||||
|
||||
masks[0] = msk0;
|
||||
masks[1] = msk1;
|
||||
masks[2] = msk2;
|
||||
masks[3] = msk3;
|
||||
masks[4] = msk4;
|
||||
masks[5] = msk5;
|
||||
masks[6] = msk6;
|
||||
masks[7] = msk7;
|
||||
masks[8] = msk8;
|
||||
masks[9] = msk9;
|
||||
masks[10] = msk10;
|
||||
masks[11] = msk11;
|
||||
masks[12] = msk12;
|
||||
masks[13] = msk13;
|
||||
masks[14] = msk14;
|
||||
masks[15] = msk15;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
if (res[i] != val1.c [masks[i]])
|
||||
abort ();
|
||||
}
|
43
gcc/testsuite/gcc.target/i386/sse4_1-pextrd.c
Normal file
43
gcc/testsuite/gcc.target/i386/sse4_1-pextrd.c
Normal file
|
@ -0,0 +1,43 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define msk0 0
|
||||
#define msk1 1
|
||||
#define msk2 2
|
||||
#define msk3 3
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x;
|
||||
int i[4];
|
||||
} val1;
|
||||
int res[4], masks[4];
|
||||
int i;
|
||||
|
||||
val1.i[0] = 0x04030201;
|
||||
val1.i[1] = 0x08070605;
|
||||
val1.i[2] = 0x0C0B0A09;
|
||||
val1.i[3] = 0x100F0E0D;
|
||||
|
||||
res[0] = _mm_extract_epi32 (val1.x, msk0);
|
||||
res[1] = _mm_extract_epi32 (val1.x, msk1);
|
||||
res[2] = _mm_extract_epi32 (val1.x, msk2);
|
||||
res[3] = _mm_extract_epi32 (val1.x, msk3);
|
||||
|
||||
masks[0] = msk0;
|
||||
masks[1] = msk1;
|
||||
masks[2] = msk2;
|
||||
masks[3] = msk3;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
if (res[i] != val1.i [masks[i]])
|
||||
abort ();
|
||||
}
|
36
gcc/testsuite/gcc.target/i386/sse4_1-pextrq.c
Normal file
36
gcc/testsuite/gcc.target/i386/sse4_1-pextrq.c
Normal file
|
@ -0,0 +1,36 @@
|
|||
/* { dg-do run { target { { i?86-*-* x86_64-*-* } && lp64 } } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define msk0 0
|
||||
#define msk1 1
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x;
|
||||
long long ll[2];
|
||||
} val1;
|
||||
long long res[2];
|
||||
int masks[2];
|
||||
int i;
|
||||
|
||||
val1.ll[0] = 0x0807060504030201LL;
|
||||
val1.ll[1] = 0x100F0E0D0C0B0A09LL;
|
||||
|
||||
res[0] = _mm_extract_epi64 (val1.x, msk0);
|
||||
res[1] = _mm_extract_epi64 (val1.x, msk1);
|
||||
|
||||
masks[0] = msk0;
|
||||
masks[1] = msk1;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
if (res[i] != val1.ll [masks[i]])
|
||||
abort ();
|
||||
}
|
56
gcc/testsuite/gcc.target/i386/sse4_1-pextrw.c
Normal file
56
gcc/testsuite/gcc.target/i386/sse4_1-pextrw.c
Normal file
|
@ -0,0 +1,56 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define msk0 0
|
||||
#define msk1 1
|
||||
#define msk2 2
|
||||
#define msk3 3
|
||||
#define msk4 4
|
||||
#define msk5 5
|
||||
#define msk6 6
|
||||
#define msk7 7
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x;
|
||||
int i[4];
|
||||
short s[8];
|
||||
} val1;
|
||||
int res[8], masks[8];
|
||||
int i;
|
||||
|
||||
val1.i[0] = 0x04030201;
|
||||
val1.i[1] = 0x08070605;
|
||||
val1.i[2] = 0x0C0B0A09;
|
||||
val1.i[3] = 0x100F0E0D;
|
||||
|
||||
res[0] = _mm_extract_epi16 (val1.x, msk0);
|
||||
res[1] = _mm_extract_epi16 (val1.x, msk1);
|
||||
res[2] = _mm_extract_epi16 (val1.x, msk2);
|
||||
res[3] = _mm_extract_epi16 (val1.x, msk3);
|
||||
res[4] = _mm_extract_epi16 (val1.x, msk4);
|
||||
res[5] = _mm_extract_epi16 (val1.x, msk5);
|
||||
res[6] = _mm_extract_epi16 (val1.x, msk6);
|
||||
res[7] = _mm_extract_epi16 (val1.x, msk7);
|
||||
|
||||
masks[0] = msk0;
|
||||
masks[1] = msk1;
|
||||
masks[2] = msk2;
|
||||
masks[3] = msk3;
|
||||
masks[4] = msk4;
|
||||
masks[5] = msk5;
|
||||
masks[6] = msk6;
|
||||
masks[7] = msk7;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
if (res[i] != val1.s [masks[i]])
|
||||
abort ();
|
||||
}
|
49
gcc/testsuite/gcc.target/i386/sse4_1-phminposuw.c
Normal file
49
gcc/testsuite/gcc.target/i386/sse4_1-phminposuw.c
Normal file
|
@ -0,0 +1,49 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM/8];
|
||||
unsigned short s[NUM];
|
||||
} src;
|
||||
unsigned short minVal[NUM/8];
|
||||
int minInd[NUM/8];
|
||||
unsigned short minValScalar, minIndScalar;
|
||||
int i, j, res;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
src.s[i] = i * i / (i + i / 3.14 + 1.0);
|
||||
|
||||
for (i = 0, j = 0; i < NUM; i += 8, j++)
|
||||
{
|
||||
res = _mm_cvtsi128_si32 (_mm_minpos_epu16 (src.x [i/8]));
|
||||
minVal[j] = res & 0xffff;
|
||||
minInd[j] = (res >> 16) & 0x3;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 8)
|
||||
{
|
||||
minValScalar = src.s[i];
|
||||
minIndScalar = 0;
|
||||
|
||||
for (j = i + 1; j < i + 8; j++)
|
||||
if (minValScalar > src.s[j])
|
||||
{
|
||||
minValScalar = src.s[j];
|
||||
minIndScalar = j - i;
|
||||
}
|
||||
|
||||
if (minValScalar != minVal[i/8] && minIndScalar != minInd[i/8])
|
||||
abort ();
|
||||
}
|
||||
}
|
102
gcc/testsuite/gcc.target/i386/sse4_1-pinsrb.c
Normal file
102
gcc/testsuite/gcc.target/i386/sse4_1-pinsrb.c
Normal file
|
@ -0,0 +1,102 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#define msk0 0x00
|
||||
#define msk1 0x01
|
||||
#define msk2 0x02
|
||||
#define msk3 0x03
|
||||
#define msk4 0x04
|
||||
#define msk5 0x05
|
||||
#define msk6 0x06
|
||||
#define msk7 0x07
|
||||
#define msk8 0x08
|
||||
#define msk9 0x09
|
||||
#define mskA 0x0A
|
||||
#define mskB 0x0B
|
||||
#define mskC 0x0C
|
||||
#define mskD 0x0D
|
||||
#define mskE 0x0E
|
||||
#define mskF 0x0F
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x;
|
||||
unsigned int i[4];
|
||||
unsigned char c[16];
|
||||
} res [16], val, tmp;
|
||||
int masks[16];
|
||||
unsigned char ins[4] = { 3, 4, 5, 6 };
|
||||
int i;
|
||||
|
||||
val.i[0] = 0x35251505;
|
||||
val.i[1] = 0x75655545;
|
||||
val.i[2] = 0xB5A59585;
|
||||
val.i[3] = 0xF5E5D5C5;
|
||||
|
||||
/* Check pinsrb imm8, r32, xmm. */
|
||||
res[0].x = _mm_insert_epi8 (val.x, ins[0], msk0);
|
||||
res[1].x = _mm_insert_epi8 (val.x, ins[0], msk1);
|
||||
res[2].x = _mm_insert_epi8 (val.x, ins[0], msk2);
|
||||
res[3].x = _mm_insert_epi8 (val.x, ins[0], msk3);
|
||||
res[4].x = _mm_insert_epi8 (val.x, ins[0], msk4);
|
||||
res[5].x = _mm_insert_epi8 (val.x, ins[0], msk5);
|
||||
res[6].x = _mm_insert_epi8 (val.x, ins[0], msk6);
|
||||
res[7].x = _mm_insert_epi8 (val.x, ins[0], msk7);
|
||||
res[8].x = _mm_insert_epi8 (val.x, ins[0], msk8);
|
||||
res[9].x = _mm_insert_epi8 (val.x, ins[0], msk9);
|
||||
res[10].x = _mm_insert_epi8 (val.x, ins[0], mskA);
|
||||
res[11].x = _mm_insert_epi8 (val.x, ins[0], mskB);
|
||||
res[12].x = _mm_insert_epi8 (val.x, ins[0], mskC);
|
||||
res[13].x = _mm_insert_epi8 (val.x, ins[0], mskD);
|
||||
res[14].x = _mm_insert_epi8 (val.x, ins[0], mskE);
|
||||
res[15].x = _mm_insert_epi8 (val.x, ins[0], mskF);
|
||||
|
||||
masks[0] = msk0;
|
||||
masks[1] = msk1;
|
||||
masks[2] = msk2;
|
||||
masks[3] = msk3;
|
||||
masks[4] = msk4;
|
||||
masks[5] = msk5;
|
||||
masks[6] = msk6;
|
||||
masks[7] = msk7;
|
||||
masks[8] = msk8;
|
||||
masks[9] = msk9;
|
||||
masks[10] = mskA;
|
||||
masks[11] = mskB;
|
||||
masks[12] = mskC;
|
||||
masks[13] = mskD;
|
||||
masks[14] = mskE;
|
||||
masks[15] = mskF;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
tmp.x = val.x;
|
||||
tmp.c[masks[i]] = ins[0];
|
||||
if (memcmp (&tmp, &res[i], sizeof (tmp)))
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* Check pinsrb imm8, m8, xmm. */
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
res[i].x = _mm_insert_epi8 (val.x, ins[i % 4], msk0);
|
||||
masks[i] = msk0;
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
tmp.x = val.x;
|
||||
tmp.c[masks[i]] = ins[i % 4];
|
||||
if (memcmp (&tmp, &res[i], sizeof (tmp)))
|
||||
abort ();
|
||||
}
|
||||
}
|
65
gcc/testsuite/gcc.target/i386/sse4_1-pinsrd.c
Normal file
65
gcc/testsuite/gcc.target/i386/sse4_1-pinsrd.c
Normal file
|
@ -0,0 +1,65 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#define msk0 0x00
|
||||
#define msk1 0x01
|
||||
#define msk2 0x02
|
||||
#define msk3 0x03
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x;
|
||||
unsigned int i[4];
|
||||
} res [4], val, tmp;
|
||||
static unsigned int ins[4] = { 3, 4, 5, 6 };
|
||||
int masks[4];
|
||||
int i;
|
||||
|
||||
val.i[0] = 55;
|
||||
val.i[1] = 55;
|
||||
val.i[2] = 55;
|
||||
val.i[3] = 55;
|
||||
|
||||
/* Check pinsrd imm8, r32, xmm. */
|
||||
res[0].x = _mm_insert_epi32 (val.x, ins[0], msk0);
|
||||
res[1].x = _mm_insert_epi32 (val.x, ins[0], msk1);
|
||||
res[2].x = _mm_insert_epi32 (val.x, ins[0], msk2);
|
||||
res[3].x = _mm_insert_epi32 (val.x, ins[0], msk3);
|
||||
|
||||
masks[0] = msk0;
|
||||
masks[1] = msk1;
|
||||
masks[2] = msk2;
|
||||
masks[3] = msk3;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
tmp.x = val.x;
|
||||
tmp.i[masks[i]] = ins[0];
|
||||
if (memcmp (&tmp, &res[i], sizeof (tmp)))
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* Check pinsrd imm8, m32, xmm. */
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
res[i].x = _mm_insert_epi32 (val.x, ins[i], msk0);
|
||||
masks[i] = msk0;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
tmp.x = val.x;
|
||||
tmp.i[masks[i]] = ins[i];
|
||||
if (memcmp (&tmp, &res[i], sizeof (tmp)))
|
||||
abort ();
|
||||
}
|
||||
}
|
58
gcc/testsuite/gcc.target/i386/sse4_1-pinsrq.c
Normal file
58
gcc/testsuite/gcc.target/i386/sse4_1-pinsrq.c
Normal file
|
@ -0,0 +1,58 @@
|
|||
/* { dg-do run { target { { i?86-*-* x86_64-*-* } && lp64 } } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#define msk0 0x00
|
||||
#define msk1 0x01
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x;
|
||||
unsigned long long ll[2];
|
||||
} res [4], val, tmp;
|
||||
int masks[4];
|
||||
static unsigned long long ins[2] =
|
||||
{ 0xAABBAABBAABBAABBLL, 0xCCDDCCDDCCDDCCDDLL };
|
||||
int i;
|
||||
|
||||
val.ll[0] = 0x0807060504030201LL;
|
||||
val.ll[1] = 0x100F0E0D0C0B0A09LL;
|
||||
|
||||
/* Check pinsrq imm8, r64, xmm. */
|
||||
res[0].x = _mm_insert_epi64 (val.x, ins[0], msk0);
|
||||
res[1].x = _mm_insert_epi64 (val.x, ins[0], msk1);
|
||||
|
||||
masks[0] = msk0;
|
||||
masks[1] = msk1;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
tmp.x = val.x;
|
||||
tmp.ll[masks[i]] = ins[0];
|
||||
if (memcmp (&tmp, &res[i], sizeof (tmp)))
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* Check pinsrq imm8, m64, xmm. */
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
res[i].x = _mm_insert_epi64 (val.x, ins[i], msk0);
|
||||
masks[i] = msk0;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
tmp.x = val.x;
|
||||
tmp.ll[masks[i]] = ins[i];
|
||||
if (memcmp (&tmp, &res[i], sizeof (tmp)))
|
||||
abort ();
|
||||
}
|
||||
}
|
38
gcc/testsuite/gcc.target/i386/sse4_1-pmaxsb.c
Normal file
38
gcc/testsuite/gcc.target/i386/sse4_1-pmaxsb.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 1024
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 16];
|
||||
char i[NUM];
|
||||
} dst, src1, src2;
|
||||
int i, sign = 1;
|
||||
char max;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src1.i[i] = i * i * sign;
|
||||
src2.i[i] = (i + 20) * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 16)
|
||||
dst.x[i / 16] = _mm_max_epi8 (src1.x[i / 16], src2.x[i / 16]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
max = src1.i[i] <= src2.i[i] ? src2.i[i] : src1.i[i];
|
||||
if (max != dst.i[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
38
gcc/testsuite/gcc.target/i386/sse4_1-pmaxsd.c
Normal file
38
gcc/testsuite/gcc.target/i386/sse4_1-pmaxsd.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 4];
|
||||
int i[NUM];
|
||||
} dst, src1, src2;
|
||||
int i, sign = 1;
|
||||
int max;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src1.i[i] = i * i * sign;
|
||||
src2.i[i] = (i + 20) * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 4)
|
||||
dst.x[i / 4] = _mm_max_epi32 (src1.x[i / 4], src2.x[i / 4]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
max = src1.i[i] <= src2.i[i] ? src2.i[i] : src1.i[i];
|
||||
if (max != dst.i[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
39
gcc/testsuite/gcc.target/i386/sse4_1-pmaxud.c
Normal file
39
gcc/testsuite/gcc.target/i386/sse4_1-pmaxud.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 4];
|
||||
unsigned int i[NUM];
|
||||
} dst, src1, src2;
|
||||
int i;
|
||||
unsigned int max;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src1.i[i] = i * i;
|
||||
src2.i[i] = i + 20;
|
||||
if ((i % 4))
|
||||
src2.i[i] |= 0x80000000;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 4)
|
||||
dst.x[i / 4] = _mm_max_epu32 (src1.x[i / 4], src2.x[i / 4]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
max = src1.i[i] <= src2.i[i] ? src2.i[i] : src1.i[i];
|
||||
if (max != dst.i[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
39
gcc/testsuite/gcc.target/i386/sse4_1-pmaxuw.c
Normal file
39
gcc/testsuite/gcc.target/i386/sse4_1-pmaxuw.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 8];
|
||||
unsigned short i[NUM];
|
||||
} dst, src1, src2;
|
||||
int i;
|
||||
unsigned short max;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src1.i[i] = i * i;
|
||||
src2.i[i] = i + 20;
|
||||
if ((i % 8))
|
||||
src2.i[i] |= 0x8000;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 8)
|
||||
dst.x[i / 8] = _mm_max_epu16 (src1.x[i / 8], src2.x[i / 8]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
max = src1.i[i] <= src2.i[i] ? src2.i[i] : src1.i[i];
|
||||
if (max != dst.i[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
38
gcc/testsuite/gcc.target/i386/sse4_1-pminsb.c
Normal file
38
gcc/testsuite/gcc.target/i386/sse4_1-pminsb.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 1024
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 16];
|
||||
char i[NUM];
|
||||
} dst, src1, src2;
|
||||
int i, sign = 1;
|
||||
char min;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src1.i[i] = i * i * sign;
|
||||
src2.i[i] = (i + 20) * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 16)
|
||||
dst.x[i / 16] = _mm_min_epi8 (src1.x[i / 16], src2.x[i / 16]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
min = src1.i[i] >= src2.i[i] ? src2.i[i] : src1.i[i];
|
||||
if (min != dst.i[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
38
gcc/testsuite/gcc.target/i386/sse4_1-pminsd.c
Normal file
38
gcc/testsuite/gcc.target/i386/sse4_1-pminsd.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 4];
|
||||
int i[NUM];
|
||||
} dst, src1, src2;
|
||||
int i, sign = 1;
|
||||
int min;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src1.i[i] = i * i * sign;
|
||||
src2.i[i] = (i + 20) * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 4)
|
||||
dst.x[i / 4] = _mm_min_epi32 (src1.x[i / 4], src2.x[i / 4]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
min = src1.i[i] >= src2.i[i] ? src2.i[i] : src1.i[i];
|
||||
if (min != dst.i[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
39
gcc/testsuite/gcc.target/i386/sse4_1-pminud.c
Normal file
39
gcc/testsuite/gcc.target/i386/sse4_1-pminud.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 4];
|
||||
unsigned int i[NUM];
|
||||
} dst, src1, src2;
|
||||
int i;
|
||||
unsigned int min;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src1.i[i] = i * i;
|
||||
src2.i[i] = i + 20;
|
||||
if ((i % 4))
|
||||
src2.i[i] |= 0x80000000;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 4)
|
||||
dst.x[i / 4] = _mm_min_epu32 (src1.x[i / 4], src2.x[i / 4]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
min = src1.i[i] >= src2.i[i] ? src2.i[i] : src1.i[i];
|
||||
if (min != dst.i[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
39
gcc/testsuite/gcc.target/i386/sse4_1-pminuw.c
Normal file
39
gcc/testsuite/gcc.target/i386/sse4_1-pminuw.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 8];
|
||||
unsigned short i[NUM];
|
||||
} dst, src1, src2;
|
||||
int i;
|
||||
unsigned short min;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src1.i[i] = i * i;
|
||||
src2.i[i] = i + 20;
|
||||
if ((i % 8))
|
||||
src2.i[i] |= 0x8000;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 8)
|
||||
dst.x[i / 8] = _mm_min_epu16 (src1.x[i / 8], src2.x[i / 8]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
min = src1.i[i] >= src2.i[i] ? src2.i[i] : src1.i[i];
|
||||
if (min != dst.i[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
34
gcc/testsuite/gcc.target/i386/sse4_1-pmovsxbd.c
Normal file
34
gcc/testsuite/gcc.target/i386/sse4_1-pmovsxbd.c
Normal file
|
@ -0,0 +1,34 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 128
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 4];
|
||||
int i[NUM];
|
||||
char c[NUM * 4];
|
||||
} dst, src;
|
||||
int i, sign = 1;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src.c[(i % 4) + (i / 4) * 16] = i * i * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 4)
|
||||
dst.x [i / 4] = _mm_cvtepi8_epi32 (src.x [i / 4]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
if (src.c[(i % 4) + (i / 4) * 16] != dst.i[i])
|
||||
abort ();
|
||||
}
|
34
gcc/testsuite/gcc.target/i386/sse4_1-pmovsxbq.c
Normal file
34
gcc/testsuite/gcc.target/i386/sse4_1-pmovsxbq.c
Normal file
|
@ -0,0 +1,34 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 128
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 2];
|
||||
long long ll[NUM];
|
||||
char c[NUM * 8];
|
||||
} dst, src;
|
||||
int i, sign = 1;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src.c[(i % 2) + (i / 2) * 16] = i * i * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 2)
|
||||
dst.x [i / 2] = _mm_cvtepi8_epi64 (src.x [i / 2]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
if (src.c[(i % 2) + (i / 2) * 16] != dst.ll[i])
|
||||
abort ();
|
||||
}
|
34
gcc/testsuite/gcc.target/i386/sse4_1-pmovsxbw.c
Normal file
34
gcc/testsuite/gcc.target/i386/sse4_1-pmovsxbw.c
Normal file
|
@ -0,0 +1,34 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 128
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 8];
|
||||
short s[NUM];
|
||||
char c[NUM * 2];
|
||||
} dst, src;
|
||||
int i, sign = 1;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src.c[(i % 8) + (i / 8) * 16] = i * i * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 8)
|
||||
dst.x [i / 8] = _mm_cvtepi8_epi16 (src.x [i / 8]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
if (src.c[(i % 8) + (i / 8) * 16] != dst.s[i])
|
||||
abort ();
|
||||
}
|
34
gcc/testsuite/gcc.target/i386/sse4_1-pmovsxdq.c
Normal file
34
gcc/testsuite/gcc.target/i386/sse4_1-pmovsxdq.c
Normal file
|
@ -0,0 +1,34 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 128
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 2];
|
||||
long long ll[NUM];
|
||||
int i[NUM * 2];
|
||||
} dst, src;
|
||||
int i, sign = 1;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src.i[(i % 2) + (i / 2) * 4] = i * i * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 2)
|
||||
dst.x [i / 2] = _mm_cvtepi32_epi64 (src.x [i / 2]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
if (src.i[(i % 2) + (i / 2) * 4] != dst.ll[i])
|
||||
abort ();
|
||||
}
|
34
gcc/testsuite/gcc.target/i386/sse4_1-pmovsxwd.c
Normal file
34
gcc/testsuite/gcc.target/i386/sse4_1-pmovsxwd.c
Normal file
|
@ -0,0 +1,34 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 128
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 4];
|
||||
int i[NUM];
|
||||
short s[NUM * 2];
|
||||
} dst, src;
|
||||
int i, sign = 1;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src.s[(i % 4) + (i / 4) * 8] = i * i * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 4)
|
||||
dst.x [i / 4] = _mm_cvtepi16_epi32 (src.x [i / 4]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
if (src.s[(i % 4) + (i / 4) * 8] != dst.i[i])
|
||||
abort ();
|
||||
}
|
34
gcc/testsuite/gcc.target/i386/sse4_1-pmovsxwq.c
Normal file
34
gcc/testsuite/gcc.target/i386/sse4_1-pmovsxwq.c
Normal file
|
@ -0,0 +1,34 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 128
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 2];
|
||||
long long ll[NUM];
|
||||
short s[NUM * 4];
|
||||
} dst, src;
|
||||
int i, sign = 1;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src.s[(i % 2) + (i / 2) * 8] = i * i * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 2)
|
||||
dst.x [i / 2] = _mm_cvtepi16_epi64 (src.x [i / 2]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
if (src.s[(i % 2) + (i / 2) * 8] != dst.ll[i])
|
||||
abort ();
|
||||
}
|
35
gcc/testsuite/gcc.target/i386/sse4_1-pmovzxbd.c
Normal file
35
gcc/testsuite/gcc.target/i386/sse4_1-pmovzxbd.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 128
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 4];
|
||||
unsigned int i[NUM];
|
||||
unsigned char c[NUM * 4];
|
||||
} dst, src;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src.c[(i % 4) + (i / 4) * 16] = i * i;
|
||||
if ((i % 4))
|
||||
src.c[(i % 4) + (i / 4) * 16] |= 0x80;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 4)
|
||||
dst.x [i / 4] = _mm_cvtepu8_epi32 (src.x [i / 4]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
if (src.c[(i % 4) + (i / 4) * 16] != dst.i[i])
|
||||
abort ();
|
||||
}
|
35
gcc/testsuite/gcc.target/i386/sse4_1-pmovzxbq.c
Normal file
35
gcc/testsuite/gcc.target/i386/sse4_1-pmovzxbq.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 128
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 2];
|
||||
unsigned long long ll[NUM];
|
||||
unsigned char c[NUM * 8];
|
||||
} dst, src;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src.c[(i % 2) + (i / 2) * 16] = i * i;
|
||||
if ((i % 2))
|
||||
src.c[(i % 2) + (i / 2) * 16] |= 0x80;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 2)
|
||||
dst.x [i / 2] = _mm_cvtepu8_epi64 (src.x [i / 2]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
if (src.c[(i % 2) + (i / 2) * 16] != dst.ll[i])
|
||||
abort ();
|
||||
}
|
35
gcc/testsuite/gcc.target/i386/sse4_1-pmovzxbw.c
Normal file
35
gcc/testsuite/gcc.target/i386/sse4_1-pmovzxbw.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 128
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 8];
|
||||
unsigned short s[NUM];
|
||||
unsigned char c[NUM * 2];
|
||||
} dst, src;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src.c[(i % 8) + (i / 8) * 16] = i * i;
|
||||
if ((i % 4))
|
||||
src.c[(i % 8) + (i / 8) * 16] |= 0x80;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 8)
|
||||
dst.x [i / 8] = _mm_cvtepu8_epi16 (src.x [i / 8]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
if (src.c[(i % 8) + (i / 8) * 16] != dst.s[i])
|
||||
abort ();
|
||||
}
|
35
gcc/testsuite/gcc.target/i386/sse4_1-pmovzxdq.c
Normal file
35
gcc/testsuite/gcc.target/i386/sse4_1-pmovzxdq.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 128
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 2];
|
||||
unsigned long long ll[NUM];
|
||||
unsigned int i[NUM * 2];
|
||||
} dst, src;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src.i[(i % 2) + (i / 2) * 4] = i * i;
|
||||
if ((i % 2))
|
||||
src.i[(i % 2) + (i / 2) * 4] |= 0x80000000;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 2)
|
||||
dst.x [i / 2] = _mm_cvtepu32_epi64 (src.x [i / 2]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
if (src.i[(i % 2) + (i / 2) * 4] != dst.ll[i])
|
||||
abort ();
|
||||
}
|
35
gcc/testsuite/gcc.target/i386/sse4_1-pmovzxwd.c
Normal file
35
gcc/testsuite/gcc.target/i386/sse4_1-pmovzxwd.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 128
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 4];
|
||||
unsigned int i[NUM];
|
||||
unsigned short s[NUM * 2];
|
||||
} dst, src;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src.s[(i % 4) + (i / 4) * 8] = i * i;
|
||||
if ((i % 4))
|
||||
src.s[(i % 4) + (i / 4) * 8] |= 0x8000;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 4)
|
||||
dst.x [i / 4] = _mm_cvtepu16_epi32 (src.x [i / 4]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
if (src.s[(i % 4) + (i / 4) * 8] != dst.i[i])
|
||||
abort ();
|
||||
}
|
35
gcc/testsuite/gcc.target/i386/sse4_1-pmovzxwq.c
Normal file
35
gcc/testsuite/gcc.target/i386/sse4_1-pmovzxwq.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 128
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 2];
|
||||
unsigned long long ll[NUM];
|
||||
unsigned short s[NUM * 4];
|
||||
} dst, src;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src.s[(i % 2) + (i / 2) * 8] = i * i;
|
||||
if ((i % 2))
|
||||
src.s[(i % 2) + (i / 2) * 8] |= 0x8000;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 2)
|
||||
dst.x [i / 2] = _mm_cvtepu16_epi64 (src.x [i / 2]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
if (src.s[(i % 2) + (i / 2) * 8] != dst.ll[i])
|
||||
abort ();
|
||||
}
|
43
gcc/testsuite/gcc.target/i386/sse4_1-pmuldq.c
Normal file
43
gcc/testsuite/gcc.target/i386/sse4_1-pmuldq.c
Normal file
|
@ -0,0 +1,43 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 2];
|
||||
long long ll[NUM];
|
||||
} dst;
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 2];
|
||||
int i[NUM * 2];
|
||||
} src1, src2;
|
||||
int i, sign = 1;
|
||||
long long value;
|
||||
|
||||
for (i = 0; i < NUM; i += 2)
|
||||
{
|
||||
src1.i[i] = i * i * sign;
|
||||
src2.i[i] = (i + 20) * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 2)
|
||||
dst.x[i / 2] = _mm_mul_epi32 (src1.x[i / 2], src2.x[i / 2]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
value = (long long) src1.i[i * 2] * (long long) src2.i[i * 2];
|
||||
if (value != dst.ll[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
38
gcc/testsuite/gcc.target/i386/sse4_1-pmulld.c
Normal file
38
gcc/testsuite/gcc.target/i386/sse4_1-pmulld.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 4];
|
||||
int i[NUM];
|
||||
} dst, src1, src2;
|
||||
int i, sign = 1;
|
||||
int value;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src1.i[i] = i * i * sign;
|
||||
src2.i[i] = (i + 20) * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 4)
|
||||
dst.x[i / 4] = _mm_mullo_epi32 (src1.x[i / 4], src2.x[i / 4]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
value = src1.i[i] * src2.i[i];
|
||||
if (value != dst.i[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
109
gcc/testsuite/gcc.target/i386/sse4_1-ptest-1.c
Normal file
109
gcc/testsuite/gcc.target/i386/sse4_1-ptest-1.c
Normal file
|
@ -0,0 +1,109 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
static int
|
||||
make_ptestz (__m128i m, __m128i v)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x;
|
||||
unsigned char c[16];
|
||||
} val, mask;
|
||||
int i, z;
|
||||
|
||||
mask.x = m;
|
||||
val.x = v;
|
||||
|
||||
z = 1;
|
||||
for (i = 0; i < 16; i++)
|
||||
if ((mask.c[i] & val.c[i]))
|
||||
{
|
||||
z = 0;
|
||||
break;
|
||||
}
|
||||
return z;
|
||||
}
|
||||
|
||||
static int
|
||||
make_ptestc (__m128i m, __m128i v)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x;
|
||||
unsigned char c[16];
|
||||
} val, mask;
|
||||
int i, c;
|
||||
|
||||
mask.x = m;
|
||||
val.x = v;
|
||||
|
||||
c = 1;
|
||||
for (i = 0; i < 16; i++)
|
||||
if ((val.c[i] & ~mask.c[i]))
|
||||
{
|
||||
c = 0;
|
||||
break;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x;
|
||||
unsigned int i[4];
|
||||
} val[4];
|
||||
int i, j, l;
|
||||
int res[32];
|
||||
|
||||
val[0].i[0] = 0x11111111;
|
||||
val[0].i[1] = 0x00000000;
|
||||
val[0].i[2] = 0x00000000;
|
||||
val[0].i[3] = 0x11111111;
|
||||
|
||||
val[1].i[0] = 0x00000000;
|
||||
val[1].i[1] = 0x11111111;
|
||||
val[1].i[2] = 0x11111111;
|
||||
val[1].i[3] = 0x00000000;
|
||||
|
||||
val[2].i[0] = 0;
|
||||
val[2].i[1] = 0;
|
||||
val[2].i[2] = 0;
|
||||
val[2].i[3] = 0;
|
||||
|
||||
val[3].i[0] = 0xffffffff;
|
||||
val[3].i[1] = 0xffffffff;
|
||||
val[3].i[2] = 0xffffffff;
|
||||
val[3].i[3] = 0xffffffff;
|
||||
|
||||
l = 0;
|
||||
for(i = 0; i < 4; i++)
|
||||
for(j = 0; j < 4; j++)
|
||||
{
|
||||
res[l++] = _mm_testz_si128 (val[j].x, val[i].x);
|
||||
res[l++] = _mm_testc_si128 (val[j].x, val[i].x);
|
||||
}
|
||||
|
||||
l = 0;
|
||||
for(i = 0; i < 4; i++)
|
||||
for(j = 0; j < 4; j++)
|
||||
{
|
||||
if (res[l++] != make_ptestz (val[j].x, val[i].x))
|
||||
abort ();
|
||||
if (res[l++] != make_ptestc (val[j].x, val[i].x))
|
||||
abort ();
|
||||
}
|
||||
|
||||
if (res[2] != _mm_testz_si128 (val[1].x, val[0].x))
|
||||
abort ();
|
||||
|
||||
if (res[3] != _mm_testc_si128 (val[1].x, val[0].x))
|
||||
abort ();
|
||||
}
|
88
gcc/testsuite/gcc.target/i386/sse4_1-ptest-2.c
Normal file
88
gcc/testsuite/gcc.target/i386/sse4_1-ptest-2.c
Normal file
|
@ -0,0 +1,88 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
static int
|
||||
make_ptestnzc (__m128i m, __m128i v)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x;
|
||||
unsigned char c[16];
|
||||
} val, mask;
|
||||
int i, z, c;
|
||||
|
||||
mask.x = m;
|
||||
val.x = v;
|
||||
|
||||
z = c = 1;
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
if ((mask.c[i] & val.c[i]))
|
||||
z = 0;
|
||||
if ((~mask.c[i] & val.c[i]))
|
||||
c = 0;
|
||||
}
|
||||
|
||||
return (z == 0 && c == 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x;
|
||||
unsigned int i[4];
|
||||
} val[4];
|
||||
int i, j, l;
|
||||
int res[32];
|
||||
|
||||
val[0].i[0] = 0x11111111;
|
||||
val[0].i[1] = 0x00000000;
|
||||
val[0].i[2] = 0x00000000;
|
||||
val[0].i[3] = 0x11111111;
|
||||
|
||||
val[1].i[0] = 0x00000000;
|
||||
val[1].i[1] = 0x11111111;
|
||||
val[1].i[2] = 0x11111111;
|
||||
val[1].i[3] = 0x00000000;
|
||||
|
||||
val[2].i[0] = 0;
|
||||
val[2].i[1] = 0;
|
||||
val[2].i[2] = 0;
|
||||
val[2].i[3] = 0;
|
||||
|
||||
val[3].i[0] = 0xffffffff;
|
||||
val[3].i[1] = 0xffffffff;
|
||||
val[3].i[2] = 0xffffffff;
|
||||
val[3].i[3] = 0xffffffff;
|
||||
|
||||
l = 0;
|
||||
for(i = 0; i < 4; i++)
|
||||
for(j = 0; j < 4; j++)
|
||||
{
|
||||
res[l++] = _mm_testnzc_si128 (val[j].x, val[i].x);
|
||||
res[l++] = _mm_testnzc_si128 (val[j].x, val[i].x);
|
||||
}
|
||||
|
||||
l = 0;
|
||||
for(i = 0; i < 4; i++)
|
||||
for(j = 0; j < 4; j++)
|
||||
{
|
||||
if (res[l++] != make_ptestnzc (val[j].x, val[i].x))
|
||||
abort ();
|
||||
if (res[l++] != make_ptestnzc (val[j].x, val[i].x))
|
||||
abort ();
|
||||
}
|
||||
|
||||
if (res[2] != _mm_testnzc_si128 (val[1].x, val[0].x))
|
||||
abort ();
|
||||
|
||||
if (res[3] != _mm_testnzc_si128 (val[1].x, val[0].x))
|
||||
abort ();
|
||||
}
|
77
gcc/testsuite/gcc.target/i386/sse4_1-ptest-3.c
Normal file
77
gcc/testsuite/gcc.target/i386/sse4_1-ptest-3.c
Normal file
|
@ -0,0 +1,77 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x;
|
||||
unsigned int i[4];
|
||||
} val[4];
|
||||
int correct_zeros[4];
|
||||
int correct_ones[4];
|
||||
int correct_mixed[4];
|
||||
int zeros[4];
|
||||
int ones[4];
|
||||
int mixed[4];
|
||||
int i;
|
||||
__m128i v;
|
||||
|
||||
val[0].i[0] = 0x11111111;
|
||||
val[0].i[1] = 0x00000000;
|
||||
val[0].i[2] = 0x00000000;
|
||||
val[0].i[3] = 0x11111111;
|
||||
correct_zeros[0] = 0;
|
||||
correct_ones[0] = 0;
|
||||
correct_mixed[0] = 1;
|
||||
|
||||
val[1].i[0] = 0x00000000;
|
||||
val[1].i[1] = 0x11111111;
|
||||
val[1].i[2] = 0x11111111;
|
||||
val[1].i[3] = 0x00000000;
|
||||
correct_zeros[1] = 0;
|
||||
correct_ones[1] = 0;
|
||||
correct_mixed[1] = 1;
|
||||
|
||||
val[2].i[0] = 0;
|
||||
val[2].i[1] = 0;
|
||||
val[2].i[2] = 0;
|
||||
val[2].i[3] = 0;
|
||||
correct_zeros[2] = 1;
|
||||
correct_ones[2] = 0;
|
||||
correct_mixed[2] = 0;
|
||||
|
||||
val[3].i[0] = 0xffffffff;
|
||||
val[3].i[1] = 0xffffffff;
|
||||
val[3].i[2] = 0xffffffff;
|
||||
val[3].i[3] = 0xffffffff;
|
||||
correct_zeros[3] = 0;
|
||||
correct_ones[3] = 1;
|
||||
correct_mixed[3] = 0;
|
||||
|
||||
for (i=0; i < 4; i++)
|
||||
zeros[i] = _mm_test_all_zeros (val[i].x, val[i].x);
|
||||
|
||||
for( i=0; i < 4; i++ )
|
||||
ones[i] = _mm_test_all_ones (val[i].x);
|
||||
|
||||
v = _mm_cmpeq_epi32 (val[0].x, val[0].x);
|
||||
for( i=0; i < 4; i++ )
|
||||
mixed[i] = _mm_test_mix_ones_zeros (val[i].x, v);
|
||||
|
||||
for( i=0; i < 4; i++ )
|
||||
{
|
||||
if (zeros[i] != correct_zeros[i])
|
||||
abort ();
|
||||
if (ones[i] != correct_ones[i])
|
||||
abort ();
|
||||
if (mixed[i] != correct_mixed[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
95
gcc/testsuite/gcc.target/i386/sse4_1-round.h
Normal file
95
gcc/testsuite/gcc.target/i386/sse4_1-round.h
Normal file
|
@ -0,0 +1,95 @@
|
|||
#include <smmintrin.h>
|
||||
#include <math.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static void
|
||||
init_round (FP_T *src)
|
||||
{
|
||||
int i, sign = 1;
|
||||
FP_T f = rand ();
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src[i] = (i + 1)* f * M_PI * sign;
|
||||
if (i < (NUM / 2))
|
||||
{
|
||||
if ((i % 6) == 0)
|
||||
f = f * src[i];
|
||||
}
|
||||
else if (i == (NUM / 2))
|
||||
f = rand ();
|
||||
else if ((i % 6) == 0)
|
||||
f = 1 / (f * (i + 1) * src[i] * M_PI *sign);
|
||||
sign = -sign;
|
||||
}
|
||||
}
|
||||
|
||||
static FP_T
|
||||
do_round (FP_T f, int type)
|
||||
{
|
||||
short saved_cw, new_cw, clr_mask;
|
||||
FP_T ret;
|
||||
|
||||
if ((type & 4))
|
||||
{
|
||||
type = 0;
|
||||
clr_mask = 0xFFFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
type = 0x003F | ((type & 3) << 10);
|
||||
clr_mask = ~0x0C3F;
|
||||
}
|
||||
|
||||
__asm__ ("fld" ASM_SUFFIX " %0" : : "m" (*&f));
|
||||
|
||||
__asm__ ("fstcw %0" : "=m" (*&saved_cw));
|
||||
new_cw = saved_cw & clr_mask;
|
||||
new_cw |= type;
|
||||
__asm__ ("fldcw %0" : : "m" (*&new_cw));
|
||||
|
||||
__asm__ ("frndint\n"
|
||||
"fstp" ASM_SUFFIX " %0\n" : "=m" (*&ret));
|
||||
__asm__ ("fldcw %0" : : "m" (*&saved_cw));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
int i;
|
||||
FP_T f;
|
||||
union
|
||||
{
|
||||
VEC_T x[NUM / LOOP_INCREMENT];
|
||||
FP_T f[NUM];
|
||||
} dst, src;
|
||||
|
||||
init_round (src.f);
|
||||
|
||||
for (i = 0; i < NUM / LOOP_INCREMENT; i++)
|
||||
dst.x[i] = ROUND_INTRIN (src.x[i], ROUND_MODE);
|
||||
|
||||
for (i = 0; i < NUM; i += CHECK_LOOP_INCREMENT)
|
||||
{
|
||||
f = do_round (src.f[i], CHECK_ROUND_MODE);
|
||||
if (f != dst.f[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
if (_MM_FROUND_TO_NEAREST_INT != 0x00
|
||||
|| _MM_FROUND_TO_NEG_INF != 0x01
|
||||
|| _MM_FROUND_TO_POS_INF != 0x02
|
||||
|| _MM_FROUND_TO_ZERO != 0x03
|
||||
|| _MM_FROUND_CUR_DIRECTION != 0x04
|
||||
|| _MM_FROUND_RAISE_EXC != 0x00
|
||||
|| _MM_FROUND_NO_EXC != 0x08
|
||||
|| _MM_FROUND_NINT != 0x00
|
||||
|| _MM_FROUND_FLOOR != 0x01
|
||||
|| _MM_FROUND_CEIL != 0x02
|
||||
|| _MM_FROUND_TRUNC != 0x03
|
||||
|| _MM_FROUND_RINT != 0x04
|
||||
|| _MM_FROUND_NEARBYINT != 0x0C)
|
||||
abort ();
|
||||
}
|
18
gcc/testsuite/gcc.target/i386/sse4_1-roundpd-1.c
Normal file
18
gcc/testsuite/gcc.target/i386/sse4_1-roundpd-1.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#define VEC_T __m128d
|
||||
#define FP_T double
|
||||
#define ASM_SUFFIX "l"
|
||||
|
||||
#define ROUND_INTRIN(x, mode) _mm_ceil_pd(x)
|
||||
#define ROUND_MODE _MM_FROUND_CEIL
|
||||
#define CHECK_ROUND_MODE 0x02
|
||||
|
||||
#define LOOP_INCREMENT 2
|
||||
#define CHECK_LOOP_INCREMENT 1
|
||||
|
||||
#include "sse4_1-round.h"
|
18
gcc/testsuite/gcc.target/i386/sse4_1-roundpd-2.c
Normal file
18
gcc/testsuite/gcc.target/i386/sse4_1-roundpd-2.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#define VEC_T __m128d
|
||||
#define FP_T double
|
||||
#define ASM_SUFFIX "l"
|
||||
|
||||
#define ROUND_INTRIN _mm_round_pd
|
||||
#define ROUND_MODE _MM_FROUND_NINT
|
||||
#define CHECK_ROUND_MODE 0x00
|
||||
|
||||
#define LOOP_INCREMENT 2
|
||||
#define CHECK_LOOP_INCREMENT 1
|
||||
|
||||
#include "sse4_1-round.h"
|
18
gcc/testsuite/gcc.target/i386/sse4_1-roundpd-3.c
Normal file
18
gcc/testsuite/gcc.target/i386/sse4_1-roundpd-3.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#define VEC_T __m128d
|
||||
#define FP_T double
|
||||
#define ASM_SUFFIX "l"
|
||||
|
||||
#define ROUND_INTRIN(x, mode) _mm_floor_pd(x)
|
||||
#define ROUND_MODE _MM_FROUND_FLOOR
|
||||
#define CHECK_ROUND_MODE 0x01
|
||||
|
||||
#define LOOP_INCREMENT 2
|
||||
#define CHECK_LOOP_INCREMENT 1
|
||||
|
||||
#include "sse4_1-round.h"
|
18
gcc/testsuite/gcc.target/i386/sse4_1-roundps-1.c
Normal file
18
gcc/testsuite/gcc.target/i386/sse4_1-roundps-1.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#define VEC_T __m128
|
||||
#define FP_T float
|
||||
#define ASM_SUFFIX "s"
|
||||
|
||||
#define ROUND_INTRIN(x, mode) _mm_ceil_ps(x)
|
||||
#define ROUND_MODE _MM_FROUND_CEIL
|
||||
#define CHECK_ROUND_MODE 0x02
|
||||
|
||||
#define LOOP_INCREMENT 4
|
||||
#define CHECK_LOOP_INCREMENT 1
|
||||
|
||||
#include "sse4_1-round.h"
|
18
gcc/testsuite/gcc.target/i386/sse4_1-roundps-2.c
Normal file
18
gcc/testsuite/gcc.target/i386/sse4_1-roundps-2.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#define VEC_T __m128
|
||||
#define FP_T float
|
||||
#define ASM_SUFFIX "s"
|
||||
|
||||
#define ROUND_INTRIN _mm_round_ps
|
||||
#define ROUND_MODE _MM_FROUND_NINT
|
||||
#define CHECK_ROUND_MODE 0x00
|
||||
|
||||
#define LOOP_INCREMENT 4
|
||||
#define CHECK_LOOP_INCREMENT 1
|
||||
|
||||
#include "sse4_1-round.h"
|
18
gcc/testsuite/gcc.target/i386/sse4_1-roundps-3.c
Normal file
18
gcc/testsuite/gcc.target/i386/sse4_1-roundps-3.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#define VEC_T __m128
|
||||
#define FP_T float
|
||||
#define ASM_SUFFIX "s"
|
||||
|
||||
#define ROUND_INTRIN(x, mode) _mm_floor_ps(x)
|
||||
#define ROUND_MODE _MM_FROUND_FLOOR
|
||||
#define CHECK_ROUND_MODE 0x01
|
||||
|
||||
#define LOOP_INCREMENT 4
|
||||
#define CHECK_LOOP_INCREMENT 1
|
||||
|
||||
#include "sse4_1-round.h"
|
18
gcc/testsuite/gcc.target/i386/sse4_1-roundsd-1.c
Normal file
18
gcc/testsuite/gcc.target/i386/sse4_1-roundsd-1.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#define VEC_T __m128d
|
||||
#define FP_T double
|
||||
#define ASM_SUFFIX "l"
|
||||
|
||||
#define ROUND_INTRIN(x, mode) _mm_ceil_sd(x, x)
|
||||
#define ROUND_MODE _MM_FROUND_CEIL
|
||||
#define CHECK_ROUND_MODE 0x02
|
||||
|
||||
#define LOOP_INCREMENT 2
|
||||
#define CHECK_LOOP_INCREMENT 2
|
||||
|
||||
#include "sse4_1-round.h"
|
18
gcc/testsuite/gcc.target/i386/sse4_1-roundsd-2.c
Normal file
18
gcc/testsuite/gcc.target/i386/sse4_1-roundsd-2.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#define VEC_T __m128d
|
||||
#define FP_T double
|
||||
#define ASM_SUFFIX "l"
|
||||
|
||||
#define ROUND_INTRIN(x, mode) _mm_round_sd(x, x, mode)
|
||||
#define ROUND_MODE _MM_FROUND_NINT
|
||||
#define CHECK_ROUND_MODE 0x00
|
||||
|
||||
#define LOOP_INCREMENT 2
|
||||
#define CHECK_LOOP_INCREMENT 2
|
||||
|
||||
#include "sse4_1-round.h"
|
18
gcc/testsuite/gcc.target/i386/sse4_1-roundsd-3.c
Normal file
18
gcc/testsuite/gcc.target/i386/sse4_1-roundsd-3.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#define VEC_T __m128d
|
||||
#define FP_T double
|
||||
#define ASM_SUFFIX "l"
|
||||
|
||||
#define ROUND_INTRIN(x, mode) _mm_floor_sd(x, x)
|
||||
#define ROUND_MODE _MM_FROUND_FLOOR
|
||||
#define CHECK_ROUND_MODE 0x01
|
||||
|
||||
#define LOOP_INCREMENT 2
|
||||
#define CHECK_LOOP_INCREMENT 2
|
||||
|
||||
#include "sse4_1-round.h"
|
91
gcc/testsuite/gcc.target/i386/sse4_1-roundsd-4.c
Normal file
91
gcc/testsuite/gcc.target/i386/sse4_1-roundsd-4.c
Normal file
|
@ -0,0 +1,91 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static void
|
||||
init_round (double *src)
|
||||
{
|
||||
int i, sign = 1;
|
||||
double d = rand ();
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src[i] = (i + 1)* d * M_PI * sign;
|
||||
if (i < (NUM / 2))
|
||||
{
|
||||
if ((i % 6) == 0)
|
||||
d = d * src[i];
|
||||
}
|
||||
else if (i == (NUM / 2))
|
||||
d = rand ();
|
||||
else if ((i % 6) == 0)
|
||||
d = 1 / (d * (i + 1) * src[i] * M_PI *sign);
|
||||
sign = -sign;
|
||||
}
|
||||
}
|
||||
|
||||
static double
|
||||
do_round (double f, int type)
|
||||
{
|
||||
short saved_cw, new_cw, clr_mask;
|
||||
double ret;
|
||||
|
||||
if ((type & 4))
|
||||
{
|
||||
type = 0;
|
||||
clr_mask = 0xFFFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
type = 0x003F | ((type & 3) << 10);
|
||||
clr_mask = ~0x0C3F;
|
||||
}
|
||||
|
||||
__asm__ ("fldl %0" : : "m" (*&f));
|
||||
|
||||
__asm__ ("fstcw %0" : "=m" (*&saved_cw));
|
||||
new_cw = saved_cw & clr_mask;
|
||||
new_cw |= type;
|
||||
__asm__ ("fldcw %0" : : "m" (*&new_cw));
|
||||
|
||||
__asm__ ("frndint\n"
|
||||
"fstpl %0\n" : "=m" (*&ret));
|
||||
__asm__ ("fldcw %0" : : "m" (*&saved_cw));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
int i;
|
||||
double f;
|
||||
union
|
||||
{
|
||||
__m128d x[NUM / 2];
|
||||
double d[NUM];
|
||||
} dst, src;
|
||||
|
||||
init_round (src.d);
|
||||
memset (&dst, 0, NUM * sizeof(double));
|
||||
|
||||
for (i = 0; i < NUM / 2 ; i++)
|
||||
dst.x[i] = _mm_round_sd (dst.x[i], src.x[i], _MM_FROUND_TRUNC);
|
||||
|
||||
for (i = 0; i < NUM; i += 2)
|
||||
{
|
||||
if (dst.d[i + 1] != 0.0)
|
||||
abort ();
|
||||
|
||||
f = do_round (src.d[i], 0x03);
|
||||
if (f != dst.d[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
18
gcc/testsuite/gcc.target/i386/sse4_1-roundss-1.c
Normal file
18
gcc/testsuite/gcc.target/i386/sse4_1-roundss-1.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#define VEC_T __m128
|
||||
#define FP_T float
|
||||
#define ASM_SUFFIX "s"
|
||||
|
||||
#define ROUND_INTRIN(x, mode) _mm_ceil_ss(x, x)
|
||||
#define ROUND_MODE _MM_FROUND_CEIL
|
||||
#define CHECK_ROUND_MODE 0x02
|
||||
|
||||
#define LOOP_INCREMENT 4
|
||||
#define CHECK_LOOP_INCREMENT 4
|
||||
|
||||
#include "sse4_1-round.h"
|
18
gcc/testsuite/gcc.target/i386/sse4_1-roundss-2.c
Normal file
18
gcc/testsuite/gcc.target/i386/sse4_1-roundss-2.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#define VEC_T __m128
|
||||
#define FP_T float
|
||||
#define ASM_SUFFIX "s"
|
||||
|
||||
#define ROUND_INTRIN(x, mode) _mm_round_ss(x, x, mode)
|
||||
#define ROUND_MODE _MM_FROUND_NINT
|
||||
#define CHECK_ROUND_MODE 0x00
|
||||
|
||||
#define LOOP_INCREMENT 4
|
||||
#define CHECK_LOOP_INCREMENT 4
|
||||
|
||||
#include "sse4_1-round.h"
|
18
gcc/testsuite/gcc.target/i386/sse4_1-roundss-3.c
Normal file
18
gcc/testsuite/gcc.target/i386/sse4_1-roundss-3.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#define VEC_T __m128
|
||||
#define FP_T float
|
||||
#define ASM_SUFFIX "s"
|
||||
|
||||
#define ROUND_INTRIN(x, mode) _mm_floor_ss(x, x)
|
||||
#define ROUND_MODE _MM_FROUND_FLOOR
|
||||
#define CHECK_ROUND_MODE 0x01
|
||||
|
||||
#define LOOP_INCREMENT 4
|
||||
#define CHECK_LOOP_INCREMENT 4
|
||||
|
||||
#include "sse4_1-round.h"
|
106
gcc/testsuite/gcc.target/i386/sse4_1-roundss-4.c
Normal file
106
gcc/testsuite/gcc.target/i386/sse4_1-roundss-4.c
Normal file
|
@ -0,0 +1,106 @@
|
|||
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static void
|
||||
init_round (float *src)
|
||||
{
|
||||
int i, sign = 1;
|
||||
float f = rand ();
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src[i] = (i + 1)* f * M_PI * sign;
|
||||
if (i < (NUM / 2))
|
||||
{
|
||||
if ((i % 6) == 0)
|
||||
f = f * src[i];
|
||||
}
|
||||
else if (i == (NUM / 2))
|
||||
f = rand ();
|
||||
else if ((i % 6) == 0)
|
||||
f = 1 / (f * (i + 1) * src[i] * M_PI *sign);
|
||||
sign = -sign;
|
||||
}
|
||||
}
|
||||
|
||||
static float
|
||||
do_round (float f, int type)
|
||||
{
|
||||
short saved_cw, new_cw, clr_mask;
|
||||
float ret;
|
||||
|
||||
if ((type & 4))
|
||||
{
|
||||
type = 0;
|
||||
clr_mask = 0xFFFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
type = 0x003F | ((type & 3) << 10);
|
||||
clr_mask = ~0x0C3F;
|
||||
}
|
||||
|
||||
__asm__ ("flds %0" : : "m" (*&f));
|
||||
|
||||
__asm__ ("fstcw %0" : "=m" (*&saved_cw));
|
||||
new_cw = saved_cw & clr_mask;
|
||||
new_cw |= type;
|
||||
__asm__ ("fldcw %0" : : "m" (*&new_cw));
|
||||
|
||||
__asm__ ("frndint\n"
|
||||
"fstps %0\n" : "=m" (*&ret));
|
||||
__asm__ ("fldcw %0" : : "m" (*&saved_cw));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
sse4_1_test (void)
|
||||
{
|
||||
int i, j;
|
||||
float f;
|
||||
union
|
||||
{
|
||||
__m128 x[NUM / 4];
|
||||
float f[NUM];
|
||||
} dst, src;
|
||||
|
||||
init_round (src.f);
|
||||
memset (&dst, 0, NUM * sizeof(float));
|
||||
|
||||
for (i = 0; i < NUM / 4 ; i++)
|
||||
dst.x[i] = _mm_round_ss (dst.x[i], src.x[i], _MM_FROUND_RINT);
|
||||
|
||||
for (i = 0; i < NUM; i += 4)
|
||||
{
|
||||
for (j = 0; j < 3; j++)
|
||||
if (dst.f[i + j + 1] != 0.0)
|
||||
abort ();
|
||||
|
||||
f = do_round (src.f[i], 0x04);
|
||||
if (f != dst.f[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM / 4 ; i++)
|
||||
dst.x[i] = _mm_round_ss (dst.x[i], src.x[i], _MM_FROUND_NEARBYINT);
|
||||
|
||||
for (i = 0; i < NUM; i += 4)
|
||||
{
|
||||
for (j = 0; j < 3; j++)
|
||||
if (dst.f[i + j + 1] != 0.0)
|
||||
abort ();
|
||||
|
||||
f = do_round (src.f[i], 0x0c);
|
||||
if (f != dst.f[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue