rs6000: Support more SSE4 "cmp", "mul", "pack" intrinsics
Function signatures and decorations match gcc/config/i386/smmintrin.h. Also, copy tests for: - _mm_cmpeq_epi64 - _mm_mullo_epi32, _mm_mul_epi32 - _mm_packus_epi32 - _mm_cmpgt_epi64 (SSE4.2) from gcc/testsuite/gcc.target/i386. 2021-10-11 Paul A. Clarke <pc@us.ibm.com> gcc * config/rs6000/smmintrin.h (_mm_cmpeq_epi64, _mm_cmpgt_epi64, _mm_mullo_epi32, _mm_mul_epi32, _mm_packus_epi32): New. * config/rs6000/nmmintrin.h: Copy from i386, tweak to suit. gcc/testsuite * gcc.target/powerpc/pr78102.c: Copy from gcc.target/i386, adjust dg directives to suit. * gcc.target/powerpc/sse4_1-packusdw.c: Same. * gcc.target/powerpc/sse4_1-pcmpeqq.c: Same. * gcc.target/powerpc/sse4_1-pmuldq.c: Same. * gcc.target/powerpc/sse4_1-pmulld.c: Same. * gcc.target/powerpc/sse4_2-pcmpgtq.c: Same. * gcc.target/powerpc/sse4_2-check.h: Copy from gcc.target/i386, tweak to suit.
This commit is contained in:
parent
285d75a454
commit
29fb1e831b
9 changed files with 384 additions and 0 deletions
40
gcc/config/rs6000/nmmintrin.h
Normal file
40
gcc/config/rs6000/nmmintrin.h
Normal file
|
@ -0,0 +1,40 @@
|
|||
/* Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef NO_WARN_X86_INTRINSICS
|
||||
/* This header is distributed to simplify porting x86_64 code that
|
||||
makes explicit use of Intel intrinsics to powerpc64le.
|
||||
It is the user's responsibility to determine if the results are
|
||||
acceptable and make additional changes as necessary.
|
||||
Note that much code that uses Intel intrinsics can be rewritten in
|
||||
standard C or GNU C extensions, which are more portable and better
|
||||
optimized across multiple targets. */
|
||||
#endif
|
||||
|
||||
#ifndef _NMMINTRIN_H_INCLUDED
|
||||
#define _NMMINTRIN_H_INCLUDED
|
||||
|
||||
/* We just include SSE4.1 header file. */
|
||||
#include <smmintrin.h>
|
||||
|
||||
#endif /* _NMMINTRIN_H_INCLUDED */
|
|
@ -274,6 +274,15 @@ _mm_floor_ss (__m128 __A, __m128 __B)
|
|||
return __r;
|
||||
}
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) vec_cmpeq ((__v2di) __X, (__v2di) __Y);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_min_epi8 (__m128i __X, __m128i __Y)
|
||||
|
@ -332,6 +341,22 @@ _mm_max_epu32 (__m128i __X, __m128i __Y)
|
|||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mullo_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) vec_mul ((__v4su) __X, (__v4su) __Y);
|
||||
}
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
__inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mul_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) vec_mule ((__v4si) __X, (__v4si) __Y);
|
||||
}
|
||||
#endif
|
||||
|
||||
__inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi8_epi16 (__m128i __A)
|
||||
{
|
||||
return (__m128i) vec_unpackh ((__v16qi) __A);
|
||||
|
@ -495,4 +520,20 @@ _mm_minpos_epu16 (__m128i __A)
|
|||
return __r.__m;
|
||||
}
|
||||
|
||||
__inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_packus_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) vec_packsu ((__v4si) __X, (__v4si) __Y);
|
||||
}
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
__inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) vec_cmpgt ((__v2di) __X, (__v2di) __Y);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
23
gcc/testsuite/gcc.target/powerpc/pr78102.c
Normal file
23
gcc/testsuite/gcc.target/powerpc/pr78102.c
Normal file
|
@ -0,0 +1,23 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mvsx" } */
|
||||
/* { dg-require-effective-target powerpc_vsx_hw } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
__m128i
|
||||
foo (const __m128i x, const __m128i y)
|
||||
{
|
||||
return _mm_cmpeq_epi64 (x, y);
|
||||
}
|
||||
|
||||
__v2di
|
||||
bar (const __v2di x, const __v2di y)
|
||||
{
|
||||
return x == y;
|
||||
}
|
||||
|
||||
__v2di
|
||||
baz (const __v2di x, const __v2di y)
|
||||
{
|
||||
return x != y;
|
||||
}
|
73
gcc/testsuite/gcc.target/powerpc/sse4_1-packusdw.c
Normal file
73
gcc/testsuite/gcc.target/powerpc/sse4_1-packusdw.c
Normal file
|
@ -0,0 +1,73 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mvsx" } */
|
||||
/* { dg-require-effective-target powerpc_vsx_hw } */
|
||||
|
||||
#ifndef CHECK_H
|
||||
#define CHECK_H "sse4_1-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST sse4_1_test
|
||||
#endif
|
||||
|
||||
#include CHECK_H
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static unsigned short
|
||||
int_to_ushort (int iVal)
|
||||
{
|
||||
unsigned short sVal;
|
||||
|
||||
if (iVal < 0)
|
||||
sVal = 0;
|
||||
else if (iVal > 0xffff)
|
||||
sVal = 0xffff;
|
||||
else sVal = iVal;
|
||||
|
||||
return sVal;
|
||||
}
|
||||
|
||||
static void
|
||||
TEST (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 4];
|
||||
int i[NUM];
|
||||
} src1, src2;
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 4];
|
||||
unsigned short s[NUM * 2];
|
||||
} dst;
|
||||
int i, sign = 1;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src1.i[i] = i * i * sign;
|
||||
src2.i[i] = (i + 20) * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 4)
|
||||
dst.x[i / 4] = _mm_packus_epi32 (src1.x [i / 4], src2.x [i / 4]);
|
||||
|
||||
for (i = 0; i < NUM; i ++)
|
||||
{
|
||||
int dstIndex;
|
||||
unsigned short sVal;
|
||||
|
||||
sVal = int_to_ushort (src1.i[i]);
|
||||
dstIndex = (i % 4) + (i / 4) * 8;
|
||||
if (sVal != dst.s[dstIndex])
|
||||
abort ();
|
||||
|
||||
sVal = int_to_ushort (src2.i[i]);
|
||||
dstIndex += 4;
|
||||
if (sVal != dst.s[dstIndex])
|
||||
abort ();
|
||||
}
|
||||
}
|
46
gcc/testsuite/gcc.target/powerpc/sse4_1-pcmpeqq.c
Normal file
46
gcc/testsuite/gcc.target/powerpc/sse4_1-pcmpeqq.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mpower8-vector" } */
|
||||
/* { dg-require-effective-target p8vector_hw } */
|
||||
|
||||
#ifndef CHECK_H
|
||||
#define CHECK_H "sse4_1-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST sse4_1_test
|
||||
#endif
|
||||
|
||||
#include CHECK_H
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static void
|
||||
TEST (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 2];
|
||||
long long ll[NUM];
|
||||
} dst, src1, src2;
|
||||
int i, sign=1;
|
||||
long long is_eq;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src1.ll[i] = i * i * sign;
|
||||
src2.ll[i] = (i + 20) * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 2)
|
||||
dst.x [i / 2] = _mm_cmpeq_epi64(src1.x [i / 2], src2.x [i / 2]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
is_eq = src1.ll[i] == src2.ll[i] ? 0xffffffffffffffffLL : 0LL;
|
||||
if (is_eq != dst.ll[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
51
gcc/testsuite/gcc.target/powerpc/sse4_1-pmuldq.c
Normal file
51
gcc/testsuite/gcc.target/powerpc/sse4_1-pmuldq.c
Normal file
|
@ -0,0 +1,51 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mpower8-vector" } */
|
||||
/* { dg-require-effective-target p8vector_hw } */
|
||||
|
||||
#ifndef CHECK_H
|
||||
#define CHECK_H "sse4_1-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST sse4_1_test
|
||||
#endif
|
||||
|
||||
#include CHECK_H
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static void
|
||||
TEST (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 2];
|
||||
long long ll[NUM];
|
||||
} dst;
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 2];
|
||||
int i[NUM * 2];
|
||||
} src1, src2;
|
||||
int i, sign = 1;
|
||||
long long value;
|
||||
|
||||
for (i = 0; i < NUM * 2; i += 2)
|
||||
{
|
||||
src1.i[i] = i * i * sign;
|
||||
src2.i[i] = (i + 20) * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 2)
|
||||
dst.x[i / 2] = _mm_mul_epi32 (src1.x[i / 2], src2.x[i / 2]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
value = (long long) src1.i[i * 2] * (long long) src2.i[i * 2];
|
||||
if (value != dst.ll[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
46
gcc/testsuite/gcc.target/powerpc/sse4_1-pmulld.c
Normal file
46
gcc/testsuite/gcc.target/powerpc/sse4_1-pmulld.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mvsx" } */
|
||||
/* { dg-require-effective-target powerpc_vsx_hw } */
|
||||
|
||||
#ifndef CHECK_H
|
||||
#define CHECK_H "sse4_1-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST sse4_1_test
|
||||
#endif
|
||||
|
||||
#include CHECK_H
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static void
|
||||
TEST (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 4];
|
||||
int i[NUM];
|
||||
} dst, src1, src2;
|
||||
int i, sign = 1;
|
||||
int value;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src1.i[i] = i * i * sign;
|
||||
src2.i[i] = (i + 20) * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 4)
|
||||
dst.x[i / 4] = _mm_mullo_epi32 (src1.x[i / 4], src2.x[i / 4]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
value = src1.i[i] * src2.i[i];
|
||||
if (value != dst.i[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
18
gcc/testsuite/gcc.target/powerpc/sse4_2-check.h
Normal file
18
gcc/testsuite/gcc.target/powerpc/sse4_2-check.h
Normal file
|
@ -0,0 +1,18 @@
|
|||
#define NO_WARN_X86_INTRINSICS 1
|
||||
|
||||
static void sse4_2_test (void);
|
||||
|
||||
static void
|
||||
__attribute__ ((noinline))
|
||||
do_test (void)
|
||||
{
|
||||
sse4_2_test ();
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
do_test ();
|
||||
|
||||
return 0;
|
||||
}
|
46
gcc/testsuite/gcc.target/powerpc/sse4_2-pcmpgtq.c
Normal file
46
gcc/testsuite/gcc.target/powerpc/sse4_2-pcmpgtq.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mvsx" } */
|
||||
/* { dg-require-effective-target powerpc_vsx_hw } */
|
||||
|
||||
#ifndef CHECK_H
|
||||
#define CHECK_H "sse4_2-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST sse4_2_test
|
||||
#endif
|
||||
|
||||
#include CHECK_H
|
||||
|
||||
#include <nmmintrin.h>
|
||||
|
||||
#define NUM 64
|
||||
|
||||
static void
|
||||
TEST (void)
|
||||
{
|
||||
union
|
||||
{
|
||||
__m128i x[NUM / 2];
|
||||
long long ll[NUM];
|
||||
} dst, src1, src2;
|
||||
int i, sign = 1;
|
||||
long long is_eq;
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
src1.ll[i] = i * i * sign;
|
||||
src2.ll[i] = (i + 20) * sign;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM; i += 2)
|
||||
dst.x[i / 2] = _mm_cmpgt_epi64 (src1.x[i / 2], src2.x[i / 2]);
|
||||
|
||||
for (i = 0; i < NUM; i++)
|
||||
{
|
||||
is_eq = src1.ll[i] > src2.ll[i] ? 0xFFFFFFFFFFFFFFFFLL : 0LL;
|
||||
if (is_eq != dst.ll[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue