AVX Programming Reference (December, 2008)
gcc/ 2009-01-07 H.J. Lu <hongjiu.lu@intel.com> AVX Programming Reference (December, 2008) * config/i386/avxintrin.h (_mm256_stream_si256): New. (_mm256_stream_pd): Likewise. (_mm256_stream_ps): Likewise. * config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_MOVNTDQ256, IX86_BUILTIN_MOVNTPD256 and IX86_BUILTIN_MOVNTPS256. (ix86_special_builtin_type): Add VOID_FTYPE_PV4DI_V4DI. (bdesc_special_args): Add __builtin_ia32_movntdq256, __builtin_ia32_movntpd256 and __builtin_ia32_movntps256. (ix86_init_mmx_sse_builtins): Handle VOID_FTYPE_PV4DI_V4DI. (ix86_expand_special_args_builtin): Likewise. * config/i386/sse.md (AVXMODEDI): New. (avx_movnt<mode>): Likewise. (avx_movnt<mode>): Likewise. (<sse>_movnt<mode>): Remove AVX support. (sse2_movntv2di): Likewise. gcc/testsuite/ 2009-01-07 H.J. Lu <hongjiu.lu@intel.com> AVX Programming Reference (December, 2008) * gcc.target/i386/avx-vmovntdq-256-1.c: New. * gcc.target/i386/avx-vmovntpd-256-1.c: Likewise. * gcc.target/i386/avx-vmovntps-256-1.c: Likewise. * gcc.target/i386/sse2-movntdq-1.c (TEST): Align array to 16byte. * gcc.target/i386/sse2-movntpd-1.c (TEST): Likewise. From-SVN: r143157
This commit is contained in:
parent
8877b5a9bd
commit
65b82caa2b
10 changed files with 173 additions and 6 deletions
|
@ -1,3 +1,24 @@
|
|||
2009-01-07 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
AVX Programming Reference (December, 2008)
|
||||
* config/i386/avxintrin.h (_mm256_stream_si256): New.
|
||||
(_mm256_stream_pd): Likewise.
|
||||
(_mm256_stream_ps): Likewise.
|
||||
|
||||
* config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_MOVNTDQ256,
|
||||
IX86_BUILTIN_MOVNTPD256 and IX86_BUILTIN_MOVNTPS256.
|
||||
(ix86_special_builtin_type): Add VOID_FTYPE_PV4DI_V4DI.
|
||||
(bdesc_special_args): Add __builtin_ia32_movntdq256,
|
||||
__builtin_ia32_movntpd256 and __builtin_ia32_movntps256.
|
||||
(ix86_init_mmx_sse_builtins): Handle VOID_FTYPE_PV4DI_V4DI.
|
||||
(ix86_expand_special_args_builtin): Likewise.
|
||||
|
||||
* config/i386/sse.md (AVXMODEDI): New.
|
||||
(avx_movnt<mode>): Likewise.
|
||||
(avx_movnt<mode>): Likewise.
|
||||
(<sse>_movnt<mode>): Remove AVX support.
|
||||
(sse2_movntv2di): Likewise.
|
||||
|
||||
2009-01-07 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR middle-end/38751
|
||||
|
|
|
@ -968,6 +968,24 @@ _mm256_lddqu_si256 (__m256i const *__P)
|
|||
return (__m256i) __builtin_ia32_lddqu256 ((char const *)__P);
|
||||
}
|
||||
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_stream_si256 (__m256i *__A, __m256i __B)
|
||||
{
|
||||
__builtin_ia32_movntdq256 ((__v4di *)__A, (__v4di)__B);
|
||||
}
|
||||
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_stream_pd (double *__A, __m256d __B)
|
||||
{
|
||||
__builtin_ia32_movntpd256 (__A, (__v4df)__B);
|
||||
}
|
||||
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_stream_ps (float *__P, __m256 __A)
|
||||
{
|
||||
__builtin_ia32_movntps256 (__P, (__v8sf)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_rcp_ps (__m256 __A)
|
||||
{
|
||||
|
|
|
@ -19942,6 +19942,9 @@ enum ix86_builtins
|
|||
IX86_BUILTIN_STOREUPD256,
|
||||
IX86_BUILTIN_STOREUPS256,
|
||||
IX86_BUILTIN_LDDQU256,
|
||||
IX86_BUILTIN_MOVNTDQ256,
|
||||
IX86_BUILTIN_MOVNTPD256,
|
||||
IX86_BUILTIN_MOVNTPS256,
|
||||
IX86_BUILTIN_LOADDQU256,
|
||||
IX86_BUILTIN_STOREDQU256,
|
||||
IX86_BUILTIN_MASKLOADPD,
|
||||
|
@ -20413,6 +20416,7 @@ enum ix86_special_builtin_type
|
|||
V2DF_FTYPE_PCV2DF_V2DF,
|
||||
V2DI_FTYPE_PV2DI,
|
||||
VOID_FTYPE_PV2SF_V4SF,
|
||||
VOID_FTYPE_PV4DI_V4DI,
|
||||
VOID_FTYPE_PV2DI_V2DI,
|
||||
VOID_FTYPE_PCHAR_V32QI,
|
||||
VOID_FTYPE_PCHAR_V16QI,
|
||||
|
@ -20652,6 +20656,10 @@ static const struct builtin_description bdesc_special_args[] =
|
|||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
|
||||
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
|
||||
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
|
||||
|
@ -22183,6 +22191,11 @@ ix86_init_mmx_sse_builtins (void)
|
|||
V8SI_type_node, V4SI_type_node,
|
||||
integer_type_node,
|
||||
NULL_TREE);
|
||||
tree pv4di_type_node = build_pointer_type (V4DI_type_node);
|
||||
tree void_ftype_pv4di_v4di
|
||||
= build_function_type_list (void_type_node,
|
||||
pv4di_type_node, V4DI_type_node,
|
||||
NULL_TREE);
|
||||
tree v8sf_ftype_v8sf_v4sf_int
|
||||
= build_function_type_list (V8SF_type_node,
|
||||
V8SF_type_node, V4SF_type_node,
|
||||
|
@ -22369,6 +22382,9 @@ ix86_init_mmx_sse_builtins (void)
|
|||
case VOID_FTYPE_PV2SF_V4SF:
|
||||
type = void_ftype_pv2sf_v4sf;
|
||||
break;
|
||||
case VOID_FTYPE_PV4DI_V4DI:
|
||||
type = void_ftype_pv4di_v4di;
|
||||
break;
|
||||
case VOID_FTYPE_PV2DI_V2DI:
|
||||
type = void_ftype_pv2di_v2di;
|
||||
break;
|
||||
|
@ -24215,6 +24231,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
|
|||
memory = 0;
|
||||
break;
|
||||
case VOID_FTYPE_PV2SF_V4SF:
|
||||
case VOID_FTYPE_PV4DI_V4DI:
|
||||
case VOID_FTYPE_PV2DI_V2DI:
|
||||
case VOID_FTYPE_PCHAR_V32QI:
|
||||
case VOID_FTYPE_PCHAR_V16QI:
|
||||
|
|
|
@ -35,6 +35,9 @@
|
|||
;; All QI vector modes handled by AVX
|
||||
(define_mode_iterator AVXMODEQI [V32QI V16QI])
|
||||
|
||||
;; All DI vector modes handled by AVX
|
||||
(define_mode_iterator AVXMODEDI [V4DI V2DI])
|
||||
|
||||
;; All vector modes handled by AVX
|
||||
(define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
|
||||
|
||||
|
@ -383,26 +386,46 @@
|
|||
(set_attr "prefix_data16" "1")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "avx_movnt<mode>"
|
||||
[(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
|
||||
(unspec:AVXMODEF2P
|
||||
[(match_operand:AVXMODEF2P 1 "register_operand" "x")]
|
||||
UNSPEC_MOVNT))]
|
||||
"AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
|
||||
"vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "<sse>_movnt<mode>"
|
||||
[(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
|
||||
(unspec:SSEMODEF2P
|
||||
[(match_operand:SSEMODEF2P 1 "register_operand" "x")]
|
||||
UNSPEC_MOVNT))]
|
||||
"SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
|
||||
"%vmovntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
|
||||
"movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "avx_movnt<mode>"
|
||||
[(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
|
||||
(unspec:AVXMODEDI
|
||||
[(match_operand:AVXMODEDI 1 "register_operand" "x")]
|
||||
UNSPEC_MOVNT))]
|
||||
"TARGET_AVX"
|
||||
"vmovntdq\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "<avxvecmode>")])
|
||||
|
||||
(define_insn "sse2_movntv2di"
|
||||
[(set (match_operand:V2DI 0 "memory_operand" "=m")
|
||||
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
|
||||
UNSPEC_MOVNT))]
|
||||
"TARGET_SSE2"
|
||||
"%vmovntdq\t{%1, %0|%0, %1}"
|
||||
"movntdq\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "prefix_data16" "1")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "sse2_movntsi"
|
||||
|
|
|
@ -1,3 +1,13 @@
|
|||
2009-01-07 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
AVX Programming Reference (December, 2008)
|
||||
* gcc.target/i386/avx-vmovntdq-256-1.c: New.
|
||||
* gcc.target/i386/avx-vmovntpd-256-1.c: Likewise.
|
||||
* gcc.target/i386/avx-vmovntps-256-1.c: Likewise.
|
||||
|
||||
* gcc.target/i386/sse2-movntdq-1.c (TEST): Align array to 16byte.
|
||||
* gcc.target/i386/sse2-movntpd-1.c (TEST): Likewise.
|
||||
|
||||
2009-01-06 Thomas Koenig <tkoenig@gcc.gnu.org>
|
||||
|
||||
PR fortran/38220
|
||||
|
|
27
gcc/testsuite/gcc.target/i386/avx-vmovntdq-256-1.c
Normal file
27
gcc/testsuite/gcc.target/i386/avx-vmovntdq-256-1.c
Normal file
|
@ -0,0 +1,27 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O2 -mavx" } */
|
||||
|
||||
#include "avx-check.h"
|
||||
|
||||
static void
|
||||
__attribute__((noinline))
|
||||
test (__m256i *p, __m256i s)
|
||||
{
|
||||
return _mm256_stream_si256 (p, s);
|
||||
}
|
||||
|
||||
static void
|
||||
avx_test (void)
|
||||
{
|
||||
union256i_d u;
|
||||
int e[8] __attribute__ ((aligned(32))) = {1,1,1,1,1,1,1,1};
|
||||
|
||||
u.x = _mm256_set_epi32 (2434, 6845, 3789, 4683,
|
||||
4623, 2236, 8295, 1084);
|
||||
|
||||
test ((__m256i *)e, u.x);
|
||||
|
||||
if (check_union256i_d (u, e))
|
||||
abort ();
|
||||
}
|
25
gcc/testsuite/gcc.target/i386/avx-vmovntpd-256-1.c
Normal file
25
gcc/testsuite/gcc.target/i386/avx-vmovntpd-256-1.c
Normal file
|
@ -0,0 +1,25 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O2 -mavx" } */
|
||||
|
||||
#include "avx-check.h"
|
||||
|
||||
static void
|
||||
__attribute__((noinline))
|
||||
test (double *p, __m256d s)
|
||||
{
|
||||
return _mm256_stream_pd (p, s);
|
||||
}
|
||||
|
||||
static void
|
||||
avx_test (void)
|
||||
{
|
||||
union256d u;
|
||||
double e[4] __attribute__ ((aligned(32))) = {1,1,1,1};
|
||||
|
||||
u.x = _mm256_set_pd (2134.3343, 1234.635654, -13443.35, 43.35345);
|
||||
test (e, u.x);
|
||||
|
||||
if (check_union256d (u, e))
|
||||
abort ();
|
||||
}
|
26
gcc/testsuite/gcc.target/i386/avx-vmovntps-256-1.c
Normal file
26
gcc/testsuite/gcc.target/i386/avx-vmovntps-256-1.c
Normal file
|
@ -0,0 +1,26 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O2 -mavx" } */
|
||||
|
||||
#include "avx-check.h"
|
||||
|
||||
static void
|
||||
__attribute__((noinline))
|
||||
test (float *p, __m256 s)
|
||||
{
|
||||
return _mm256_stream_ps (p, s);
|
||||
}
|
||||
|
||||
static void
|
||||
avx_test (void)
|
||||
{
|
||||
union256 u;
|
||||
float e[8] __attribute__ ((aligned(32)));
|
||||
|
||||
u.x = _mm256_set_ps (24.43, 68.346, -43.35, 546.46,
|
||||
46.9, -2.78, 82.9, -0.4);
|
||||
test (e, u.x);
|
||||
|
||||
if (check_union256 (u, e))
|
||||
abort ();
|
||||
}
|
|
@ -24,7 +24,7 @@ static void
|
|||
TEST (void)
|
||||
{
|
||||
union128i_d u;
|
||||
int e[4] __attribute__ ((aligned(32)));
|
||||
int e[4] __attribute__ ((aligned(16)));
|
||||
|
||||
u.x = _mm_set_epi32 (21, 34, 334, 8567);
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ static void
|
|||
TEST (void)
|
||||
{
|
||||
union128d u;
|
||||
double e[2] __attribute__ ((aligned(32)));
|
||||
double e[2] __attribute__ ((aligned(16)));
|
||||
|
||||
u.x = _mm_set_pd (2134.3343,1234.635654);
|
||||
test (e, u.x);
|
||||
|
|
Loading…
Add table
Reference in a new issue