AVX Programming Reference (December, 2008)

gcc/

2009-01-07  H.J. Lu  <hongjiu.lu@intel.com>

	AVX Programming Reference (December, 2008)
	* config/i386/avxintrin.h (_mm256_stream_si256): New.
	(_mm256_stream_pd): Likewise.
	(_mm256_stream_ps): Likewise.

	* config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_MOVNTDQ256,
	IX86_BUILTIN_MOVNTPD256 and IX86_BUILTIN_MOVNTPS256.
	(ix86_special_builtin_type): Add VOID_FTYPE_PV4DI_V4DI.
	(bdesc_special_args): Add __builtin_ia32_movntdq256,
	__builtin_ia32_movntpd256 and __builtin_ia32_movntps256.
	(ix86_init_mmx_sse_builtins): Handle VOID_FTYPE_PV4DI_V4DI.
	(ix86_expand_special_args_builtin): Likewise.

	* config/i386/sse.md (AVXMODEDI): New.
	(avx_movnt<mode>): Likewise.
	(avx_movnt<mode>): Likewise.
	(<sse>_movnt<mode>): Remove AVX support.
	(sse2_movntv2di): Likewise.

gcc/testsuite/

2009-01-07  H.J. Lu  <hongjiu.lu@intel.com>

	AVX Programming Reference (December, 2008)
	* gcc.target/i386/avx-vmovntdq-256-1.c: New.
	* gcc.target/i386/avx-vmovntpd-256-1.c: Likewise.
	* gcc.target/i386/avx-vmovntps-256-1.c: Likewise.

	* gcc.target/i386/sse2-movntdq-1.c (TEST): Align array to 16byte.
	* gcc.target/i386/sse2-movntpd-1.c (TEST): Likewise.

From-SVN: r143157
This commit is contained in:
H.J. Lu 2009-01-07 14:56:14 +00:00 committed by H.J. Lu
parent 8877b5a9bd
commit 65b82caa2b
10 changed files with 173 additions and 6 deletions

View file

@ -1,3 +1,24 @@
2009-01-07 H.J. Lu <hongjiu.lu@intel.com>
AVX Programming Reference (December, 2008)
* config/i386/avxintrin.h (_mm256_stream_si256): New.
(_mm256_stream_pd): Likewise.
(_mm256_stream_ps): Likewise.
* config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_MOVNTDQ256,
IX86_BUILTIN_MOVNTPD256 and IX86_BUILTIN_MOVNTPS256.
(ix86_special_builtin_type): Add VOID_FTYPE_PV4DI_V4DI.
(bdesc_special_args): Add __builtin_ia32_movntdq256,
__builtin_ia32_movntpd256 and __builtin_ia32_movntps256.
(ix86_init_mmx_sse_builtins): Handle VOID_FTYPE_PV4DI_V4DI.
(ix86_expand_special_args_builtin): Likewise.
* config/i386/sse.md (AVXMODEDI): New.
(avx_movnt<mode>): Likewise.
(avx_movnt<mode>): Likewise.
(<sse>_movnt<mode>): Remove AVX support.
(sse2_movntv2di): Likewise.
2009-01-07 Richard Guenther <rguenther@suse.de>
PR middle-end/38751

View file

@ -968,6 +968,24 @@ _mm256_lddqu_si256 (__m256i const *__P)
return (__m256i) __builtin_ia32_lddqu256 ((char const *)__P);
}
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_stream_si256 (__m256i *__A, __m256i __B)
{
__builtin_ia32_movntdq256 ((__v4di *)__A, (__v4di)__B);
}
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_stream_pd (double *__A, __m256d __B)
{
__builtin_ia32_movntpd256 (__A, (__v4df)__B);
}
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_stream_ps (float *__P, __m256 __A)
{
__builtin_ia32_movntps256 (__P, (__v8sf)__A);
}
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rcp_ps (__m256 __A)
{

View file

@ -19942,6 +19942,9 @@ enum ix86_builtins
IX86_BUILTIN_STOREUPD256,
IX86_BUILTIN_STOREUPS256,
IX86_BUILTIN_LDDQU256,
IX86_BUILTIN_MOVNTDQ256,
IX86_BUILTIN_MOVNTPD256,
IX86_BUILTIN_MOVNTPS256,
IX86_BUILTIN_LOADDQU256,
IX86_BUILTIN_STOREDQU256,
IX86_BUILTIN_MASKLOADPD,
@ -20413,6 +20416,7 @@ enum ix86_special_builtin_type
V2DF_FTYPE_PCV2DF_V2DF,
V2DI_FTYPE_PV2DI,
VOID_FTYPE_PV2SF_V4SF,
VOID_FTYPE_PV4DI_V4DI,
VOID_FTYPE_PV2DI_V2DI,
VOID_FTYPE_PCHAR_V32QI,
VOID_FTYPE_PCHAR_V16QI,
@ -20652,6 +20656,10 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
@ -22183,6 +22191,11 @@ ix86_init_mmx_sse_builtins (void)
V8SI_type_node, V4SI_type_node,
integer_type_node,
NULL_TREE);
tree pv4di_type_node = build_pointer_type (V4DI_type_node);
tree void_ftype_pv4di_v4di
= build_function_type_list (void_type_node,
pv4di_type_node, V4DI_type_node,
NULL_TREE);
tree v8sf_ftype_v8sf_v4sf_int
= build_function_type_list (V8SF_type_node,
V8SF_type_node, V4SF_type_node,
@ -22369,6 +22382,9 @@ ix86_init_mmx_sse_builtins (void)
case VOID_FTYPE_PV2SF_V4SF:
type = void_ftype_pv2sf_v4sf;
break;
case VOID_FTYPE_PV4DI_V4DI:
type = void_ftype_pv4di_v4di;
break;
case VOID_FTYPE_PV2DI_V2DI:
type = void_ftype_pv2di_v2di;
break;
@ -24215,6 +24231,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
memory = 0;
break;
case VOID_FTYPE_PV2SF_V4SF:
case VOID_FTYPE_PV4DI_V4DI:
case VOID_FTYPE_PV2DI_V2DI:
case VOID_FTYPE_PCHAR_V32QI:
case VOID_FTYPE_PCHAR_V16QI:

View file

@ -35,6 +35,9 @@
;; All QI vector modes handled by AVX
(define_mode_iterator AVXMODEQI [V32QI V16QI])
;; All DI vector modes handled by AVX
(define_mode_iterator AVXMODEDI [V4DI V2DI])
;; All vector modes handled by AVX
(define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
@ -383,26 +386,46 @@
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "avx_movnt<mode>"
[(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
(unspec:AVXMODEF2P
[(match_operand:AVXMODEF2P 1 "register_operand" "x")]
UNSPEC_MOVNT))]
"AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
"vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "vex")
(set_attr "mode" "<MODE>")])
(define_insn "<sse>_movnt<mode>"
[(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
(unspec:SSEMODEF2P
[(match_operand:SSEMODEF2P 1 "register_operand" "x")]
UNSPEC_MOVNT))]
"SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
"%vmovntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
"movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
(define_insn "avx_movnt<mode>"
[(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
(unspec:AVXMODEDI
[(match_operand:AVXMODEDI 1 "register_operand" "x")]
UNSPEC_MOVNT))]
"TARGET_AVX"
"vmovntdq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "vex")
(set_attr "mode" "<avxvecmode>")])
(define_insn "sse2_movntv2di"
[(set (match_operand:V2DI 0 "memory_operand" "=m")
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
UNSPEC_MOVNT))]
"TARGET_SSE2"
"%vmovntdq\t{%1, %0|%0, %1}"
"movntdq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix_data16" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "sse2_movntsi"

View file

@ -1,3 +1,13 @@
2009-01-07 H.J. Lu <hongjiu.lu@intel.com>
AVX Programming Reference (December, 2008)
* gcc.target/i386/avx-vmovntdq-256-1.c: New.
* gcc.target/i386/avx-vmovntpd-256-1.c: Likewise.
* gcc.target/i386/avx-vmovntps-256-1.c: Likewise.
* gcc.target/i386/sse2-movntdq-1.c (TEST): Align array to 16byte.
* gcc.target/i386/sse2-movntpd-1.c (TEST): Likewise.
2009-01-06 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/38220

View file

@ -0,0 +1,27 @@
/* { dg-do run } */
/* { dg-require-effective-target avx } */
/* { dg-options "-O2 -mavx" } */
#include "avx-check.h"
static void
__attribute__((noinline))
test (__m256i *p, __m256i s)
{
return _mm256_stream_si256 (p, s);
}
static void
avx_test (void)
{
union256i_d u;
int e[8] __attribute__ ((aligned(32))) = {1,1,1,1,1,1,1,1};
u.x = _mm256_set_epi32 (2434, 6845, 3789, 4683,
4623, 2236, 8295, 1084);
test ((__m256i *)e, u.x);
if (check_union256i_d (u, e))
abort ();
}

View file

@ -0,0 +1,25 @@
/* { dg-do run } */
/* { dg-require-effective-target avx } */
/* { dg-options "-O2 -mavx" } */
#include "avx-check.h"
static void
__attribute__((noinline))
test (double *p, __m256d s)
{
return _mm256_stream_pd (p, s);
}
static void
avx_test (void)
{
union256d u;
double e[4] __attribute__ ((aligned(32))) = {1,1,1,1};
u.x = _mm256_set_pd (2134.3343, 1234.635654, -13443.35, 43.35345);
test (e, u.x);
if (check_union256d (u, e))
abort ();
}

View file

@ -0,0 +1,26 @@
/* { dg-do run } */
/* { dg-require-effective-target avx } */
/* { dg-options "-O2 -mavx" } */
#include "avx-check.h"
static void
__attribute__((noinline))
test (float *p, __m256 s)
{
return _mm256_stream_ps (p, s);
}
static void
avx_test (void)
{
union256 u;
float e[8] __attribute__ ((aligned(32)));
u.x = _mm256_set_ps (24.43, 68.346, -43.35, 546.46,
46.9, -2.78, 82.9, -0.4);
test (e, u.x);
if (check_union256 (u, e))
abort ();
}

View file

@ -24,7 +24,7 @@ static void
TEST (void)
{
union128i_d u;
int e[4] __attribute__ ((aligned(32)));
int e[4] __attribute__ ((aligned(16)));
u.x = _mm_set_epi32 (21, 34, 334, 8567);

View file

@ -24,7 +24,7 @@ static void
TEST (void)
{
union128d u;
double e[2] __attribute__ ((aligned(32)));
double e[2] __attribute__ ((aligned(16)));
u.x = _mm_set_pd (2134.3343,1234.635654);
test (e, u.x);