[Patch AArch64] Use 128-bit vectors when autovectorizing 16-bit float types
gcc/ * config/aarch64/aarch64.c (aarch64_simd_container_mode): Handle HFmode. gcc/testsuite/ * gcc.target/aarch64/vect_fp16_1.c: New. From-SVN: r245429
This commit is contained in:
parent
f6cc254a28
commit
b719f884b4
4 changed files with 43 additions and 0 deletions
|
@ -1,3 +1,8 @@
|
|||
2017-02-14 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
* config/aarch64/aarch64.c (aarch64_simd_container_mode): Handle
|
||||
HFmode.
|
||||
|
||||
2017-02-14 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
|
||||
PR rtl-optimization/68664
|
||||
|
|
|
@ -10845,6 +10845,8 @@ aarch64_simd_container_mode (machine_mode mode, unsigned width)
|
|||
return V2DFmode;
|
||||
case SFmode:
|
||||
return V4SFmode;
|
||||
case HFmode:
|
||||
return V8HFmode;
|
||||
case SImode:
|
||||
return V4SImode;
|
||||
case HImode:
|
||||
|
@ -10861,6 +10863,8 @@ aarch64_simd_container_mode (machine_mode mode, unsigned width)
|
|||
{
|
||||
case SFmode:
|
||||
return V2SFmode;
|
||||
case HFmode:
|
||||
return V4HFmode;
|
||||
case SImode:
|
||||
return V2SImode;
|
||||
case HImode:
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
2017-02-14 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
* gcc.target/aarch64/vect_fp16_1.c: New.
|
||||
|
||||
2017-02-14 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
|
||||
|
||||
* gcc.dg/gimplefe-25.c: New test.
|
||||
|
|
30
gcc/testsuite/gcc.target/aarch64/vect_fp16_1.c
Normal file
30
gcc/testsuite/gcc.target/aarch64/vect_fp16_1.c
Normal file
|
@ -0,0 +1,30 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -fno-vect-cost-model" } */
|
||||
|
||||
/* Check that we vectorize to a full 128-bit vector for _Float16 and __fp16
|
||||
types. */
|
||||
|
||||
/* Enable ARMv8.2-A+fp16 so we have access to the vector instructions. */
|
||||
#pragma GCC target ("arch=armv8.2-a+fp16")
|
||||
|
||||
_Float16
|
||||
sum_Float16 (_Float16 *__restrict__ __attribute__ ((__aligned__ (16))) a,
|
||||
_Float16 *__restrict__ __attribute__ ((__aligned__ (16))) b,
|
||||
_Float16 *__restrict__ __attribute__ ((__aligned__ (16))) c)
|
||||
{
|
||||
for (int i = 0; i < 256; i++)
|
||||
a[i] = b[i] + c[i];
|
||||
}
|
||||
|
||||
_Float16
|
||||
sum_fp16 (__fp16 *__restrict__ __attribute__ ((__aligned__ (16))) a,
|
||||
__fp16 *__restrict__ __attribute__ ((__aligned__ (16))) b,
|
||||
__fp16 *__restrict__ __attribute__ ((__aligned__ (16))) c)
|
||||
{
|
||||
for (int i = 0; i < 256; i++)
|
||||
a[i] = b[i] + c[i];
|
||||
}
|
||||
|
||||
/* Two FADD operations on "8h" data widths, one from sum_Float16, one from
|
||||
sum_fp16. */
|
||||
/* { dg-final { scan-assembler-times "fadd\tv\[0-9\]\+.8h" 2 } } */
|
Loading…
Add table
Reference in a new issue