[AArch64] Support zero-extended move to FP register
The popcount expansion uses SIMD instructions acting on 64-bit values. As a result a popcount of a 32-bit integer requires zero-extension before moving the zero-extended value into an FP register. This patch adds support for zero-extended int->FP moves to avoid the redundant uxtw. Similarly, add support for 32-bit zero-extending load->FP register and 32-bit zero-extending FP->FP and FP->int moves. Add a missing 'fp' arch attribute to the related 8/16-bit pattern and fix an incorrect type attribute. To complete zero-extended load support, add a new alternative to load_pair_zero_extendsidi2_aarch64 to support LDP into FP registers too. int f (int a) { return __builtin_popcount (a); } Before: uxtw x0, w0 fmov d0, x0 cnt v0.8b, v0.8b addv b0, v0.8b fmov w0, s0 ret After: fmov s0, w0 cnt v0.8b, v0.8b addv b0, v0.8b fmov w0, s0 ret Passes regress & bootstrap on AArch64. gcc/ * config/aarch64/aarch64.md (zero_extendsidi2_aarch64): Add alternatives to zero-extend between int and floating-point registers. (load_pair_zero_extendsidi2_aarch64): Add alternative for zero-extended ldp into floating-point registers. Add type and arch attributes. (zero_extend<SHORT:mode><GPI:mode>2_aarch64): Add arch attribute. Use f_loads for type attribute. testsuite/ * gcc.target/aarch64/popcnt.c: Test zero-extended popcount. * gcc.target/aarch64/vec_zeroextend.c: Test zero-extended vectors. From-SVN: r265079
This commit is contained in:
parent
4dc003fffa
commit
0cfc095c8d
5 changed files with 68 additions and 13 deletions
|
@ -1,3 +1,12 @@
|
|||
2018-10-12 Wilco Dijkstra <wdijkstr@arm.com>
|
||||
|
||||
* config/aarch64/aarch64.md (zero_extendsidi2_aarch64): Add alternatives
|
||||
to zero-extend between int and floating-point registers.
|
||||
(load_pair_zero_extendsidi2_aarch64): Add alternative for zero-extended
|
||||
ldp into floating-point registers. Add type and arch attributes.
|
||||
(zero_extend<SHORT:mode><GPI:mode>2_aarch64): Add arch attribute.
|
||||
Use f_loads for type attribute.
|
||||
|
||||
2018-10-11 Martin Sebor <msebor@redhat.com>
|
||||
|
||||
* doc/extend.texi (attribute packed): Correct typos.
|
||||
|
|
|
@ -1520,26 +1520,34 @@
|
|||
)
|
||||
|
||||
(define_insn "*zero_extendsidi2_aarch64"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r,r")
|
||||
(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m")))]
|
||||
[(set (match_operand:DI 0 "register_operand" "=r,r,w,w,r,w")
|
||||
(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
|
||||
""
|
||||
"@
|
||||
uxtw\t%0, %w1
|
||||
ldr\t%w0, %1"
|
||||
[(set_attr "type" "extend,load_4")]
|
||||
ldr\t%w0, %1
|
||||
fmov\t%s0, %w1
|
||||
ldr\t%s0, %1
|
||||
fmov\t%w0, %s1
|
||||
fmov\t%s0, %s1"
|
||||
[(set_attr "type" "extend,load_4,f_mcr,f_loads,f_mrc,fmov")
|
||||
(set_attr "arch" "*,*,fp,fp,fp,fp")]
|
||||
)
|
||||
|
||||
(define_insn "*load_pair_zero_extendsidi2_aarch64"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump")))
|
||||
(set (match_operand:DI 2 "register_operand" "=r")
|
||||
(zero_extend:DI (match_operand:SI 3 "memory_operand" "m")))]
|
||||
[(set (match_operand:DI 0 "register_operand" "=r,w")
|
||||
(zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump,Ump")))
|
||||
(set (match_operand:DI 2 "register_operand" "=r,w")
|
||||
(zero_extend:DI (match_operand:SI 3 "memory_operand" "m,m")))]
|
||||
"rtx_equal_p (XEXP (operands[3], 0),
|
||||
plus_constant (Pmode,
|
||||
XEXP (operands[1], 0),
|
||||
GET_MODE_SIZE (SImode)))"
|
||||
"ldp\\t%w0, %w2, %1"
|
||||
[(set_attr "type" "load_8")]
|
||||
"@
|
||||
ldp\t%w0, %w2, %1
|
||||
ldp\t%s0, %s2, %1"
|
||||
[(set_attr "type" "load_8,neon_load1_2reg")
|
||||
(set_attr "arch" "*,fp")]
|
||||
)
|
||||
|
||||
(define_expand "<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2"
|
||||
|
@ -1566,7 +1574,8 @@
|
|||
and\t%<GPI:w>0, %<GPI:w>1, <SHORT:short_mask>
|
||||
ldr<SHORT:size>\t%w0, %1
|
||||
ldr\t%<SHORT:size>0, %1"
|
||||
[(set_attr "type" "logic_imm,load_4,load_4")]
|
||||
[(set_attr "type" "logic_imm,load_4,f_loads")
|
||||
(set_attr "arch" "*,*,fp")]
|
||||
)
|
||||
|
||||
(define_expand "<optab>qihi2"
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2018-10-12 Wilco Dijkstra <wdijkstr@arm.com>
|
||||
|
||||
* gcc.target/aarch64/popcnt.c: Test zero-extended popcount.
|
||||
* gcc.target/aarch64/vec_zeroextend.c: Test zero-extended vectors.
|
||||
|
||||
2018-10-11 Will Schmidt <will_schmidt@vnet.ibm.com>
|
||||
|
||||
* gcc.target/powerpc/fold-vec-insert-char-p8.c: New.
|
||||
|
|
|
@ -19,5 +19,16 @@ foo2 (long long x)
|
|||
return __builtin_popcountll (x);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "popcount" } } */
|
||||
/* { dg-final { scan-assembler-times "cnt\t" 3 } } */
|
||||
int
|
||||
foo3 (int *p)
|
||||
{
|
||||
return __builtin_popcount (*p);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not {popcount} } } */
|
||||
/* { dg-final { scan-assembler-times {cnt\t} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {fmov\ts} 1 {target lp64} } } */
|
||||
/* { dg-final { scan-assembler-times {fmov\td} 2 {target lp64} } } */
|
||||
/* { dg-final { scan-assembler-times {fmov\ts} 2 {target ilp32} } } */
|
||||
/* { dg-final { scan-assembler-times {fmov\td} 1 {target ilp32} } } */
|
||||
/* { dg-final { scan-assembler-times {ldr\ts} 1 } } */
|
||||
|
|
21
gcc/testsuite/gcc.target/aarch64/vec_zeroextend.c
Normal file
21
gcc/testsuite/gcc.target/aarch64/vec_zeroextend.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
#define vector __attribute__((vector_size(16) ))
|
||||
|
||||
vector unsigned long long
|
||||
f1(vector unsigned long long b, vector unsigned int a)
|
||||
{
|
||||
b[0] = a[0];
|
||||
return b;
|
||||
}
|
||||
|
||||
unsigned long long
|
||||
f2(vector unsigned int a)
|
||||
{
|
||||
return a[0];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {fmov} 2 } } */
|
||||
/* { dg-final { scan-assembler-not {umov} } } */
|
||||
/* { dg-final { scan-assembler-not {uxtw} } } */
|
Loading…
Add table
Reference in a new issue