AArch64: Undo vec_widen_<sur>shiftl optabs [PR106346]
In GCC 11 we implemented the vectorizer optab for widening left shifts, however this optab is only supported for uniform shift constants. At the moment GCC still has two loop vectorization strategy (classical loop and SLP based loop vec) and the optab is implemented as a scalar pattern. This means that when we apply it to a non-uniform constant inside a loop we only find out during SLP build that the constants aren't uniform. At this point it's too late and we lose SLP entirely. Over the years I've tried various options but none of it works well: 1. Dissolving patterns during SLP built (problematic, also dissolves them for non-slp). 2. Optionally ignoring patterns for SLP build (problematic, ends up interfearing with relevancy detection). 3. Relaxing contraint on SLP build to allow non-constant values and dissolving them after SLP build using an SLP pattern. (problematic, ends up breaking shift reassociation). As a result we've concluded that for now this pattern should just be removed and formed during RTL. The plan is to move this to an SLP only pattern once we remove classical loop vectorization support from GCC, at which time we can also properly support SVE's Top and Bottom variants. This removes the optab and reworks the RTL to recognize both the vector variant and the intrinsics variant. Also just simplifies all these patterns. gcc/ChangeLog: PR target/106346 * config/aarch64/aarch64-simd.md (vec_widen_<sur>shiftl_lo_<mode>, vec_widen_<sur>shiftl_hi_<mode>): Remove. (aarch64_<sur>shll<mode>_internal): Renamed to... (aarch64_<su>shll<mode>): .. This. (aarch64_<sur>shll2<mode>_internal): Renamed to... (aarch64_<su>shll2<mode>): .. This. (aarch64_<sur>shll_n<mode>, aarch64_<sur>shll2_n<mode>): Re-use new optabs. * config/aarch64/constraints.md (D2, DL): New. * config/aarch64/predicates.md (aarch64_simd_shll_imm_vec): New. gcc/testsuite/ChangeLog: PR target/106346 * gcc.target/aarch64/pr98772.c: Adjust assembly. * gcc.target/aarch64/vect-widen-shift.c: New test.
This commit is contained in:
parent
6b80071a4d
commit
451391a647
5 changed files with 113 additions and 82 deletions
|
@ -6387,107 +6387,69 @@
|
|||
[(set_attr "type" "neon_sat_shift_reg<q>")]
|
||||
)
|
||||
|
||||
(define_expand "vec_widen_<sur>shiftl_lo_<mode>"
|
||||
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
||||
(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
|
||||
(match_operand:SI 2
|
||||
"aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
|
||||
VSHLL))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
|
||||
emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
|
||||
p, operands[2]));
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
(define_expand "vec_widen_<sur>shiftl_hi_<mode>"
|
||||
[(set (match_operand:<VWIDE> 0 "register_operand")
|
||||
(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
|
||||
(match_operand:SI 2
|
||||
"immediate_operand" "i")]
|
||||
VSHLL))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
||||
emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
|
||||
p, operands[2]));
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
;; vshll_n
|
||||
|
||||
(define_insn "aarch64_<sur>shll<mode>_internal"
|
||||
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
||||
(unspec:<VWIDE> [(vec_select:<VHALF>
|
||||
(match_operand:VQW 1 "register_operand" "w")
|
||||
(match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
|
||||
(match_operand:SI 3
|
||||
"aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
|
||||
VSHLL))]
|
||||
(define_insn "aarch64_<su>shll<mode>"
|
||||
[(set (match_operand:<VWIDE> 0 "register_operand")
|
||||
(ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
|
||||
(match_operand:VD_BHSI 1 "register_operand"))
|
||||
(match_operand:<VWIDE> 2
|
||||
"aarch64_simd_shll_imm_vec")))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
|
||||
return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
|
||||
else
|
||||
return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
|
||||
{@ [cons: =0, 1, 2]
|
||||
[w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2
|
||||
[w, w, DL] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2
|
||||
}
|
||||
[(set_attr "type" "neon_shift_imm_long")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_<sur>shll2<mode>_internal"
|
||||
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
||||
(unspec:<VWIDE> [(vec_select:<VHALF>
|
||||
(match_operand:VQW 1 "register_operand" "w")
|
||||
(match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
|
||||
(match_operand:SI 3
|
||||
"aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
|
||||
VSHLL))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
|
||||
return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
|
||||
else
|
||||
return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
|
||||
}
|
||||
[(set_attr "type" "neon_shift_imm_long")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_<sur>shll_n<mode>"
|
||||
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
||||
(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
|
||||
(define_expand "aarch64_<sur>shll_n<mode>"
|
||||
[(set (match_operand:<VWIDE> 0 "register_operand")
|
||||
(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand")
|
||||
(match_operand:SI 2
|
||||
"aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
|
||||
VSHLL))]
|
||||
"aarch64_simd_shift_imm_bitsize_<ve_mode>")]
|
||||
VSHLL))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
|
||||
return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
|
||||
else
|
||||
return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
|
||||
rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
|
||||
emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft));
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "type" "neon_shift_imm_long")]
|
||||
)
|
||||
|
||||
;; vshll_high_n
|
||||
|
||||
(define_insn "aarch64_<sur>shll2_n<mode>"
|
||||
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
||||
(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
|
||||
(match_operand:SI 2 "immediate_operand" "i")]
|
||||
VSHLL))]
|
||||
(define_insn "aarch64_<su>shll2<mode>"
|
||||
[(set (match_operand:<VWIDE> 0 "register_operand")
|
||||
(ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
|
||||
(vec_select:<VHALF>
|
||||
(match_operand:VQW 1 "register_operand")
|
||||
(match_operand:VQW 2 "vect_par_cnst_hi_half")))
|
||||
(match_operand:<VWIDE> 3
|
||||
"aarch64_simd_shll_imm_vec")))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
|
||||
return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
|
||||
else
|
||||
return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
|
||||
{@ [cons: =0, 1, 2, 3]
|
||||
[w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
|
||||
[w, w, , DL] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
|
||||
}
|
||||
[(set_attr "type" "neon_shift_imm_long")]
|
||||
)
|
||||
|
||||
(define_expand "aarch64_<sur>shll2_n<mode>"
|
||||
[(set (match_operand:<VWIDE> 0 "register_operand")
|
||||
(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand")
|
||||
(match_operand:SI 2
|
||||
"aarch64_simd_shift_imm_bitsize_<ve_mode>")]
|
||||
VSHLL))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
|
||||
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
||||
emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, shft));
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
;; vrshr_n
|
||||
|
||||
(define_insn "aarch64_<sra_op>rshr_n<mode><vczle><vczbe>_insn"
|
||||
|
|
|
@ -468,6 +468,20 @@
|
|||
GET_MODE_UNIT_BITSIZE (mode) - 1,
|
||||
GET_MODE_UNIT_BITSIZE (mode) - 1)")))
|
||||
|
||||
(define_constraint "D2"
|
||||
"@internal
|
||||
A constraint that matches vector of immediates that is bits(mode)/2."
|
||||
(and (match_code "const,const_vector")
|
||||
(match_test "aarch64_simd_shift_imm_vec_exact_top (op, mode)")))
|
||||
|
||||
(define_constraint "DL"
|
||||
"@internal
|
||||
A constraint that matches vector of immediates for left shift long.
|
||||
That is immediates between 0 to (bits(mode)/2)-1."
|
||||
(and (match_code "const,const_vector")
|
||||
(match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
|
||||
(GET_MODE_UNIT_BITSIZE (mode) / 2) - 1)")))
|
||||
|
||||
(define_constraint "Dr"
|
||||
"@internal
|
||||
A constraint that matches vector of immediates for right shifts."
|
||||
|
|
|
@ -638,6 +638,11 @@
|
|||
HOST_WIDE_INT_1U
|
||||
<< (GET_MODE_UNIT_BITSIZE (mode) / 2 - 1))")))
|
||||
|
||||
(define_predicate "aarch64_simd_shll_imm_vec"
|
||||
(and (match_code "const_vector")
|
||||
(match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
|
||||
GET_MODE_UNIT_BITSIZE (mode) / 2)")))
|
||||
|
||||
(define_predicate "aarch64_simd_shift_imm_bitsize_qi"
|
||||
(and (match_code "const_int")
|
||||
(match_test "IN_RANGE (INTVAL (op), 0, 8)")))
|
||||
|
|
|
@ -155,4 +155,4 @@ int main ()
|
|||
/* { dg-final { scan-assembler-times "uaddl\\tv" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "usubl\\tv" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "umull\\tv" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "shl\\tv" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "shll\\tv" 2 } } */
|
||||
|
|
50
gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c
Normal file
50
gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c
Normal file
|
@ -0,0 +1,50 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -save-temps" } */
|
||||
/* { dg-final { check-function-bodies "**" "" "" } } */
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#pragma GCC target "+nosve"
|
||||
|
||||
#define ARR_SIZE 1024
|
||||
|
||||
/* Should produce an shll,shll2 pair*/
|
||||
/*
|
||||
** sshll_opt1:
|
||||
** ...
|
||||
** shll v[0-9]+.4s, v[0-9]+.4h, 16
|
||||
** shll2 v[0-9]+.4s, v[0-9]+.8h, 16
|
||||
** ...
|
||||
*/
|
||||
void sshll_opt1 (int32_t *foo, int16_t *a, int16_t *b)
|
||||
{
|
||||
for( int i = 0; i < ARR_SIZE - 3;i=i+4)
|
||||
{
|
||||
foo[i] = a[i] << 16;
|
||||
foo[i+1] = a[i+1] << 16;
|
||||
foo[i+2] = a[i+2] << 16;
|
||||
foo[i+3] = a[i+3] << 16;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** sshll_opt2:
|
||||
** ...
|
||||
** sxtl v[0-9]+.4s, v[0-9]+.4h
|
||||
** sxtl2 v[0-9]+.4s, v[0-9]+.8h
|
||||
** sshl v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
|
||||
** sshl v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
|
||||
** ...
|
||||
*/
|
||||
void sshll_opt2 (int32_t *foo, int16_t *a, int16_t *b)
|
||||
{
|
||||
for( int i = 0; i < ARR_SIZE - 3;i=i+4)
|
||||
{
|
||||
foo[i] = a[i] << 16;
|
||||
foo[i+1] = a[i+1] << 15;
|
||||
foo[i+2] = a[i+2] << 14;
|
||||
foo[i+3] = a[i+3] << 17;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Add table
Reference in a new issue