aarch64: Represent SQXTUN with RTL operations
This patch removes UNSPEC_SQXTUN and uses organic RTL codes to represent the operation. SQXTUN is an odd one. It's described in the architecture as "Signed saturating extract Unsigned Narrow". It's not a straightforward ss_truncate nor a us_truncate. It is a sort of truncating signed clamp operation with limits derived from the unsigned extrema of the narrow mode: (truncate:N (smin:M (smax:M (reg:M) (const_int 0)) (const_int <unsigned-max-for-mode-N>))) This patch implements these semantics. I've checked that the vqmovun tests in advsimd-intrinsics.exp now get constant-folded and still pass validation, so I'm pretty confident in the semantics. Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqmovun<mode><vczle><vczbe>): Rename to... (*aarch64_sqmovun<mode>_insn<vczle><vczbe>): ... This. Reimplement with RTL codes. (aarch64_sqmovun<mode> [SD_HSDI]): Reimplement with RTL codes. (aarch64_sqxtun2<mode>_le): Likewise. (aarch64_sqxtun2<mode>_be): Likewise. (aarch64_sqxtun2<mode>): Adjust for the above. (aarch64_sqmovun<mode>): New define_expand. * config/aarch64/iterators.md (UNSPEC_SQXTUN): Delete. (half_mask): New mode attribute. * config/aarch64/predicates.md (aarch64_simd_umax_half_mode): New predicate.
This commit is contained in:
parent
a053c659f6
commit
b747f54a2a
3 changed files with 56 additions and 14 deletions
|
@ -5438,28 +5438,55 @@
|
|||
|
||||
(define_insn "aarch64_sqmovun<mode>"
|
||||
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
|
||||
(unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")]
|
||||
UNSPEC_SQXTUN))]
|
||||
(truncate:<VNARROWQ>
|
||||
(smin:SD_HSDI
|
||||
(smax:SD_HSDI
|
||||
(match_operand:SD_HSDI 1 "register_operand" "w")
|
||||
(const_int 0))
|
||||
(const_int <half_mask>))))]
|
||||
"TARGET_SIMD"
|
||||
"sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
|
||||
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_sqmovun<mode><vczle><vczbe>"
|
||||
(define_insn "*aarch64_sqmovun<mode>_insn<vczle><vczbe>"
|
||||
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
|
||||
(unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")]
|
||||
UNSPEC_SQXTUN))]
|
||||
(truncate:<VNARROWQ>
|
||||
(smin:VQN
|
||||
(smax:VQN (match_operand:VQN 1 "register_operand" "w")
|
||||
(match_operand:VQN 2 "aarch64_simd_or_scalar_imm_zero"))
|
||||
(match_operand:VQN 3 "aarch64_simd_umax_half_mode"))))]
|
||||
"TARGET_SIMD"
|
||||
"sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
|
||||
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
|
||||
)
|
||||
|
||||
(define_expand "aarch64_sqmovun<mode>"
|
||||
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
|
||||
(truncate:<VNARROWQ>
|
||||
(smin:VQN
|
||||
(smax:VQN (match_operand:VQN 1 "register_operand" "w")
|
||||
(match_dup 2))
|
||||
(match_dup 3))))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
operands[2] = CONST0_RTX (<MODE>mode);
|
||||
operands[3]
|
||||
= aarch64_simd_gen_const_vector_dup (<MODE>mode,
|
||||
GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
|
||||
}
|
||||
)
|
||||
|
||||
(define_insn "aarch64_sqxtun2<mode>_le"
|
||||
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
|
||||
(vec_concat:<VNARROWQ2>
|
||||
(match_operand:<VNARROWQ> 1 "register_operand" "0")
|
||||
(unspec:<VNARROWQ>
|
||||
[(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN)))]
|
||||
(truncate:<VNARROWQ>
|
||||
(smin:VQN
|
||||
(smax:VQN
|
||||
(match_operand:VQN 2 "register_operand" "w")
|
||||
(match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
|
||||
(match_operand:VQN 4 "aarch64_simd_umax_half_mode")))))]
|
||||
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
|
||||
"sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
|
||||
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
|
||||
|
@ -5468,8 +5495,12 @@
|
|||
(define_insn "aarch64_sqxtun2<mode>_be"
|
||||
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
|
||||
(vec_concat:<VNARROWQ2>
|
||||
(unspec:<VNARROWQ>
|
||||
[(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN)
|
||||
(truncate:<VNARROWQ>
|
||||
(smin:VQN
|
||||
(smax:VQN
|
||||
(match_operand:VQN 2 "register_operand" "w")
|
||||
(match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
|
||||
(match_operand:VQN 4 "aarch64_simd_umax_half_mode")))
|
||||
(match_operand:<VNARROWQ> 1 "register_operand" "0")))]
|
||||
"TARGET_SIMD && BYTES_BIG_ENDIAN"
|
||||
"sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
|
||||
|
@ -5479,16 +5510,18 @@
|
|||
(define_expand "aarch64_sqxtun2<mode>"
|
||||
[(match_operand:<VNARROWQ2> 0 "register_operand")
|
||||
(match_operand:<VNARROWQ> 1 "register_operand")
|
||||
(unspec:<VNARROWQ>
|
||||
[(match_operand:VQN 2 "register_operand")] UNSPEC_SQXTUN)]
|
||||
(match_operand:VQN 2 "register_operand")]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
rtx zeros = CONST0_RTX (<MODE>mode);
|
||||
rtx half_umax = aarch64_simd_gen_const_vector_dup (<MODE>mode,
|
||||
GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1],
|
||||
operands[2]));
|
||||
operands[2], zeros, half_umax));
|
||||
else
|
||||
emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1],
|
||||
operands[2]));
|
||||
operands[2], zeros, half_umax));
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
|
|
@ -638,7 +638,6 @@
|
|||
UNSPEC_FMULX ; Used in aarch64-simd.md.
|
||||
UNSPEC_USQADD ; Used in aarch64-simd.md.
|
||||
UNSPEC_SUQADD ; Used in aarch64-simd.md.
|
||||
UNSPEC_SQXTUN ; Used in aarch64-simd.md.
|
||||
UNSPEC_SSRA ; Used in aarch64-simd.md.
|
||||
UNSPEC_USRA ; Used in aarch64-simd.md.
|
||||
UNSPEC_SRSHR ; Used in aarch64-simd.md.
|
||||
|
@ -1025,6 +1024,8 @@
|
|||
|
||||
(define_mode_attr short_mask [(HI "65535") (QI "255")])
|
||||
|
||||
(define_mode_attr half_mask [(HI "255") (SI "65535") (DI "4294967295")])
|
||||
|
||||
;; For constraints used in scalar immediate vector moves
|
||||
(define_mode_attr hq [(HI "h") (QI "q")])
|
||||
|
||||
|
|
|
@ -595,6 +595,14 @@
|
|||
GET_MODE_UNIT_BITSIZE (GET_MODE (op)) / 2,
|
||||
GET_MODE_UNIT_BITSIZE (GET_MODE (op)) / 2)")))
|
||||
|
||||
(define_predicate "aarch64_simd_umax_half_mode"
|
||||
(and (match_code "const_vector")
|
||||
(match_test "aarch64_const_vec_all_same_in_range_p (op,
|
||||
(HOST_WIDE_INT_1U
|
||||
<< (GET_MODE_UNIT_BITSIZE (mode) / 2)) - 1,
|
||||
(HOST_WIDE_INT_1U
|
||||
<< (GET_MODE_UNIT_BITSIZE (mode) / 2)) - 1)")))
|
||||
|
||||
(define_predicate "aarch64_simd_shift_imm_vec_qi"
|
||||
(and (match_code "const_vector")
|
||||
(match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 8)")))
|
||||
|
|
Loading…
Add table
Reference in a new issue