From 9ebc9fbdddfe1ec85355b068354315a4da8e1ca0 Mon Sep 17 00:00:00 2001 From: Stefan Schulze Frielinghaus Date: Thu, 12 Sep 2024 13:29:43 +0200 Subject: [PATCH] s390: Fix strict_low_part generation In s390_expand_insv(), if generating code for ICM et al. src is a MEM and gen_lowpart might force src into a register such that we end up with patterns which do not match anymore. Use adjust_address() instead in order to preserve a MEM. Furthermore, it is not straight forward to enforce a subreg. For example, in case of a paradoxical subreg, gen_lowpart() may return a register. In order to compensate this, s390_gen_lowpart_subreg() emits a reference to a pseudo which does not coincide with its definition which is wrong. Additionally, if dest is a paradoxical subreg, then do not try to emit a strict_low_part since it could mean that dest was not initialized even though this might be fixed up later by init-regs. Splitter for insn *get_tp_64, *zero_extendhisi2_31, *zero_extendqisi2_31, *zero_extendqihi2_31 are applied after reload. Thus, operands[0] is a hard register and gen_lowpart (m, operands[0]) just returns the hard register for mode m which is fine to use as an argument for strict_low_part, i.e., we do not need to enforce subregs here since after reload subregs are supposed to be eliminated anyway. This fixes gcc.dg/torture/pr111821.c. gcc/ChangeLog: * config/s390/s390-protos.h (s390_gen_lowpart_subreg): Remove. * config/s390/s390.cc (s390_gen_lowpart_subreg): Remove. (s390_expand_insv): Use adjust_address() and emit a strict_low_part only in case of a natural subreg. * config/s390/s390.md: Use gen_lowpart() instead of s390_gen_lowpart_subreg(). --- gcc/config/s390/s390-protos.h | 1 - gcc/config/s390/s390.cc | 47 +++++++++++------------------------ gcc/config/s390/s390.md | 13 +++++----- 3 files changed, 20 insertions(+), 41 deletions(-) diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h index b4646ccb606..e7ac59d17da 100644 --- a/gcc/config/s390/s390-protos.h +++ b/gcc/config/s390/s390-protos.h @@ -50,7 +50,6 @@ extern void s390_set_has_landing_pad_p (bool); extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int); extern int s390_class_max_nregs (enum reg_class, machine_mode); extern bool s390_return_addr_from_memory(void); -extern rtx s390_gen_lowpart_subreg (machine_mode, rtx); extern bool s390_fma_allowed_p (machine_mode); #if S390_USE_TARGET_ATTRIBUTE extern tree s390_valid_target_attribute_tree (tree args, diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index 47e1d5adfd9..c1649ca49bd 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -516,31 +516,6 @@ s390_return_addr_from_memory () return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK; } -/* Generate a SUBREG for the MODE lowpart of EXPR. - - In contrast to gen_lowpart it will always return a SUBREG - expression. This is useful to generate STRICT_LOW_PART - expressions. */ -rtx -s390_gen_lowpart_subreg (machine_mode mode, rtx expr) -{ - rtx lowpart = gen_lowpart (mode, expr); - - /* There might be no SUBREG in case it could be applied to the hard - REG rtx or it could be folded with a paradoxical subreg. Bring - it back. */ - if (!SUBREG_P (lowpart)) - { - machine_mode reg_mode = TARGET_ZARCH ? DImode : SImode; - gcc_assert (REG_P (lowpart)); - lowpart = gen_lowpart_SUBREG (mode, - gen_rtx_REG (reg_mode, - REGNO (lowpart))); - } - - return lowpart; -} - /* Return nonzero if it's OK to use fused multiply-add for MODE. */ bool s390_fma_allowed_p (machine_mode mode) @@ -7112,15 +7087,21 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src) /* Emit a strict_low_part pattern if possible. */ if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize) { - rtx low_dest = s390_gen_lowpart_subreg (smode, dest); - rtx low_src = gen_lowpart (smode, src); - - switch (smode) + rtx low_dest = gen_lowpart (smode, dest); + if (SUBREG_P (low_dest) && !paradoxical_subreg_p (low_dest)) { - case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src)); return true; - case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src)); return true; - case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src)); return true; - default: break; + poly_int64 offset = GET_MODE_SIZE (mode) - GET_MODE_SIZE (smode); + rtx low_src = adjust_address (src, smode, offset); + switch (smode) + { + case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src)); + return true; + case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src)); + return true; + case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src)); + return true; + default: break; + } } } diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 3d5759d6252..592cf62d962 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -1974,12 +1974,11 @@ "TARGET_ZARCH" "#" "&& reload_completed" - [(set (match_dup 2) (match_dup 4)) + [(set (match_dup 2) (match_dup 3)) (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32))) - (set (strict_low_part (match_dup 3)) (match_dup 5))] + (set (strict_low_part (match_dup 2)) (match_dup 4))] "operands[2] = gen_lowpart (SImode, operands[0]); - operands[3] = s390_gen_lowpart_subreg (SImode, operands[0]); - s390_split_access_reg (operands[1], &operands[5], &operands[4]);") + s390_split_access_reg (operands[1], &operands[4], &operands[3]);") ; Splitters for storing TLS pointer to %a0:DI. @@ -5068,7 +5067,7 @@ (parallel [(set (strict_low_part (match_dup 2)) (match_dup 1)) (clobber (reg:CC CC_REGNUM))])] - "operands[2] = s390_gen_lowpart_subreg (HImode, operands[0]);") + "operands[2] = gen_lowpart (HImode, operands[0]);") (define_insn_and_split "*zero_extendqisi2_31" [(set (match_operand:SI 0 "register_operand" "=&d") @@ -5078,7 +5077,7 @@ "&& reload_completed" [(set (match_dup 0) (const_int 0)) (set (strict_low_part (match_dup 2)) (match_dup 1))] - "operands[2] = s390_gen_lowpart_subreg (QImode, operands[0]);") + "operands[2] = gen_lowpart (QImode, operands[0]);") ; ; zero_extendqihi2 instruction pattern(s). @@ -5110,7 +5109,7 @@ "&& reload_completed" [(set (match_dup 0) (const_int 0)) (set (strict_low_part (match_dup 2)) (match_dup 1))] - "operands[2] = s390_gen_lowpart_subreg (QImode, operands[0]);") + "operands[2] = gen_lowpart (QImode, operands[0]);") ; ; fixuns_trunc(dd|td|sf|df|tf)(si|di)2 expander