IBM Z: Provide rawmemchr{qi,hi,si} expander
gcc/ChangeLog: * config/s390/s390-protos.h (s390_rawmemchr): Add prototype. * config/s390/s390.c (s390_rawmemchr): New function. * config/s390/s390.md (rawmemchr<SINT:mode>): New expander. * config/s390/vector.md (@vec_vfees<mode>): Basically a copy of the pattern vfees<mode> from vx-builtins.md. * config/s390/vx-builtins.md (*vfees<mode>): Remove. gcc/testsuite/ChangeLog: * gcc.target/s390/rawmemchr-1.c: New test.
This commit is contained in:
parent
6f966f0614
commit
6e3c4bfd0c
6 changed files with 203 additions and 26 deletions
|
@ -66,6 +66,8 @@ s390_asm_declare_function_size (FILE *asm_out_file,
|
|||
const char *fnname ATTRIBUTE_UNUSED, tree decl);
|
||||
#endif
|
||||
|
||||
extern void s390_rawmemchr (machine_mode elt_mode, rtx dst, rtx src, rtx pat);
|
||||
|
||||
#ifdef RTX_CODE
|
||||
extern int s390_extra_constraint_str (rtx, int, const char *);
|
||||
extern int s390_const_ok_for_constraint_p (HOST_WIDE_INT, int, const char *);
|
||||
|
|
|
@ -16569,6 +16569,75 @@ s390_excess_precision (enum excess_precision_type type)
|
|||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
s390_rawmemchr (machine_mode elt_mode, rtx dst, rtx src, rtx pat)
|
||||
{
|
||||
machine_mode vec_mode = mode_for_vector (as_a <scalar_int_mode> (elt_mode),
|
||||
16 / GET_MODE_SIZE (elt_mode)).require();
|
||||
rtx lens = gen_reg_rtx (V16QImode);
|
||||
rtx pattern = gen_reg_rtx (vec_mode);
|
||||
rtx loop_start = gen_label_rtx ();
|
||||
rtx loop_end = gen_label_rtx ();
|
||||
rtx addr = gen_reg_rtx (Pmode);
|
||||
rtx offset = gen_reg_rtx (Pmode);
|
||||
rtx loadlen = gen_reg_rtx (SImode);
|
||||
rtx matchlen = gen_reg_rtx (SImode);
|
||||
rtx mem;
|
||||
|
||||
pat = GEN_INT (trunc_int_for_mode (INTVAL (pat), elt_mode));
|
||||
emit_insn (gen_rtx_SET (pattern, gen_rtx_VEC_DUPLICATE (vec_mode, pat)));
|
||||
|
||||
emit_move_insn (addr, XEXP (src, 0));
|
||||
|
||||
// alignment
|
||||
emit_insn (gen_vlbb (lens, gen_rtx_MEM (BLKmode, addr), GEN_INT (6)));
|
||||
emit_insn (gen_lcbb (loadlen, addr, GEN_INT (6)));
|
||||
lens = convert_to_mode (vec_mode, lens, 1);
|
||||
emit_insn (gen_vec_vfees (vec_mode, lens, lens, pattern, GEN_INT (0)));
|
||||
lens = convert_to_mode (V4SImode, lens, 1);
|
||||
emit_insn (gen_vec_extractv4sisi (matchlen, lens, GEN_INT (1)));
|
||||
lens = convert_to_mode (vec_mode, lens, 1);
|
||||
emit_cmp_and_jump_insns (matchlen, loadlen, LT, NULL_RTX, SImode, 1, loop_end);
|
||||
force_expand_binop (Pmode, add_optab, addr, GEN_INT(16), addr, 1, OPTAB_DIRECT);
|
||||
force_expand_binop (Pmode, and_optab, addr, GEN_INT(~HOST_WIDE_INT_UC(0xf)), addr, 1, OPTAB_DIRECT);
|
||||
// now, addr is 16-byte aligned
|
||||
|
||||
mem = gen_rtx_MEM (vec_mode, addr);
|
||||
set_mem_align (mem, 128);
|
||||
emit_move_insn (lens, mem);
|
||||
emit_insn (gen_vec_vfees (vec_mode, lens, lens, pattern, GEN_INT (VSTRING_FLAG_CS)));
|
||||
add_int_reg_note (s390_emit_ccraw_jump (4, EQ, loop_end),
|
||||
REG_BR_PROB,
|
||||
profile_probability::very_unlikely ().to_reg_br_prob_note ());
|
||||
|
||||
emit_label (loop_start);
|
||||
LABEL_NUSES (loop_start) = 1;
|
||||
|
||||
force_expand_binop (Pmode, add_optab, addr, GEN_INT (16), addr, 1, OPTAB_DIRECT);
|
||||
mem = gen_rtx_MEM (vec_mode, addr);
|
||||
set_mem_align (mem, 128);
|
||||
emit_move_insn (lens, mem);
|
||||
emit_insn (gen_vec_vfees (vec_mode, lens, lens, pattern, GEN_INT (VSTRING_FLAG_CS)));
|
||||
add_int_reg_note (s390_emit_ccraw_jump (4, NE, loop_start),
|
||||
REG_BR_PROB,
|
||||
profile_probability::very_likely ().to_reg_br_prob_note ());
|
||||
|
||||
emit_label (loop_end);
|
||||
LABEL_NUSES (loop_end) = 1;
|
||||
|
||||
if (TARGET_64BIT)
|
||||
{
|
||||
lens = convert_to_mode (V2DImode, lens, 1);
|
||||
emit_insn (gen_vec_extractv2didi (offset, lens, GEN_INT (0)));
|
||||
}
|
||||
else
|
||||
{
|
||||
lens = convert_to_mode (V4SImode, lens, 1);
|
||||
emit_insn (gen_vec_extractv4sisi (offset, lens, GEN_INT (1)));
|
||||
}
|
||||
force_expand_binop (Pmode, add_optab, addr, offset, dst, 1, OPTAB_DIRECT);
|
||||
}
|
||||
|
||||
/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
|
||||
|
||||
static unsigned HOST_WIDE_INT
|
||||
|
|
|
@ -12258,3 +12258,10 @@
|
|||
UNSPECV_PPA)]
|
||||
"TARGET_ZEC12"
|
||||
"")
|
||||
|
||||
(define_expand "rawmemchr<SINT:mode>"
|
||||
[(match_operand 0 "register_operand")
|
||||
(match_operand 1 "memory_operand")
|
||||
(match_operand:SINT 2 "const_int_operand")]
|
||||
"TARGET_VX"
|
||||
"s390_rawmemchr(<SINT:MODE>mode, operands[0], operands[1], operands[2]); DONE;")
|
||||
|
|
|
@ -1988,6 +1988,32 @@
|
|||
"vll\t%v0,%1,%2"
|
||||
[(set_attr "op_type" "VRS")])
|
||||
|
||||
; vfeebs, vfeehs, vfeefs
|
||||
; vfeezbs, vfeezhs, vfeezfs
|
||||
(define_insn "@vec_vfees<mode>"
|
||||
[(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
|
||||
(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")
|
||||
(match_operand:VI_HW_QHS 2 "register_operand" "v")
|
||||
(match_operand:QI 3 "const_mask_operand" "C")]
|
||||
UNSPEC_VEC_VFEE))
|
||||
(set (reg:CCRAW CC_REGNUM)
|
||||
(unspec:CCRAW [(match_dup 1)
|
||||
(match_dup 2)
|
||||
(match_dup 3)]
|
||||
UNSPEC_VEC_VFEECC))]
|
||||
"TARGET_VX"
|
||||
{
|
||||
unsigned HOST_WIDE_INT flags = UINTVAL (operands[3]);
|
||||
|
||||
gcc_assert (!(flags & ~(VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
|
||||
flags &= ~VSTRING_FLAG_CS;
|
||||
|
||||
if (flags == VSTRING_FLAG_ZS)
|
||||
return "vfeez<bhfgq>s\t%v0,%v1,%v2";
|
||||
return "vfee<bhfgq>s\t%v0,%v1,%v2";
|
||||
}
|
||||
[(set_attr "op_type" "VRR")])
|
||||
|
||||
; vfenebs, vfenehs, vfenefs
|
||||
; vfenezbs, vfenezhs, vfenezfs
|
||||
(define_insn "vec_vfenes<mode>"
|
||||
|
|
|
@ -1366,32 +1366,6 @@
|
|||
|
||||
; Vector find element equal
|
||||
|
||||
; vfeebs, vfeehs, vfeefs
|
||||
; vfeezbs, vfeezhs, vfeezfs
|
||||
(define_insn "*vfees<mode>"
|
||||
[(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
|
||||
(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")
|
||||
(match_operand:VI_HW_QHS 2 "register_operand" "v")
|
||||
(match_operand:QI 3 "const_mask_operand" "C")]
|
||||
UNSPEC_VEC_VFEE))
|
||||
(set (reg:CCRAW CC_REGNUM)
|
||||
(unspec:CCRAW [(match_dup 1)
|
||||
(match_dup 2)
|
||||
(match_dup 3)]
|
||||
UNSPEC_VEC_VFEECC))]
|
||||
"TARGET_VX"
|
||||
{
|
||||
unsigned HOST_WIDE_INT flags = UINTVAL (operands[3]);
|
||||
|
||||
gcc_assert (!(flags & ~(VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
|
||||
flags &= ~VSTRING_FLAG_CS;
|
||||
|
||||
if (flags == VSTRING_FLAG_ZS)
|
||||
return "vfeez<bhfgq>s\t%v0,%v1,%v2";
|
||||
return "vfee<bhfgq>s\t%v0,%v1,%v2,%b3";
|
||||
}
|
||||
[(set_attr "op_type" "VRR")])
|
||||
|
||||
; vfeeb, vfeeh, vfeef
|
||||
(define_insn "vfee<mode>"
|
||||
[(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
|
||||
|
|
99
gcc/testsuite/gcc.target/s390/rawmemchr-1.c
Normal file
99
gcc/testsuite/gcc.target/s390/rawmemchr-1.c
Normal file
|
@ -0,0 +1,99 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-details -mzarch -march=z13" } */
|
||||
/* { dg-final { scan-tree-dump-times "generated rawmemchrQI" 2 "ldist" } } */
|
||||
/* { dg-final { scan-tree-dump-times "generated rawmemchrHI" 2 "ldist" } } */
|
||||
/* { dg-final { scan-tree-dump-times "generated rawmemchrSI" 2 "ldist" } } */
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define rawmemchrT(T, pattern) \
|
||||
__attribute__((noinline,noclone)) \
|
||||
T* rawmemchr_##T (T *s) \
|
||||
{ \
|
||||
while (*s != pattern) \
|
||||
++s; \
|
||||
return s; \
|
||||
}
|
||||
|
||||
rawmemchrT(int8_t, (int8_t)0xde)
|
||||
rawmemchrT(uint8_t, 0xde)
|
||||
rawmemchrT(int16_t, (int16_t)0xdead)
|
||||
rawmemchrT(uint16_t, 0xdead)
|
||||
rawmemchrT(int32_t, (int32_t)0xdeadbeef)
|
||||
rawmemchrT(uint32_t, 0xdeadbeef)
|
||||
|
||||
#define runT(T, pattern) \
|
||||
void run_##T () \
|
||||
{ \
|
||||
T *buf = malloc (4096 * 2 * sizeof(T)); \
|
||||
assert (buf != NULL); \
|
||||
memset (buf, 0xa, 4096 * 2 * sizeof(T)); \
|
||||
/* ensure q is 4096-byte aligned */ \
|
||||
T *q = (T*)((unsigned char *)buf \
|
||||
+ (4096 - ((uintptr_t)buf & 4095))); \
|
||||
T *p; \
|
||||
/* unaligned + block boundary + 1st load */ \
|
||||
p = (T *) ((uintptr_t)q - 8); \
|
||||
p[2] = pattern; \
|
||||
assert ((rawmemchr_##T (&p[0]) == &p[2])); \
|
||||
p[2] = (T) 0xaaaaaaaa; \
|
||||
/* unaligned + block boundary + 2nd load */ \
|
||||
p = (T *) ((uintptr_t)q - 8); \
|
||||
p[6] = pattern; \
|
||||
assert ((rawmemchr_##T (&p[0]) == &p[6])); \
|
||||
p[6] = (T) 0xaaaaaaaa; \
|
||||
/* unaligned + 1st load */ \
|
||||
q[5] = pattern; \
|
||||
assert ((rawmemchr_##T (&q[2]) == &q[5])); \
|
||||
q[5] = (T) 0xaaaaaaaa; \
|
||||
/* unaligned + 2nd load */ \
|
||||
q[14] = pattern; \
|
||||
assert ((rawmemchr_##T (&q[2]) == &q[14])); \
|
||||
q[14] = (T) 0xaaaaaaaa; \
|
||||
/* unaligned + 3rd load */ \
|
||||
q[19] = pattern; \
|
||||
assert ((rawmemchr_##T (&q[2]) == &q[19])); \
|
||||
q[19] = (T) 0xaaaaaaaa; \
|
||||
/* unaligned + 4th load */ \
|
||||
q[25] = pattern; \
|
||||
assert ((rawmemchr_##T (&q[2]) == &q[25])); \
|
||||
q[25] = (T) 0xaaaaaaaa; \
|
||||
/* aligned + 1st load */ \
|
||||
q[5] = pattern; \
|
||||
assert ((rawmemchr_##T (&q[0]) == &q[5])); \
|
||||
q[5] = (T) 0xaaaaaaaa; \
|
||||
/* aligned + 2nd load */ \
|
||||
q[14] = pattern; \
|
||||
assert ((rawmemchr_##T (&q[0]) == &q[14])); \
|
||||
q[14] = (T) 0xaaaaaaaa; \
|
||||
/* aligned + 3rd load */ \
|
||||
q[19] = pattern; \
|
||||
assert ((rawmemchr_##T (&q[0]) == &q[19])); \
|
||||
q[19] = (T) 0xaaaaaaaa; \
|
||||
/* aligned + 4th load */ \
|
||||
q[25] = pattern; \
|
||||
assert ((rawmemchr_##T (&q[0]) == &q[25])); \
|
||||
q[25] = (T) 0xaaaaaaaa; \
|
||||
free (buf); \
|
||||
}
|
||||
|
||||
runT(int8_t, (int8_t)0xde)
|
||||
runT(uint8_t, 0xde)
|
||||
runT(int16_t, (int16_t)0xdead)
|
||||
runT(uint16_t, 0xdead)
|
||||
runT(int32_t, (int32_t)0xdeadbeef)
|
||||
runT(uint32_t, 0xdeadbeef)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
run_uint8_t ();
|
||||
run_int8_t ();
|
||||
run_uint16_t ();
|
||||
run_int16_t ();
|
||||
run_uint32_t ();
|
||||
run_int32_t ();
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Reference in a new issue