x86: Support 2/4/8 byte constant vector stores
1. Add a predicate for constant vectors which can be converted to integer constants suitable for constant integer stores. For a 8-byte constant vector, the converted 64-bit integer must be valid for store with 64-bit immediate, which is a 64-bit integer sign-extended from a 32-bit integer. 2. Add a new pattern to allow 2-byte, 4-byte and 8-byte constant vector stores, like (set (mem:V2HI (reg:DI 84)) (const_vector:V2HI [(const_int 0 [0]) (const_int 1 [0x1])])) 3. After reload, convert constant vector stores to constant integer stores, like (set (mem:SI (reg:DI 5 di [84])) (const_int 65536 [0x10000])) For void foo (short * c) { c[0] = 0; c[1] = 1; } it generates movl $65536, (%rdi) instead of movl .LC0(%rip), %eax movl %eax, (%rdi) gcc/ PR target/106022 * config/i386/i386-protos.h (ix86_convert_const_vector_to_integer): New. * config/i386/i386.cc (ix86_convert_const_vector_to_integer): New. * config/i386/mmx.md (V_16_32_64): New. (*mov<mode>_imm): New patterns for stores with 16-bit, 32-bit and 64-bit constant vector. * config/i386/predicates.md (x86_64_const_vector_operand): New. gcc/testsuite/ PR target/106022 * gcc.target/i386/pr106022-1.c: New test. * gcc.target/i386/pr106022-2.c: Likewise. * gcc.target/i386/pr106022-3.c: Likewise. * gcc.target/i386/pr106022-4.c: Likewise.
This commit is contained in:
parent
3ae9def085
commit
f3a5e75cb6
8 changed files with 152 additions and 0 deletions
|
@ -122,6 +122,8 @@ extern void ix86_expand_unary_operator (enum rtx_code, machine_mode,
|
|||
rtx[]);
|
||||
extern rtx ix86_build_const_vector (machine_mode, bool, rtx);
|
||||
extern rtx ix86_build_signbit_mask (machine_mode, bool, bool);
|
||||
extern HOST_WIDE_INT ix86_convert_const_vector_to_integer (rtx,
|
||||
machine_mode);
|
||||
extern void ix86_split_convert_uns_si_sse (rtx[]);
|
||||
extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
|
||||
extern void ix86_expand_convert_uns_sixf_sse (rtx, rtx);
|
||||
|
|
|
@ -15723,6 +15723,53 @@ ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
|
|||
return force_reg (vec_mode, v);
|
||||
}
|
||||
|
||||
/* Return HOST_WIDE_INT for const vector OP in MODE. */
|
||||
|
||||
HOST_WIDE_INT
|
||||
ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
|
||||
{
|
||||
if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
|
||||
gcc_unreachable ();
|
||||
|
||||
int nunits = GET_MODE_NUNITS (mode);
|
||||
wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
|
||||
machine_mode innermode = GET_MODE_INNER (mode);
|
||||
unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case E_V2QImode:
|
||||
case E_V4QImode:
|
||||
case E_V2HImode:
|
||||
case E_V8QImode:
|
||||
case E_V4HImode:
|
||||
case E_V2SImode:
|
||||
for (int i = 0; i < nunits; ++i)
|
||||
{
|
||||
int v = INTVAL (XVECEXP (op, 0, i));
|
||||
wide_int wv = wi::shwi (v, innermode_bits);
|
||||
val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
|
||||
}
|
||||
break;
|
||||
case E_V2HFmode:
|
||||
case E_V4HFmode:
|
||||
case E_V2SFmode:
|
||||
for (int i = 0; i < nunits; ++i)
|
||||
{
|
||||
rtx x = XVECEXP (op, 0, i);
|
||||
int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
|
||||
REAL_MODE_FORMAT (innermode));
|
||||
wide_int wv = wi::shwi (v, innermode_bits);
|
||||
val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
return val.to_shwi ();
|
||||
}
|
||||
|
||||
/* Return TRUE or FALSE depending on whether the first SET in INSN
|
||||
has source and destination with matching CC modes, and that the
|
||||
CC mode is at least as constrained as REQ_MODE. */
|
||||
|
|
|
@ -69,6 +69,12 @@
|
|||
;; 4-byte and 2-byte QImode vector modes
|
||||
(define_mode_iterator VI1_16_32 [V4QI V2QI])
|
||||
|
||||
;; All 2-byte, 4-byte and 8-byte vector modes with more than 1 element
|
||||
(define_mode_iterator V_16_32_64
|
||||
[V2QI V4QI V2HI V2HF
|
||||
(V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT") (V4HF "TARGET_64BIT")
|
||||
(V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
|
||||
|
||||
;; V2S* modes
|
||||
(define_mode_iterator V2FI [V2SF V2SI])
|
||||
|
||||
|
@ -331,6 +337,37 @@
|
|||
]
|
||||
(symbol_ref "true")))])
|
||||
|
||||
;; 16-bit, 32-bit and 64-bit constant vector stores. After reload,
|
||||
;; convert them to immediate integer stores.
|
||||
(define_insn_and_split "*mov<mode>_imm"
|
||||
[(set (match_operand:V_16_32_64 0 "memory_operand" "=m")
|
||||
(match_operand:V_16_32_64 1 "x86_64_const_vector_operand" "i"))]
|
||||
""
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(set (match_dup 0) (match_dup 1))]
|
||||
{
|
||||
HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[1],
|
||||
<MODE>mode);
|
||||
operands[1] = GEN_INT (val);
|
||||
machine_mode mode;
|
||||
switch (GET_MODE_SIZE (<MODE>mode))
|
||||
{
|
||||
case 2:
|
||||
mode = HImode;
|
||||
break;
|
||||
case 4:
|
||||
mode = SImode;
|
||||
break;
|
||||
case 8:
|
||||
mode = DImode;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
operands[0] = lowpart_subreg (mode, operands[0], <MODE>mode);
|
||||
})
|
||||
|
||||
;; For TARGET_64BIT we always round up to 8 bytes.
|
||||
(define_insn "*push<mode>2_rex64"
|
||||
[(set (match_operand:V_32 0 "push_operand" "=X,X")
|
||||
|
|
|
@ -1194,6 +1194,17 @@
|
|||
(ior (match_operand 0 "register_operand")
|
||||
(match_code "const_vector")))
|
||||
|
||||
;; Return true when OP is CONST_VECTOR which can be converted to a
|
||||
;; sign extended 32-bit integer.
|
||||
(define_predicate "x86_64_const_vector_operand"
|
||||
(match_code "const_vector")
|
||||
{
|
||||
if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
|
||||
return false;
|
||||
HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (op, mode);
|
||||
return trunc_int_for_mode (val, SImode) == val;
|
||||
})
|
||||
|
||||
;; Return true when OP is nonimmediate or standard SSE constant.
|
||||
(define_predicate "nonimmediate_or_sse_const_operand"
|
||||
(ior (match_operand 0 "nonimmediate_operand")
|
||||
|
|
13
gcc/testsuite/gcc.target/i386/pr106022-1.c
Normal file
13
gcc/testsuite/gcc.target/i386/pr106022-1.c
Normal file
|
@ -0,0 +1,13 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -march=x86-64" } */
|
||||
|
||||
void
|
||||
foo (char *c)
|
||||
{
|
||||
c[0] = 0;
|
||||
c[1] = 1;
|
||||
c[2] = 2;
|
||||
c[3] = 3;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$50462976," 1 } } */
|
14
gcc/testsuite/gcc.target/i386/pr106022-2.c
Normal file
14
gcc/testsuite/gcc.target/i386/pr106022-2.c
Normal file
|
@ -0,0 +1,14 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -march=x86-64" } */
|
||||
|
||||
void
|
||||
foo (int *c)
|
||||
{
|
||||
c = __builtin_assume_aligned (c, 16);
|
||||
c[0] = -1;
|
||||
c[1] = -1;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movq\[ \\t\]+\[^\n\]*%xmm" 2 { target { ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$-1," 1 { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-not "xmm" { target { ! ia32 } } } } */
|
14
gcc/testsuite/gcc.target/i386/pr106022-3.c
Normal file
14
gcc/testsuite/gcc.target/i386/pr106022-3.c
Normal file
|
@ -0,0 +1,14 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -march=x86-64" } */
|
||||
|
||||
void
|
||||
foo (int *c)
|
||||
{
|
||||
c[0] = 0;
|
||||
c[1] = 1;
|
||||
c[2] = 2;
|
||||
c[3] = 3;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movdqa\[ \\t\]+\[^\n\]*%xmm" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
|
14
gcc/testsuite/gcc.target/i386/pr106022-4.c
Normal file
14
gcc/testsuite/gcc.target/i386/pr106022-4.c
Normal file
|
@ -0,0 +1,14 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -march=x86-64" } */
|
||||
|
||||
void
|
||||
foo (float *c)
|
||||
{
|
||||
c[0] = 2.3;
|
||||
c[1] = 0.0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$0x40133333" 1 { target { ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$0x00000000" 1 { target { ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$1075000115," 1 { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-not "xmm" } } */
|
Loading…
Add table
Reference in a new issue