i386: Narrow test instructions with immediate operands [PR111698]
Narrow test instructions with immediate operand that test memory location for zero. E.g. testl $0x00aa0000, mem can be converted to testb $0xaa, mem+2. Reject targets where reading (possibly unaligned) part of memory location after a large write to the same address causes store-to-load forwarding stall. PR target/111698 gcc/ChangeLog: * config/i386/x86-tune.def (X86_TUNE_PARTIAL_MEMORY_READ_STALL): New tune. * config/i386/i386.h (TARGET_PARTIAL_MEMORY_READ_STALL): New macro. * config/i386/i386.md: New peephole pattern to narrow test instructions with immediate operands that test memory locations for zero. gcc/testsuite/ChangeLog: * gcc.target/i386/pr111698.c: New test.
This commit is contained in:
parent
f7dbf62304
commit
678e6c328c
4 changed files with 80 additions and 0 deletions
|
@ -311,6 +311,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
|
|||
#define TARGET_USE_SAHF ix86_tune_features[X86_TUNE_USE_SAHF]
|
||||
#define TARGET_MOVX ix86_tune_features[X86_TUNE_MOVX]
|
||||
#define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL]
|
||||
#define TARGET_PARTIAL_MEMORY_READ_STALL \
|
||||
ix86_tune_features[X86_TUNE_PARTIAL_MEMORY_READ_STALL]
|
||||
#define TARGET_PARTIAL_FLAG_REG_STALL \
|
||||
ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL]
|
||||
#define TARGET_LCP_STALL \
|
||||
|
|
|
@ -11115,6 +11115,57 @@
|
|||
operands[3] = gen_int_mode (INTVAL (operands[3]), QImode);
|
||||
})
|
||||
|
||||
;; Narrow test instructions with immediate operands that test
|
||||
;; memory locations for zero. E.g. testl $0x00aa0000, mem can be
|
||||
;; converted to testb $0xaa, mem+2. Reject volatile locations and
|
||||
;; targets where reading (possibly unaligned) part of memory
|
||||
;; location after a large write to the same address causes
|
||||
;; store-to-load forwarding stall.
|
||||
(define_peephole2
|
||||
[(set (reg:CCZ FLAGS_REG)
|
||||
(compare:CCZ
|
||||
(and:SWI248 (match_operand:SWI248 0 "memory_operand")
|
||||
(match_operand 1 "const_int_operand"))
|
||||
(const_int 0)))]
|
||||
"!TARGET_PARTIAL_MEMORY_READ_STALL && !MEM_VOLATILE_P (operands[0])"
|
||||
[(set (reg:CCZ FLAGS_REG)
|
||||
(compare:CCZ (match_dup 2) (const_int 0)))]
|
||||
{
|
||||
unsigned HOST_WIDE_INT ival = UINTVAL (operands[1]);
|
||||
int first_nonzero_byte, bitsize;
|
||||
rtx new_addr, new_const;
|
||||
machine_mode new_mode;
|
||||
|
||||
if (ival == 0)
|
||||
FAIL;
|
||||
|
||||
/* Clear bits outside mode width. */
|
||||
ival &= GET_MODE_MASK (<MODE>mode);
|
||||
|
||||
first_nonzero_byte = ctz_hwi (ival) / BITS_PER_UNIT;
|
||||
|
||||
ival >>= first_nonzero_byte * BITS_PER_UNIT;
|
||||
|
||||
bitsize = sizeof (ival) * BITS_PER_UNIT - clz_hwi (ival);
|
||||
|
||||
if (bitsize <= GET_MODE_BITSIZE (QImode))
|
||||
new_mode = QImode;
|
||||
else if (bitsize <= GET_MODE_BITSIZE (HImode))
|
||||
new_mode = HImode;
|
||||
else if (bitsize <= GET_MODE_BITSIZE (SImode))
|
||||
new_mode = SImode;
|
||||
else
|
||||
new_mode = DImode;
|
||||
|
||||
if (GET_MODE_SIZE (new_mode) >= GET_MODE_SIZE (<MODE>mode))
|
||||
FAIL;
|
||||
|
||||
new_addr = adjust_address (operands[0], new_mode, first_nonzero_byte);
|
||||
new_const = gen_int_mode (ival, new_mode);
|
||||
|
||||
operands[2] = gen_rtx_AND (new_mode, new_addr, new_const);
|
||||
})
|
||||
|
||||
;; %%% This used to optimize known byte-wide and operations to memory,
|
||||
;; and sometimes to QImode registers. If this is considered useful,
|
||||
;; it should be done with splitters.
|
||||
|
|
|
@ -647,6 +647,14 @@ DEF_TUNE (X86_TUNE_NOT_UNPAIRABLE, "not_unpairable", m_PENT | m_LAKEMONT)
|
|||
and can happen in caller/callee saving sequences. */
|
||||
DEF_TUNE (X86_TUNE_PARTIAL_REG_STALL, "partial_reg_stall", m_PPRO)
|
||||
|
||||
/* X86_TUNE_PARTIAL_MEMORY_READ_STALL: Reading (possible unaligned) part of
|
||||
memory location after a large write to the same address causes
|
||||
store-to-load forwarding stall. */
|
||||
DEF_TUNE (X86_TUNE_PARTIAL_MEMORY_READ_STALL, "partial_memoy_read_stall",
|
||||
m_386 | m_486 | m_PENT | m_LAKEMONT | m_PPRO | m_P4_NOCONA | m_CORE2
|
||||
| m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT
|
||||
| m_K6_GEODE | m_ATHLON_K8 | m_AMDFAM10)
|
||||
|
||||
/* X86_TUNE_PROMOTE_QIMODE: When it is cheap, turn 8bit arithmetic to
|
||||
corresponding 32bit arithmetic. */
|
||||
DEF_TUNE (X86_TUNE_PROMOTE_QIMODE, "promote_qimode",
|
||||
|
|
19
gcc/testsuite/gcc.target/i386/pr111698.c
Normal file
19
gcc/testsuite/gcc.target/i386/pr111698.c
Normal file
|
@ -0,0 +1,19 @@
|
|||
/* PR target/111698 */
|
||||
/* { dg-options "-O2 -masm=att" } */
|
||||
/* { dg-final { scan-assembler-not "testl" } } */
|
||||
|
||||
int m;
|
||||
|
||||
_Bool foo (void)
|
||||
{
|
||||
return m & 0x0a0000;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "testb" 1 } } */
|
||||
|
||||
_Bool bar (void)
|
||||
{
|
||||
return m & 0xa0a000;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "testw" 1 } } */
|
Loading…
Add table
Reference in a new issue