i386: Some AVX512 ternlog expansion refinements.
This patch replaces the calls to force_reg in ix86_expand_ternlog_binop and ix86_expand_ternlog with gen_reg_rtx and emit_move_insn. This patch also cleans up whitespace, consistently uses CONST_VECTOR_P instead of GET_CODE and tweaks checks for ix86_ternlog_leaf_p (for example where vpandn may take a memory operand). 2024-07-12 Roger Sayle <roger@nextmovesoftware.com> Hongtao Liu <hongtao.liu@intel.com> gcc/ChangeLog * config/i386/i386-expand.cc (ix86_broadcast_from_constant): Use CONST_VECTOR_P instead of comparison against GET_CODE. (ix86_gen_bcst_mem): Likewise. (ix86_ternlog_leaf_p): Likewise. (ix86_ternlog_operand_p): ix86_ternlog_leaf_p is always true for vector_all_ones_operand. (ix86_expand_ternlog_bin_op): Use CONST_VECTOR_P instead of equality comparison against GET_CODE. Replace call to force_reg with gen_reg_rtx and emit_move_insn (for VEC_DUPLICATE broadcast). Check for !register_operand instead of memory_operand. Support CONST_VECTORs by calling force_const_mem. (ix86_expand_ternlog): Fix indentation whitespace. Allow ix86_ternlog_leaf_p as ix86_expand_ternlog_andnot's second operand. Use CONST_VECTOR_P instead of equality against GET_CODE. Use gen_reg_rtx and emit_move_insn for ~a, ~b and ~c cases.
This commit is contained in:
parent
56de68aba6
commit
6b5d263f2c
1 changed files with 78 additions and 48 deletions
|
@ -613,7 +613,7 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op)
|
|||
return nullptr;
|
||||
|
||||
rtx constant = get_pool_constant (XEXP (op, 0));
|
||||
if (GET_CODE (constant) != CONST_VECTOR)
|
||||
if (!CONST_VECTOR_P (constant))
|
||||
return nullptr;
|
||||
|
||||
/* There could be some rtx like
|
||||
|
@ -623,7 +623,7 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op)
|
|||
{
|
||||
constant = simplify_subreg (mode, constant, GET_MODE (constant),
|
||||
0);
|
||||
if (constant == nullptr || GET_CODE (constant) != CONST_VECTOR)
|
||||
if (constant == nullptr || !CONST_VECTOR_P (constant))
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -25561,7 +25561,7 @@ static rtx
|
|||
ix86_gen_bcst_mem (machine_mode mode, rtx x)
|
||||
{
|
||||
if (!TARGET_AVX512F
|
||||
|| GET_CODE (x) != CONST_VECTOR
|
||||
|| !CONST_VECTOR_P (x)
|
||||
|| (!TARGET_AVX512VL
|
||||
&& (GET_MODE_SIZE (mode) != 64 || !TARGET_EVEX512))
|
||||
|| !VALID_BCST_MODE_P (GET_MODE_INNER (mode))
|
||||
|
@ -25751,7 +25751,7 @@ ix86_ternlog_leaf_p (rtx op, machine_mode mode)
|
|||
problems splitting instructions. */
|
||||
return register_operand (op, mode)
|
||||
|| MEM_P (op)
|
||||
|| GET_CODE (op) == CONST_VECTOR
|
||||
|| CONST_VECTOR_P (op)
|
||||
|| bcst_mem_operand (op, mode);
|
||||
}
|
||||
|
||||
|
@ -25801,8 +25801,7 @@ ix86_ternlog_operand_p (rtx op)
|
|||
op1 = XEXP (op, 1);
|
||||
/* Prefer pxor, or one_cmpl<vmode>2. */
|
||||
if (ix86_ternlog_leaf_p (XEXP (op, 0), mode)
|
||||
&& (ix86_ternlog_leaf_p (op1, mode)
|
||||
|| vector_all_ones_operand (op1, mode)))
|
||||
&& ix86_ternlog_leaf_p (XEXP (op, 1), mode))
|
||||
return false;
|
||||
break;
|
||||
|
||||
|
@ -25822,15 +25821,20 @@ ix86_expand_ternlog_binop (enum rtx_code code, machine_mode mode,
|
|||
if (GET_MODE (op1) != mode)
|
||||
op1 = gen_lowpart (mode, op1);
|
||||
|
||||
if (GET_CODE (op0) == CONST_VECTOR)
|
||||
if (CONST_VECTOR_P (op0))
|
||||
op0 = validize_mem (force_const_mem (mode, op0));
|
||||
if (GET_CODE (op1) == CONST_VECTOR)
|
||||
if (CONST_VECTOR_P (op1))
|
||||
op1 = validize_mem (force_const_mem (mode, op1));
|
||||
|
||||
if (memory_operand (op0, mode))
|
||||
if (!register_operand (op0, mode))
|
||||
{
|
||||
if (memory_operand (op1, mode))
|
||||
op0 = force_reg (mode, op0);
|
||||
if (!register_operand (op1, mode))
|
||||
{
|
||||
/* We can't use force_reg (op0, mode). */
|
||||
rtx reg = gen_reg_rtx (mode);
|
||||
emit_move_insn (reg, op0);
|
||||
op0 = reg;
|
||||
}
|
||||
else
|
||||
std::swap (op0, op1);
|
||||
}
|
||||
|
@ -25849,6 +25853,8 @@ ix86_expand_ternlog_andnot (machine_mode mode, rtx op0, rtx op1, rtx target)
|
|||
op0 = gen_rtx_NOT (mode, op0);
|
||||
if (GET_MODE (op1) != mode)
|
||||
op1 = gen_lowpart (mode, op1);
|
||||
if (CONST_VECTOR_P (op1))
|
||||
op1 = validize_mem (force_const_mem (mode, op1));
|
||||
emit_move_insn (target, gen_rtx_AND (mode, op0, op1));
|
||||
return target;
|
||||
}
|
||||
|
@ -25885,9 +25891,9 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
|
|||
{
|
||||
case 0x00:
|
||||
if ((!op0 || !side_effects_p (op0))
|
||||
&& (!op1 || !side_effects_p (op1))
|
||||
&& (!op2 || !side_effects_p (op2)))
|
||||
{
|
||||
&& (!op1 || !side_effects_p (op1))
|
||||
&& (!op2 || !side_effects_p (op2)))
|
||||
{
|
||||
emit_move_insn (target, CONST0_RTX (mode));
|
||||
return target;
|
||||
}
|
||||
|
@ -25896,26 +25902,31 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
|
|||
case 0x0a: /* ~a&c */
|
||||
if ((!op1 || !side_effects_p (op1))
|
||||
&& op0 && register_operand (op0, mode)
|
||||
&& op2 && register_operand (op2, mode))
|
||||
&& op2 && ix86_ternlog_leaf_p (op2, mode))
|
||||
return ix86_expand_ternlog_andnot (mode, op0, op2, target);
|
||||
break;
|
||||
|
||||
case 0x0c: /* ~a&b */
|
||||
if ((!op2 || !side_effects_p (op2))
|
||||
&& op0 && register_operand (op0, mode)
|
||||
&& op1 && register_operand (op1, mode))
|
||||
&& op1 && ix86_ternlog_leaf_p (op1, mode))
|
||||
return ix86_expand_ternlog_andnot (mode, op0, op1, target);
|
||||
break;
|
||||
|
||||
case 0x0f: /* ~a */
|
||||
if ((!op1 || !side_effects_p (op1))
|
||||
&& (!op2 || !side_effects_p (op2))
|
||||
&& op0)
|
||||
&& op0)
|
||||
{
|
||||
if (GET_MODE (op0) != mode)
|
||||
op0 = gen_lowpart (mode, op0);
|
||||
if (!TARGET_64BIT && !register_operand (op0, mode))
|
||||
op0 = force_reg (mode, op0);
|
||||
{
|
||||
/* Avoid force_reg (mode, op0). */
|
||||
rtx reg = gen_reg_rtx (mode);
|
||||
emit_move_insn (reg, op0);
|
||||
op0 = reg;
|
||||
}
|
||||
emit_move_insn (target, gen_rtx_XOR (mode, op0, CONSTM1_RTX (mode)));
|
||||
return target;
|
||||
}
|
||||
|
@ -25924,13 +25935,13 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
|
|||
case 0x22: /* ~b&c */
|
||||
if ((!op0 || !side_effects_p (op0))
|
||||
&& op1 && register_operand (op1, mode)
|
||||
&& op2 && register_operand (op2, mode))
|
||||
&& op2 && ix86_ternlog_leaf_p (op2, mode))
|
||||
return ix86_expand_ternlog_andnot (mode, op1, op2, target);
|
||||
break;
|
||||
|
||||
case 0x30: /* ~b&a */
|
||||
if ((!op2 || !side_effects_p (op2))
|
||||
&& op0 && register_operand (op0, mode)
|
||||
&& op0 && ix86_ternlog_leaf_p (op0, mode)
|
||||
&& op1 && register_operand (op1, mode))
|
||||
return ix86_expand_ternlog_andnot (mode, op1, op0, target);
|
||||
break;
|
||||
|
@ -25938,33 +25949,39 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
|
|||
case 0x33: /* ~b */
|
||||
if ((!op0 || !side_effects_p (op0))
|
||||
&& (!op2 || !side_effects_p (op2))
|
||||
&& op1)
|
||||
&& op1)
|
||||
{
|
||||
if (GET_MODE (op1) != mode)
|
||||
op1 = gen_lowpart (mode, op1);
|
||||
if (!TARGET_64BIT && !register_operand (op1, mode))
|
||||
op1 = force_reg (mode, op1);
|
||||
{
|
||||
/* Avoid force_reg (mode, op1). */
|
||||
rtx reg = gen_reg_rtx (mode);
|
||||
emit_move_insn (reg, op1);
|
||||
op1 = reg;
|
||||
}
|
||||
emit_move_insn (target, gen_rtx_XOR (mode, op1, CONSTM1_RTX (mode)));
|
||||
return target;
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x3c: /* a^b */
|
||||
if (op0 && op1
|
||||
&& (!op2 || !side_effects_p (op2)))
|
||||
if (op0 && ix86_ternlog_leaf_p (op0, mode)
|
||||
&& op1 && ix86_ternlog_leaf_p (op1, mode)
|
||||
&& (!op2 || !side_effects_p (op2)))
|
||||
return ix86_expand_ternlog_binop (XOR, mode, op0, op1, target);
|
||||
break;
|
||||
|
||||
case 0x44: /* ~c&b */
|
||||
if ((!op0 || !side_effects_p (op0))
|
||||
&& op1 && register_operand (op1, mode)
|
||||
&& op1 && ix86_ternlog_leaf_p (op1, mode)
|
||||
&& op2 && register_operand (op2, mode))
|
||||
return ix86_expand_ternlog_andnot (mode, op2, op1, target);
|
||||
break;
|
||||
|
||||
case 0x50: /* ~c&a */
|
||||
if ((!op1 || !side_effects_p (op1))
|
||||
&& op0 && register_operand (op0, mode)
|
||||
&& op0 && ix86_ternlog_leaf_p (op0, mode)
|
||||
&& op2 && register_operand (op2, mode))
|
||||
return ix86_expand_ternlog_andnot (mode, op2, op0, target);
|
||||
break;
|
||||
|
@ -25972,45 +25989,54 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
|
|||
case 0x55: /* ~c */
|
||||
if ((!op0 || !side_effects_p (op0))
|
||||
&& (!op1 || !side_effects_p (op1))
|
||||
&& op2)
|
||||
&& op2)
|
||||
{
|
||||
if (GET_MODE (op2) != mode)
|
||||
op2 = gen_lowpart (mode, op2);
|
||||
if (!TARGET_64BIT && !register_operand (op2, mode))
|
||||
op2 = force_reg (mode, op2);
|
||||
{
|
||||
/* Avoid force_reg (mode, op2). */
|
||||
rtx reg = gen_reg_rtx (mode);
|
||||
emit_move_insn (reg, op2);
|
||||
op2 = reg;
|
||||
}
|
||||
emit_move_insn (target, gen_rtx_XOR (mode, op2, CONSTM1_RTX (mode)));
|
||||
return target;
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x5a: /* a^c */
|
||||
if (op0 && op2
|
||||
&& (!op1 || !side_effects_p (op1)))
|
||||
if (op0 && ix86_ternlog_leaf_p (op0, mode)
|
||||
&& op2 && ix86_ternlog_leaf_p (op2, mode)
|
||||
&& (!op1 || !side_effects_p (op1)))
|
||||
return ix86_expand_ternlog_binop (XOR, mode, op0, op2, target);
|
||||
break;
|
||||
|
||||
case 0x66: /* b^c */
|
||||
if ((!op0 || !side_effects_p (op0))
|
||||
&& op1 && op2)
|
||||
&& op1 && ix86_ternlog_leaf_p (op1, mode)
|
||||
&& op2 && ix86_ternlog_leaf_p (op2, mode))
|
||||
return ix86_expand_ternlog_binop (XOR, mode, op1, op2, target);
|
||||
break;
|
||||
|
||||
case 0x88: /* b&c */
|
||||
if ((!op0 || !side_effects_p (op0))
|
||||
&& op1 && op2)
|
||||
&& op1 && ix86_ternlog_leaf_p (op1, mode)
|
||||
&& op2 && ix86_ternlog_leaf_p (op2, mode))
|
||||
return ix86_expand_ternlog_binop (AND, mode, op1, op2, target);
|
||||
break;
|
||||
|
||||
case 0xa0: /* a&c */
|
||||
if ((!op1 || !side_effects_p (op1))
|
||||
&& op0 && op2)
|
||||
&& op0 && ix86_ternlog_leaf_p (op0, mode)
|
||||
&& op2 && ix86_ternlog_leaf_p (op2, mode))
|
||||
return ix86_expand_ternlog_binop (AND, mode, op0, op2, target);
|
||||
break;
|
||||
|
||||
case 0xaa: /* c */
|
||||
if ((!op0 || !side_effects_p (op0))
|
||||
&& (!op1 || !side_effects_p (op1))
|
||||
&& op2)
|
||||
&& op2)
|
||||
{
|
||||
if (GET_MODE (op2) != mode)
|
||||
op2 = gen_lowpart (mode, op2);
|
||||
|
@ -26020,14 +26046,15 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
|
|||
break;
|
||||
|
||||
case 0xc0: /* a&b */
|
||||
if (op0 && op1
|
||||
&& (!op2 || !side_effects_p (op2)))
|
||||
if (op0 && ix86_ternlog_leaf_p (op0, mode)
|
||||
&& op1 && ix86_ternlog_leaf_p (op1, mode)
|
||||
&& (!op2 || !side_effects_p (op2)))
|
||||
return ix86_expand_ternlog_binop (AND, mode, op0, op1, target);
|
||||
break;
|
||||
|
||||
case 0xcc: /* b */
|
||||
if ((!op0 || !side_effects_p (op0))
|
||||
&& op1
|
||||
&& op1
|
||||
&& (!op2 || !side_effects_p (op2)))
|
||||
{
|
||||
if (GET_MODE (op1) != mode)
|
||||
|
@ -26039,13 +26066,14 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
|
|||
|
||||
case 0xee: /* b|c */
|
||||
if ((!op0 || !side_effects_p (op0))
|
||||
&& op1 && op2)
|
||||
&& op1 && ix86_ternlog_leaf_p (op1, mode)
|
||||
&& op2 && ix86_ternlog_leaf_p (op2, mode))
|
||||
return ix86_expand_ternlog_binop (IOR, mode, op1, op2, target);
|
||||
break;
|
||||
|
||||
case 0xf0: /* a */
|
||||
if (op0
|
||||
&& (!op1 || !side_effects_p (op1))
|
||||
&& (!op1 || !side_effects_p (op1))
|
||||
&& (!op2 || !side_effects_p (op2)))
|
||||
{
|
||||
if (GET_MODE (op0) != mode)
|
||||
|
@ -26056,23 +26084,25 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
|
|||
break;
|
||||
|
||||
case 0xfa: /* a|c */
|
||||
if (op0 && op2
|
||||
&& (!op1 || !side_effects_p (op1)))
|
||||
if (op0 && ix86_ternlog_leaf_p (op0, mode)
|
||||
&& op2 && ix86_ternlog_leaf_p (op2, mode)
|
||||
&& (!op1 || !side_effects_p (op1)))
|
||||
return ix86_expand_ternlog_binop (IOR, mode, op0, op2, target);
|
||||
break;
|
||||
|
||||
case 0xfc: /* a|b */
|
||||
if (op0 && op1
|
||||
&& (!op2 || !side_effects_p (op2)))
|
||||
if (op0 && ix86_ternlog_leaf_p (op0, mode)
|
||||
&& op1 && ix86_ternlog_leaf_p (op1, mode)
|
||||
&& (!op2 || !side_effects_p (op2)))
|
||||
return ix86_expand_ternlog_binop (IOR, mode, op0, op1, target);
|
||||
break;
|
||||
|
||||
case 0xff:
|
||||
if ((!op0 || !side_effects_p (op0))
|
||||
&& (!op1 || !side_effects_p (op1))
|
||||
&& (!op2 || !side_effects_p (op2)))
|
||||
{
|
||||
emit_move_insn (target, CONSTM1_RTX (mode));
|
||||
&& (!op1 || !side_effects_p (op1))
|
||||
&& (!op2 || !side_effects_p (op2)))
|
||||
{
|
||||
emit_move_insn (target, CONSTM1_RTX (mode));
|
||||
return target;
|
||||
}
|
||||
break;
|
||||
|
@ -26106,7 +26136,7 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
|
|||
tmp2 = copy_rtx (tmp0);
|
||||
else if (rtx_equal_p (op1, op2))
|
||||
tmp2 = copy_rtx (tmp1);
|
||||
else if (GET_CODE (op2) == CONST_VECTOR)
|
||||
else if (CONST_VECTOR_P (op2))
|
||||
{
|
||||
if (GET_MODE (op2) != mode)
|
||||
op2 = gen_lowpart (mode, op2);
|
||||
|
|
Loading…
Add table
Reference in a new issue