i386: Some AVX512 ternlog expansion refinements.

This patch replaces the calls to force_reg in ix86_expand_ternlog_binop
and ix86_expand_ternlog with gen_reg_rtx and emit_move_insn.
This patch also cleans up whitespace, consistently uses CONST_VECTOR_P
instead of GET_CODE and tweaks checks for ix86_ternlog_leaf_p (for
example where vpandn may take a memory operand).

2024-07-12  Roger Sayle  <roger@nextmovesoftware.com>
	    Hongtao Liu  <hongtao.liu@intel.com>

gcc/ChangeLog
	* config/i386/i386-expand.cc (ix86_broadcast_from_constant):
	Use CONST_VECTOR_P instead of comparison against GET_CODE.
	(ix86_gen_bcst_mem): Likewise.
	(ix86_ternlog_leaf_p): Likewise.
	(ix86_ternlog_operand_p): ix86_ternlog_leaf_p is always true for
	vector_all_ones_operand.
	(ix86_expand_ternlog_bin_op): Use CONST_VECTOR_P instead of
	equality comparison against GET_CODE.  Replace call to force_reg
	with gen_reg_rtx and emit_move_insn (for VEC_DUPLICATE broadcast).
	Check for !register_operand instead of memory_operand.
	Support CONST_VECTORs by calling force_const_mem.
	(ix86_expand_ternlog): Fix indentation whitespace.
	Allow ix86_ternlog_leaf_p as ix86_expand_ternlog_andnot's second
	operand. Use CONST_VECTOR_P instead of equality against GET_CODE.
	Use gen_reg_rtx and emit_move_insn for ~a, ~b and ~c cases.
This commit is contained in:
Roger Sayle 2024-07-12 12:30:56 +01:00
parent 56de68aba6
commit 6b5d263f2c

View file

@ -613,7 +613,7 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op)
return nullptr;
rtx constant = get_pool_constant (XEXP (op, 0));
if (GET_CODE (constant) != CONST_VECTOR)
if (!CONST_VECTOR_P (constant))
return nullptr;
/* There could be some rtx like
@ -623,7 +623,7 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op)
{
constant = simplify_subreg (mode, constant, GET_MODE (constant),
0);
if (constant == nullptr || GET_CODE (constant) != CONST_VECTOR)
if (constant == nullptr || !CONST_VECTOR_P (constant))
return nullptr;
}
@ -25561,7 +25561,7 @@ static rtx
ix86_gen_bcst_mem (machine_mode mode, rtx x)
{
if (!TARGET_AVX512F
|| GET_CODE (x) != CONST_VECTOR
|| !CONST_VECTOR_P (x)
|| (!TARGET_AVX512VL
&& (GET_MODE_SIZE (mode) != 64 || !TARGET_EVEX512))
|| !VALID_BCST_MODE_P (GET_MODE_INNER (mode))
@ -25751,7 +25751,7 @@ ix86_ternlog_leaf_p (rtx op, machine_mode mode)
problems splitting instructions. */
return register_operand (op, mode)
|| MEM_P (op)
|| GET_CODE (op) == CONST_VECTOR
|| CONST_VECTOR_P (op)
|| bcst_mem_operand (op, mode);
}
@ -25801,8 +25801,7 @@ ix86_ternlog_operand_p (rtx op)
op1 = XEXP (op, 1);
/* Prefer pxor, or one_cmpl<vmode>2. */
if (ix86_ternlog_leaf_p (XEXP (op, 0), mode)
&& (ix86_ternlog_leaf_p (op1, mode)
|| vector_all_ones_operand (op1, mode)))
&& ix86_ternlog_leaf_p (XEXP (op, 1), mode))
return false;
break;
@ -25822,15 +25821,20 @@ ix86_expand_ternlog_binop (enum rtx_code code, machine_mode mode,
if (GET_MODE (op1) != mode)
op1 = gen_lowpart (mode, op1);
if (GET_CODE (op0) == CONST_VECTOR)
if (CONST_VECTOR_P (op0))
op0 = validize_mem (force_const_mem (mode, op0));
if (GET_CODE (op1) == CONST_VECTOR)
if (CONST_VECTOR_P (op1))
op1 = validize_mem (force_const_mem (mode, op1));
if (memory_operand (op0, mode))
if (!register_operand (op0, mode))
{
if (memory_operand (op1, mode))
op0 = force_reg (mode, op0);
if (!register_operand (op1, mode))
{
/* We can't use force_reg (op0, mode). */
rtx reg = gen_reg_rtx (mode);
emit_move_insn (reg, op0);
op0 = reg;
}
else
std::swap (op0, op1);
}
@ -25849,6 +25853,8 @@ ix86_expand_ternlog_andnot (machine_mode mode, rtx op0, rtx op1, rtx target)
op0 = gen_rtx_NOT (mode, op0);
if (GET_MODE (op1) != mode)
op1 = gen_lowpart (mode, op1);
if (CONST_VECTOR_P (op1))
op1 = validize_mem (force_const_mem (mode, op1));
emit_move_insn (target, gen_rtx_AND (mode, op0, op1));
return target;
}
@ -25885,9 +25891,9 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
{
case 0x00:
if ((!op0 || !side_effects_p (op0))
&& (!op1 || !side_effects_p (op1))
&& (!op2 || !side_effects_p (op2)))
{
&& (!op1 || !side_effects_p (op1))
&& (!op2 || !side_effects_p (op2)))
{
emit_move_insn (target, CONST0_RTX (mode));
return target;
}
@ -25896,26 +25902,31 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
case 0x0a: /* ~a&c */
if ((!op1 || !side_effects_p (op1))
&& op0 && register_operand (op0, mode)
&& op2 && register_operand (op2, mode))
&& op2 && ix86_ternlog_leaf_p (op2, mode))
return ix86_expand_ternlog_andnot (mode, op0, op2, target);
break;
case 0x0c: /* ~a&b */
if ((!op2 || !side_effects_p (op2))
&& op0 && register_operand (op0, mode)
&& op1 && register_operand (op1, mode))
&& op1 && ix86_ternlog_leaf_p (op1, mode))
return ix86_expand_ternlog_andnot (mode, op0, op1, target);
break;
case 0x0f: /* ~a */
if ((!op1 || !side_effects_p (op1))
&& (!op2 || !side_effects_p (op2))
&& op0)
&& op0)
{
if (GET_MODE (op0) != mode)
op0 = gen_lowpart (mode, op0);
if (!TARGET_64BIT && !register_operand (op0, mode))
op0 = force_reg (mode, op0);
{
/* Avoid force_reg (mode, op0). */
rtx reg = gen_reg_rtx (mode);
emit_move_insn (reg, op0);
op0 = reg;
}
emit_move_insn (target, gen_rtx_XOR (mode, op0, CONSTM1_RTX (mode)));
return target;
}
@ -25924,13 +25935,13 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
case 0x22: /* ~b&c */
if ((!op0 || !side_effects_p (op0))
&& op1 && register_operand (op1, mode)
&& op2 && register_operand (op2, mode))
&& op2 && ix86_ternlog_leaf_p (op2, mode))
return ix86_expand_ternlog_andnot (mode, op1, op2, target);
break;
case 0x30: /* ~b&a */
if ((!op2 || !side_effects_p (op2))
&& op0 && register_operand (op0, mode)
&& op0 && ix86_ternlog_leaf_p (op0, mode)
&& op1 && register_operand (op1, mode))
return ix86_expand_ternlog_andnot (mode, op1, op0, target);
break;
@ -25938,33 +25949,39 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
case 0x33: /* ~b */
if ((!op0 || !side_effects_p (op0))
&& (!op2 || !side_effects_p (op2))
&& op1)
&& op1)
{
if (GET_MODE (op1) != mode)
op1 = gen_lowpart (mode, op1);
if (!TARGET_64BIT && !register_operand (op1, mode))
op1 = force_reg (mode, op1);
{
/* Avoid force_reg (mode, op1). */
rtx reg = gen_reg_rtx (mode);
emit_move_insn (reg, op1);
op1 = reg;
}
emit_move_insn (target, gen_rtx_XOR (mode, op1, CONSTM1_RTX (mode)));
return target;
}
break;
case 0x3c: /* a^b */
if (op0 && op1
&& (!op2 || !side_effects_p (op2)))
if (op0 && ix86_ternlog_leaf_p (op0, mode)
&& op1 && ix86_ternlog_leaf_p (op1, mode)
&& (!op2 || !side_effects_p (op2)))
return ix86_expand_ternlog_binop (XOR, mode, op0, op1, target);
break;
case 0x44: /* ~c&b */
if ((!op0 || !side_effects_p (op0))
&& op1 && register_operand (op1, mode)
&& op1 && ix86_ternlog_leaf_p (op1, mode)
&& op2 && register_operand (op2, mode))
return ix86_expand_ternlog_andnot (mode, op2, op1, target);
break;
case 0x50: /* ~c&a */
if ((!op1 || !side_effects_p (op1))
&& op0 && register_operand (op0, mode)
&& op0 && ix86_ternlog_leaf_p (op0, mode)
&& op2 && register_operand (op2, mode))
return ix86_expand_ternlog_andnot (mode, op2, op0, target);
break;
@ -25972,45 +25989,54 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
case 0x55: /* ~c */
if ((!op0 || !side_effects_p (op0))
&& (!op1 || !side_effects_p (op1))
&& op2)
&& op2)
{
if (GET_MODE (op2) != mode)
op2 = gen_lowpart (mode, op2);
if (!TARGET_64BIT && !register_operand (op2, mode))
op2 = force_reg (mode, op2);
{
/* Avoid force_reg (mode, op2). */
rtx reg = gen_reg_rtx (mode);
emit_move_insn (reg, op2);
op2 = reg;
}
emit_move_insn (target, gen_rtx_XOR (mode, op2, CONSTM1_RTX (mode)));
return target;
}
break;
case 0x5a: /* a^c */
if (op0 && op2
&& (!op1 || !side_effects_p (op1)))
if (op0 && ix86_ternlog_leaf_p (op0, mode)
&& op2 && ix86_ternlog_leaf_p (op2, mode)
&& (!op1 || !side_effects_p (op1)))
return ix86_expand_ternlog_binop (XOR, mode, op0, op2, target);
break;
case 0x66: /* b^c */
if ((!op0 || !side_effects_p (op0))
&& op1 && op2)
&& op1 && ix86_ternlog_leaf_p (op1, mode)
&& op2 && ix86_ternlog_leaf_p (op2, mode))
return ix86_expand_ternlog_binop (XOR, mode, op1, op2, target);
break;
case 0x88: /* b&c */
if ((!op0 || !side_effects_p (op0))
&& op1 && op2)
&& op1 && ix86_ternlog_leaf_p (op1, mode)
&& op2 && ix86_ternlog_leaf_p (op2, mode))
return ix86_expand_ternlog_binop (AND, mode, op1, op2, target);
break;
case 0xa0: /* a&c */
if ((!op1 || !side_effects_p (op1))
&& op0 && op2)
&& op0 && ix86_ternlog_leaf_p (op0, mode)
&& op2 && ix86_ternlog_leaf_p (op2, mode))
return ix86_expand_ternlog_binop (AND, mode, op0, op2, target);
break;
case 0xaa: /* c */
if ((!op0 || !side_effects_p (op0))
&& (!op1 || !side_effects_p (op1))
&& op2)
&& op2)
{
if (GET_MODE (op2) != mode)
op2 = gen_lowpart (mode, op2);
@ -26020,14 +26046,15 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
break;
case 0xc0: /* a&b */
if (op0 && op1
&& (!op2 || !side_effects_p (op2)))
if (op0 && ix86_ternlog_leaf_p (op0, mode)
&& op1 && ix86_ternlog_leaf_p (op1, mode)
&& (!op2 || !side_effects_p (op2)))
return ix86_expand_ternlog_binop (AND, mode, op0, op1, target);
break;
case 0xcc: /* b */
if ((!op0 || !side_effects_p (op0))
&& op1
&& op1
&& (!op2 || !side_effects_p (op2)))
{
if (GET_MODE (op1) != mode)
@ -26039,13 +26066,14 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
case 0xee: /* b|c */
if ((!op0 || !side_effects_p (op0))
&& op1 && op2)
&& op1 && ix86_ternlog_leaf_p (op1, mode)
&& op2 && ix86_ternlog_leaf_p (op2, mode))
return ix86_expand_ternlog_binop (IOR, mode, op1, op2, target);
break;
case 0xf0: /* a */
if (op0
&& (!op1 || !side_effects_p (op1))
&& (!op1 || !side_effects_p (op1))
&& (!op2 || !side_effects_p (op2)))
{
if (GET_MODE (op0) != mode)
@ -26056,23 +26084,25 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
break;
case 0xfa: /* a|c */
if (op0 && op2
&& (!op1 || !side_effects_p (op1)))
if (op0 && ix86_ternlog_leaf_p (op0, mode)
&& op2 && ix86_ternlog_leaf_p (op2, mode)
&& (!op1 || !side_effects_p (op1)))
return ix86_expand_ternlog_binop (IOR, mode, op0, op2, target);
break;
case 0xfc: /* a|b */
if (op0 && op1
&& (!op2 || !side_effects_p (op2)))
if (op0 && ix86_ternlog_leaf_p (op0, mode)
&& op1 && ix86_ternlog_leaf_p (op1, mode)
&& (!op2 || !side_effects_p (op2)))
return ix86_expand_ternlog_binop (IOR, mode, op0, op1, target);
break;
case 0xff:
if ((!op0 || !side_effects_p (op0))
&& (!op1 || !side_effects_p (op1))
&& (!op2 || !side_effects_p (op2)))
{
emit_move_insn (target, CONSTM1_RTX (mode));
&& (!op1 || !side_effects_p (op1))
&& (!op2 || !side_effects_p (op2)))
{
emit_move_insn (target, CONSTM1_RTX (mode));
return target;
}
break;
@ -26106,7 +26136,7 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
tmp2 = copy_rtx (tmp0);
else if (rtx_equal_p (op1, op2))
tmp2 = copy_rtx (tmp1);
else if (GET_CODE (op2) == CONST_VECTOR)
else if (CONST_VECTOR_P (op2))
{
if (GET_MODE (op2) != mode)
op2 = gen_lowpart (mode, op2);