Some additional ix86_rtx_costs clean-ups: NEG, AND, andn and pandn.

Double-word NOT requires two operations, but double-word NEG requires
three operations.  Using SSE, vector NOT requires a pxor with -1, but
AND of NOT is cheap thanks to the existence of pandn.  There's also some
legacy (aka incorrect) logic explicitly testing for DImode [independently
of TARGET_64BIT] in determining the cost of logic operations that's not
required.

2022-05-23  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
	* config/i386/i386.cc (ix86_rtx_costs) <case AND>: Split from
	XOR/IOR case.  Account for two instructions for double-word
	operations.  In case of vector pandn, account for single
	instruction.  Likewise for integer andn with TARGET_BMI.
	<case NOT>: Vector NOT requires more than 1 instruction (pxor).
	<case NEG>: Double-word negation requires 3 instructions.
This commit is contained in:
Roger Sayle 2022-05-23 08:47:42 +01:00
parent 075fb873c2
commit 7707d7fddf

View file

@ -20738,62 +20738,21 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
}
if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
{
*total = cost->addss;
return false;
}
*total = cost->addss;
else if (X87_FLOAT_MODE_P (mode))
{
*total = cost->fadd;
return false;
}
*total = cost->fadd;
else if (FLOAT_MODE_P (mode))
{
*total = ix86_vec_cost (mode, cost->addss);
return false;
}
/* FALLTHRU */
*total = ix86_vec_cost (mode, cost->addss);
else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
*total = ix86_vec_cost (mode, cost->sse_op);
else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
*total = cost->add * 2;
else
*total = cost->add;
return false;
case AND:
case IOR:
case XOR:
if (GET_MODE_CLASS (mode) == MODE_INT
&& GET_MODE_SIZE (mode) > UNITS_PER_WORD)
{
*total = (cost->add * 2
+ (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
<< (GET_MODE (XEXP (x, 0)) != DImode))
+ (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
<< (GET_MODE (XEXP (x, 1)) != DImode)));
return true;
}
else if (code == AND
&& address_no_seg_operand (x, mode))
{
*total = cost->lea;
return true;
}
/* FALLTHRU */
case NEG:
if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
{
*total = cost->sse_op;
return false;
}
else if (X87_FLOAT_MODE_P (mode))
{
*total = cost->fchs;
return false;
}
else if (FLOAT_MODE_P (mode))
{
*total = ix86_vec_cost (mode, cost->sse_op);
return false;
}
/* FALLTHRU */
case NOT:
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
*total = ix86_vec_cost (mode, cost->sse_op);
else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
@ -20802,6 +20761,102 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
*total = cost->add;
return false;
case AND:
if (address_no_seg_operand (x, mode))
{
*total = cost->lea;
return true;
}
else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
{
/* pandn is a single instruction. */
if (GET_CODE (XEXP (x, 0)) == NOT)
{
*total = ix86_vec_cost (mode, cost->sse_op)
+ rtx_cost (XEXP (XEXP (x, 0), 0), mode,
outer_code, opno, speed)
+ rtx_cost (XEXP (x, 1), mode,
outer_code, opno, speed);
return true;
}
else if (GET_CODE (XEXP (x, 1)) == NOT)
{
*total = ix86_vec_cost (mode, cost->sse_op)
+ rtx_cost (XEXP (x, 0), mode,
outer_code, opno, speed)
+ rtx_cost (XEXP (XEXP (x, 1), 0), mode,
outer_code, opno, speed);
return true;
}
*total = ix86_vec_cost (mode, cost->sse_op);
}
else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
{
if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
{
*total = cost->add * 2
+ rtx_cost (XEXP (XEXP (x, 0), 0), mode,
outer_code, opno, speed)
+ rtx_cost (XEXP (x, 1), mode,
outer_code, opno, speed);
return true;
}
else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT)
{
*total = cost->add * 2
+ rtx_cost (XEXP (x, 0), mode,
outer_code, opno, speed)
+ rtx_cost (XEXP (XEXP (x, 1), 0), mode,
outer_code, opno, speed);
return true;
}
*total = cost->add * 2;
}
else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
{
*total = cost->add
+ rtx_cost (XEXP (XEXP (x, 0), 0), mode,
outer_code, opno, speed)
+ rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
return true;
}
else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT)
{
*total = cost->add
+ rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
+ rtx_cost (XEXP (XEXP (x, 1), 0), mode,
outer_code, opno, speed);
return true;
}
else
*total = cost->add;
return false;
case NOT:
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
// vnot is pxor -1.
*total = ix86_vec_cost (mode, cost->sse_op) + 1;
else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
*total = cost->add * 2;
else
*total = cost->add;
return false;
case NEG:
if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
*total = cost->sse_op;
else if (X87_FLOAT_MODE_P (mode))
*total = cost->fchs;
else if (FLOAT_MODE_P (mode))
*total = ix86_vec_cost (mode, cost->sse_op);
else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
*total = ix86_vec_cost (mode, cost->sse_op);
else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
*total = cost->add * 3;
else
*total = cost->add;
return false;
case COMPARE:
rtx op0, op1;
op0 = XEXP (x, 0);