i386: Improve and optimize ix86_expand_sse_movcc

Modernize ix86_expand_sse_movcc to use expand_simple_{unop,binop}
infrastructure to avoid manual twiddling with output registers.
Also fix a couple of inconsistent vector_all_ones_operand usages,
break a couple of unnecessary else-if chains, eliminate common
subexpressions and do some general code simplifications.

2022-01-15  Uroš Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog:

	* config/i386/i386-expand.c (ix86_expand_sse_movcc): Use
	expand_simple_unop and expand_simple_binop instead of manually
	constructing NOT, AND and IOR RTXes.  Use vector_all_ones_operand
	consistently.  Eliminate common subexpressions and simplify code.
	* config/i386/sse.md (<any_logic:code><MODEF:mode>3): New expander.
	(<any_logic:code><MODEF:mode>3): Make public.
This commit is contained in:
Uros Bizjak 2022-01-15 20:59:07 +01:00
parent dcf8fe1eea
commit f4a2cecd68
2 changed files with 62 additions and 78 deletions

View file

@ -3781,6 +3781,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
{
machine_mode mode = GET_MODE (dest);
machine_mode cmpmode = GET_MODE (cmp);
rtx x;
/* Simplify trivial VEC_COND_EXPR to avoid ICE in pr97506. */
if (rtx_equal_p (op_true, op_false))
@ -3789,8 +3790,6 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
return;
}
rtx t2, t3, x;
/* If we have an integer mask and FP value then we need
to cast mask to FP mode. */
if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
@ -3813,12 +3812,14 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
? force_reg (mode, op_false) : op_false);
if (op_true == CONST0_RTX (mode))
{
rtx n = gen_reg_rtx (cmpmode);
if (cmpmode == E_DImode && !TARGET_64BIT)
emit_insn (gen_knotdi (n, cmp));
{
x = gen_reg_rtx (cmpmode);
emit_insn (gen_knotdi (x, cmp));
}
else
emit_insn (gen_rtx_SET (n, gen_rtx_fmt_e (NOT, cmpmode, cmp)));
cmp = n;
x = expand_simple_unop (cmpmode, NOT, cmp, NULL, 1);
cmp = x;
/* Reverse op_true op_false. */
std::swap (op_true, op_false);
}
@ -3826,22 +3827,24 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
if (mode == HFmode)
emit_insn (gen_movhf_mask (dest, op_true, op_false, cmp));
else
{
rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp);
emit_insn (gen_rtx_SET (dest, vec_merge));
}
emit_insn (gen_rtx_SET (dest,
gen_rtx_VEC_MERGE (mode,
op_true, op_false, cmp)));
return;
}
else if (vector_all_ones_operand (op_true, mode)
&& op_false == CONST0_RTX (mode))
if (vector_all_ones_operand (op_true, mode)
&& op_false == CONST0_RTX (mode))
{
emit_insn (gen_rtx_SET (dest, cmp));
emit_move_insn (dest, cmp);
return;
}
else if (op_false == CONST0_RTX (mode))
{
op_true = force_reg (mode, op_true);
ix86_emit_vec_binop (AND, mode, dest, cmp, op_true);
x = expand_simple_binop (mode, AND, cmp, op_true,
dest, 1, OPTAB_DIRECT);
if (x != dest)
emit_move_insn (dest, x);
return;
}
else if (op_true == CONST0_RTX (mode))
@ -3851,13 +3854,16 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
ix86_emit_vec_binop (AND, mode, dest, x, op_false);
return;
}
else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
else if (vector_all_ones_operand (op_true, mode))
{
op_false = force_reg (mode, op_false);
ix86_emit_vec_binop (IOR, mode, dest, cmp, op_false);
x = expand_simple_binop (mode, IOR, cmp, op_false,
dest, 1, OPTAB_DIRECT);
if (x != dest)
emit_move_insn (dest, x);
return;
}
else if (TARGET_XOP)
if (TARGET_XOP)
{
op_true = force_reg (mode, op_true);
@ -3865,16 +3871,17 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|| !nonimmediate_operand (op_false, mode))
op_false = force_reg (mode, op_false);
emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
op_true,
op_false)));
emit_insn (gen_rtx_SET (dest,
gen_rtx_IF_THEN_ELSE (mode, cmp,
op_true, op_false)));
return;
}
rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
rtx d = dest;
machine_mode blend_mode = mode;
if (!vector_operand (op_true, mode))
if (GET_MODE_SIZE (mode) < 16
|| !vector_operand (op_true, mode))
op_true = force_reg (mode, op_true);
op_false = force_reg (mode, op_false);
@ -3883,10 +3890,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
{
case E_V2SFmode:
if (TARGET_SSE4_1)
{
gen = gen_mmx_blendvps;
op_true = force_reg (mode, op_true);
}
gen = gen_mmx_blendvps;
break;
case E_V4SFmode:
if (TARGET_SSE4_1)
@ -3898,54 +3902,32 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
break;
case E_SFmode:
if (TARGET_SSE4_1)
{
gen = gen_sse4_1_blendvss;
op_true = force_reg (mode, op_true);
}
gen = gen_sse4_1_blendvss;
break;
case E_DFmode:
if (TARGET_SSE4_1)
{
gen = gen_sse4_1_blendvsd;
op_true = force_reg (mode, op_true);
}
gen = gen_sse4_1_blendvsd;
break;
case E_V8QImode:
case E_V4HImode:
case E_V2SImode:
if (TARGET_SSE4_1)
{
op_true = force_reg (mode, op_true);
gen = gen_mmx_pblendvb_v8qi;
if (mode != V8QImode)
d = gen_reg_rtx (V8QImode);
op_false = gen_lowpart (V8QImode, op_false);
op_true = gen_lowpart (V8QImode, op_true);
cmp = gen_lowpart (V8QImode, cmp);
blend_mode = V8QImode;
}
break;
case E_V4QImode:
case E_V2HImode:
if (TARGET_SSE4_1)
{
op_true = force_reg (mode, op_true);
gen = gen_mmx_pblendvb_v4qi;
if (mode != V4QImode)
d = gen_reg_rtx (V4QImode);
op_false = gen_lowpart (V4QImode, op_false);
op_true = gen_lowpart (V4QImode, op_true);
cmp = gen_lowpart (V4QImode, cmp);
blend_mode = V4QImode;
}
break;
case E_V2QImode:
if (TARGET_SSE4_1)
{
op_true = force_reg (mode, op_true);
gen = gen_mmx_pblendvb_v2qi;
}
gen = gen_mmx_pblendvb_v2qi;
break;
case E_V16QImode:
case E_V8HImode:
@ -3955,11 +3937,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
if (TARGET_SSE4_1)
{
gen = gen_sse4_1_pblendvb;
if (mode != V16QImode)
d = gen_reg_rtx (V16QImode);
op_false = gen_lowpart (V16QImode, op_false);
op_true = gen_lowpart (V16QImode, op_true);
cmp = gen_lowpart (V16QImode, cmp);
blend_mode = V16QImode;
}
break;
case E_V8SFmode:
@ -3978,11 +3956,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
if (TARGET_AVX2)
{
gen = gen_avx2_pblendvb;
if (mode != V32QImode)
d = gen_reg_rtx (V32QImode);
op_false = gen_lowpart (V32QImode, op_false);
op_true = gen_lowpart (V32QImode, op_true);
cmp = gen_lowpart (V32QImode, cmp);
blend_mode = V32QImode;
}
break;
@ -4014,26 +3988,36 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
if (gen != NULL)
{
emit_insn (gen (d, op_false, op_true, cmp));
if (d != dest)
emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
if (blend_mode == mode)
x = dest;
else
{
x = gen_reg_rtx (blend_mode);
op_false = gen_lowpart (blend_mode, op_false);
op_true = gen_lowpart (blend_mode, op_true);
cmp = gen_lowpart (blend_mode, cmp);
}
emit_insn (gen (x, op_false, op_true, cmp));
if (x != dest)
emit_move_insn (dest, gen_lowpart (mode, x));
}
else
{
op_true = force_reg (mode, op_true);
rtx t2, t3;
t2 = gen_reg_rtx (mode);
if (optimize)
t3 = gen_reg_rtx (mode);
else
t3 = dest;
ix86_emit_vec_binop (AND, mode, t2, op_true, cmp);
t2 = expand_simple_binop (mode, AND, op_true, cmp,
NULL, 1, OPTAB_DIRECT);
t3 = gen_reg_rtx (mode);
x = gen_rtx_NOT (mode, cmp);
ix86_emit_vec_binop (AND, mode, t3, x, op_false);
ix86_emit_vec_binop (IOR, mode, dest, t3, t2);
x = expand_simple_binop (mode, IOR, t3, t2,
dest, 1, OPTAB_DIRECT);
if (x != dest)
emit_move_insn (dest, x);
}
}

View file

@ -4956,7 +4956,7 @@
]
(const_string "TI")))])
(define_insn "*<code><mode>3"
(define_insn "<code><mode>3"
[(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
(any_logic:MODEF
(match_operand:MODEF 1 "register_operand" "%0,x,v,v")