i386: Improve and optimize ix86_expand_sse_movcc
Modernize ix86_expand_sse_movcc to use expand_simple_{unop,binop} infrastructure to avoid manual twiddling with output registers. Also fix a couple of inconsistent vector_all_ones_operand usages, break a couple of unnecessary else-if chains, eliminate common subexpressions and do some general code simplifications. 2022-01-15 Uroš Bizjak <ubizjak@gmail.com> gcc/ChangeLog: * config/i386/i386-expand.c (ix86_expand_sse_movcc): Use expand_simple_unop and expand_simple_binop instead of manually constructing NOT, AND and IOR RTXes. Use vector_all_ones_operand consistently. Eliminate common subexpressions and simplify code. * config/i386/sse.md (<any_logic:code><MODEF:mode>3): New expander. (<any_logic:code><MODEF:mode>3): Make public.
This commit is contained in:
parent
dcf8fe1eea
commit
f4a2cecd68
2 changed files with 62 additions and 78 deletions
|
@ -3781,6 +3781,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|
|||
{
|
||||
machine_mode mode = GET_MODE (dest);
|
||||
machine_mode cmpmode = GET_MODE (cmp);
|
||||
rtx x;
|
||||
|
||||
/* Simplify trivial VEC_COND_EXPR to avoid ICE in pr97506. */
|
||||
if (rtx_equal_p (op_true, op_false))
|
||||
|
@ -3789,8 +3790,6 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|
|||
return;
|
||||
}
|
||||
|
||||
rtx t2, t3, x;
|
||||
|
||||
/* If we have an integer mask and FP value then we need
|
||||
to cast mask to FP mode. */
|
||||
if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
|
||||
|
@ -3813,12 +3812,14 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|
|||
? force_reg (mode, op_false) : op_false);
|
||||
if (op_true == CONST0_RTX (mode))
|
||||
{
|
||||
rtx n = gen_reg_rtx (cmpmode);
|
||||
if (cmpmode == E_DImode && !TARGET_64BIT)
|
||||
emit_insn (gen_knotdi (n, cmp));
|
||||
{
|
||||
x = gen_reg_rtx (cmpmode);
|
||||
emit_insn (gen_knotdi (x, cmp));
|
||||
}
|
||||
else
|
||||
emit_insn (gen_rtx_SET (n, gen_rtx_fmt_e (NOT, cmpmode, cmp)));
|
||||
cmp = n;
|
||||
x = expand_simple_unop (cmpmode, NOT, cmp, NULL, 1);
|
||||
cmp = x;
|
||||
/* Reverse op_true op_false. */
|
||||
std::swap (op_true, op_false);
|
||||
}
|
||||
|
@ -3826,22 +3827,24 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|
|||
if (mode == HFmode)
|
||||
emit_insn (gen_movhf_mask (dest, op_true, op_false, cmp));
|
||||
else
|
||||
{
|
||||
rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp);
|
||||
emit_insn (gen_rtx_SET (dest, vec_merge));
|
||||
}
|
||||
emit_insn (gen_rtx_SET (dest,
|
||||
gen_rtx_VEC_MERGE (mode,
|
||||
op_true, op_false, cmp)));
|
||||
return;
|
||||
}
|
||||
else if (vector_all_ones_operand (op_true, mode)
|
||||
&& op_false == CONST0_RTX (mode))
|
||||
|
||||
if (vector_all_ones_operand (op_true, mode)
|
||||
&& op_false == CONST0_RTX (mode))
|
||||
{
|
||||
emit_insn (gen_rtx_SET (dest, cmp));
|
||||
emit_move_insn (dest, cmp);
|
||||
return;
|
||||
}
|
||||
else if (op_false == CONST0_RTX (mode))
|
||||
{
|
||||
op_true = force_reg (mode, op_true);
|
||||
ix86_emit_vec_binop (AND, mode, dest, cmp, op_true);
|
||||
x = expand_simple_binop (mode, AND, cmp, op_true,
|
||||
dest, 1, OPTAB_DIRECT);
|
||||
if (x != dest)
|
||||
emit_move_insn (dest, x);
|
||||
return;
|
||||
}
|
||||
else if (op_true == CONST0_RTX (mode))
|
||||
|
@ -3851,13 +3854,16 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|
|||
ix86_emit_vec_binop (AND, mode, dest, x, op_false);
|
||||
return;
|
||||
}
|
||||
else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
|
||||
else if (vector_all_ones_operand (op_true, mode))
|
||||
{
|
||||
op_false = force_reg (mode, op_false);
|
||||
ix86_emit_vec_binop (IOR, mode, dest, cmp, op_false);
|
||||
x = expand_simple_binop (mode, IOR, cmp, op_false,
|
||||
dest, 1, OPTAB_DIRECT);
|
||||
if (x != dest)
|
||||
emit_move_insn (dest, x);
|
||||
return;
|
||||
}
|
||||
else if (TARGET_XOP)
|
||||
|
||||
if (TARGET_XOP)
|
||||
{
|
||||
op_true = force_reg (mode, op_true);
|
||||
|
||||
|
@ -3865,16 +3871,17 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|
|||
|| !nonimmediate_operand (op_false, mode))
|
||||
op_false = force_reg (mode, op_false);
|
||||
|
||||
emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
|
||||
op_true,
|
||||
op_false)));
|
||||
emit_insn (gen_rtx_SET (dest,
|
||||
gen_rtx_IF_THEN_ELSE (mode, cmp,
|
||||
op_true, op_false)));
|
||||
return;
|
||||
}
|
||||
|
||||
rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
|
||||
rtx d = dest;
|
||||
machine_mode blend_mode = mode;
|
||||
|
||||
if (!vector_operand (op_true, mode))
|
||||
if (GET_MODE_SIZE (mode) < 16
|
||||
|| !vector_operand (op_true, mode))
|
||||
op_true = force_reg (mode, op_true);
|
||||
|
||||
op_false = force_reg (mode, op_false);
|
||||
|
@ -3883,10 +3890,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|
|||
{
|
||||
case E_V2SFmode:
|
||||
if (TARGET_SSE4_1)
|
||||
{
|
||||
gen = gen_mmx_blendvps;
|
||||
op_true = force_reg (mode, op_true);
|
||||
}
|
||||
gen = gen_mmx_blendvps;
|
||||
break;
|
||||
case E_V4SFmode:
|
||||
if (TARGET_SSE4_1)
|
||||
|
@ -3898,54 +3902,32 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|
|||
break;
|
||||
case E_SFmode:
|
||||
if (TARGET_SSE4_1)
|
||||
{
|
||||
gen = gen_sse4_1_blendvss;
|
||||
op_true = force_reg (mode, op_true);
|
||||
}
|
||||
gen = gen_sse4_1_blendvss;
|
||||
break;
|
||||
case E_DFmode:
|
||||
if (TARGET_SSE4_1)
|
||||
{
|
||||
gen = gen_sse4_1_blendvsd;
|
||||
op_true = force_reg (mode, op_true);
|
||||
}
|
||||
gen = gen_sse4_1_blendvsd;
|
||||
break;
|
||||
case E_V8QImode:
|
||||
case E_V4HImode:
|
||||
case E_V2SImode:
|
||||
if (TARGET_SSE4_1)
|
||||
{
|
||||
op_true = force_reg (mode, op_true);
|
||||
|
||||
gen = gen_mmx_pblendvb_v8qi;
|
||||
if (mode != V8QImode)
|
||||
d = gen_reg_rtx (V8QImode);
|
||||
op_false = gen_lowpart (V8QImode, op_false);
|
||||
op_true = gen_lowpart (V8QImode, op_true);
|
||||
cmp = gen_lowpart (V8QImode, cmp);
|
||||
blend_mode = V8QImode;
|
||||
}
|
||||
break;
|
||||
case E_V4QImode:
|
||||
case E_V2HImode:
|
||||
if (TARGET_SSE4_1)
|
||||
{
|
||||
op_true = force_reg (mode, op_true);
|
||||
|
||||
gen = gen_mmx_pblendvb_v4qi;
|
||||
if (mode != V4QImode)
|
||||
d = gen_reg_rtx (V4QImode);
|
||||
op_false = gen_lowpart (V4QImode, op_false);
|
||||
op_true = gen_lowpart (V4QImode, op_true);
|
||||
cmp = gen_lowpart (V4QImode, cmp);
|
||||
blend_mode = V4QImode;
|
||||
}
|
||||
break;
|
||||
case E_V2QImode:
|
||||
if (TARGET_SSE4_1)
|
||||
{
|
||||
op_true = force_reg (mode, op_true);
|
||||
|
||||
gen = gen_mmx_pblendvb_v2qi;
|
||||
}
|
||||
gen = gen_mmx_pblendvb_v2qi;
|
||||
break;
|
||||
case E_V16QImode:
|
||||
case E_V8HImode:
|
||||
|
@ -3955,11 +3937,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|
|||
if (TARGET_SSE4_1)
|
||||
{
|
||||
gen = gen_sse4_1_pblendvb;
|
||||
if (mode != V16QImode)
|
||||
d = gen_reg_rtx (V16QImode);
|
||||
op_false = gen_lowpart (V16QImode, op_false);
|
||||
op_true = gen_lowpart (V16QImode, op_true);
|
||||
cmp = gen_lowpart (V16QImode, cmp);
|
||||
blend_mode = V16QImode;
|
||||
}
|
||||
break;
|
||||
case E_V8SFmode:
|
||||
|
@ -3978,11 +3956,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|
|||
if (TARGET_AVX2)
|
||||
{
|
||||
gen = gen_avx2_pblendvb;
|
||||
if (mode != V32QImode)
|
||||
d = gen_reg_rtx (V32QImode);
|
||||
op_false = gen_lowpart (V32QImode, op_false);
|
||||
op_true = gen_lowpart (V32QImode, op_true);
|
||||
cmp = gen_lowpart (V32QImode, cmp);
|
||||
blend_mode = V32QImode;
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -4014,26 +3988,36 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|
|||
|
||||
if (gen != NULL)
|
||||
{
|
||||
emit_insn (gen (d, op_false, op_true, cmp));
|
||||
if (d != dest)
|
||||
emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
|
||||
if (blend_mode == mode)
|
||||
x = dest;
|
||||
else
|
||||
{
|
||||
x = gen_reg_rtx (blend_mode);
|
||||
op_false = gen_lowpart (blend_mode, op_false);
|
||||
op_true = gen_lowpart (blend_mode, op_true);
|
||||
cmp = gen_lowpart (blend_mode, cmp);
|
||||
}
|
||||
|
||||
emit_insn (gen (x, op_false, op_true, cmp));
|
||||
|
||||
if (x != dest)
|
||||
emit_move_insn (dest, gen_lowpart (mode, x));
|
||||
}
|
||||
else
|
||||
{
|
||||
op_true = force_reg (mode, op_true);
|
||||
rtx t2, t3;
|
||||
|
||||
t2 = gen_reg_rtx (mode);
|
||||
if (optimize)
|
||||
t3 = gen_reg_rtx (mode);
|
||||
else
|
||||
t3 = dest;
|
||||
|
||||
ix86_emit_vec_binop (AND, mode, t2, op_true, cmp);
|
||||
t2 = expand_simple_binop (mode, AND, op_true, cmp,
|
||||
NULL, 1, OPTAB_DIRECT);
|
||||
|
||||
t3 = gen_reg_rtx (mode);
|
||||
x = gen_rtx_NOT (mode, cmp);
|
||||
ix86_emit_vec_binop (AND, mode, t3, x, op_false);
|
||||
|
||||
ix86_emit_vec_binop (IOR, mode, dest, t3, t2);
|
||||
x = expand_simple_binop (mode, IOR, t3, t2,
|
||||
dest, 1, OPTAB_DIRECT);
|
||||
if (x != dest)
|
||||
emit_move_insn (dest, x);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -4956,7 +4956,7 @@
|
|||
]
|
||||
(const_string "TI")))])
|
||||
|
||||
(define_insn "*<code><mode>3"
|
||||
(define_insn "<code><mode>3"
|
||||
[(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
|
||||
(any_logic:MODEF
|
||||
(match_operand:MODEF 1 "register_operand" "%0,x,v,v")
|
||||
|
|
Loading…
Add table
Reference in a new issue