diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 2bf4fc492fe..1b34d4dc8ed 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -3020,8 +3020,21 @@ expand_vec_cond_optab_fn (internal_fn, gcall *stmt, convert_optab optab) icode = convert_optab_handler (optab, mode, cmp_op_mode); rtx comparison = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp, icode, 4); - rtx rtx_op1 = expand_normal (op1); - rtx rtx_op2 = expand_normal (op2); + /* vector_compare_rtx legitimizes operands, preserve equality when + expanding op1/op2. */ + rtx rtx_op1, rtx_op2; + if (operand_equal_p (op1, op0a)) + rtx_op1 = XEXP (comparison, 0); + else if (operand_equal_p (op1, op0b)) + rtx_op1 = XEXP (comparison, 1); + else + rtx_op1 = expand_normal (op1); + if (operand_equal_p (op2, op0a)) + rtx_op2 = XEXP (comparison, 0); + else if (operand_equal_p (op2, op0b)) + rtx_op2 = XEXP (comparison, 1); + else + rtx_op2 = expand_normal (op2); rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); create_output_operand (&ops[0], target, mode); diff --git a/gcc/optabs.cc b/gcc/optabs.cc index 4e9f58f8060..32ff379ffc3 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -5119,13 +5119,41 @@ emit_conditional_move (rtx target, struct rtx_comparison comp, last = get_last_insn (); do_pending_stack_adjust (); machine_mode cmpmode = comp.mode; + rtx orig_op0 = XEXP (comparison, 0); + rtx orig_op1 = XEXP (comparison, 1); + rtx op2p = op2; + rtx op3p = op3; + /* If we are optimizing, force expensive constants into a register + but preserve an eventual equality with op2/op3. */ + if (CONSTANT_P (orig_op0) && optimize + && (rtx_cost (orig_op0, mode, COMPARE, 0, + optimize_insn_for_speed_p ()) + > COSTS_N_INSNS (1)) + && can_create_pseudo_p ()) + { + if (rtx_equal_p (orig_op0, op2)) + op2p = XEXP (comparison, 0) = force_reg (cmpmode, orig_op0); + else if (rtx_equal_p (orig_op0, op3)) + op3p = XEXP (comparison, 0) = force_reg (cmpmode, orig_op0); + } + if (CONSTANT_P (orig_op1) && optimize + && (rtx_cost (orig_op1, mode, COMPARE, 0, + optimize_insn_for_speed_p ()) + > COSTS_N_INSNS (1)) + && can_create_pseudo_p ()) + { + if (rtx_equal_p (orig_op1, op2)) + op2p = XEXP (comparison, 1) = force_reg (cmpmode, orig_op1); + else if (rtx_equal_p (orig_op1, op3)) + op3p = XEXP (comparison, 1) = force_reg (cmpmode, orig_op1); + } prepare_cmp_insn (XEXP (comparison, 0), XEXP (comparison, 1), GET_CODE (comparison), NULL_RTX, unsignedp, OPTAB_WIDEN, &comparison, &cmpmode); if (comparison) { rtx res = emit_conditional_move_1 (target, comparison, - op2, op3, mode); + op2p, op3p, mode); if (res != NULL_RTX) return res; } diff --git a/gcc/testsuite/g++.target/i386/pr61747.C b/gcc/testsuite/g++.target/i386/pr61747.C new file mode 100644 index 00000000000..024ef400052 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr61747.C @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target c++11 } */ +/* { dg-options "-O2 -msse4.1 -mfpmath=sse" } */ + +typedef float __attribute__( ( vector_size( 16 ) ) ) float32x4_t; + +template +V1 vmax(V1 a, V1 b) { + return (a>b) ? a : b; +} + +template +V1 vmin(V1 a, V1 b) { + return (a +Float bart(Float a) { + constexpr Float zero{0.f}; + constexpr Float it = zero+4.f; + constexpr Float zt = zero-3.f; + return vmin(vmax(a,zt),it); +} + +float bar(float a) { + return bart(a); +} + +float32x4_t bar(float32x4_t a) { + return bart(a); +} + +/* { dg-final { scan-assembler-times "min" 4 } } */ +/* { dg-final { scan-assembler-times "max" 4 } } */