diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index ac674418b96..697eb475f48 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -13948,8 +13948,6 @@ rdseed_step: arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */ op1 = expand_normal (arg0); - if (!integer_zerop (arg0)) - op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1)); op2 = expand_normal (arg1); if (!register_operand (op2, mode0)) @@ -13967,7 +13965,7 @@ rdseed_step: } op0 = gen_reg_rtx (mode0); - if (integer_zerop (arg0)) + if (op1 == const0_rtx) { /* If arg0 is 0, optimize right away into add or sub instruction that sets CCCmode flags. */ @@ -13977,7 +13975,14 @@ rdseed_step: else { /* Generate CF from input operand. */ - emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx)); + if (!CONST_INT_P (op1)) + { + op1 = convert_to_mode (QImode, op1, 1); + op1 = copy_to_mode_reg (QImode, op1); + emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx)); + } + else + emit_insn (gen_x86_stc ()); /* Generate instruction that consumes CF. */ op1 = gen_rtx_REG (CCCmode, FLAGS_REG); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index d4ff56ee8dd..c4591d63063 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -15954,6 +15954,17 @@ ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) && REGNO (XEXP (op1, 0)) == FLAGS_REG && XEXP (op1, 1) == const0_rtx) return CCCmode; + /* Similarly for *x86_cmc pattern. + Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) + and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)). + It is sufficient to test that the operand modes are CCCmode. */ + else if (code == LTU + && GET_CODE (op0) == NEG + && GET_CODE (XEXP (op0, 0)) == LTU + && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode + && GET_CODE (op1) == GEU + && GET_MODE (XEXP (op1, 0)) == CCCmode) + return CCCmode; else return CCmode; case GTU: /* CF=0 & ZF=0 */ @@ -21305,6 +21316,31 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, *total = 0; return true; } + /* Match x + (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) + (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */ + if (mode == CCCmode + && GET_CODE (op0) == NEG + && GET_CODE (XEXP (op0, 0)) == LTU + && REG_P (XEXP (XEXP (op0, 0), 0)) + && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode + && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG + && XEXP (XEXP (op0, 0), 1) == const0_rtx + && GET_CODE (op1) == GEU + && REG_P (XEXP (op1, 0)) + && GET_MODE (XEXP (op1, 0)) == CCCmode + && REGNO (XEXP (op1, 0)) == FLAGS_REG + && XEXP (op1, 1) == const0_rtx) + { + /* This is *x86_cmc. */ + if (!speed) + *total = COSTS_N_BYTES (1); + else if (TARGET_SLOW_STC) + *total = COSTS_N_INSNS (2); + else + *total = COSTS_N_INSNS (1); + return true; + } if (SCALAR_INT_MODE_P (GET_MODE (op0)) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index c7439f89bdf..5ac9c78d3ba 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -448,6 +448,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD] #define TARGET_DEST_FALSE_DEP_FOR_GLC \ ix86_tune_features[X86_TUNE_DEST_FALSE_DEP_FOR_GLC] +#define TARGET_SLOW_STC ix86_tune_features[X86_TUNE_SLOW_STC] /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e6ebc461e52..0929115ed4d 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -114,6 +114,7 @@ UNSPEC_INSN_FALSE_DEP UNSPEC_SBB UNSPEC_CC_NE + UNSPEC_STC ;; For SSE/MMX support: UNSPEC_FIX_NOTRUNC @@ -1999,6 +2000,53 @@ [(set_attr "type" "ssecomi") (set_attr "prefix" "evex") (set_attr "mode" "HF")]) + +;; Set carry flag. +(define_insn "x86_stc" + [(set (reg:CCC FLAGS_REG) (unspec:CCC [(const_int 0)] UNSPEC_STC))] + "" + "stc" + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + +;; On Pentium 4, set the carry flag using mov $1,%al;addb $-1,%al. +(define_peephole2 + [(match_scratch:QI 0 "r") + (set (reg:CCC FLAGS_REG) (unspec:CCC [(const_int 0)] UNSPEC_STC))] + "TARGET_SLOW_STC && !optimize_insn_for_size_p ()" + [(set (match_dup 0) (const_int 1)) + (parallel + [(set (reg:CCC FLAGS_REG) + (compare:CCC (plus:QI (match_dup 0) (const_int -1)) + (match_dup 0))) + (set (match_dup 0) (plus:QI (match_dup 0) (const_int -1)))])]) + +;; Complement carry flag. +(define_insn "*x86_cmc" + [(set (reg:CCC FLAGS_REG) + (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) + (geu:QI (reg:CCC FLAGS_REG) (const_int 0))))] + "" + "cmc" + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "use_carry" "1") + (set_attr "modrm" "0")]) + +;; On Pentium 4, cmc is replaced with setnc %al;addb $-1,%al. +(define_peephole2 + [(match_scratch:QI 0 "r") + (set (reg:CCC FLAGS_REG) + (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) + (geu:QI (reg:CCC FLAGS_REG) (const_int 0))))] + "TARGET_SLOW_STC && !optimize_insn_for_size_p ()" + [(set (match_dup 0) (ne:QI (reg:CCC FLAGS_REG) (const_int 0))) + (parallel + [(set (reg:CCC FLAGS_REG) + (compare:CCC (plus:QI (match_dup 0) (const_int -1)) + (match_dup 0))) + (set (match_dup 0) (plus:QI (match_dup 0) (const_int -1)))])]) ;; Push/pop instructions. @@ -8107,6 +8155,34 @@ "#" "&& 1" [(const_int 0)]) + +;; Set the carry flag from the carry flag. +(define_insn_and_split "*setccc" + [(set (reg:CCC FLAGS_REG) + (reg:CCC FLAGS_REG))] + "ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)]) + +;; Set the carry flag from the carry flag. +(define_insn_and_split "*setcc_qi_negqi_ccc_1_" + [(set (reg:CCC FLAGS_REG) + (ltu:CCC (reg:CC_CCC FLAGS_REG) (const_int 0)))] + "ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)]) + +;; Set the carry flag from the carry flag. +(define_insn_and_split "*setcc_qi_negqi_ccc_2_" + [(set (reg:CCC FLAGS_REG) + (unspec:CCC [(ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)) + (const_int 0)] UNSPEC_CC_NE))] + "ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)]) ;; Overflow setting add instructions diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index e1c72cddf1f..c3229d269b2 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -698,3 +698,7 @@ DEF_TUNE (X86_TUNE_PROMOTE_QI_REGS, "promote_qi_regs", m_NONE) /* X86_TUNE_EMIT_VZEROUPPER: This enables vzeroupper instruction insertion before a transfer of control flow out of the function. */ DEF_TUNE (X86_TUNE_EMIT_VZEROUPPER, "emit_vzeroupper", ~m_KNL) + +/* X86_TUNE_SLOW_STC: This disables use of stc, clc and cmc carry flag + modifications on architectures where theses operations are slow. */ +DEF_TUNE (X86_TUNE_SLOW_STC, "slow_stc", m_PENT4) diff --git a/gcc/testsuite/gcc.target/i386/cmc-1.c b/gcc/testsuite/gcc.target/i386/cmc-1.c new file mode 100644 index 00000000000..58e922ad12c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cmc-1.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +unsigned int o1; +unsigned int o2; + +unsigned int foo_xor (unsigned int a, unsigned int b, + unsigned int c, unsigned int d) +{ + unsigned int c1 = __builtin_ia32_addcarryx_u32 (1, a, b, &o1); + return __builtin_ia32_addcarryx_u32 (c1 ^ 1, c, d, &o2); +} + +unsigned int foo_sub (unsigned int a, unsigned int b, + unsigned int c, unsigned int d) +{ + unsigned int c1 = __builtin_ia32_addcarryx_u32 (1, a, b, &o1); + return __builtin_ia32_addcarryx_u32 (1 - c1, c, d, &o2); +} + +unsigned int foo_eqz (unsigned int a, unsigned int b, + unsigned int c, unsigned int d) +{ + unsigned int c1 = __builtin_ia32_addcarryx_u32 (1, a, b, &o1); + return __builtin_ia32_addcarryx_u32 (c1 == 0, c, d, &o2); +} + +/* { dg-final { scan-assembler "cmc" } } */ diff --git a/gcc/testsuite/gcc.target/i386/stc-1.c b/gcc/testsuite/gcc.target/i386/stc-1.c new file mode 100644 index 00000000000..857c939dbea --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/stc-1.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +typedef unsigned int u32; + +unsigned int foo (unsigned int a, unsigned int b, unsigned int *c) +{ + return __builtin_ia32_addcarryx_u32 (1, a, b, c); +} + +unsigned int bar (unsigned int b, unsigned int *c) +{ + return __builtin_ia32_addcarryx_u32 (1, 2, b, c); +} + +unsigned int baz (unsigned int a, unsigned int *c) +{ + return __builtin_ia32_addcarryx_u32 (1, a, 3, c); +} + +/* { dg-final { scan-assembler "stc" } } */