re PR target/67317 ([x86] Silly code generation for _addcarry_u32/_addcarry_u64)

PR target/67317
	* config/i386/i386.md (*add<mode>3_cc): Remove insn pattern.
	(addqi3_cc): Ditto.
	(UNSPEC_ADD_CARRY): Remove.
	(addqi3_cconly_overflow): New expander.
	(*add<dwi>3_doubleword): Split to add<mode>3_cconly_overflow.
	Adjust for changed add<mode>3_carry.
	(*neg<dwi>2_doubleword): Adjust for changed add<mode>3_carry.
	(*sub<dwi>3_doubleword): Adjust for changed sub<mode>3_carry.
	(<plusminus_insn><mode>3_carry): Remove expander.
	(*<plusminus_insn><mode>3_carry): Split insn pattern to
	add<mode>3_carry and sub<mode>3_carry.
	(plusminus_carry_mnemonic): Remove code attribute.
	(add<mode>3_carry): Canonicalize insn pattern.
	(*addsi3_carry_zext): Ditto.
	(sub<mode>3_carry): Ditto.
	(*subsi3_carry_zext): Ditto.
	(adcx<mode>3): Remove insn pattern.
	(addcarry<mode>): New insn pattern.
	(subborrow<mode>): Ditto.
	* config/i386/i386.c (ix86_expand_strlensi_unroll_1): Use
	gen_addqi3_cconly_overflow instead of gen_addqi3_cc.
	(ix86_expand_builtin) <case IX86_BUILTIN_SBB32,
	case IX86_BUILTIN_SBB64, case IX86_BUILTIN_ADDCARRY32,
	case IX86_BUILTIN_ADDCARRY64>: Use CODE_FOR_subborrowsi,
	CODE_FOR_subborrowdi, CODE_FOR_addcarrysi and CODE_FOR_addcarrydi.
	Rewrite expander to not clobber carry flag chains.

testsuite/ChangeLog:

	PR target/67317
	* gcc.target/i386/pr67317-1.c: New test.
	* gcc.target/i386/pr67317-2.c: Ditto.
	* gcc.target/i386/pr67317-3.c: Ditto.
	* gcc.target/i386/pr67317-4.c: Ditto.
	* gcc.target/i386/adx-addcarryx32-1.c: Also scan for adcl.
	* gcc.target/i386/adx-addcarryx32-2.c: Also scan for adcq.

From-SVN: r227271
This commit is contained in:
Uros Bizjak 2015-08-27 20:29:37 +02:00 committed by Uros Bizjak
parent 2b6fb4aa47
commit a443ee6e5b
10 changed files with 236 additions and 111 deletions

View file

@ -1,3 +1,33 @@
2015-08-27 Uros Bizjak <ubizjak@gmail.com>
PR target/67317
* config/i386/i386.md (*add<mode>3_cc): Remove insn pattern.
(addqi3_cc): Ditto.
(UNSPEC_ADD_CARRY): Remove.
(addqi3_cconly_overflow): New expander.
(*add<dwi>3_doubleword): Split to add<mode>3_cconly_overflow.
Adjust for changed add<mode>3_carry.
(*neg<dwi>2_doubleword): Adjust for changed add<mode>3_carry.
(*sub<dwi>3_doubleword): Adjust for changed sub<mode>3_carry.
(<plusminus_insn><mode>3_carry): Remove expander.
(*<plusminus_insn><mode>3_carry): Split insn pattern to
add<mode>3_carry and sub<mode>3_carry.
(plusminus_carry_mnemonic): Remove code attribute.
(add<mode>3_carry): Canonicalize insn pattern.
(*addsi3_carry_zext): Ditto.
(sub<mode>3_carry): Ditto.
(*subsi3_carry_zext): Ditto.
(adcx<mode>3): Remove insn pattern.
(addcarry<mode>): New insn pattern.
(subborrow<mode>): Ditto.
* config/i386/i386.c (ix86_expand_strlensi_unroll_1): Use
gen_addqi3_cconly_overflow instead of gen_addqi3_cc.
(ix86_expand_builtin) <case IX86_BUILTIN_SBB32,
case IX86_BUILTIN_SBB64, case IX86_BUILTIN_ADDCARRY32,
case IX86_BUILTIN_ADDCARRY64>: Use CODE_FOR_subborrowsi,
CODE_FOR_subborrowdi, CODE_FOR_addcarrysi and CODE_FOR_addcarrydi.
Rewrite expander to not clobber carry flag chains.
2015-08-27 Pat Haugen <pthaugen@us.ibm.com>
* config/rs6000/vector.md (vec_shr_<mode>): Fix to do a shift

View file

@ -25531,7 +25531,7 @@ ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
/* Avoid branch in fixing the byte. */
tmpreg = gen_lowpart (QImode, tmpreg);
emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
tmp = gen_rtx_REG (CCmode, FLAGS_REG);
cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
@ -39510,60 +39510,57 @@ rdseed_step:
return target;
case IX86_BUILTIN_SBB32:
icode = CODE_FOR_subsi3_carry;
icode = CODE_FOR_subborrowsi;
mode0 = SImode;
goto addcarryx;
goto handlecarry;
case IX86_BUILTIN_SBB64:
icode = CODE_FOR_subdi3_carry;
icode = CODE_FOR_subborrowdi;
mode0 = DImode;
goto addcarryx;
goto handlecarry;
case IX86_BUILTIN_ADDCARRYX32:
icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
icode = CODE_FOR_addcarrysi;
mode0 = SImode;
goto addcarryx;
goto handlecarry;
case IX86_BUILTIN_ADDCARRYX64:
icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
icode = CODE_FOR_addcarrydi;
mode0 = DImode;
addcarryx:
handlecarry:
arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
op0 = gen_reg_rtx (QImode);
/* Generate CF from input operand. */
op1 = expand_normal (arg0);
op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
/* Gen ADCX instruction to compute X+Y+CF. */
op2 = expand_normal (arg1);
op3 = expand_normal (arg2);
if (!REG_P (op2))
if (!register_operand (op2, mode0))
op2 = copy_to_mode_reg (mode0, op2);
if (!REG_P (op3))
op3 = expand_normal (arg2);
if (!register_operand (op3, mode0))
op3 = copy_to_mode_reg (mode0, op3);
op0 = gen_reg_rtx (mode0);
op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
/* Store the result. */
op4 = expand_normal (arg3);
if (!address_operand (op4, VOIDmode))
{
op4 = convert_memory_address (Pmode, op4);
op4 = copy_addr_to_reg (op4);
}
emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
/* Generate CF from input operand. */
emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
/* Generate instruction that consumes CF. */
op0 = gen_reg_rtx (mode0);
op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
pat = gen_rtx_LTU (mode0, op1, const0_rtx);
emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat));
/* Return current CF value. */
if (target == 0)
@ -39571,6 +39568,10 @@ addcarryx:
PUT_MODE (pat, QImode);
emit_insn (gen_rtx_SET (target, pat));
/* Store the result. */
emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
return target;
case IX86_BUILTIN_READ_FLAGS:

View file

@ -102,7 +102,6 @@
UNSPEC_SAHF
UNSPEC_PARITY
UNSPEC_FSTCW
UNSPEC_ADD_CARRY
UNSPEC_FLDCW
UNSPEC_REP
UNSPEC_LD_MPIC ; load_macho_picbase
@ -848,8 +847,6 @@
(define_code_attr plusminus_mnemonic
[(plus "add") (ss_plus "adds") (us_plus "addus")
(minus "sub") (ss_minus "subs") (us_minus "subus")])
(define_code_attr plusminus_carry_mnemonic
[(plus "adc") (minus "sbb")])
(define_code_attr multdiv_mnemonic
[(mult "mul") (div "div")])
@ -5317,46 +5314,21 @@
"ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
"#"
"reload_completed"
[(parallel [(set (reg:CC FLAGS_REG)
(unspec:CC [(match_dup 1) (match_dup 2)]
UNSPEC_ADD_CARRY))
[(parallel [(set (reg:CCC FLAGS_REG)
(compare:CCC
(plus:DWIH (match_dup 1) (match_dup 2))
(match_dup 1)))
(set (match_dup 0)
(plus:DWIH (match_dup 1) (match_dup 2)))])
(parallel [(set (match_dup 3)
(plus:DWIH
(match_dup 4)
(plus:DWIH
(ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
(match_dup 5))))
(match_dup 4))
(match_dup 5)))
(clobber (reg:CC FLAGS_REG))])]
"split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);")
(define_insn "*add<mode>3_cc"
[(set (reg:CC FLAGS_REG)
(unspec:CC
[(match_operand:SWI48 1 "nonimmediate_operand" "%0,0")
(match_operand:SWI48 2 "<general_operand>" "r<i>,rm")]
UNSPEC_ADD_CARRY))
(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
(plus:SWI48 (match_dup 1) (match_dup 2)))]
"ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
"add{<imodesuffix>}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
(define_insn "addqi3_cc"
[(set (reg:CC FLAGS_REG)
(unspec:CC
[(match_operand:QI 1 "nonimmediate_operand" "%0,0")
(match_operand:QI 2 "general_operand" "qn,qm")]
UNSPEC_ADD_CARRY))
(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
(plus:QI (match_dup 1) (match_dup 2)))]
"ix86_binary_operator_ok (PLUS, QImode, operands)"
"add{b}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
(set_attr "mode" "QI")])
(define_insn "*add<mode>_1"
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm,r,r")
(plus:SWI48
@ -6264,10 +6236,10 @@
(minus:DWIH (match_dup 1) (match_dup 2)))])
(parallel [(set (match_dup 3)
(minus:DWIH
(match_dup 4)
(plus:DWIH
(ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
(match_dup 5))))
(minus:DWIH
(match_dup 4)
(ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
(match_dup 5)))
(clobber (reg:CC FLAGS_REG))])]
"split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);")
@ -6431,29 +6403,17 @@
;; Add with carry and subtract with borrow
(define_expand "<plusminus_insn><mode>3_carry"
[(parallel
[(set (match_operand:SWI 0 "nonimmediate_operand")
(plusminus:SWI
(match_operand:SWI 1 "nonimmediate_operand")
(plus:SWI (match_operator:SWI 4 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand")
(const_int 0)])
(match_operand:SWI 2 "<general_operand>"))))
(clobber (reg:CC FLAGS_REG))])]
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)")
(define_insn "*<plusminus_insn><mode>3_carry"
(define_insn "add<mode>3_carry"
[(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
(plusminus:SWI
(match_operand:SWI 1 "nonimmediate_operand" "<comm>0,0")
(plus:SWI
(plus:SWI
(match_operator 3 "ix86_carry_flag_operator"
[(reg FLAGS_REG) (const_int 0)])
(match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))))
(match_operator:SWI 4 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)])
(match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
(match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
"<plusminus_carry_mnemonic>{<imodesuffix>}\t{%2, %0|%0, %2}"
"adc{<imodesuffix>}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
(set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
@ -6462,10 +6422,11 @@
(define_insn "*addsi3_carry_zext"
[(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
(plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
(plus:SI (match_operator 3 "ix86_carry_flag_operator"
[(reg FLAGS_REG) (const_int 0)])
(match_operand:SI 2 "x86_64_general_operand" "rme")))))
(plus:SI
(plus:SI (match_operator:SI 3 "ix86_carry_flag_operator"
[(reg FLAGS_REG) (const_int 0)])
(match_operand:SI 1 "register_operand" "%0"))
(match_operand:SI 2 "x86_64_general_operand" "rme"))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
"adc{l}\t{%2, %k0|%k0, %2}"
@ -6474,45 +6435,96 @@
(set_attr "pent_pair" "pu")
(set_attr "mode" "SI")])
;; There is no point to generate ADCX instruction. ADC is shorter and faster.
(define_insn "addcarry<mode>"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
(plus:SWI48
(plus:SWI48
(match_operator:SWI48 4 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)])
(match_operand:SWI48 1 "nonimmediate_operand" "%0"))
(match_operand:SWI48 2 "nonimmediate_operand" "rm"))
(match_dup 1)))
(set (match_operand:SWI48 0 "register_operand" "=r")
(plus:SWI48 (plus:SWI48 (match_op_dup 4
[(match_dup 3) (const_int 0)])
(match_dup 1))
(match_dup 2)))]
"ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
"adc{<imodesuffix>}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
(set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "<MODE>")])
(define_insn "sub<mode>3_carry"
[(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
(minus:SWI
(minus:SWI
(match_operand:SWI 1 "nonimmediate_operand" "0,0")
(match_operator:SWI 4 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)]))
(match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
"sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
(set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "<MODE>")])
(define_insn "*subsi3_carry_zext"
[(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
(minus:SI (match_operand:SI 1 "register_operand" "0")
(plus:SI (match_operator 3 "ix86_carry_flag_operator"
[(reg FLAGS_REG) (const_int 0)])
(match_operand:SI 2 "x86_64_general_operand" "rme")))))
(minus:SI
(minus:SI
(match_operand:SI 1 "register_operand" "0")
(match_operator:SI 3 "ix86_carry_flag_operator"
[(reg FLAGS_REG) (const_int 0)]))
(match_operand:SI 2 "x86_64_general_operand" "rme"))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
"sbb{l}\t{%2, %k0|%k0, %2}"
[(set_attr "type" "alu")
(set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "SI")])
;; ADCX instruction
(define_insn "adcx<mode>3"
(define_insn "subborrow<mode>"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
(match_operand:SWI48 1 "nonimmediate_operand" "0")
(plus:SWI48
(match_operand:SWI48 1 "nonimmediate_operand" "%0")
(plus:SWI48
(match_operator 4 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)])
(match_operand:SWI48 2 "nonimmediate_operand" "rm")))
(const_int 0)))
(match_operator:SWI48 4 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)])
(match_operand:SWI48 2 "nonimmediate_operand" "rm"))))
(set (match_operand:SWI48 0 "register_operand" "=r")
(plus:SWI48 (match_dup 1)
(plus:SWI48 (match_op_dup 4
[(match_dup 3) (const_int 0)])
(match_dup 2))))]
"TARGET_ADX && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
"adcx\t{%2, %0|%0, %2}"
(minus:SWI48 (minus:SWI48 (match_dup 1)
(match_op_dup 4
[(match_dup 3) (const_int 0)]))
(match_dup 2)))]
"ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
"sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
(set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "<MODE>")])
;; Overflow setting add instructions
(define_expand "addqi3_cconly_overflow"
[(parallel
[(set (reg:CCC FLAGS_REG)
(compare:CCC
(plus:QI
(match_operand:QI 0 "nonimmediate_operand")
(match_operand:QI 1 "general_operand"))
(match_dup 0)))
(clobber (match_scratch:QI 2))])]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))")
(define_insn "*add<mode>3_cconly_overflow"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
@ -8842,9 +8854,9 @@
(set (match_dup 0) (neg:DWIH (match_dup 1)))])
(parallel
[(set (match_dup 2)
(plus:DWIH (match_dup 3)
(plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
(const_int 0))))
(plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
(match_dup 3))
(const_int 0)))
(clobber (reg:CC FLAGS_REG))])
(parallel
[(set (match_dup 2)

View file

@ -1,3 +1,13 @@
2015-08-27 Uros Bizjak <ubizjak@gmail.com>
PR target/67317
* gcc.target/i386/pr67317-1.c: New test.
* gcc.target/i386/pr67317-2.c: Ditto.
* gcc.target/i386/pr67317-3.c: Ditto.
* gcc.target/i386/pr67317-4.c: Ditto.
* gcc.target/i386/adx-addcarryx32-1.c: Also scan for adcl.
* gcc.target/i386/adx-addcarryx32-2.c: Also scan for adcq.
2015-08-27 Pat Haugen <pthaugen@us.ibm.com>
* gcc.target/powerpc/vec-shr.c: New.

View file

@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-madx -O2" } */
/* { dg-final { scan-assembler-times "adcx" 2 } } */
/* { dg-final { scan-assembler-times "adc\[xl\]" 2 } } */
/* { dg-final { scan-assembler-times "sbbl" 1 } } */
#include <x86intrin.h>

View file

@ -1,6 +1,6 @@
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-madx -O2" } */
/* { dg-final { scan-assembler-times "adcx" 2 } } */
/* { dg-final { scan-assembler-times "adc\[xq\]" 2 } } */
/* { dg-final { scan-assembler-times "sbbq" 1 } } */
#include <x86intrin.h>

View file

@ -0,0 +1,18 @@
/* PR target/67317 */
/* { dg-do compile } */
/* { dg-options "-O2" } */
typedef unsigned int u32;
u32 testcarry_u32 (u32 a, u32 b, u32 c, u32 d)
{
u32 result0, result1;
__builtin_ia32_addcarryx_u32
(__builtin_ia32_addcarryx_u32 (0, a, c, &result0), b, d, &result1);
return result0 ^ result1;
}
/* { dg-final { scan-assembler-not "addb" } } */
/* { dg-final { scan-assembler-not "setn?c" } } */

View file

@ -0,0 +1,18 @@
/* PR target/67317 */
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2" } */
typedef unsigned long long u64;
u64 testcarry_u64 (u64 a, u64 b, u64 c, u64 d)
{
u64 result0, result1;
__builtin_ia32_addcarryx_u64
(__builtin_ia32_addcarryx_u64 (0, a, c, &result0), b, d, &result1);
return result0 ^ result1;
}
/* { dg-final { scan-assembler-not "addb" } } */
/* { dg-final { scan-assembler-not "setn?c" } } */

View file

@ -0,0 +1,18 @@
/* PR target/67317 */
/* { dg-do compile } */
/* { dg-options "-O2" } */
typedef unsigned int u32;
u32 testcarry_u32 (u32 a, u32 b, u32 c, u32 d)
{
u32 result0, result1;
__builtin_ia32_sbb_u32
(__builtin_ia32_sbb_u32 (0, a, c, &result0), b, d, &result1);
return result0 ^ result1;
}
/* { dg-final { scan-assembler-not "addb" } } */
/* { dg-final { scan-assembler-not "setn?c" } } */

View file

@ -0,0 +1,18 @@
/* PR target/67317 */
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2" } */
typedef unsigned long long u64;
u64 testcarry_u64 (u64 a, u64 b, u64 c, u64 d)
{
u64 result0, result1;
__builtin_ia32_sbb_u64
(__builtin_ia32_sbb_u64 (0, a, c, &result0), b, d, &result1);
return result0 ^ result1;
}
/* { dg-final { scan-assembler-not "addb" } } */
/* { dg-final { scan-assembler-not "setn?c" } } */