From f28eb39cee72340b8f202c535368887111047bf3 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 6 Jun 2007 08:53:29 +0200 Subject: [PATCH] sse.md (sse4_2_pcmpestr_cconly): Prefer pcmpestrm as flags setting insn. * config/i386/sse.md (sse4_2_pcmpestr_cconly): Prefer pcmpestrm as flags setting insn. (sse4_2_pcmpistr_cconly): Prefer pcmpistrm as flags setting insn. * config/i386/i386.md (UNSPEC_ROUNDP, UNSPEC_ROUNDS): Remove. (UNSPEC_ROUND): New. ("sse4_1_round2"): New insn pattern. ("rint2"): Expand using "sse4_1_round2" pattern for SSE4.1 targets. ("floor2"): Rename from floordf2 and floorsf2. Macroize expander using SSEMODEF mode macro. Expand using "sse4_1_round2" pattern for SSE4.1 targets. ("ceil2"): Rename from ceildf2 and ceilsf2. Macroize expander using SSEMODEF mode macro. Expand using "sse4_1_round2" pattern for SSE4.1 targets. ("btrunc2"): Rename from btruncdf2 and btruncsf2. Macroize expander using SSEMODEF mode macro. Expand using "sse4_1_round2" pattern for SSE4.1 targets. * config/i386/sse.md ("sse4_1_roundpd", "sse4_1_roundps"): Use UNSPEC_ROUND instead of UNSPEC_ROUNDP. ("sse4_1_roundsd", "sse4_1_roundss"): Use UNSPEC_ROUND instead of UNSPEC_ROUNDS. From-SVN: r125356 --- gcc/ChangeLog | 33 ++++++- gcc/config/i386/i386.md | 205 ++++++++++++++++------------------------ gcc/config/i386/sse.md | 28 +++--- 3 files changed, 124 insertions(+), 142 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 99faa972544..937ac42ec56 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,30 @@ +2007-06-06 Uros Bizjak + + * config/i386/sse.md (sse4_2_pcmpestr_cconly): Prefer pcmpestrm + as flags setting insn. + (sse4_2_pcmpistr_cconly): Prefer pcmpistrm as flags setting insn. + +2007-06-06 Uros Bizjak + + * config/i386/i386.md (UNSPEC_ROUNDP, UNSPEC_ROUNDS): Remove. + (UNSPEC_ROUND): New. + ("sse4_1_round2"): New insn pattern. + ("rint2"): Expand using "sse4_1_round2" pattern for + SSE4.1 targets. + ("floor2"): Rename from floordf2 and floorsf2. Macroize + expander using SSEMODEF mode macro. Expand using + "sse4_1_round2" pattern for SSE4.1 targets. + ("ceil2"): Rename from ceildf2 and ceilsf2. Macroize + expander using SSEMODEF mode macro. Expand using + "sse4_1_round2" pattern for SSE4.1 targets. + ("btrunc2"): Rename from btruncdf2 and btruncsf2. Macroize + expander using SSEMODEF mode macro. Expand using + "sse4_1_round2" pattern for SSE4.1 targets. + * config/i386/sse.md ("sse4_1_roundpd", "sse4_1_roundps"): Use + UNSPEC_ROUND instead of UNSPEC_ROUNDP. + ("sse4_1_roundsd", "sse4_1_roundss"): Use UNSPEC_ROUND instead of + UNSPEC_ROUNDS. + 2007-06-06 Jan Sjodin Sebastian Pop @@ -53,7 +80,8 @@ * cfgexpand (label_rtx_for_bb): Likewise. (expand_gimple_basic_block): Likewise. * cfghooks.c (dump_bb): Likewise. - (lv_adjust_loop_header_phi): Avoid using C++ keywords as variable names. + (lv_adjust_loop_header_phi): Avoid using C++ keywords as + variable names. (lv_add_condition_to_bb): Likewise. * cfglayout (relink_block_chain): Cast according to the coding conventions. @@ -64,7 +92,8 @@ (dump_recorded_exit): Likewise. * cfgloop.h (enum loop_estimation): Move out of struct scope... (struct loop): ... from here. - * cfgloopmanip.c (rpe_enum_p): Cast according to the coding conventions. + * cfgloopmanip.c (rpe_enum_p): Cast according to the coding + conventions. * cfgrtl.c (rtl_create_basic_block): Likewise. (rtl_split_block): Likewise. (rtl_dump_bb): Likewise. diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 3e9a15fbf7c..43e58ae8dc7 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -171,8 +171,7 @@ (UNSPEC_MPSADBW 138) (UNSPEC_PHMINPOSUW 139) (UNSPEC_PTEST 140) - (UNSPEC_ROUNDP 141) - (UNSPEC_ROUNDS 142) + (UNSPEC_ROUND 141) ; For SSE4.2 support (UNSPEC_CRC32 143) @@ -16999,6 +16998,17 @@ }) +(define_insn "sse4_1_round2" + [(set (match_operand:SSEMODEF 0 "register_operand" "=x") + (unspec:SSEMODEF [(match_operand:SSEMODEF 1 "register_operand" "x") + (match_operand:SI 2 "const_0_to_15_operand" "n")] + UNSPEC_ROUND))] + "TARGET_SSE4_1" + "rounds\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "prefix_extra" "1") + (set_attr "mode" "")]) + (define_insn "rintxf2" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0")] @@ -17018,12 +17028,18 @@ && flag_unsafe_math_optimizations) || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !flag_trapping_math - && !optimize_size)" + && (TARGET_SSE4_1 || !optimize_size))" { if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !flag_trapping_math - && !optimize_size) - ix86_expand_rint (operand0, operand1); + && (TARGET_SSE4_1 || !optimize_size)) + { + if (TARGET_SSE4_1) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x04))); + else + ix86_expand_rint (operand0, operand1); + } else { rtx op0 = gen_reg_rtx (XFmode); @@ -17044,7 +17060,7 @@ && !flag_trapping_math && !flag_rounding_math && !optimize_size" { - if ((mode != DFmode) || TARGET_64BIT) + if (TARGET_64BIT || (mode != DFmode)) ix86_expand_round (operand0, operand1); else ix86_expand_rounddf_32 (operand0, operand1); @@ -17250,20 +17266,25 @@ DONE; }) -(define_expand "floordf2" - [(use (match_operand:DF 0 "register_operand" "")) - (use (match_operand:DF 1 "register_operand" ""))] - "((TARGET_USE_FANCY_MATH_387 - && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH - && !flag_trapping_math)) - && !optimize_size" +(define_expand "floor2" + [(use (match_operand:SSEMODEF 0 "register_operand" "")) + (use (match_operand:SSEMODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations && !optimize_size) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_SSE4_1 || !optimize_size))" { - if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH - && !flag_trapping_math) + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_SSE4_1 || !optimize_size)) { - if (TARGET_64BIT) + if (TARGET_SSE4_1) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x01))); + else if (TARGET_64BIT || (mode != DFmode)) ix86_expand_floorceil (operand0, operand1, true); else ix86_expand_floorceildf_32 (operand0, operand1, true); @@ -17273,36 +17294,10 @@ rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - emit_insn (gen_extenddfxf2 (op1, operands[1])); + emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_frndintxf2_floor (op0, op1)); - emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); - } - DONE; -}) - -(define_expand "floorsf2" - [(use (match_operand:SF 0 "register_operand" "")) - (use (match_operand:SF 1 "register_operand" ""))] - "((TARGET_USE_FANCY_MATH_387 - && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math)) - && !optimize_size" -{ - if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math) - ix86_expand_floorceil (operand0, operand1, true); - else - { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); - - emit_insn (gen_extendsfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_floor (op0, op1)); - - emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); } DONE; }) @@ -17536,20 +17531,25 @@ DONE; }) -(define_expand "ceildf2" - [(use (match_operand:DF 0 "register_operand" "")) - (use (match_operand:DF 1 "register_operand" ""))] - "((TARGET_USE_FANCY_MATH_387 - && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH - && !flag_trapping_math)) - && !optimize_size" +(define_expand "ceil2" + [(use (match_operand:SSEMODEF 0 "register_operand" "")) + (use (match_operand:SSEMODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations && !optimize_size) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_SSE4_1 || !optimize_size))" { - if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH - && !flag_trapping_math) + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_SSE4_1 || !optimize_size)) { - if (TARGET_64BIT) + if (TARGET_SSE4_1) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x02))); + else if (TARGET_64BIT || (mode != DFmode)) ix86_expand_floorceil (operand0, operand1, false); else ix86_expand_floorceildf_32 (operand0, operand1, false); @@ -17559,36 +17559,10 @@ rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - emit_insn (gen_extenddfxf2 (op1, operands[1])); + emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_frndintxf2_ceil (op0, op1)); - emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); - } - DONE; -}) - -(define_expand "ceilsf2" - [(use (match_operand:SF 0 "register_operand" "")) - (use (match_operand:SF 1 "register_operand" ""))] - "((TARGET_USE_FANCY_MATH_387 - && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math)) - && !optimize_size" -{ - if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math) - ix86_expand_floorceil (operand0, operand1, false); - else - { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); - - emit_insn (gen_extendsfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_ceil (op0, op1)); - - emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); } DONE; }) @@ -17820,20 +17794,25 @@ DONE; }) -(define_expand "btruncdf2" - [(use (match_operand:DF 0 "register_operand" "")) - (use (match_operand:DF 1 "register_operand" ""))] - "((TARGET_USE_FANCY_MATH_387 - && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH - && !flag_trapping_math)) - && !optimize_size" +(define_expand "btrunc2" + [(use (match_operand:SSEMODEF 0 "register_operand" "")) + (use (match_operand:SSEMODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations && !optimize_size) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_SSE4_1 || !optimize_size))" { - if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH - && !flag_trapping_math) + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_SSE4_1 || !optimize_size)) { - if (TARGET_64BIT) + if (TARGET_SSE4_1) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x03))); + else if (TARGET_64BIT || (mode != DFmode)) ix86_expand_trunc (operand0, operand1); else ix86_expand_truncdf_32 (operand0, operand1); @@ -17843,36 +17822,10 @@ rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); - emit_insn (gen_extenddfxf2 (op1, operands[1])); + emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_frndintxf2_trunc (op0, op1)); - emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); - } - DONE; -}) - -(define_expand "btruncsf2" - [(use (match_operand:SF 0 "register_operand" "")) - (use (match_operand:SF 1 "register_operand" ""))] - "((TARGET_USE_FANCY_MATH_387 - && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations) - || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math)) - && !optimize_size" -{ - if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH - && !flag_trapping_math) - ix86_expand_trunc (operand0, operand1); - else - { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); - - emit_insn (gen_extendsfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_trunc (op0, op1)); - - emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); } DONE; }) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index bdb653d7a37..042146ec613 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -6338,7 +6338,7 @@ [(set (match_operand:V2DF 0 "register_operand" "=x") (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm") (match_operand:SI 2 "const_0_to_15_operand" "n")] - UNSPEC_ROUNDP))] + UNSPEC_ROUND))] "TARGET_SSE4_1" "roundpd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ssecvt") @@ -6349,7 +6349,7 @@ [(set (match_operand:V4SF 0 "register_operand" "=x") (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm") (match_operand:SI 2 "const_0_to_15_operand" "n")] - UNSPEC_ROUNDP))] + UNSPEC_ROUND))] "TARGET_SSE4_1" "roundps\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ssecvt") @@ -6361,7 +6361,7 @@ (vec_merge:V2DF (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x") (match_operand:SI 3 "const_0_to_15_operand" "n")] - UNSPEC_ROUNDS) + UNSPEC_ROUND) (match_operand:V2DF 1 "register_operand" "0") (const_int 1)))] "TARGET_SSE4_1" @@ -6375,7 +6375,7 @@ (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x") (match_operand:SI 3 "const_0_to_15_operand" "n")] - UNSPEC_ROUNDS) + UNSPEC_ROUND) (match_operand:V4SF 1 "register_operand" "0") (const_int 1)))] "TARGET_SSE4_1" @@ -6504,14 +6504,14 @@ (match_operand:SI 3 "register_operand" "d,d,d,d") (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")] UNSPEC_PCMPESTR)) - (clobber (match_scratch:SI 5 "=c,c,X,X")) - (clobber (match_scratch:V16QI 6 "=X,X,Y0,Y0"))] + (clobber (match_scratch:V16QI 5 "=Y0,Y0,X,X")) + (clobber (match_scratch:SI 6 "= X, X,c,c"))] "TARGET_SSE4_2" "@ - pcmpestri\t{%4, %2, %0|%0, %2, %4} - pcmpestri\t{%4, %2, %0|%0, %2, %4} pcmpestrm\t{%4, %2, %0|%0, %2, %4} - pcmpestrm\t{%4, %2, %0|%0, %2, %4}" + pcmpestrm\t{%4, %2, %0|%0, %2, %4} + pcmpestri\t{%4, %2, %0|%0, %2, %4} + pcmpestri\t{%4, %2, %0|%0, %2, %4}" [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") @@ -6613,14 +6613,14 @@ (match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m") (match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")] UNSPEC_PCMPISTR)) - (clobber (match_scratch:SI 3 "=c,c,X,X")) - (clobber (match_scratch:V16QI 4 "=X,X,Y0,Y0"))] + (clobber (match_scratch:V16QI 3 "=Y0,Y0,X,X")) + (clobber (match_scratch:SI 4 "= X, X,c,c"))] "TARGET_SSE4_2" "@ - pcmpistri\t{%2, %1, %0|%0, %1, %2} - pcmpistri\t{%2, %1, %0|%0, %1, %2} pcmpistrm\t{%2, %1, %0|%0, %1, %2} - pcmpistrm\t{%2, %1, %0|%0, %1, %2}" + pcmpistrm\t{%2, %1, %0|%0, %1, %2} + pcmpistri\t{%2, %1, %0|%0, %1, %2} + pcmpistri\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1")