i386: Convert from -mfused-madd to -ffp-contract.
* config/fused-madd.opt: New file. * config.gcc [i386-*, x86_64-*] (extra_options): Use it. * config/i386/i386.c (ix86_extra_costs): Handle FMA. (TARGET_DEFAULT_TARGET_FLAGS): Remove MASK_FUSED_MADD. * config/i386/i386.opt (mfused-madd): Remove. * config/i386/sse.md (split_fma): Remove. (split_fms, split_fnma, split_fnms): Remove. From-SVN: r166642
This commit is contained in:
parent
75d319c280
commit
d48e78d639
8 changed files with 72 additions and 103 deletions
|
@ -1,3 +1,13 @@
|
|||
2010-11-11 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/fused-madd.opt: New file.
|
||||
* config.gcc [i386-*, x86_64-*] (extra_options): Use it.
|
||||
* config/i386/i386.c (ix86_extra_costs): Handle FMA.
|
||||
(TARGET_DEFAULT_TARGET_FLAGS): Remove MASK_FUSED_MADD.
|
||||
* config/i386/i386.opt (mfused-madd): Remove.
|
||||
* config/i386/sse.md (split_fma): Remove.
|
||||
(split_fms, split_fnma, split_fnms): Remove.
|
||||
|
||||
2010-11-12 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/46088
|
||||
|
|
|
@ -311,6 +311,7 @@ i[34567]86-*-*)
|
|||
cpu_type=i386
|
||||
c_target_objs="i386-c.o"
|
||||
cxx_target_objs="i386-c.o"
|
||||
extra_options="${extra_options} fused-madd.opt"
|
||||
extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
|
||||
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
|
||||
nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
|
||||
|
@ -322,6 +323,7 @@ x86_64-*-*)
|
|||
cpu_type=i386
|
||||
c_target_objs="i386-c.o"
|
||||
cxx_target_objs="i386-c.o"
|
||||
extra_options="${extra_options} fused-madd.opt"
|
||||
extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
|
||||
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
|
||||
nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
|
||||
|
|
25
gcc/config/fused-madd.opt
Normal file
25
gcc/config/fused-madd.opt
Normal file
|
@ -0,0 +1,25 @@
|
|||
; -mfused-madd option (some targets only).
|
||||
;
|
||||
; Copyright (C) 2010
|
||||
; Free Software Foundation, Inc.
|
||||
;
|
||||
; This file is part of GCC.
|
||||
;
|
||||
; GCC is free software; you can redistribute it and/or modify it under
|
||||
; the terms of the GNU General Public License as published by the Free
|
||||
; Software Foundation; either version 3, or (at your option) any later
|
||||
; version.
|
||||
;
|
||||
; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
; WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
; for more details.
|
||||
;
|
||||
; You should have received a copy of the GNU General Public License
|
||||
; along with GCC; see the file COPYING3. If not see
|
||||
; <http://www.gnu.org/licenses/>.
|
||||
|
||||
mfused-madd
|
||||
Target Undocumented Alias(ffp-contract=, fast, off) Warn(%<-mfused-madd%> is deprecated; use %<-ffp-contract=%> instead)
|
||||
|
||||
; This comment is to ensure we retain the blank line above.
|
|
@ -28587,6 +28587,31 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
|
|||
}
|
||||
return false;
|
||||
|
||||
case FMA:
|
||||
{
|
||||
rtx sub;
|
||||
|
||||
gcc_assert (FLOAT_MODE_P (mode));
|
||||
gcc_assert (TARGET_FMA || TARGET_FMA4);
|
||||
|
||||
/* ??? SSE scalar/vector cost should be used here. */
|
||||
/* ??? Bald assumption that fma has the same cost as fmul. */
|
||||
*total = cost->fmul;
|
||||
*total += rtx_cost (XEXP (x, 1), FMA, speed);
|
||||
|
||||
/* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
|
||||
sub = XEXP (x, 0);
|
||||
if (GET_CODE (sub) == NEG)
|
||||
sub = XEXP (x, 0);
|
||||
*total += rtx_cost (sub, FMA, speed);
|
||||
|
||||
sub = XEXP (x, 2);
|
||||
if (GET_CODE (sub) == NEG)
|
||||
sub = XEXP (x, 0);
|
||||
*total += rtx_cost (sub, FMA, speed);
|
||||
return true;
|
||||
}
|
||||
|
||||
case MULT:
|
||||
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
|
||||
{
|
||||
|
@ -34483,8 +34508,7 @@ ix86_autovectorize_vector_sizes (void)
|
|||
#define TARGET_DEFAULT_TARGET_FLAGS \
|
||||
(TARGET_DEFAULT \
|
||||
| TARGET_SUBTARGET_DEFAULT \
|
||||
| TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
|
||||
| MASK_FUSED_MADD)
|
||||
| TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
|
||||
|
||||
#undef TARGET_HANDLE_OPTION
|
||||
#define TARGET_HANDLE_OPTION ix86_handle_option
|
||||
|
|
|
@ -261,12 +261,6 @@ Target Report Mask(VZEROUPPER) Save
|
|||
Generate vzeroupper instruction before a transfer of control flow out of
|
||||
the function.
|
||||
|
||||
mfused-madd
|
||||
Target Report Mask(FUSED_MADD) Save
|
||||
Enable automatic generation of fused floating point multiply-add instructions
|
||||
if the ISA supports such instructions. The -mfused-madd option is on by
|
||||
default.
|
||||
|
||||
mdispatch-scheduler
|
||||
Target RejectNegative Var(flag_dispatch_scheduler)
|
||||
Do dispatch scheduling if processor is bdver1 and Haifa scheduling
|
||||
|
|
|
@ -1856,6 +1856,10 @@
|
|||
;; (set (reg1) (mem (addr1)))
|
||||
;; (set (reg2) (mult (reg1) (mem (addr2))))
|
||||
;; (set (reg3) (plus (reg2) (mem (addr3))))
|
||||
;;
|
||||
;; ??? This is historic, pre-dating the gimple fma transformation.
|
||||
;; We could now properly represent that only one memory operand is
|
||||
;; allowed and not be penalized during optimization.
|
||||
|
||||
;; Intrinsic FMA operations.
|
||||
|
||||
|
@ -2178,100 +2182,6 @@
|
|||
[(set_attr "type" "ssemuladd")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
;; Non-intrinsic versions, matched when fused-multiply-add is allowed.
|
||||
;;
|
||||
;; ??? If fused-madd were a generic flag, combine could do this without
|
||||
;; needing splitters here in the backend. Irritatingly, combine won't
|
||||
;; recognize many of these with mere splits, since only 3 or more insns
|
||||
;; are allowed to split during combine. Thankfully, there's always a
|
||||
;; split_all_insns pass that runs before reload.
|
||||
;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define_insn_and_split "*split_fma"
|
||||
[(set (match_operand:FMAMODE 0 "register_operand")
|
||||
(plus:FMAMODE
|
||||
(mult:FMAMODE
|
||||
(match_operand:FMAMODE 1 "nonimmediate_operand")
|
||||
(match_operand:FMAMODE 2 "nonimmediate_operand"))
|
||||
(match_operand:FMAMODE 3 "nonimmediate_operand")))]
|
||||
"TARGET_SSE_MATH && TARGET_FUSED_MADD
|
||||
&& (TARGET_FMA || TARGET_FMA4)
|
||||
&& !(reload_in_progress || reload_completed)"
|
||||
{ gcc_unreachable (); }
|
||||
"&& 1"
|
||||
[(set (match_dup 0)
|
||||
(fma:FMAMODE
|
||||
(match_dup 1)
|
||||
(match_dup 2)
|
||||
(match_dup 3)))]
|
||||
"")
|
||||
|
||||
;; Floating multiply and subtract.
|
||||
(define_insn_and_split "*split_fms"
|
||||
[(set (match_operand:FMAMODE 0 "register_operand")
|
||||
(minus:FMAMODE
|
||||
(mult:FMAMODE
|
||||
(match_operand:FMAMODE 1 "nonimmediate_operand")
|
||||
(match_operand:FMAMODE 2 "nonimmediate_operand"))
|
||||
(match_operand:FMAMODE 3 "nonimmediate_operand")))]
|
||||
"TARGET_SSE_MATH && TARGET_FUSED_MADD
|
||||
&& (TARGET_FMA || TARGET_FMA4)
|
||||
&& !(reload_in_progress || reload_completed)"
|
||||
{ gcc_unreachable (); }
|
||||
"&& 1"
|
||||
[(set (match_dup 0)
|
||||
(fma:FMAMODE
|
||||
(match_dup 1)
|
||||
(match_dup 2)
|
||||
(neg:FMAMODE (match_dup 3))))]
|
||||
"")
|
||||
|
||||
;; Floating point negative multiply and add.
|
||||
;; Recognize (-a * b + c) via the canonical form: c - (a * b).
|
||||
(define_insn_and_split "*split_fnma"
|
||||
[(set (match_operand:FMAMODE 0 "register_operand")
|
||||
(minus:FMAMODE
|
||||
(match_operand:FMAMODE 3 "nonimmediate_operand")
|
||||
(mult:FMAMODE
|
||||
(match_operand:FMAMODE 1 "nonimmediate_operand")
|
||||
(match_operand:FMAMODE 2 "nonimmediate_operand"))))]
|
||||
"TARGET_SSE_MATH && TARGET_FUSED_MADD
|
||||
&& (TARGET_FMA || TARGET_FMA4)
|
||||
&& !(reload_in_progress || reload_completed)"
|
||||
{ gcc_unreachable (); }
|
||||
"&& 1"
|
||||
[(set (match_dup 0)
|
||||
(fma:FMAMODE
|
||||
(neg:FMAMODE (match_dup 1))
|
||||
(match_dup 2)
|
||||
(match_dup 3)))]
|
||||
"")
|
||||
|
||||
;; Floating point negative multiply and subtract.
|
||||
;; Recognize (-a * b - c) via the canonical form: c - (-a * b).
|
||||
(define_insn_and_split "*split_fnms"
|
||||
[(set (match_operand:FMAMODE 0 "register_operand")
|
||||
(minus:FMAMODE
|
||||
(mult:FMAMODE
|
||||
(neg:FMAMODE
|
||||
(match_operand:FMAMODE 1 "nonimmediate_operand"))
|
||||
(match_operand:FMAMODE 2 "nonimmediate_operand"))
|
||||
(match_operand:FMAMODE 3 "nonimmediate_operand")))]
|
||||
"TARGET_SSE_MATH && TARGET_FUSED_MADD
|
||||
&& (TARGET_FMA || TARGET_FMA4)
|
||||
&& !(reload_in_progress || reload_completed)"
|
||||
{ gcc_unreachable (); }
|
||||
"&& 1"
|
||||
[(set (match_dup 0)
|
||||
(fma:FMAMODE
|
||||
(neg:FMAMODE (match_dup 1))
|
||||
(match_dup 2)
|
||||
(neg:FMAMODE (match_dup 3))))]
|
||||
"")
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
;; Parallel single-precision floating point conversion operations
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
2010-11-11 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* gcc.target/i386/sse-24.c: Use -ffp-contract.
|
||||
|
||||
2010-11-11 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* gcc.dg/tree-ssa/inline-5.c: Fix testcase.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* PR target/44338 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -mno-fused-madd" } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -ffp-contract=off" } */
|
||||
|
||||
#include "sse-23.c"
|
||||
|
|
Loading…
Add table
Reference in a new issue