Add fma support
From-SVN: r165515
This commit is contained in:
parent
e74bf53a08
commit
1b1562a559
24 changed files with 1086 additions and 154 deletions
|
@ -1,3 +1,74 @@
|
|||
2010-10-14 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
* doc/md.texi (Standard Names): Add fma@var{m}4 documentation.
|
||||
|
||||
* doc/rtl.texi (RTX_TERNARY): Document FMA is ternary. Add
|
||||
SIGN_EXTRACT and ZERO_EXTRACT which were missing.
|
||||
(Standard names): Document fma.
|
||||
|
||||
* doc/cpp.texi (Common Predefined Macros): Document __FP_FAST_FMA,
|
||||
__FP_FAST_FMAF, __FP_FAST_FMAL.
|
||||
|
||||
* builitns.c (expand_builtin_mathfn_ternary): New function for
|
||||
expanding ternary math functions, like fma.
|
||||
(expand_builtin): Call it for the fma builtins.
|
||||
|
||||
* simplify-rtx.c (simplify_ternary_operation): Don't simplify FMA
|
||||
ops at present.
|
||||
|
||||
* tree-vect-stmts.c (vectorizable_call): Allow 3 argument
|
||||
vectorizable functions to support vectorizing fma.
|
||||
|
||||
* config/rs6000/rs6000.c (rs6000_builtin_vectorized_function):
|
||||
Handle fma builtins.
|
||||
|
||||
* config/rs6000/vsx.md (UNSPEC_VSX_MADD): Delete.
|
||||
(UNSPEC_VSX_MSUB): Ditto.
|
||||
(UNSPEC_VSX_NMADD): Ditto.
|
||||
(UNSPEC_VSX_NMSUB): Ditto.
|
||||
(vsx_fmadd<mode>4*): Rewrite to use FMA rtl in some cases instead
|
||||
of UNSPEC. Renumber combiner patterns.
|
||||
(vsx_fmsub<mode>4*): Ditto.
|
||||
(vsx_fnmadd<mode>4*): Ditto.
|
||||
(vsx_fnmsub<mode>4*): Ditto.
|
||||
|
||||
* config/rs6000/altivec.md (UNSPEC_VNMSUBFP): Delete.
|
||||
(altivec_vmaddfp): Rewrite to use FMA rtl if no fused
|
||||
multiply/add. Rename combiner pattern, and add TARGET_FUSED_MADD
|
||||
test.
|
||||
(altivec_vmaddfp_1): Ditto.
|
||||
(altivec_vmaddfp_2): Ditto.
|
||||
(atlivec_mulv4sf3): Ditto.
|
||||
(altivec_vnmsubfp): Ditto.
|
||||
(altivec_vnmsubfp_1): Ditto.
|
||||
(altivec_vnmsubfp_2): Ditto.
|
||||
(altivec_vnmsubfp_3): Delete.
|
||||
|
||||
* config/rs6000/rs6000.md (UNSPEC_FMA): Delete.
|
||||
(fmasf4): Rewrite to always use FMA rtl. Add combiners to
|
||||
generate the four fused multiply/add ops. Combine power, powerpc
|
||||
ops.
|
||||
(fmasf4_fpr): Ditto.
|
||||
(fmssf4_fpr): Ditto.
|
||||
(fnmasf4_fpr): Ditto.
|
||||
(fnmssf4_fpr): Ditto.
|
||||
(fmadf4): Ditto.
|
||||
(fmadf4_fpr): Ditto.
|
||||
(fmsdf4_fpr): Ditto.
|
||||
(fnmadf4_fpr): Ditto.
|
||||
(fnmsdf4_fpr): Ditto.
|
||||
|
||||
* optabs.h (OTI_fma): Add fma optab.
|
||||
(fma_optab): Ditto.
|
||||
|
||||
* genopinit.c (optabs): Set fma optab.
|
||||
|
||||
* rtl.def (FMA): Add FMA rtl.
|
||||
|
||||
* tree.h (mode_has_fma): New function to return if MODE supports a
|
||||
fast multiply and add instruction.
|
||||
* builtins.c (mode_has_fma): Ditto.
|
||||
|
||||
2010-10-15 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* lto-streamer-out.c (write_symbol): Use pointer set of seen
|
||||
|
|
|
@ -106,6 +106,7 @@ static void expand_errno_check (tree, rtx);
|
|||
static rtx expand_builtin_mathfn (tree, rtx, rtx);
|
||||
static rtx expand_builtin_mathfn_2 (tree, rtx, rtx);
|
||||
static rtx expand_builtin_mathfn_3 (tree, rtx, rtx);
|
||||
static rtx expand_builtin_mathfn_ternary (tree, rtx, rtx);
|
||||
static rtx expand_builtin_interclass_mathfn (tree, rtx);
|
||||
static rtx expand_builtin_sincos (tree);
|
||||
static rtx expand_builtin_cexpi (tree, rtx);
|
||||
|
@ -2185,6 +2186,79 @@ expand_builtin_mathfn_2 (tree exp, rtx target, rtx subtarget)
|
|||
return target;
|
||||
}
|
||||
|
||||
/* Expand a call to the builtin trinary math functions (fma).
|
||||
Return NULL_RTX if a normal call should be emitted rather than expanding the
|
||||
function in-line. EXP is the expression that is a call to the builtin
|
||||
function; if convenient, the result should be placed in TARGET.
|
||||
SUBTARGET may be used as the target for computing one of EXP's
|
||||
operands. */
|
||||
|
||||
static rtx
|
||||
expand_builtin_mathfn_ternary (tree exp, rtx target, rtx subtarget)
|
||||
{
|
||||
optab builtin_optab;
|
||||
rtx op0, op1, op2, insns;
|
||||
tree fndecl = get_callee_fndecl (exp);
|
||||
tree arg0, arg1, arg2;
|
||||
enum machine_mode mode;
|
||||
|
||||
if (!validate_arglist (exp, REAL_TYPE, REAL_TYPE, REAL_TYPE, VOID_TYPE))
|
||||
return NULL_RTX;
|
||||
|
||||
arg0 = CALL_EXPR_ARG (exp, 0);
|
||||
arg1 = CALL_EXPR_ARG (exp, 1);
|
||||
arg2 = CALL_EXPR_ARG (exp, 2);
|
||||
|
||||
switch (DECL_FUNCTION_CODE (fndecl))
|
||||
{
|
||||
CASE_FLT_FN (BUILT_IN_FMA):
|
||||
builtin_optab = fma_optab; break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Make a suitable register to place result in. */
|
||||
mode = TYPE_MODE (TREE_TYPE (exp));
|
||||
|
||||
/* Before working hard, check whether the instruction is available. */
|
||||
if (optab_handler (builtin_optab, mode) == CODE_FOR_nothing)
|
||||
return NULL_RTX;
|
||||
|
||||
target = gen_reg_rtx (mode);
|
||||
|
||||
/* Always stabilize the argument list. */
|
||||
CALL_EXPR_ARG (exp, 0) = arg0 = builtin_save_expr (arg0);
|
||||
CALL_EXPR_ARG (exp, 1) = arg1 = builtin_save_expr (arg1);
|
||||
CALL_EXPR_ARG (exp, 2) = arg2 = builtin_save_expr (arg2);
|
||||
|
||||
op0 = expand_expr (arg0, subtarget, VOIDmode, EXPAND_NORMAL);
|
||||
op1 = expand_normal (arg1);
|
||||
op2 = expand_normal (arg2);
|
||||
|
||||
start_sequence ();
|
||||
|
||||
/* Compute into TARGET.
|
||||
Set TARGET to wherever the result comes back. */
|
||||
target = expand_ternary_op (mode, builtin_optab, op0, op1, op2,
|
||||
target, 0);
|
||||
|
||||
/* If we were unable to expand via the builtin, stop the sequence
|
||||
(without outputting the insns) and call to the library function
|
||||
with the stabilized argument list. */
|
||||
if (target == 0)
|
||||
{
|
||||
end_sequence ();
|
||||
return expand_call (exp, target, target == const0_rtx);
|
||||
}
|
||||
|
||||
/* Output the entire sequence. */
|
||||
insns = get_insns ();
|
||||
end_sequence ();
|
||||
emit_insn (insns);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
/* Expand a call to the builtin sin and cos math functions.
|
||||
Return NULL_RTX if a normal call should be emitted rather than expanding the
|
||||
function in-line. EXP is the expression that is a call to the builtin
|
||||
|
@ -5828,6 +5902,12 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
|
|||
return target;
|
||||
break;
|
||||
|
||||
CASE_FLT_FN (BUILT_IN_FMA):
|
||||
target = expand_builtin_mathfn_ternary (exp, target, subtarget);
|
||||
if (target)
|
||||
return target;
|
||||
break;
|
||||
|
||||
CASE_FLT_FN (BUILT_IN_ILOGB):
|
||||
if (! flag_unsafe_math_optimizations)
|
||||
break;
|
||||
|
@ -13830,3 +13910,10 @@ is_inexpensive_builtin (tree decl)
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Return true if MODE provides a fast multiply/add (FMA) builtin function. */
|
||||
|
||||
bool
|
||||
mode_has_fma (enum machine_mode mode)
|
||||
{
|
||||
return optab_handler (fma_optab, mode) != CODE_FOR_nothing;
|
||||
}
|
||||
|
|
|
@ -1,8 +1,15 @@
|
|||
2010-10-14 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
* c-cppbuiltin.c (builtin_define_float_constants): Emit
|
||||
__FP_FAST_FMA, __FP_FAST_FMAF, and __FP_FAST_FMAL if the machine
|
||||
has the appropriate fma builtins.
|
||||
(c_cpp_builtins): Adjust call to builtin_define_float_constants.
|
||||
|
||||
2010-10-14 Iain Sandoe <iains@gcc.gnu.org>
|
||||
|
||||
merge from FSF apple 'trunk' branch.
|
||||
merge from FSF apple 'trunk' branch.
|
||||
2006 Fariborz Jahanian <fjahanian@apple.com>
|
||||
|
||||
|
||||
Radars 4436866, 4505126, 4506903, 4517826
|
||||
* c-common.c (c_common_resword): Define @property and its attributes.
|
||||
* c-common.h: Define property attribute enum entries.
|
||||
|
@ -14,17 +21,17 @@
|
|||
(objc_add_property_variable): Likewise.
|
||||
(objc_build_getter_call): Likewise.
|
||||
(objc_build_setter_call) Likewise.
|
||||
|
||||
|
||||
2010-10-13 Iain Sandoe <iains@gcc.gnu.org>
|
||||
|
||||
merge from FSF apple 'trunk' branch.
|
||||
merge from FSF apple 'trunk' branch.
|
||||
2006-04-26 Fariborz Jahanian <fjahanian@apple.com>
|
||||
|
||||
Radar 3803157 (method attributes)
|
||||
* c-common.c (handle_deprecated_attribute): Recognize
|
||||
objc methods as valid declarations.
|
||||
* c-common.h: Declare objc_method_decl ().
|
||||
* stub-objc.c (objc_method_decl): New stub.
|
||||
* stub-objc.c (objc_method_decl): New stub.
|
||||
|
||||
2010-10-08 Joseph Myers <joseph@codesourcery.com>
|
||||
|
||||
|
|
|
@ -60,6 +60,7 @@ static void builtin_define_type_max (const char *, tree);
|
|||
static void builtin_define_type_minmax (const char *, const char *, tree);
|
||||
static void builtin_define_type_sizeof (const char *, tree);
|
||||
static void builtin_define_float_constants (const char *,
|
||||
const char *,
|
||||
const char *,
|
||||
const char *,
|
||||
tree);
|
||||
|
@ -78,6 +79,7 @@ static void
|
|||
builtin_define_float_constants (const char *name_prefix,
|
||||
const char *fp_suffix,
|
||||
const char *fp_cast,
|
||||
const char *fma_suffix,
|
||||
tree type)
|
||||
{
|
||||
/* Used to convert radix-based values to base 10 values in several cases.
|
||||
|
@ -260,6 +262,13 @@ builtin_define_float_constants (const char *name_prefix,
|
|||
NaN has quiet NaNs. */
|
||||
sprintf (name, "__%s_HAS_QUIET_NAN__", name_prefix);
|
||||
builtin_define_with_int_value (name, MODE_HAS_NANS (TYPE_MODE (type)));
|
||||
|
||||
/* Note whether we have fast FMA. */
|
||||
if (mode_has_fma (TYPE_MODE (type)))
|
||||
{
|
||||
sprintf (name, "__FP_FAST_FMA%s", fma_suffix);
|
||||
builtin_define_with_int_value (name, 1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Define __DECx__ constants for TYPE using NAME_PREFIX and SUFFIX. */
|
||||
|
@ -607,13 +616,15 @@ c_cpp_builtins (cpp_reader *pfile)
|
|||
builtin_define_with_int_value ("__DEC_EVAL_METHOD__",
|
||||
TARGET_DEC_EVAL_METHOD);
|
||||
|
||||
builtin_define_float_constants ("FLT", "F", "%s", float_type_node);
|
||||
builtin_define_float_constants ("FLT", "F", "%s", "F", float_type_node);
|
||||
/* Cast the double precision constants. This is needed when single
|
||||
precision constants are specified or when pragma FLOAT_CONST_DECIMAL64
|
||||
is used. The correct result is computed by the compiler when using
|
||||
macros that include a cast. */
|
||||
builtin_define_float_constants ("DBL", "L", "((double)%s)", double_type_node);
|
||||
builtin_define_float_constants ("LDBL", "L", "%s", long_double_type_node);
|
||||
builtin_define_float_constants ("DBL", "L", "((double)%s)", "",
|
||||
double_type_node);
|
||||
builtin_define_float_constants ("LDBL", "L", "%s", "L",
|
||||
long_double_type_node);
|
||||
|
||||
/* For decfloat.h. */
|
||||
builtin_define_decimal_float_constants ("DEC32", "DF", dfloat32_type_node);
|
||||
|
|
|
@ -143,7 +143,6 @@
|
|||
(UNSPEC_VUPKLS_V4SF 325)
|
||||
(UNSPEC_VUPKHU_V4SF 326)
|
||||
(UNSPEC_VUPKLU_V4SF 327)
|
||||
(UNSPEC_VNMSUBFP 328)
|
||||
])
|
||||
|
||||
(define_constants
|
||||
|
@ -513,12 +512,39 @@
|
|||
"vsel %0,%3,%2,%1"
|
||||
[(set_attr "type" "vecperm")])
|
||||
|
||||
;; Fused multiply add
|
||||
(define_insn "altivec_vmaddfp"
|
||||
;; Fused multiply add. By default expand the FMA into (plus (mult)) to help
|
||||
;; loop unrolling. Don't do negate multiply ops, because of complications with
|
||||
;; honoring signed zero and fused-madd.
|
||||
|
||||
(define_expand "altivec_vmaddfp"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "")
|
||||
(plus:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "")
|
||||
(match_operand:V4SF 2 "register_operand" ""))
|
||||
(match_operand:V4SF 3 "register_operand" "")))]
|
||||
"VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
|
||||
{
|
||||
if (!TARGET_FUSED_MADD)
|
||||
{
|
||||
emit_insn (gen_altivec_vmaddfp_2 (operands[0], operands[1], operands[2],
|
||||
operands[3]));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
(define_insn "*altivec_vmaddfp_1"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(plus:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "v")
|
||||
(match_operand:V4SF 2 "register_operand" "v"))
|
||||
(match_operand:V4SF 3 "register_operand" "v")))]
|
||||
"VECTOR_UNIT_ALTIVEC_P (V4SFmode) && TARGET_FUSED_MADD"
|
||||
"vmaddfp %0,%1,%2,%3"
|
||||
[(set_attr "type" "vecfloat")])
|
||||
|
||||
(define_insn "altivec_vmaddfp_2"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(fma:V4SF (match_operand:V4SF 1 "register_operand" "v")
|
||||
(match_operand:V4SF 2 "register_operand" "v")
|
||||
(match_operand:V4SF 3 "register_operand" "v")))]
|
||||
"VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
|
||||
"vmaddfp %0,%1,%2,%3"
|
||||
[(set_attr "type" "vecfloat")])
|
||||
|
@ -529,7 +555,7 @@
|
|||
[(use (match_operand:V4SF 0 "register_operand" ""))
|
||||
(use (match_operand:V4SF 1 "register_operand" ""))
|
||||
(use (match_operand:V4SF 2 "register_operand" ""))]
|
||||
"VECTOR_UNIT_ALTIVEC_P (V4SFmode) && TARGET_FUSED_MADD"
|
||||
"VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
|
||||
"
|
||||
{
|
||||
rtx neg0;
|
||||
|
@ -627,34 +653,18 @@
|
|||
}")
|
||||
|
||||
;; Fused multiply subtract
|
||||
(define_expand "altivec_vnmsubfp"
|
||||
[(match_operand:V4SF 0 "register_operand" "")
|
||||
(match_operand:V4SF 1 "register_operand" "")
|
||||
(match_operand:V4SF 2 "register_operand" "")
|
||||
(match_operand:V4SF 3 "register_operand" "")]
|
||||
(define_insn "altivec_vnmsubfp"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(neg:V4SF
|
||||
(fma:V4SF (match_operand:V4SF 1 "register_operand" "v")
|
||||
(match_operand:V4SF 2 "register_operand" "v")
|
||||
(neg:V4SF
|
||||
(match_operand:V4SF 3 "register_operand" "v")))))]
|
||||
"VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
|
||||
{
|
||||
if (TARGET_FUSED_MADD && HONOR_SIGNED_ZEROS (SFmode))
|
||||
{
|
||||
emit_insn (gen_altivec_vnmsubfp_1 (operands[0], operands[1],
|
||||
operands[2], operands[3]));
|
||||
DONE;
|
||||
}
|
||||
else if (TARGET_FUSED_MADD && !HONOR_SIGNED_ZEROS (DFmode))
|
||||
{
|
||||
emit_insn (gen_altivec_vnmsubfp_2 (operands[0], operands[1],
|
||||
operands[2], operands[3]));
|
||||
DONE;
|
||||
}
|
||||
else
|
||||
{
|
||||
emit_insn (gen_altivec_vnmsubfp_3 (operands[0], operands[1],
|
||||
operands[2], operands[3]));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
"vnmsubfp %0,%1,%2,%3"
|
||||
[(set_attr "type" "vecfloat")])
|
||||
|
||||
(define_insn "altivec_vnmsubfp_1"
|
||||
(define_insn "*altivec_vnmsubfp_1"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(neg:V4SF
|
||||
(minus:V4SF
|
||||
|
@ -667,7 +677,7 @@
|
|||
"vnmsubfp %0,%1,%2,%3"
|
||||
[(set_attr "type" "vecfloat")])
|
||||
|
||||
(define_insn "altivec_vnmsubfp_2"
|
||||
(define_insn "*altivec_vnmsubfp_2"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(minus:V4SF
|
||||
(match_operand:V4SF 3 "register_operand" "v")
|
||||
|
@ -679,16 +689,6 @@
|
|||
"vnmsubfp %0,%1,%2,%3"
|
||||
[(set_attr "type" "vecfloat")])
|
||||
|
||||
(define_insn "altivec_vnmsubfp_3"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")
|
||||
(match_operand:V4SF 2 "register_operand" "v")
|
||||
(match_operand:V4SF 3 "register_operand" "v")]
|
||||
UNSPEC_VNMSUBFP))]
|
||||
"VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
|
||||
"vnmsubfp %0,%1,%2,%3"
|
||||
[(set_attr "type" "vecfloat")])
|
||||
|
||||
(define_insn "altivec_vmsumu<VI_char>m"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||
(unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v")
|
||||
|
|
|
@ -3938,6 +3938,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
|
|||
if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
|
||||
return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
|
||||
break;
|
||||
case BUILT_IN_FMA:
|
||||
if (VECTOR_UNIT_VSX_P (V2DFmode)
|
||||
&& out_mode == DFmode && out_n == 2
|
||||
&& in_mode == DFmode && in_n == 2)
|
||||
return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
|
||||
break;
|
||||
case BUILT_IN_FMAF:
|
||||
if (VECTOR_UNIT_VSX_P (V4SFmode)
|
||||
&& out_mode == SFmode && out_n == 4
|
||||
&& in_mode == SFmode && in_n == 4)
|
||||
return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
|
||||
else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
|
||||
&& out_mode == SFmode && out_n == 4
|
||||
&& in_mode == SFmode && in_n == 4)
|
||||
return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
|
||||
break;
|
||||
case BUILT_IN_TRUNC:
|
||||
if (VECTOR_UNIT_VSX_P (V2DFmode)
|
||||
&& out_mode == DFmode && out_n == 2
|
||||
|
|
|
@ -5844,6 +5844,78 @@
|
|||
"fres %0,%1"
|
||||
[(set_attr "type" "fp")])
|
||||
|
||||
; builtin fmaf support
|
||||
; If the user explicitly uses the fma builtin, don't convert this to
|
||||
; (plus (mult op1 op2) op3)
|
||||
(define_expand "fmasf4"
|
||||
[(set (match_operand:SF 0 "gpc_reg_operand" "")
|
||||
(fma:SF (match_operand:SF 1 "gpc_reg_operand" "")
|
||||
(match_operand:SF 2 "gpc_reg_operand" "")
|
||||
(match_operand:SF 3 "gpc_reg_operand" "")))]
|
||||
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
|
||||
"")
|
||||
|
||||
(define_insn "fmasf4_fpr"
|
||||
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
|
||||
(fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
|
||||
(match_operand:SF 2 "gpc_reg_operand" "f")
|
||||
(match_operand:SF 3 "gpc_reg_operand" "f")))]
|
||||
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
|
||||
"*
|
||||
{
|
||||
return ((TARGET_POWERPC)
|
||||
? \"fmadds %0,%1,%2,%3\"
|
||||
: \"{fma|fmadd} %0,%1,%2,%3\");
|
||||
}"
|
||||
[(set_attr "type" "fp")
|
||||
(set_attr "fp_type" "fp_maddsub_s")])
|
||||
|
||||
(define_insn "*fmssf4_fpr"
|
||||
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
|
||||
(fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
|
||||
(match_operand:SF 2 "gpc_reg_operand" "f")
|
||||
(neg:SF (match_operand:SF 3 "gpc_reg_operand" "f"))))]
|
||||
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
|
||||
"*
|
||||
{
|
||||
return ((TARGET_POWERPC)
|
||||
? \"fmsubs %0,%1,%2,%3\"
|
||||
: \"{fms|fmsub} %0,%1,%2,%3\");
|
||||
}"
|
||||
[(set_attr "type" "fp")
|
||||
(set_attr "fp_type" "fp_maddsub_s")])
|
||||
|
||||
(define_insn "*fnmasf4_fpr"
|
||||
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
|
||||
(neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
|
||||
(match_operand:SF 2 "gpc_reg_operand" "f")
|
||||
(match_operand:SF 3 "gpc_reg_operand" "f"))))]
|
||||
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
|
||||
"*
|
||||
{
|
||||
return ((TARGET_POWERPC)
|
||||
? \"fnmadds %0,%1,%2,%3\"
|
||||
: \"{fnma|fnmadd} %0,%1,%2,%3\");
|
||||
}"
|
||||
[(set_attr "type" "fp")
|
||||
(set_attr "fp_type" "fp_maddsub_s")])
|
||||
|
||||
(define_insn "*fnmssf4_fpr"
|
||||
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
|
||||
(neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
|
||||
(match_operand:SF 2 "gpc_reg_operand" "f")
|
||||
(neg:SF (match_operand:SF 3 "gpc_reg_operand" "f")))))]
|
||||
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
|
||||
"*
|
||||
{
|
||||
return ((TARGET_POWERPC)
|
||||
? \"fnmsubs %0,%1,%2,%3\"
|
||||
: \"{fnms|fnmsub} %0,%1,%2,%3\");
|
||||
}"
|
||||
[(set_attr "type" "fp")
|
||||
(set_attr "fp_type" "fp_maddsub_s")])
|
||||
|
||||
; Fused multiply/add ops created by the combiner
|
||||
(define_insn "*fmaddsf4_powerpc"
|
||||
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
|
||||
(plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
|
||||
|
@ -5854,7 +5926,7 @@
|
|||
"fmadds %0,%1,%2,%3"
|
||||
[(set_attr "type" "fp")
|
||||
(set_attr "fp_type" "fp_maddsub_s")])
|
||||
|
||||
|
||||
(define_insn "*fmaddsf4_power"
|
||||
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
|
||||
(plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
|
||||
|
@ -6312,6 +6384,62 @@
|
|||
"frsqrte %0,%1"
|
||||
[(set_attr "type" "fp")])
|
||||
|
||||
; builtin fma support
|
||||
; If the user explicitly uses the fma builtin, don't convert this to
|
||||
; (plus (mult op1 op2) op3)
|
||||
(define_expand "fmadf4"
|
||||
[(set (match_operand:DF 0 "gpc_reg_operand" "")
|
||||
(fma:DF (match_operand:DF 1 "gpc_reg_operand" "")
|
||||
(match_operand:DF 2 "gpc_reg_operand" "")
|
||||
(match_operand:DF 3 "gpc_reg_operand" "")))]
|
||||
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
|
||||
"")
|
||||
|
||||
(define_insn "fmadf4_fpr"
|
||||
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
|
||||
(fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
|
||||
(match_operand:DF 2 "gpc_reg_operand" "f")
|
||||
(match_operand:DF 3 "gpc_reg_operand" "f")))]
|
||||
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
|
||||
&& VECTOR_UNIT_NONE_P (DFmode)"
|
||||
"{fma|fmadd} %0,%1,%2,%3"
|
||||
[(set_attr "type" "fp")
|
||||
(set_attr "fp_type" "fp_maddsub_s")])
|
||||
|
||||
(define_insn "*fmsdf4_fpr"
|
||||
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
|
||||
(fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
|
||||
(match_operand:DF 2 "gpc_reg_operand" "f")
|
||||
(neg:DF (match_operand:DF 3 "gpc_reg_operand" "f"))))]
|
||||
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
|
||||
&& VECTOR_UNIT_NONE_P (DFmode)"
|
||||
"{fms|fmsub} %0,%1,%2,%3"
|
||||
[(set_attr "type" "fp")
|
||||
(set_attr "fp_type" "fp_maddsub_s")])
|
||||
|
||||
(define_insn "*fnmadf4_fpr"
|
||||
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
|
||||
(neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
|
||||
(match_operand:DF 2 "gpc_reg_operand" "f")
|
||||
(match_operand:DF 3 "gpc_reg_operand" "f"))))]
|
||||
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
|
||||
&& VECTOR_UNIT_NONE_P (DFmode)"
|
||||
"{fnma|fnmadd} %0,%1,%2,%3"
|
||||
[(set_attr "type" "fp")
|
||||
(set_attr "fp_type" "fp_maddsub_s")])
|
||||
|
||||
(define_insn "*fnmsdf4_fpr"
|
||||
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
|
||||
(neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
|
||||
(match_operand:DF 2 "gpc_reg_operand" "f")
|
||||
(neg:DF (match_operand:DF 3 "gpc_reg_operand" "f")))))]
|
||||
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
|
||||
&& VECTOR_UNIT_NONE_P (DFmode)"
|
||||
"{fnms|fnmsub} %0,%1,%2,%3"
|
||||
[(set_attr "type" "fp")
|
||||
(set_attr "fp_type" "fp_maddsub_s")])
|
||||
|
||||
; Fused multiply/add ops created by the combiner
|
||||
(define_insn "*fmadddf4_fpr"
|
||||
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
|
||||
(plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
|
||||
|
|
|
@ -194,11 +194,7 @@
|
|||
(UNSPEC_VSX_CVUXDSP 507)
|
||||
(UNSPEC_VSX_CVSPSXDS 508)
|
||||
(UNSPEC_VSX_CVSPUXDS 509)
|
||||
(UNSPEC_VSX_MADD 510)
|
||||
(UNSPEC_VSX_MSUB 511)
|
||||
(UNSPEC_VSX_NMADD 512)
|
||||
(UNSPEC_VSX_NMSUB 513)
|
||||
;; 514 deleted
|
||||
;; 510-514 deleted
|
||||
(UNSPEC_VSX_TDIV 515)
|
||||
(UNSPEC_VSX_TSQRT 516)
|
||||
(UNSPEC_VSX_XXPERMDI 517)
|
||||
|
@ -499,19 +495,22 @@
|
|||
;; does not check -mfused-madd to allow users to use these ops when they know
|
||||
;; they want the fused multiply/add.
|
||||
|
||||
;; Fused multiply add. By default expand the FMA into (plus (mult)) to help
|
||||
;; loop unrolling. Don't do negate multiply ops, because of complications with
|
||||
;; honoring signed zero and fused-madd.
|
||||
|
||||
(define_expand "vsx_fmadd<mode>4"
|
||||
[(set (match_operand:VSX_B 0 "vsx_register_operand" "")
|
||||
(plus:VSX_B
|
||||
(mult:VSX_B
|
||||
(match_operand:VSX_B 1 "vsx_register_operand" "")
|
||||
(match_operand:VSX_B 2 "vsx_register_operand" ""))
|
||||
(mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "")
|
||||
(match_operand:VSX_B 2 "vsx_register_operand" ""))
|
||||
(match_operand:VSX_B 3 "vsx_register_operand" "")))]
|
||||
"VECTOR_UNIT_VSX_P (<MODE>mode)"
|
||||
{
|
||||
if (!TARGET_FUSED_MADD)
|
||||
{
|
||||
emit_insn (gen_vsx_fmadd<mode>4_2 (operands[0], operands[1], operands[2],
|
||||
operands[3]));
|
||||
emit_insn (gen_vsx_fmadd<mode>4_2 (operands[0], operands[1],
|
||||
operands[2], operands[3]));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
@ -534,10 +533,9 @@
|
|||
|
||||
(define_insn "vsx_fmadd<mode>4_2"
|
||||
[(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
|
||||
(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
|
||||
(match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
|
||||
(match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")]
|
||||
UNSPEC_VSX_MADD))]
|
||||
(fma:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
|
||||
(match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
|
||||
(match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))]
|
||||
"VECTOR_UNIT_VSX_P (<MODE>mode)"
|
||||
"@
|
||||
x<VSv>madda<VSs> %x0,%x1,%x2
|
||||
|
@ -550,16 +548,15 @@
|
|||
(define_expand "vsx_fmsub<mode>4"
|
||||
[(set (match_operand:VSX_B 0 "vsx_register_operand" "")
|
||||
(minus:VSX_B
|
||||
(mult:VSX_B
|
||||
(match_operand:VSX_B 1 "vsx_register_operand" "")
|
||||
(match_operand:VSX_B 2 "vsx_register_operand" ""))
|
||||
(mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "")
|
||||
(match_operand:VSX_B 2 "vsx_register_operand" ""))
|
||||
(match_operand:VSX_B 3 "vsx_register_operand" "")))]
|
||||
"VECTOR_UNIT_VSX_P (<MODE>mode)"
|
||||
{
|
||||
if (!TARGET_FUSED_MADD)
|
||||
{
|
||||
emit_insn (gen_vsx_fmsub<mode>4_2 (operands[0], operands[1], operands[2],
|
||||
operands[3]));
|
||||
emit_insn (gen_vsx_fmsub<mode>4_2 (operands[0], operands[1],
|
||||
operands[2], operands[3]));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
@ -582,10 +579,10 @@
|
|||
|
||||
(define_insn "vsx_fmsub<mode>4_2"
|
||||
[(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
|
||||
(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
|
||||
(match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
|
||||
(match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")]
|
||||
UNSPEC_VSX_MSUB))]
|
||||
(fma:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
|
||||
(match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
|
||||
(neg:VSX_B
|
||||
(match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
|
||||
"VECTOR_UNIT_VSX_P (<MODE>mode)"
|
||||
"@
|
||||
x<VSv>msuba<VSs> %x0,%x1,%x2
|
||||
|
@ -595,32 +592,21 @@
|
|||
[(set_attr "type" "<VStype_mul>")
|
||||
(set_attr "fp_type" "<VSfptype_mul>")])
|
||||
|
||||
(define_expand "vsx_fnmadd<mode>4"
|
||||
[(match_operand:VSX_B 0 "vsx_register_operand" "")
|
||||
(match_operand:VSX_B 1 "vsx_register_operand" "")
|
||||
(match_operand:VSX_B 2 "vsx_register_operand" "")
|
||||
(match_operand:VSX_B 3 "vsx_register_operand" "")]
|
||||
(define_insn "vsx_fnmadd<mode>4"
|
||||
[(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
|
||||
(neg:VSX_B
|
||||
(fma:VSX_B
|
||||
(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa")
|
||||
(match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
|
||||
(match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
|
||||
"VECTOR_UNIT_VSX_P (<MODE>mode)"
|
||||
{
|
||||
if (TARGET_FUSED_MADD && HONOR_SIGNED_ZEROS (DFmode))
|
||||
{
|
||||
emit_insn (gen_vsx_fnmadd<mode>4_1 (operands[0], operands[1],
|
||||
operands[2], operands[3]));
|
||||
DONE;
|
||||
}
|
||||
else if (TARGET_FUSED_MADD && !HONOR_SIGNED_ZEROS (DFmode))
|
||||
{
|
||||
emit_insn (gen_vsx_fnmadd<mode>4_2 (operands[0], operands[1],
|
||||
operands[2], operands[3]));
|
||||
DONE;
|
||||
}
|
||||
else
|
||||
{
|
||||
emit_insn (gen_vsx_fnmadd<mode>4_3 (operands[0], operands[1],
|
||||
operands[2], operands[3]));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
"@
|
||||
x<VSv>nmadda<VSs> %x0,%x1,%x2
|
||||
x<VSv>nmaddm<VSs> %x0,%x1,%x3
|
||||
x<VSv>nmadda<VSs> %x0,%x1,%x2
|
||||
x<VSv>nmaddm<VSs> %x0,%x1,%x3"
|
||||
[(set_attr "type" "<VStype_mul>")
|
||||
(set_attr "fp_type" "<VSfptype_mul>")])
|
||||
|
||||
(define_insn "vsx_fnmadd<mode>4_1"
|
||||
[(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
|
||||
|
@ -658,48 +644,22 @@
|
|||
[(set_attr "type" "<VStype_mul>")
|
||||
(set_attr "fp_type" "<VSfptype_mul>")])
|
||||
|
||||
(define_insn "vsx_fnmadd<mode>4_3"
|
||||
(define_insn "vsx_fnmsub<mode>4"
|
||||
[(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
|
||||
(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa")
|
||||
(match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
|
||||
(match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")]
|
||||
UNSPEC_VSX_NMADD))]
|
||||
(neg:VSX_B
|
||||
(fma:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
|
||||
(match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
|
||||
(neg:VSX_B
|
||||
(match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))))]
|
||||
"VECTOR_UNIT_VSX_P (<MODE>mode)"
|
||||
"@
|
||||
x<VSv>nmadda<VSs> %x0,%x1,%x2
|
||||
x<VSv>nmaddm<VSs> %x0,%x1,%x3
|
||||
x<VSv>nmadda<VSs> %x0,%x1,%x2
|
||||
x<VSv>nmaddm<VSs> %x0,%x1,%x3"
|
||||
x<VSv>nmsuba<VSs> %x0,%x1,%x2
|
||||
x<VSv>nmsubm<VSs> %x0,%x1,%x3
|
||||
x<VSv>nmsuba<VSs> %x0,%x1,%x2
|
||||
x<VSv>nmsubm<VSs> %x0,%x1,%x3"
|
||||
[(set_attr "type" "<VStype_mul>")
|
||||
(set_attr "fp_type" "<VSfptype_mul>")])
|
||||
|
||||
(define_expand "vsx_fnmsub<mode>4"
|
||||
[(match_operand:VSX_B 0 "vsx_register_operand" "")
|
||||
(match_operand:VSX_B 1 "vsx_register_operand" "")
|
||||
(match_operand:VSX_B 2 "vsx_register_operand" "")
|
||||
(match_operand:VSX_B 3 "vsx_register_operand" "")]
|
||||
"VECTOR_UNIT_VSX_P (<MODE>mode)"
|
||||
{
|
||||
if (TARGET_FUSED_MADD && HONOR_SIGNED_ZEROS (DFmode))
|
||||
{
|
||||
emit_insn (gen_vsx_fnmsub<mode>4_1 (operands[0], operands[1],
|
||||
operands[2], operands[3]));
|
||||
DONE;
|
||||
}
|
||||
else if (TARGET_FUSED_MADD && !HONOR_SIGNED_ZEROS (DFmode))
|
||||
{
|
||||
emit_insn (gen_vsx_fnmsub<mode>4_2 (operands[0], operands[1],
|
||||
operands[2], operands[3]));
|
||||
DONE;
|
||||
}
|
||||
else
|
||||
{
|
||||
emit_insn (gen_vsx_fnmsub<mode>4_3 (operands[0], operands[1],
|
||||
operands[2], operands[3]));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
(define_insn "vsx_fnmsub<mode>4_1"
|
||||
[(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
|
||||
(neg:VSX_B
|
||||
|
@ -735,21 +695,6 @@
|
|||
[(set_attr "type" "<VStype_mul>")
|
||||
(set_attr "fp_type" "<VSfptype_mul>")])
|
||||
|
||||
(define_insn "vsx_fnmsub<mode>4_3"
|
||||
[(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
|
||||
(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
|
||||
(match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
|
||||
(match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")]
|
||||
UNSPEC_VSX_NMSUB))]
|
||||
"VECTOR_UNIT_VSX_P (<MODE>mode)"
|
||||
"@
|
||||
x<VSv>nmsuba<VSs> %x0,%x1,%x2
|
||||
x<VSv>nmsubm<VSs> %x0,%x1,%x3
|
||||
x<VSv>nmsuba<VSs> %x0,%x1,%x2
|
||||
x<VSv>nmsubm<VSs> %x0,%x1,%x3"
|
||||
[(set_attr "type" "<VStype_mul>")
|
||||
(set_attr "fp_type" "<VSfptype_mul>")])
|
||||
|
||||
;; Vector conditional expressions (no scalar version for these instructions)
|
||||
(define_insn "vsx_eq<mode>"
|
||||
[(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
|
||||
|
|
|
@ -2345,6 +2345,15 @@ and swap operations on operands 1, 2, 4, 8 or 16 bytes in length, respectively.
|
|||
This macro is defined when the compiler is emitting Dwarf2 CFI directives
|
||||
to the assembler. When this is defined, it is possible to emit those same
|
||||
directives in inline assembly.
|
||||
|
||||
@item __FP_FAST_FMA
|
||||
@itemx __FP_FAST_FMAF
|
||||
@itemx __FP_FAST_FMAL
|
||||
These macros are defined with value 1 if the backend supports the
|
||||
@code{fma}, @code{fmaf}, and @code{fmal} builtin functions, so that
|
||||
the include file @file{math.h} can define the macros
|
||||
@code{FP_FAST_FMA}, @code{FP_FAST_FMAF}, and @code{FP_FAST_FMAL}
|
||||
for compatibility with the 1999 C standard.
|
||||
@end table
|
||||
|
||||
@node System-specific Predefined Macros
|
||||
|
|
|
@ -3948,6 +3948,16 @@ means of constraints requiring operands 1 and 0 to be the same location.
|
|||
@itemx @samp{and@var{m}3}, @samp{ior@var{m}3}, @samp{xor@var{m}3}
|
||||
Similar, for other arithmetic operations.
|
||||
|
||||
@cindex @code{fma@var{m}4} instruction pattern
|
||||
@item @samp{fma@var{m}4}
|
||||
Multiply operand 2 and operand 1, then add operand 3, storing the
|
||||
result in operand 0. All operands must have mode @var{m}. This
|
||||
pattern is used to implement the @code{fma}, @code{fmaf}, and
|
||||
@code{fmal} builtin functions from the ISO C99 standard. The
|
||||
@code{fma} operation may produce different results than doing the
|
||||
multiply followed by the add if the machine does not perform a
|
||||
rounding step between the operations.
|
||||
|
||||
@cindex @code{min@var{m}3} instruction pattern
|
||||
@cindex @code{max@var{m}3} instruction pattern
|
||||
@item @samp{smin@var{m}3}, @samp{smax@var{m}3}
|
||||
|
|
|
@ -182,7 +182,8 @@ and are lvalues (so they can be used for insertion as well).
|
|||
|
||||
@item RTX_TERNARY
|
||||
An RTX code for other three input operations. Currently only
|
||||
@code{IF_THEN_ELSE} and @code{VEC_MERGE}.
|
||||
@code{IF_THEN_ELSE}, @code{VEC_MERGE}, @code{SIGN_EXTRACT},
|
||||
@code{ZERO_EXTRACT}, and @code{FMA}.
|
||||
|
||||
@item RTX_INSN
|
||||
An RTX code for an entire instruction: @code{INSN}, @code{JUMP_INSN}, and
|
||||
|
@ -2234,6 +2235,12 @@ not be the same.
|
|||
For unsigned widening multiplication, use the same idiom, but with
|
||||
@code{zero_extend} instead of @code{sign_extend}.
|
||||
|
||||
@findex fma
|
||||
@item (fma:@var{m} @var{x} @var{y} @var{z})
|
||||
Represents the @code{fma}, @code{fmaf}, and @code{fmal} builtin
|
||||
functions that do a combined multiply of @var{x} and @var{y} and then
|
||||
adding to@var{z} without doing an intermediate rounding step.
|
||||
|
||||
@findex div
|
||||
@findex ss_div
|
||||
@cindex division
|
||||
|
|
|
@ -159,6 +159,7 @@ static const char * const optabs[] =
|
|||
"set_optab_handler (sqrt_optab, $A, CODE_FOR_$(sqrt$a2$))",
|
||||
"set_optab_handler (floor_optab, $A, CODE_FOR_$(floor$a2$))",
|
||||
"set_convert_optab_handler (lfloor_optab, $B, $A, CODE_FOR_$(lfloor$F$a$I$b2$))",
|
||||
"set_optab_handler (fma_optab, $A, CODE_FOR_$(fma$a4$))",
|
||||
"set_optab_handler (ceil_optab, $A, CODE_FOR_$(ceil$a2$))",
|
||||
"set_convert_optab_handler (lceil_optab, $B, $A, CODE_FOR_$(lceil$F$a$I$b2$))",
|
||||
"set_optab_handler (round_optab, $A, CODE_FOR_$(round$a2$))",
|
||||
|
|
|
@ -190,6 +190,8 @@ enum optab_index
|
|||
OTI_pow,
|
||||
/* Arc tangent of y/x */
|
||||
OTI_atan2,
|
||||
/* Floating multiply/add */
|
||||
OTI_fma,
|
||||
|
||||
/* Move instruction. */
|
||||
OTI_mov,
|
||||
|
@ -432,6 +434,7 @@ enum optab_index
|
|||
#define umax_optab (&optab_table[OTI_umax])
|
||||
#define pow_optab (&optab_table[OTI_pow])
|
||||
#define atan2_optab (&optab_table[OTI_atan2])
|
||||
#define fma_optab (&optab_table[OTI_fma])
|
||||
|
||||
#define mov_optab (&optab_table[OTI_mov])
|
||||
#define movstrict_optab (&optab_table[OTI_movstrict])
|
||||
|
|
|
@ -706,6 +706,9 @@ DEF_RTL_EXPR(SS_TRUNCATE, "ss_truncate", "e", RTX_UNARY)
|
|||
/* Unsigned saturating truncate. */
|
||||
DEF_RTL_EXPR(US_TRUNCATE, "us_truncate", "e", RTX_UNARY)
|
||||
|
||||
/* Floating point multiply/add combined instruction. */
|
||||
DEF_RTL_EXPR(FMA, "fma", "eee", RTX_TERNARY)
|
||||
|
||||
/* Information about the variable and its location. */
|
||||
/* Changed 'te' to 'tei'; the 'i' field is for recording
|
||||
initialization status of variables. */
|
||||
|
|
|
@ -4712,6 +4712,12 @@ simplify_ternary_operation (enum rtx_code code, enum machine_mode mode,
|
|||
|
||||
switch (code)
|
||||
{
|
||||
/* At present, don't simplify fused multiply and add ops, because we need
|
||||
to make sure there are no intermediate rounding steps used, and that
|
||||
we get the right sign if negative 0 would be returned. */
|
||||
case FMA:
|
||||
return NULL_RTX;
|
||||
|
||||
case SIGN_EXTRACT:
|
||||
case ZERO_EXTRACT:
|
||||
if (CONST_INT_P (op0)
|
||||
|
|
|
@ -1,3 +1,13 @@
|
|||
2010-10-14 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
* gcc.target/powerpc/ppc-fma-1.c: New tests for powerpc FMA
|
||||
builtin combiner patterns.
|
||||
* gcc.target/powerpc/ppc-fma-2.c: Ditto.
|
||||
* gcc.target/powerpc/ppc-fma-3.c: Ditto.
|
||||
* gcc.target/powerpc/ppc-fma-4.c: Ditto.
|
||||
* gcc.target/powerpc/ppc-fma-5.c: Ditto.
|
||||
* gcc.target/powerpc/ppc-fma-6.c: Ditto.
|
||||
|
||||
2010-10-15 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
* g++.dg/lto/20101015-1_0.C: New testcase.
|
||||
|
|
183
gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c
Normal file
183
gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c
Normal file
|
@ -0,0 +1,183 @@
|
|||
/* { dg-do compile { target { powerpc*-*-* } } } */
|
||||
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
|
||||
/* { dg-require-effective-target powerpc_vsx_ok } */
|
||||
/* { dg-options "-O3 -ftree-vectorize -mcpu=power7 -ffast-math" } */
|
||||
/* { dg-final { scan-assembler-times "xvmadd" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "xsmadd" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "fmadds" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "xvmsub" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "xsmsub" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fmsubs" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "xvnmadd" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "xsnmadd" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fnmadds" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "xvnmsub" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "xsnmsub" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fnmsubs" 1 } } */
|
||||
|
||||
/* All functions should generate an appropriate (a * b) + c instruction
|
||||
since -mfused-madd is on by default. */
|
||||
|
||||
double
|
||||
builtin_fma (double b, double c, double d)
|
||||
{
|
||||
return __builtin_fma (b, c, d); /* xsmadd{a,m}dp */
|
||||
}
|
||||
|
||||
double
|
||||
builtin_fms (double b, double c, double d)
|
||||
{
|
||||
return __builtin_fma (b, c, -d); /* xsmsub{a,b}dp */
|
||||
}
|
||||
|
||||
double
|
||||
builtin_fnma (double b, double c, double d)
|
||||
{
|
||||
return - __builtin_fma (b, c, d); /* xsnmadd{a,b}dp */
|
||||
}
|
||||
|
||||
double
|
||||
builtin_fnms (double b, double c, double d)
|
||||
{
|
||||
return - __builtin_fma (b, c, -d); /* xsnmsub{a,b}dp */
|
||||
}
|
||||
|
||||
float
|
||||
builtin_fmaf (float b, float c, float d)
|
||||
{
|
||||
return __builtin_fmaf (b, c, d); /* fmadds */
|
||||
}
|
||||
|
||||
float
|
||||
builtin_fmsf (float b, float c, float d)
|
||||
{
|
||||
return __builtin_fmaf (b, c, -d); /* fmsubs */
|
||||
}
|
||||
|
||||
float
|
||||
builtin_fnmaf (float b, float c, float d)
|
||||
{
|
||||
return - __builtin_fmaf (b, c, d); /* fnmadds */
|
||||
}
|
||||
|
||||
float
|
||||
builtin_fnmsf (float b, float c, float d)
|
||||
{
|
||||
return - __builtin_fmaf (b, c, -d); /* fnmsubs */
|
||||
}
|
||||
|
||||
double
|
||||
normal_fma (double b, double c, double d)
|
||||
{
|
||||
return (b * c) + d; /* xsmadd{a,m}dp */
|
||||
}
|
||||
|
||||
float
|
||||
normal_fmaf (float b, float c, float d)
|
||||
{
|
||||
return (b * c) + d; /* fmadds */
|
||||
}
|
||||
|
||||
#ifndef SIZE
|
||||
#define SIZE 1024
|
||||
#endif
|
||||
|
||||
double vda[SIZE] __attribute__((__aligned__(32)));
|
||||
double vdb[SIZE] __attribute__((__aligned__(32)));
|
||||
double vdc[SIZE] __attribute__((__aligned__(32)));
|
||||
double vdd[SIZE] __attribute__((__aligned__(32)));
|
||||
|
||||
float vfa[SIZE] __attribute__((__aligned__(32)));
|
||||
float vfb[SIZE] __attribute__((__aligned__(32)));
|
||||
float vfc[SIZE] __attribute__((__aligned__(32)));
|
||||
float vfd[SIZE] __attribute__((__aligned__(32)));
|
||||
|
||||
void
|
||||
vector_fma (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vda[i] = __builtin_fma (vdb[i], vdc[i], vdd[i]); /* xvmadd{a,m}dp */
|
||||
}
|
||||
|
||||
void
|
||||
vector_fms (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vda[i] = __builtin_fma (vdb[i], vdc[i], -vdd[i]); /* xvmsub{a,m}dp */
|
||||
}
|
||||
|
||||
void
|
||||
vector_fnma (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vda[i] = - __builtin_fma (vdb[i], vdc[i], vdd[i]); /* xvnmadd{a,m}dp */
|
||||
}
|
||||
|
||||
void
|
||||
vector_fnms (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vda[i] = - __builtin_fma (vdb[i], vdc[i], -vdd[i]); /* xvnmsub{a,m}dp */
|
||||
}
|
||||
|
||||
void
|
||||
vector_fmaf (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vfa[i] = __builtin_fmaf (vfb[i], vfc[i], vfd[i]); /* xvmadd{a,m}sp */
|
||||
}
|
||||
|
||||
void
|
||||
vector_fmsf (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vfa[i] = __builtin_fmaf (vfb[i], vfc[i], -vfd[i]); /* xvmsub{a,m}sp */
|
||||
}
|
||||
|
||||
void
|
||||
vector_fnmaf (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vfa[i] = - __builtin_fmaf (vfb[i], vfc[i], vfd[i]); /* xvnmadd{a,m}sp */
|
||||
}
|
||||
|
||||
void
|
||||
vector_fnmsf (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vfa[i] = - __builtin_fmaf (vfb[i], vfc[i], -vfd[i]); /* xvnmsub{a,m}sp */
|
||||
}
|
||||
|
||||
void
|
||||
vnormal_fma (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vda[i] = (vdb[i] * vdc[i]) + vdd[i]; /* xvmadd{a,m}dp */
|
||||
}
|
||||
|
||||
void
|
||||
vnormal_fmaf (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vfa[i] = (vfb[i] * vfc[i]) + vfd[i]; /* xvmadd{a,m}sp */
|
||||
}
|
183
gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c
Normal file
183
gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c
Normal file
|
@ -0,0 +1,183 @@
|
|||
/* { dg-do compile { target { powerpc*-*-* } } } */
|
||||
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
|
||||
/* { dg-require-effective-target powerpc_vsx_ok } */
|
||||
/* { dg-options "-O3 -ftree-vectorize -mcpu=power7 -ffast-math -mno-fused-madd" } */
|
||||
/* { dg-final { scan-assembler-times "xvmadd" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "xsmadd" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fmadds" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "xvmsub" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "xsmsub" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fmsubs" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "xvnmadd" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "xsnmadd" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fnmadds" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "xvnmsub" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "xsnmsub" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fnmsubs" 1 } } */
|
||||
|
||||
/* Only the functions calling the bulitin should generate an appropriate (a *
|
||||
b) + c instruction. */
|
||||
|
||||
double
|
||||
builtin_fma (double b, double c, double d)
|
||||
{
|
||||
return __builtin_fma (b, c, d); /* xsmadd{a,m}dp */
|
||||
}
|
||||
|
||||
double
|
||||
builtin_fms (double b, double c, double d)
|
||||
{
|
||||
return __builtin_fma (b, c, -d); /* xsmsub{a,b}dp */
|
||||
}
|
||||
|
||||
double
|
||||
builtin_fnma (double b, double c, double d)
|
||||
{
|
||||
return - __builtin_fma (b, c, d); /* xsnmadd{a,b}dp */
|
||||
}
|
||||
|
||||
double
|
||||
builtin_fnms (double b, double c, double d)
|
||||
{
|
||||
return - __builtin_fma (b, c, -d); /* xsnmsub{a,b}dp */
|
||||
}
|
||||
|
||||
float
|
||||
builtin_fmaf (float b, float c, float d)
|
||||
{
|
||||
return __builtin_fmaf (b, c, d); /* fmadds */
|
||||
}
|
||||
|
||||
float
|
||||
builtin_fmsf (float b, float c, float d)
|
||||
{
|
||||
return __builtin_fmaf (b, c, -d); /* fmsubs */
|
||||
}
|
||||
|
||||
float
|
||||
builtin_fnmaf (float b, float c, float d)
|
||||
{
|
||||
return - __builtin_fmaf (b, c, d); /* fnmadds */
|
||||
}
|
||||
|
||||
float
|
||||
builtin_fnmsf (float b, float c, float d)
|
||||
{
|
||||
return - __builtin_fmaf (b, c, -d); /* fnmsubs */
|
||||
}
|
||||
|
||||
double
|
||||
normal_fma (double b, double c, double d)
|
||||
{
|
||||
return (b * c) + d; /* fmul/fadd */
|
||||
}
|
||||
|
||||
float
|
||||
normal_fmaf (float b, float c, float d)
|
||||
{
|
||||
return (b * c) + d; /* fmuls/fadds */
|
||||
}
|
||||
|
||||
#ifndef SIZE
|
||||
#define SIZE 1024
|
||||
#endif
|
||||
|
||||
double vda[SIZE] __attribute__((__aligned__(32)));
|
||||
double vdb[SIZE] __attribute__((__aligned__(32)));
|
||||
double vdc[SIZE] __attribute__((__aligned__(32)));
|
||||
double vdd[SIZE] __attribute__((__aligned__(32)));
|
||||
|
||||
float vfa[SIZE] __attribute__((__aligned__(32)));
|
||||
float vfb[SIZE] __attribute__((__aligned__(32)));
|
||||
float vfc[SIZE] __attribute__((__aligned__(32)));
|
||||
float vfd[SIZE] __attribute__((__aligned__(32)));
|
||||
|
||||
void
|
||||
vector_fma (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vda[i] = __builtin_fma (vdb[i], vdc[i], vdd[i]); /* xvmadd{a,m}dp */
|
||||
}
|
||||
|
||||
void
|
||||
vector_fms (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vda[i] = __builtin_fma (vdb[i], vdc[i], -vdd[i]); /* xvmsub{a,m}dp */
|
||||
}
|
||||
|
||||
void
|
||||
vector_fnma (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vda[i] = - __builtin_fma (vdb[i], vdc[i], vdd[i]); /* xvnmadd{a,m}dp */
|
||||
}
|
||||
|
||||
void
|
||||
vector_fnms (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vda[i] = - __builtin_fma (vdb[i], vdc[i], -vdd[i]); /* xvnmsub{a,m}dp */
|
||||
}
|
||||
|
||||
void
|
||||
vector_fmaf (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vfa[i] = __builtin_fmaf (vfb[i], vfc[i], vfd[i]); /* xvmadd{a,m}sp */
|
||||
}
|
||||
|
||||
void
|
||||
vector_fmsf (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vfa[i] = __builtin_fmaf (vfb[i], vfc[i], -vfd[i]); /* xvmsub{a,m}sp */
|
||||
}
|
||||
|
||||
void
|
||||
vector_fnmaf (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vfa[i] = - __builtin_fmaf (vfb[i], vfc[i], vfd[i]); /* xvnmadd{a,m}sp */
|
||||
}
|
||||
|
||||
void
|
||||
vector_fnmsf (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vfa[i] = - __builtin_fmaf (vfb[i], vfc[i], -vfd[i]); /* xvnmsub{a,m}sp */
|
||||
}
|
||||
|
||||
void
|
||||
vnormal_fma (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vda[i] = (vdb[i] * vdc[i]) + vdd[i]; /* xvmadd{a,m}dp */
|
||||
}
|
||||
|
||||
void
|
||||
vnormal_fmaf (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vfa[i] = (vfb[i] * vfc[i]) + vfd[i]; /* xvmadd{a,m}sp */
|
||||
}
|
103
gcc/testsuite/gcc.target/powerpc/ppc-fma-3.c
Normal file
103
gcc/testsuite/gcc.target/powerpc/ppc-fma-3.c
Normal file
|
@ -0,0 +1,103 @@
|
|||
/* { dg-do compile { target { powerpc*-*-* } } } */
|
||||
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
|
||||
/* { dg-require-effective-target powerpc_altivec_ok } */
|
||||
/* { dg-options "-O3 -ftree-vectorize -mcpu=power6 -maltivec -ffast-math" } */
|
||||
/* { dg-final { scan-assembler-times "vmaddfp" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "fmadd " 2 } } */
|
||||
/* { dg-final { scan-assembler-times "fmadds" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "fmsub " 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fmsubs" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fnmadd " 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fnmadds" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fnmsub " 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fnmsubs" 1 } } */
|
||||
|
||||
/* All functions should generate an appropriate (a * b) + c instruction
|
||||
since -mfused-madd is on by default. */
|
||||
|
||||
double
|
||||
builtin_fma (double b, double c, double d)
|
||||
{
|
||||
return __builtin_fma (b, c, d); /* fmadd */
|
||||
}
|
||||
|
||||
double
|
||||
builtin_fms (double b, double c, double d)
|
||||
{
|
||||
return __builtin_fma (b, c, -d); /* fmsub */
|
||||
}
|
||||
|
||||
double
|
||||
builtin_fnma (double b, double c, double d)
|
||||
{
|
||||
return - __builtin_fma (b, c, d); /* fnmadd */
|
||||
}
|
||||
|
||||
double
|
||||
builtin_fnms (double b, double c, double d)
|
||||
{
|
||||
return - __builtin_fma (b, c, -d); /* fnmsub */
|
||||
}
|
||||
|
||||
float
|
||||
builtin_fmaf (float b, float c, float d)
|
||||
{
|
||||
return __builtin_fmaf (b, c, d); /* fmadds */
|
||||
}
|
||||
|
||||
float
|
||||
builtin_fmsf (float b, float c, float d)
|
||||
{
|
||||
return __builtin_fmaf (b, c, -d); /* fmsubs */
|
||||
}
|
||||
|
||||
float
|
||||
builtin_fnmaf (float b, float c, float d)
|
||||
{
|
||||
return - __builtin_fmaf (b, c, d); /* fnmadds */
|
||||
}
|
||||
|
||||
float
|
||||
builtin_fnmsf (float b, float c, float d)
|
||||
{
|
||||
return - __builtin_fmaf (b, c, -d); /* fnmsubs */
|
||||
}
|
||||
|
||||
double
|
||||
normal_fma (double b, double c, double d)
|
||||
{
|
||||
return (b * c) + d; /* fmadd */
|
||||
}
|
||||
|
||||
float
|
||||
normal_fmaf (float b, float c, float d)
|
||||
{
|
||||
return (b * c) + d; /* fmadds */
|
||||
}
|
||||
|
||||
#ifndef SIZE
|
||||
#define SIZE 1024
|
||||
#endif
|
||||
|
||||
float vfa[SIZE] __attribute__((__aligned__(32)));
|
||||
float vfb[SIZE] __attribute__((__aligned__(32)));
|
||||
float vfc[SIZE] __attribute__((__aligned__(32)));
|
||||
float vfd[SIZE] __attribute__((__aligned__(32)));
|
||||
|
||||
void
|
||||
vector_fmaf (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vfa[i] = __builtin_fmaf (vfb[i], vfc[i], vfd[i]); /* vaddfp */
|
||||
}
|
||||
|
||||
void
|
||||
vnormal_fmaf (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vfa[i] = (vfb[i] * vfc[i]) + vfd[i]; /* vaddfp */
|
||||
}
|
94
gcc/testsuite/gcc.target/powerpc/ppc-fma-4.c
Normal file
94
gcc/testsuite/gcc.target/powerpc/ppc-fma-4.c
Normal file
|
@ -0,0 +1,94 @@
|
|||
/* { dg-do compile { target { powerpc*-*-* } } } */
|
||||
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
|
||||
/* { dg-require-effective-target powerpc_altivec_ok } */
|
||||
/* { dg-options "-O3 -ftree-vectorize -mcpu=power6 -maltivec -ffast-math -mno-fused-madd" } */
|
||||
/* { dg-final { scan-assembler-times "vmaddfp" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fmadd " 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fmadds" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fmsub " 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fmsubs" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fnmadd " 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fnmadds" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fnmsub " 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fnmsubs" 1 } } */
|
||||
|
||||
/* Only the functions calling the builtin should generate an appropriate
|
||||
(a * b) + c instruction. */
|
||||
|
||||
double
|
||||
builtin_fma (double b, double c, double d)
|
||||
{
|
||||
return __builtin_fma (b, c, d); /* fmadd */
|
||||
}
|
||||
|
||||
double
|
||||
builtin_fms (double b, double c, double d)
|
||||
{
|
||||
return __builtin_fma (b, c, -d); /* fmsub */
|
||||
}
|
||||
|
||||
double
|
||||
builtin_fnma (double b, double c, double d)
|
||||
{
|
||||
return - __builtin_fma (b, c, d); /* fnmadd */
|
||||
}
|
||||
|
||||
double
|
||||
builtin_fnms (double b, double c, double d)
|
||||
{
|
||||
return - __builtin_fma (b, c, -d); /* fnmsub */
|
||||
}
|
||||
|
||||
float
|
||||
builtin_fmaf (float b, float c, float d)
|
||||
{
|
||||
return __builtin_fmaf (b, c, d); /* fmadds */
|
||||
}
|
||||
|
||||
float
|
||||
builtin_fmsf (float b, float c, float d)
|
||||
{
|
||||
return __builtin_fmaf (b, c, -d); /* fmsubs */
|
||||
}
|
||||
|
||||
float
|
||||
builtin_fnmaf (float b, float c, float d)
|
||||
{
|
||||
return - __builtin_fmaf (b, c, d); /* fnmadds */
|
||||
}
|
||||
|
||||
float
|
||||
builtin_fnmsf (float b, float c, float d)
|
||||
{
|
||||
return - __builtin_fmaf (b, c, -d); /* fnmsubs */
|
||||
}
|
||||
|
||||
double
|
||||
normal_fma (double b, double c, double d)
|
||||
{
|
||||
return (b * c) + d; /* fmul/fadd */
|
||||
}
|
||||
|
||||
float
|
||||
normal_fmaf (float b, float c, float d)
|
||||
{
|
||||
return (b * c) + d; /* fmuls/fadds */
|
||||
}
|
||||
|
||||
#ifndef SIZE
|
||||
#define SIZE 1024
|
||||
#endif
|
||||
|
||||
float vfa[SIZE] __attribute__((__aligned__(32)));
|
||||
float vfb[SIZE] __attribute__((__aligned__(32)));
|
||||
float vfc[SIZE] __attribute__((__aligned__(32)));
|
||||
float vfd[SIZE] __attribute__((__aligned__(32)));
|
||||
|
||||
void
|
||||
vector_fmaf (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
vfa[i] = __builtin_fmaf (vfb[i], vfc[i], vfd[i]); /* vaddfp */
|
||||
}
|
26
gcc/testsuite/gcc.target/powerpc/ppc-fma-5.c
Normal file
26
gcc/testsuite/gcc.target/powerpc/ppc-fma-5.c
Normal file
|
@ -0,0 +1,26 @@
|
|||
/* { dg-do run { target { powerpc*-*-* } } } */
|
||||
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
|
||||
/* { dg-options "-O2 -mcpu=power5 -std=c99" } */
|
||||
|
||||
#ifndef __FP_FAST_FMA
|
||||
#error "__FP_FAST_FMA should be defined"
|
||||
#endif
|
||||
|
||||
#ifndef __FP_FAST_FMAF
|
||||
#error "__FP_FAST_FMAF should be defined"
|
||||
#endif
|
||||
|
||||
double d_a = 2.0, d_b = 3.0, d_c = 4.0;
|
||||
float f_a = 2.0f, f_b = 3.0f, f_c = 4.0f;
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
if (__builtin_fma (d_a, d_b, d_c) != (2.0 * 3.0) + 4.0)
|
||||
__builtin_abort ();
|
||||
|
||||
if (__builtin_fmaf (f_a, f_b, f_c) != (2.0f * 3.0f) + 4.0f)
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
28
gcc/testsuite/gcc.target/powerpc/ppc-fma-6.c
Normal file
28
gcc/testsuite/gcc.target/powerpc/ppc-fma-6.c
Normal file
|
@ -0,0 +1,28 @@
|
|||
/* { dg-do compile { target { powerpc*-*-* } } } */
|
||||
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
|
||||
/* { dg-require-effective-target ilp32 } */
|
||||
/* { dg-options "-O2 -mcpu=power5 -std=c99 -msoft-float" } */
|
||||
/* { dg-final { scan-assembler-not "fmadd" } } */
|
||||
/* { dg-final { scan-assembler-not "xsfmadd" } } */
|
||||
|
||||
/* Test whether -msoft-float turns off the macros math.h uses for
|
||||
FP_FAST_FMA{,F,L}. */
|
||||
#ifdef __FP_FAST_FMA
|
||||
#error "__FP_FAST_FMA should not be defined"
|
||||
#endif
|
||||
|
||||
#ifdef __FP_FAST_FMAF
|
||||
#error "__FP_FAST_FMAF should not be defined"
|
||||
#endif
|
||||
|
||||
double
|
||||
builtin_fma (double b, double c, double d)
|
||||
{
|
||||
return __builtin_fma (b, c, d); /* bl fma */
|
||||
}
|
||||
|
||||
float
|
||||
builtin_fmaf (float b, float c, float d)
|
||||
{
|
||||
return __builtin_fmaf (b, c, -d); /* bl fmaf */
|
||||
}
|
|
@ -1357,10 +1357,10 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
|
|||
vectype_in = NULL_TREE;
|
||||
nargs = gimple_call_num_args (stmt);
|
||||
|
||||
/* Bail out if the function has more than two arguments, we
|
||||
do not have interesting builtin functions to vectorize with
|
||||
more than two arguments. No arguments is also not good. */
|
||||
if (nargs == 0 || nargs > 2)
|
||||
/* Bail out if the function has more than three arguments, we do not have
|
||||
interesting builtin functions to vectorize with more than two arguments
|
||||
except for fma. No arguments is also not good. */
|
||||
if (nargs == 0 || nargs > 3)
|
||||
return false;
|
||||
|
||||
for (i = 0; i < nargs; i++)
|
||||
|
|
|
@ -5068,6 +5068,7 @@ extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
|
|||
extern void set_builtin_user_assembler_name (tree decl, const char *asmspec);
|
||||
extern bool is_simple_builtin (tree);
|
||||
extern bool is_inexpensive_builtin (tree);
|
||||
extern bool mode_has_fma (enum machine_mode mode);
|
||||
|
||||
/* In convert.c */
|
||||
extern tree strip_float_extensions (tree);
|
||||
|
|
Loading…
Add table
Reference in a new issue