diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e20777fe71b..b2b7ea185aa 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,60 @@ +2012-11-05 Sriraman Tallam + + * doc/tm.texi.in (TARGET_OPTION_FUNCTION_VERSIONS): New hook + description. + * (TARGET_COMPARE_VERSION_PRIORITY): New hook description. + * (TARGET_GET_FUNCTION_VERSIONS_DISPATCHER): New hook description. + * (TARGET_GENERATE_VERSION_DISPATCHER_BODY): New hook description. + * doc/tm.texi: Regenerate. + * target.def (compare_version_priority): New target hook. + * (generate_version_dispatcher_body): New target hook. + * (get_function_versions_dispatcher): New target hook. + * (function_versions): New target hook. + * cgraph.c (cgraph_fnver_htab): New htab. + (cgraph_fn_ver_htab_hash): New function. + (cgraph_fn_ver_htab_eq): New function. + (version_info_node): New pointer. + (insert_new_cgraph_node_version): New function. + (get_cgraph_node_version): New function. + (delete_function_version): New function. + (record_function_versions): New function. + * cgraph.h (cgraph_node): New bitfield dispatcher_function. + (cgraph_function_version_info): New struct. + (get_cgraph_node_version): New function. + (insert_new_cgraph_node_version): New function. + (record_function_versions): New function. + (delete_function_version): New function. + (init_lowered_empty_function): Expose function. + * tree.h (DECL_FUNCTION_VERSIONED): New macro. + (tree_function_decl): New bit-field versioned_function. + * cgraphunit.c (cgraph_analyze_function): Generate body of multiversion + function dispatcher. + (cgraph_analyze_functions): Analyze dispatcher function. + (init_lowered_empty_function): Make non-static. New parameter in_ssa. + (assemble_thunk): Add parameter to call to init_lowered_empty_function. + * config/i386/i386.c (add_condition_to_bb): New function. + (get_builtin_code_for_version): New function. + (ix86_compare_version_priority): New function. + (feature_compare): New function. + (dispatch_function_versions): New function. + (ix86_function_versions): New function. + (attr_strcmp): New function. + (ix86_mangle_function_version_assembler_name): New function. + (ix86_mangle_decl_assembler_name): New function. + (make_name): New function. + (make_dispatcher_decl): New function. + (is_function_default_version): New function. + (ix86_get_function_versions_dispatcher): New function. + (make_attribute): New function. + (make_resolver_func): New function. + (ix86_generate_version_dispatcher_body): New function. + (fold_builtin_cpu): Return integer for cpu builtins. + (TARGET_MANGLE_DECL_ASSEMBLER_NAME): New macro. + (TARGET_COMPARE_VERSION_PRIORITY): New macro. + (TARGET_GENERATE_VERSION_DISPATCHER_BODY): New macro. + (TARGET_GET_FUNCTION_VERSIONS_DISPATCHER): New macro. + (TARGET_OPTION_FUNCTION_VERSIONS): New macro. + 2012-11-05 Joern Rennecke * recog.c (extract_insn): Enabled alternative defaults to 1. diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 766609b5366..c9ff5c6de8d 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -132,6 +132,144 @@ static GTY(()) struct cgraph_edge *free_edges; /* Did procss_same_body_aliases run? */ bool same_body_aliases_done; +/* Map a cgraph_node to cgraph_function_version_info using this htab. + The cgraph_function_version_info has a THIS_NODE field that is the + corresponding cgraph_node.. */ + +static htab_t GTY((param_is (struct cgraph_function_version_info *))) + cgraph_fnver_htab = NULL; + +/* Hash function for cgraph_fnver_htab. */ +static hashval_t +cgraph_fnver_htab_hash (const void *ptr) +{ + int uid = ((const struct cgraph_function_version_info *)ptr)->this_node->uid; + return (hashval_t)(uid); +} + +/* eq function for cgraph_fnver_htab. */ +static int +cgraph_fnver_htab_eq (const void *p1, const void *p2) +{ + const struct cgraph_function_version_info *n1 + = (const struct cgraph_function_version_info *)p1; + const struct cgraph_function_version_info *n2 + = (const struct cgraph_function_version_info *)p2; + + return n1->this_node->uid == n2->this_node->uid; +} + +/* Mark as GC root all allocated nodes. */ +static GTY(()) struct cgraph_function_version_info * + version_info_node = NULL; + +/* Get the cgraph_function_version_info node corresponding to node. */ +struct cgraph_function_version_info * +get_cgraph_node_version (struct cgraph_node *node) +{ + struct cgraph_function_version_info *ret; + struct cgraph_function_version_info key; + key.this_node = node; + + if (cgraph_fnver_htab == NULL) + return NULL; + + ret = (struct cgraph_function_version_info *) + htab_find (cgraph_fnver_htab, &key); + + return ret; +} + +/* Insert a new cgraph_function_version_info node into cgraph_fnver_htab + corresponding to cgraph_node NODE. */ +struct cgraph_function_version_info * +insert_new_cgraph_node_version (struct cgraph_node *node) +{ + void **slot; + + version_info_node = NULL; + version_info_node = ggc_alloc_cleared_cgraph_function_version_info (); + version_info_node->this_node = node; + + if (cgraph_fnver_htab == NULL) + cgraph_fnver_htab = htab_create_ggc (2, cgraph_fnver_htab_hash, + cgraph_fnver_htab_eq, NULL); + + slot = htab_find_slot (cgraph_fnver_htab, version_info_node, INSERT); + gcc_assert (slot != NULL); + *slot = version_info_node; + return version_info_node; +} + +/* Remove the cgraph_function_version_info and cgraph_node for DECL. This + DECL is a duplicate declaration. */ +void +delete_function_version (tree decl) +{ + struct cgraph_node *decl_node = cgraph_get_create_node (decl); + struct cgraph_function_version_info *decl_v = NULL; + + if (decl_node == NULL) + return; + + decl_v = get_cgraph_node_version (decl_node); + + if (decl_v == NULL) + return; + + if (decl_v->prev != NULL) + decl_v->prev->next = decl_v->next; + + if (decl_v->next != NULL) + decl_v->next->prev = decl_v->prev; + + if (cgraph_fnver_htab != NULL) + htab_remove_elt (cgraph_fnver_htab, decl_v); + + cgraph_remove_node (decl_node); +} + +/* Record that DECL1 and DECL2 are semantically identical function + versions. */ +void +record_function_versions (tree decl1, tree decl2) +{ + struct cgraph_node *decl1_node = cgraph_get_create_node (decl1); + struct cgraph_node *decl2_node = cgraph_get_create_node (decl2); + struct cgraph_function_version_info *decl1_v = NULL; + struct cgraph_function_version_info *decl2_v = NULL; + struct cgraph_function_version_info *before; + struct cgraph_function_version_info *after; + + gcc_assert (decl1_node != NULL && decl2_node != NULL); + decl1_v = get_cgraph_node_version (decl1_node); + decl2_v = get_cgraph_node_version (decl2_node); + + if (decl1_v != NULL && decl2_v != NULL) + return; + + if (decl1_v == NULL) + decl1_v = insert_new_cgraph_node_version (decl1_node); + + if (decl2_v == NULL) + decl2_v = insert_new_cgraph_node_version (decl2_node); + + /* Chain decl2_v and decl1_v. All semantically identical versions + will be chained together. */ + + before = decl1_v; + after = decl2_v; + + while (before->next != NULL) + before = before->next; + + while (after->prev != NULL) + after= after->prev; + + before->next = after; + after->prev = before; +} + /* Macros to access the next item in the list of free cgraph nodes and edges. */ #define NEXT_FREE_NODE(NODE) cgraph ((NODE)->symbol.next) diff --git a/gcc/cgraph.h b/gcc/cgraph.h index f276512df31..25c1f33eb57 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -280,6 +280,8 @@ struct GTY(()) cgraph_node { /* ?? We should be able to remove this. We have enough bits in cgraph to calculate it. */ unsigned tm_clone : 1; + /* True if this decl is a dispatcher for function versions. */ + unsigned dispatcher_function : 1; }; DEF_VEC_P(symtab_node); @@ -292,6 +294,47 @@ DEF_VEC_P(cgraph_node_ptr); DEF_VEC_ALLOC_P(cgraph_node_ptr,heap); DEF_VEC_ALLOC_P(cgraph_node_ptr,gc); +/* Function Multiversioning info. */ +struct GTY(()) cgraph_function_version_info { + /* The cgraph_node for which the function version info is stored. */ + struct cgraph_node *this_node; + /* Chains all the semantically identical function versions. The + first function in this chain is the version_info node of the + default function. */ + struct cgraph_function_version_info *prev; + /* If this version node corresponds to a dispatcher for function + versions, this points to the version info node of the default + function, the first node in the chain. */ + struct cgraph_function_version_info *next; + /* If this node corresponds to a function version, this points + to the dispatcher function decl, which is the function that must + be called to execute the right function version at run-time. + + If this cgraph node is a dispatcher (if dispatcher_function is + true, in the cgraph_node struct) for function versions, this + points to resolver function, which holds the function body of the + dispatcher. The dispatcher decl is an alias to the resolver + function decl. */ + tree dispatcher_resolver; +}; + +/* Get the cgraph_function_version_info node corresponding to node. */ +struct cgraph_function_version_info * + get_cgraph_node_version (struct cgraph_node *node); + +/* Insert a new cgraph_function_version_info node into cgraph_fnver_htab + corresponding to cgraph_node NODE. */ +struct cgraph_function_version_info * + insert_new_cgraph_node_version (struct cgraph_node *node); + +/* Record that DECL1 and DECL2 are semantically identical function + versions. */ +void record_function_versions (tree decl1, tree decl2); + +/* Remove the cgraph_function_version_info and cgraph_node for DECL. This + DECL is a duplicate declaration. */ +void delete_function_version (tree decl); + /* A cgraph node set is a collection of cgraph nodes. A cgraph node can appear in multiple sets. */ struct cgraph_node_set_def @@ -638,6 +681,9 @@ void init_cgraph (void); bool cgraph_process_new_functions (void); void cgraph_process_same_body_aliases (void); void fixup_same_cpp_alias_visibility (symtab_node node, symtab_node target, tree alias); +/* Initialize datastructures so DECL is a function in lowered gimple form. + IN_SSA is true if the gimple is in SSA. */ +basic_block init_lowered_empty_function (tree decl, bool in_ssa); /* In cgraphclones.c */ diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index 230125c4a69..bf1326bbb88 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -630,6 +630,21 @@ cgraph_analyze_function (struct cgraph_node *node) cgraph_create_edge (node, cgraph_get_node (node->thunk.alias), NULL, 0, CGRAPH_FREQ_BASE); } + else if (node->dispatcher_function) + { + /* Generate the dispatcher body of multi-versioned functions. */ + struct cgraph_function_version_info *dispatcher_version_info + = get_cgraph_node_version (node); + if (dispatcher_version_info != NULL + && (dispatcher_version_info->dispatcher_resolver + == NULL_TREE)) + { + tree resolver = NULL_TREE; + gcc_assert (targetm.generate_version_dispatcher_body); + resolver = targetm.generate_version_dispatcher_body (node); + gcc_assert (resolver != NULL_TREE); + } + } else { push_cfun (DECL_STRUCT_FUNCTION (decl)); @@ -938,7 +953,8 @@ cgraph_analyze_functions (void) See gcc.c-torture/compile/20011119-1.c */ if (!DECL_STRUCT_FUNCTION (decl) && (!cnode->alias || !cnode->thunk.alias) - && !cnode->thunk.thunk_p) + && !cnode->thunk.thunk_p + && !cnode->dispatcher_function) { cgraph_reset_node (cnode); cnode->local.redefined_extern_inline = true; @@ -1219,13 +1235,13 @@ mark_functions_to_output (void) } /* DECL is FUNCTION_DECL. Initialize datastructures so DECL is a function - in lowered gimple form. + in lowered gimple form. IN_SSA is true if the gimple is in SSA. Set current_function_decl and cfun to newly constructed empty function body. return basic block in the function body. */ -static basic_block -init_lowered_empty_function (tree decl) +basic_block +init_lowered_empty_function (tree decl, bool in_ssa) { basic_block bb; @@ -1233,9 +1249,14 @@ init_lowered_empty_function (tree decl) allocate_struct_function (decl, false); gimple_register_cfg_hooks (); init_empty_tree_cfg (); - init_tree_ssa (cfun); - init_ssa_operands (cfun); - cfun->gimple_df->in_ssa_p = true; + + if (in_ssa) + { + init_tree_ssa (cfun); + init_ssa_operands (cfun); + cfun->gimple_df->in_ssa_p = true; + } + DECL_INITIAL (decl) = make_node (BLOCK); DECL_SAVED_TREE (decl) = error_mark_node; @@ -1442,7 +1463,7 @@ assemble_thunk (struct cgraph_node *node) else resdecl = DECL_RESULT (thunk_fndecl); - bb = then_bb = else_bb = return_bb = init_lowered_empty_function (thunk_fndecl); + bb = then_bb = else_bb = return_bb = init_lowered_empty_function (thunk_fndecl, true); bsi = gsi_start_bb (bb); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 62f380fcd6d..d92fb1677ab 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -62,6 +62,8 @@ along with GCC; see the file COPYING3. If not see #include "opts.h" #include "diagnostic.h" #include "dumpfile.h" +#include "tree-pass.h" +#include "tree-flow.h" enum upper_128bits_state { @@ -28463,6 +28465,967 @@ ix86_init_mmx_sse_builtins (void) } } +/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL + to return a pointer to VERSION_DECL if the outcome of the expression + formed by PREDICATE_CHAIN is true. This function will be called during + version dispatch to decide which function version to execute. It returns + the basic block at the end, to which more conditions can be added. */ + +static basic_block +add_condition_to_bb (tree function_decl, tree version_decl, + tree predicate_chain, basic_block new_bb) +{ + gimple return_stmt; + tree convert_expr, result_var; + gimple convert_stmt; + gimple call_cond_stmt; + gimple if_else_stmt; + + basic_block bb1, bb2, bb3; + edge e12, e23; + + tree cond_var, and_expr_var = NULL_TREE; + gimple_seq gseq; + + tree predicate_decl, predicate_arg; + + push_cfun (DECL_STRUCT_FUNCTION (function_decl)); + + gcc_assert (new_bb != NULL); + gseq = bb_seq (new_bb); + + + convert_expr = build1 (CONVERT_EXPR, ptr_type_node, + build_fold_addr_expr (version_decl)); + result_var = create_tmp_var (ptr_type_node, NULL); + convert_stmt = gimple_build_assign (result_var, convert_expr); + return_stmt = gimple_build_return (result_var); + + if (predicate_chain == NULL_TREE) + { + gimple_seq_add_stmt (&gseq, convert_stmt); + gimple_seq_add_stmt (&gseq, return_stmt); + set_bb_seq (new_bb, gseq); + gimple_set_bb (convert_stmt, new_bb); + gimple_set_bb (return_stmt, new_bb); + pop_cfun (); + return new_bb; + } + + while (predicate_chain != NULL) + { + cond_var = create_tmp_var (integer_type_node, NULL); + predicate_decl = TREE_PURPOSE (predicate_chain); + predicate_arg = TREE_VALUE (predicate_chain); + call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg); + gimple_call_set_lhs (call_cond_stmt, cond_var); + + gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl)); + gimple_set_bb (call_cond_stmt, new_bb); + gimple_seq_add_stmt (&gseq, call_cond_stmt); + + predicate_chain = TREE_CHAIN (predicate_chain); + + if (and_expr_var == NULL) + and_expr_var = cond_var; + else + { + gimple assign_stmt; + /* Use MIN_EXPR to check if any integer is zero?. + and_expr_var = min_expr */ + assign_stmt = gimple_build_assign (and_expr_var, + build2 (MIN_EXPR, integer_type_node, + cond_var, and_expr_var)); + + gimple_set_block (assign_stmt, DECL_INITIAL (function_decl)); + gimple_set_bb (assign_stmt, new_bb); + gimple_seq_add_stmt (&gseq, assign_stmt); + } + } + + if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var, + integer_zero_node, + NULL_TREE, NULL_TREE); + gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl)); + gimple_set_bb (if_else_stmt, new_bb); + gimple_seq_add_stmt (&gseq, if_else_stmt); + + gimple_seq_add_stmt (&gseq, convert_stmt); + gimple_seq_add_stmt (&gseq, return_stmt); + set_bb_seq (new_bb, gseq); + + bb1 = new_bb; + e12 = split_block (bb1, if_else_stmt); + bb2 = e12->dest; + e12->flags &= ~EDGE_FALLTHRU; + e12->flags |= EDGE_TRUE_VALUE; + + e23 = split_block (bb2, return_stmt); + + gimple_set_bb (convert_stmt, bb2); + gimple_set_bb (return_stmt, bb2); + + bb3 = e23->dest; + make_edge (bb1, bb3, EDGE_FALSE_VALUE); + + remove_edge (e23); + make_edge (bb2, EXIT_BLOCK_PTR, 0); + + pop_cfun (); + + return bb3; +} + +/* This parses the attribute arguments to target in DECL and determines + the right builtin to use to match the platform specification. + It returns the priority value for this version decl. If PREDICATE_LIST + is not NULL, it stores the list of cpu features that need to be checked + before dispatching this function. */ + +static unsigned int +get_builtin_code_for_version (tree decl, tree *predicate_list) +{ + tree attrs; + struct cl_target_option cur_target; + tree target_node; + struct cl_target_option *new_target; + const char *arg_str = NULL; + const char *attrs_str = NULL; + char *tok_str = NULL; + char *token; + + /* Priority of i386 features, greater value is higher priority. This is + used to decide the order in which function dispatch must happen. For + instance, a version specialized for SSE4.2 should be checked for dispatch + before a version for SSE3, as SSE4.2 implies SSE3. */ + enum feature_priority + { + P_ZERO = 0, + P_MMX, + P_SSE, + P_SSE2, + P_SSE3, + P_SSSE3, + P_PROC_SSSE3, + P_SSE4_a, + P_PROC_SSE4_a, + P_SSE4_1, + P_SSE4_2, + P_PROC_SSE4_2, + P_POPCNT, + P_AVX, + P_AVX2, + P_FMA, + P_PROC_FMA + }; + + enum feature_priority priority = P_ZERO; + + /* These are the target attribute strings for which a dispatcher is + available, from fold_builtin_cpu. */ + + static struct _feature_list + { + const char *const name; + const enum feature_priority priority; + } + const feature_list[] = + { + {"mmx", P_MMX}, + {"sse", P_SSE}, + {"sse2", P_SSE2}, + {"sse3", P_SSE3}, + {"ssse3", P_SSSE3}, + {"sse4.1", P_SSE4_1}, + {"sse4.2", P_SSE4_2}, + {"popcnt", P_POPCNT}, + {"avx", P_AVX}, + {"avx2", P_AVX2} + }; + + + static unsigned int NUM_FEATURES + = sizeof (feature_list) / sizeof (struct _feature_list); + + unsigned int i; + + tree predicate_chain = NULL_TREE; + tree predicate_decl, predicate_arg; + + attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl)); + gcc_assert (attrs != NULL); + + attrs = TREE_VALUE (TREE_VALUE (attrs)); + + gcc_assert (TREE_CODE (attrs) == STRING_CST); + attrs_str = TREE_STRING_POINTER (attrs); + + + /* Handle arch= if specified. For priority, set it to be 1 more than + the best instruction set the processor can handle. For instance, if + there is a version for atom and a version for ssse3 (the highest ISA + priority for atom), the atom version must be checked for dispatch + before the ssse3 version. */ + if (strstr (attrs_str, "arch=") != NULL) + { + cl_target_option_save (&cur_target, &global_options); + target_node = ix86_valid_target_attribute_tree (attrs); + + gcc_assert (target_node); + new_target = TREE_TARGET_OPTION (target_node); + gcc_assert (new_target); + + if (new_target->arch_specified && new_target->arch > 0) + { + switch (new_target->arch) + { + case PROCESSOR_CORE2_32: + case PROCESSOR_CORE2_64: + arg_str = "core2"; + priority = P_PROC_SSSE3; + break; + case PROCESSOR_COREI7_32: + case PROCESSOR_COREI7_64: + arg_str = "corei7"; + priority = P_PROC_SSE4_2; + break; + case PROCESSOR_ATOM: + arg_str = "atom"; + priority = P_PROC_SSSE3; + break; + case PROCESSOR_AMDFAM10: + arg_str = "amdfam10h"; + priority = P_PROC_SSE4_a; + break; + case PROCESSOR_BDVER1: + arg_str = "bdver1"; + priority = P_PROC_FMA; + break; + case PROCESSOR_BDVER2: + arg_str = "bdver2"; + priority = P_PROC_FMA; + break; + } + } + + cl_target_option_restore (&global_options, &cur_target); + + if (predicate_list && arg_str == NULL) + { + error_at (DECL_SOURCE_LOCATION (decl), + "No dispatcher found for the versioning attributes"); + return 0; + } + + if (predicate_list) + { + predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS]; + /* For a C string literal the length includes the trailing NULL. */ + predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str); + predicate_chain = tree_cons (predicate_decl, predicate_arg, + predicate_chain); + } + } + + /* Process feature name. */ + tok_str = (char *) xmalloc (strlen (attrs_str) + 1); + strcpy (tok_str, attrs_str); + token = strtok (tok_str, ","); + predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS]; + + while (token != NULL) + { + /* Do not process "arch=" */ + if (strncmp (token, "arch=", 5) == 0) + { + token = strtok (NULL, ","); + continue; + } + for (i = 0; i < NUM_FEATURES; ++i) + { + if (strcmp (token, feature_list[i].name) == 0) + { + if (predicate_list) + { + predicate_arg = build_string_literal ( + strlen (feature_list[i].name) + 1, + feature_list[i].name); + predicate_chain = tree_cons (predicate_decl, predicate_arg, + predicate_chain); + } + /* Find the maximum priority feature. */ + if (feature_list[i].priority > priority) + priority = feature_list[i].priority; + + break; + } + } + if (predicate_list && i == NUM_FEATURES) + { + error_at (DECL_SOURCE_LOCATION (decl), + "No dispatcher found for %s", token); + return 0; + } + token = strtok (NULL, ","); + } + free (tok_str); + + if (predicate_list && predicate_chain == NULL_TREE) + { + error_at (DECL_SOURCE_LOCATION (decl), + "No dispatcher found for the versioning attributes : %s", + attrs_str); + return 0; + } + else if (predicate_list) + { + predicate_chain = nreverse (predicate_chain); + *predicate_list = predicate_chain; + } + + return priority; +} + +/* This compares the priority of target features in function DECL1 + and DECL2. It returns positive value if DECL1 is higher priority, + negative value if DECL2 is higher priority and 0 if they are the + same. */ + +static int +ix86_compare_version_priority (tree decl1, tree decl2) +{ + unsigned int priority1 = 0; + unsigned int priority2 = 0; + + if (lookup_attribute ("target", DECL_ATTRIBUTES (decl1)) != NULL) + priority1 = get_builtin_code_for_version (decl1, NULL); + + if (lookup_attribute ("target", DECL_ATTRIBUTES (decl2)) != NULL) + priority2 = get_builtin_code_for_version (decl2, NULL); + + return (int)priority1 - (int)priority2; +} + +/* V1 and V2 point to function versions with different priorities + based on the target ISA. This function compares their priorities. */ + +static int +feature_compare (const void *v1, const void *v2) +{ + typedef struct _function_version_info + { + tree version_decl; + tree predicate_chain; + unsigned int dispatch_priority; + } function_version_info; + + const function_version_info c1 = *(const function_version_info *)v1; + const function_version_info c2 = *(const function_version_info *)v2; + return (c2.dispatch_priority - c1.dispatch_priority); +} + +/* This function generates the dispatch function for + multi-versioned functions. DISPATCH_DECL is the function which will + contain the dispatch logic. FNDECLS are the function choices for + dispatch, and is a tree chain. EMPTY_BB is the basic block pointer + in DISPATCH_DECL in which the dispatch code is generated. */ + +static int +dispatch_function_versions (tree dispatch_decl, + void *fndecls_p, + basic_block *empty_bb) +{ + tree default_decl; + gimple ifunc_cpu_init_stmt; + gimple_seq gseq; + int ix; + tree ele; + VEC (tree, heap) *fndecls; + unsigned int num_versions = 0; + unsigned int actual_versions = 0; + unsigned int i; + + struct _function_version_info + { + tree version_decl; + tree predicate_chain; + unsigned int dispatch_priority; + }*function_version_info; + + gcc_assert (dispatch_decl != NULL + && fndecls_p != NULL + && empty_bb != NULL); + + /*fndecls_p is actually a vector. */ + fndecls = (VEC (tree, heap) *)fndecls_p; + + /* At least one more version other than the default. */ + num_versions = VEC_length (tree, fndecls); + gcc_assert (num_versions >= 2); + + function_version_info = (struct _function_version_info *) + XNEWVEC (struct _function_version_info, (num_versions - 1)); + + /* The first version in the vector is the default decl. */ + default_decl = VEC_index (tree, fndecls, 0); + + push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl)); + + gseq = bb_seq (*empty_bb); + /* Function version dispatch is via IFUNC. IFUNC resolvers fire before + constructors, so explicity call __builtin_cpu_init here. */ + ifunc_cpu_init_stmt = gimple_build_call_vec ( + ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], NULL); + gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt); + gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb); + set_bb_seq (*empty_bb, gseq); + + pop_cfun (); + + + for (ix = 1; VEC_iterate (tree, fndecls, ix, ele); ++ix) + { + tree version_decl = ele; + tree predicate_chain = NULL_TREE; + unsigned int priority; + /* Get attribute string, parse it and find the right predicate decl. + The predicate function could be a lengthy combination of many + features, like arch-type and various isa-variants. */ + priority = get_builtin_code_for_version (version_decl, + &predicate_chain); + + if (predicate_chain == NULL_TREE) + continue; + + actual_versions++; + function_version_info [ix - 1].version_decl = version_decl; + function_version_info [ix - 1].predicate_chain = predicate_chain; + function_version_info [ix - 1].dispatch_priority = priority; + } + + /* Sort the versions according to descending order of dispatch priority. The + priority is based on the ISA. This is not a perfect solution. There + could still be ambiguity. If more than one function version is suitable + to execute, which one should be dispatched? In future, allow the user + to specify a dispatch priority next to the version. */ + qsort (function_version_info, actual_versions, + sizeof (struct _function_version_info), feature_compare); + + for (i = 0; i < actual_versions; ++i) + *empty_bb = add_condition_to_bb (dispatch_decl, + function_version_info[i].version_decl, + function_version_info[i].predicate_chain, + *empty_bb); + + /* dispatch default version at the end. */ + *empty_bb = add_condition_to_bb (dispatch_decl, default_decl, + NULL, *empty_bb); + + free (function_version_info); + return 0; +} + +/* This function returns true if FN1 and FN2 are versions of the same function, + that is, the targets of the function decls are different. This assumes + that FN1 and FN2 have the same signature. */ + +static bool +ix86_function_versions (tree fn1, tree fn2) +{ + tree attr1, attr2; + struct cl_target_option *target1, *target2; + + if (TREE_CODE (fn1) != FUNCTION_DECL + || TREE_CODE (fn2) != FUNCTION_DECL) + return false; + + attr1 = DECL_FUNCTION_SPECIFIC_TARGET (fn1); + attr2 = DECL_FUNCTION_SPECIFIC_TARGET (fn2); + + /* Atleast one function decl should have target attribute specified. */ + if (attr1 == NULL_TREE && attr2 == NULL_TREE) + return false; + + if (attr1 == NULL_TREE) + attr1 = target_option_default_node; + else if (attr2 == NULL_TREE) + attr2 = target_option_default_node; + + target1 = TREE_TARGET_OPTION (attr1); + target2 = TREE_TARGET_OPTION (attr2); + + /* target1 and target2 must be different in some way. */ + if (target1->x_ix86_isa_flags == target2->x_ix86_isa_flags + && target1->x_target_flags == target2->x_target_flags + && target1->arch == target2->arch + && target1->tune == target2->tune + && target1->x_ix86_fpmath == target2->x_ix86_fpmath + && target1->branch_cost == target2->branch_cost) + return false; + + return true; +} + +/* Comparator function to be used in qsort routine to sort attribute + specification strings to "target". */ + +static int +attr_strcmp (const void *v1, const void *v2) +{ + const char *c1 = *(char *const*)v1; + const char *c2 = *(char *const*)v2; + return strcmp (c1, c2); +} + +/* STR is the argument to target attribute. This function tokenizes + the comma separated arguments, sorts them and returns a string which + is a unique identifier for the comma separated arguments. It also + replaces non-identifier characters "=,-" with "_". */ + +static char * +sorted_attr_string (const char *str) +{ + char **args = NULL; + char *attr_str, *ret_str; + char *attr = NULL; + unsigned int argnum = 1; + unsigned int i; + + for (i = 0; i < strlen (str); i++) + if (str[i] == ',') + argnum++; + + attr_str = (char *)xmalloc (strlen (str) + 1); + strcpy (attr_str, str); + + /* Replace "=,-" with "_". */ + for (i = 0; i < strlen (attr_str); i++) + if (attr_str[i] == '=' || attr_str[i]== '-') + attr_str[i] = '_'; + + if (argnum == 1) + return attr_str; + + args = XNEWVEC (char *, argnum); + + i = 0; + attr = strtok (attr_str, ","); + while (attr != NULL) + { + args[i] = attr; + i++; + attr = strtok (NULL, ","); + } + + qsort (args, argnum, sizeof (char*), attr_strcmp); + + ret_str = (char *)xmalloc (strlen (str) + 1); + strcpy (ret_str, args[0]); + for (i = 1; i < argnum; i++) + { + strcat (ret_str, "_"); + strcat (ret_str, args[i]); + } + + free (args); + free (attr_str); + return ret_str; +} + +/* This function changes the assembler name for functions that are + versions. If DECL is a function version and has a "target" + attribute, it appends the attribute string to its assembler name. */ + +static tree +ix86_mangle_function_version_assembler_name (tree decl, tree id) +{ + tree version_attr; + const char *orig_name, *version_string, *attr_str; + char *assembler_name; + + if (DECL_DECLARED_INLINE_P (decl) + && lookup_attribute ("gnu_inline", + DECL_ATTRIBUTES (decl))) + error_at (DECL_SOURCE_LOCATION (decl), + "Function versions cannot be marked as gnu_inline," + " bodies have to be generated"); + + if (DECL_VIRTUAL_P (decl) + || DECL_VINDEX (decl)) + error_at (DECL_SOURCE_LOCATION (decl), + "Virtual function versioning not supported\n"); + + version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl)); + + /* target attribute string is NULL for default functions. */ + if (version_attr == NULL_TREE) + return id; + + orig_name = IDENTIFIER_POINTER (id); + version_string + = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr))); + + attr_str = sorted_attr_string (version_string); + assembler_name = (char *) xmalloc (strlen (orig_name) + + strlen (attr_str) + 2); + + sprintf (assembler_name, "%s.%s", orig_name, attr_str); + + /* Allow assembler name to be modified if already set. */ + if (DECL_ASSEMBLER_NAME_SET_P (decl)) + SET_DECL_RTL (decl, NULL); + + return get_identifier (assembler_name); +} + +static tree +ix86_mangle_decl_assembler_name (tree decl, tree id) +{ + /* For function version, add the target suffix to the assembler name. */ + if (TREE_CODE (decl) == FUNCTION_DECL + && DECL_FUNCTION_VERSIONED (decl)) + return ix86_mangle_function_version_assembler_name (decl, id); + + return id; +} + +/* Return a new name by appending SUFFIX to the DECL name. If make_unique + is true, append the full path name of the source file. */ + +static char * +make_name (tree decl, const char *suffix, bool make_unique) +{ + char *global_var_name; + int name_len; + const char *name; + const char *unique_name = NULL; + + name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); + + /* Get a unique name that can be used globally without any chances + of collision at link time. */ + if (make_unique) + unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0")); + + name_len = strlen (name) + strlen (suffix) + 2; + + if (make_unique) + name_len += strlen (unique_name) + 1; + global_var_name = XNEWVEC (char, name_len); + + /* Use '.' to concatenate names as it is demangler friendly. */ + if (make_unique) + snprintf (global_var_name, name_len, "%s.%s.%s", name, + unique_name, suffix); + else + snprintf (global_var_name, name_len, "%s.%s", name, suffix); + + return global_var_name; +} + +/* Make a dispatcher declaration for the multi-versioned function DECL. + Calls to DECL function will be replaced with calls to the dispatcher + by the front-end. Return the decl created. */ + +static tree +make_dispatcher_decl (const tree decl) +{ + tree func_decl; + char *func_name, *resolver_name; + tree fn_type, func_type; + bool is_uniq = false; + + if (TREE_PUBLIC (decl) == 0) + is_uniq = true; + + func_name = make_name (decl, "ifunc", is_uniq); + resolver_name = make_name (decl, "resolver", is_uniq); + gcc_assert (resolver_name); + + fn_type = TREE_TYPE (decl); + func_type = build_function_type (TREE_TYPE (fn_type), + TYPE_ARG_TYPES (fn_type)); + + func_decl = build_fn_decl (func_name, func_type); + TREE_USED (func_decl) = 1; + DECL_CONTEXT (func_decl) = NULL_TREE; + DECL_INITIAL (func_decl) = error_mark_node; + DECL_ARTIFICIAL (func_decl) = 1; + /* Mark this func as external, the resolver will flip it again if + it gets generated. */ + DECL_EXTERNAL (func_decl) = 1; + /* This will be of type IFUNCs have to be externally visible. */ + TREE_PUBLIC (func_decl) = 1; + + return func_decl; +} + +/* Returns true if decl is multi-versioned and DECL is the default function, + that is it is not tagged with target specific optimization. */ + +static bool +is_function_default_version (const tree decl) +{ + return (TREE_CODE (decl) == FUNCTION_DECL + && DECL_FUNCTION_VERSIONED (decl) + && DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL_TREE); +} + +/* Make a dispatcher declaration for the multi-versioned function DECL. + Calls to DECL function will be replaced with calls to the dispatcher + by the front-end. Returns the decl of the dispatcher function. */ + +static tree +ix86_get_function_versions_dispatcher (void *decl) +{ + tree fn = (tree) decl; + struct cgraph_node *node = NULL; + struct cgraph_node *default_node = NULL; + struct cgraph_function_version_info *node_v = NULL; + struct cgraph_function_version_info *it_v = NULL; + struct cgraph_function_version_info *first_v = NULL; + + tree dispatch_decl = NULL; + struct cgraph_node *dispatcher_node = NULL; + struct cgraph_function_version_info *dispatcher_version_info = NULL; + + struct cgraph_function_version_info *default_version_info = NULL; + + gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn)); + + node = cgraph_get_node (fn); + gcc_assert (node != NULL); + + node_v = get_cgraph_node_version (node); + gcc_assert (node_v != NULL); + + if (node_v->dispatcher_resolver != NULL) + return node_v->dispatcher_resolver; + + /* Find the default version and make it the first node. */ + first_v = node_v; + /* Go to the beginnig of the chain. */ + while (first_v->prev != NULL) + first_v = first_v->prev; + default_version_info = first_v; + while (default_version_info != NULL) + { + if (is_function_default_version + (default_version_info->this_node->symbol.decl)) + break; + default_version_info = default_version_info->next; + } + + /* If there is no default node, just return NULL. */ + if (default_version_info == NULL) + return NULL; + + /* Make default info the first node. */ + if (first_v != default_version_info) + { + default_version_info->prev->next = default_version_info->next; + if (default_version_info->next) + default_version_info->next->prev = default_version_info->prev; + first_v->prev = default_version_info; + default_version_info->next = first_v; + default_version_info->prev = NULL; + } + + default_node = default_version_info->this_node; + +#if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION + /* Right now, the dispatching is done via ifunc. */ + dispatch_decl = make_dispatcher_decl (default_node->symbol.decl); +#else + error_at (DECL_SOURCE_LOCATION (default_node->symbol.decl), + "Multiversioning needs ifunc which is not supported " + "in this configuration"); +#endif + + dispatcher_node = cgraph_get_create_node (dispatch_decl); + gcc_assert (dispatcher_node != NULL); + dispatcher_node->dispatcher_function = 1; + dispatcher_version_info + = insert_new_cgraph_node_version (dispatcher_node); + dispatcher_version_info->next = default_version_info; + dispatcher_node->local.finalized = 1; + + /* Set the dispatcher for all the versions. */ + it_v = default_version_info; + while (it_v->next != NULL) + { + it_v->dispatcher_resolver = dispatch_decl; + it_v = it_v->next; + } + + return dispatch_decl; +} + +/* Makes a function attribute of the form NAME(ARG_NAME) and chains + it to CHAIN. */ + +static tree +make_attribute (const char *name, const char *arg_name, tree chain) +{ + tree attr_name; + tree attr_arg_name; + tree attr_args; + tree attr; + + attr_name = get_identifier (name); + attr_arg_name = build_string (strlen (arg_name), arg_name); + attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE); + attr = tree_cons (attr_name, attr_args, chain); + return attr; +} + +/* Make the resolver function decl to dispatch the versions of + a multi-versioned function, DEFAULT_DECL. Create an + empty basic block in the resolver and store the pointer in + EMPTY_BB. Return the decl of the resolver function. */ + +static tree +make_resolver_func (const tree default_decl, + const tree dispatch_decl, + basic_block *empty_bb) +{ + char *resolver_name; + tree decl, type, decl_name, t; + bool is_uniq = false; + + /* IFUNC's have to be globally visible. So, if the default_decl is + not, then the name of the IFUNC should be made unique. */ + if (TREE_PUBLIC (default_decl) == 0) + is_uniq = true; + + /* Append the filename to the resolver function if the versions are + not externally visible. This is because the resolver function has + to be externally visible for the loader to find it. So, appending + the filename will prevent conflicts with a resolver function from + another module which is based on the same version name. */ + resolver_name = make_name (default_decl, "resolver", is_uniq); + + /* The resolver function should return a (void *). */ + type = build_function_type_list (ptr_type_node, NULL_TREE); + + decl = build_fn_decl (resolver_name, type); + decl_name = get_identifier (resolver_name); + SET_DECL_ASSEMBLER_NAME (decl, decl_name); + + DECL_NAME (decl) = decl_name; + TREE_USED (decl) = 1; + DECL_ARTIFICIAL (decl) = 1; + DECL_IGNORED_P (decl) = 0; + /* IFUNC resolvers have to be externally visible. */ + TREE_PUBLIC (decl) = 1; + DECL_UNINLINABLE (decl) = 0; + + /* Resolver is not external, body is generated. */ + DECL_EXTERNAL (decl) = 0; + DECL_EXTERNAL (dispatch_decl) = 0; + + DECL_CONTEXT (decl) = NULL_TREE; + DECL_INITIAL (decl) = make_node (BLOCK); + DECL_STATIC_CONSTRUCTOR (decl) = 0; + + if (DECL_COMDAT_GROUP (default_decl) + || TREE_PUBLIC (default_decl)) + { + /* In this case, each translation unit with a call to this + versioned function will put out a resolver. Ensure it + is comdat to keep just one copy. */ + DECL_COMDAT (decl) = 1; + make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); + } + /* Build result decl and add to function_decl. */ + t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node); + DECL_ARTIFICIAL (t) = 1; + DECL_IGNORED_P (t) = 1; + DECL_RESULT (decl) = t; + + gimplify_function_tree (decl); + push_cfun (DECL_STRUCT_FUNCTION (decl)); + *empty_bb = init_lowered_empty_function (decl, false); + + cgraph_add_new_function (decl, true); + cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl)); + + pop_cfun (); + + gcc_assert (dispatch_decl != NULL); + /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */ + DECL_ATTRIBUTES (dispatch_decl) + = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl)); + + /* Create the alias for dispatch to resolver here. */ + /*cgraph_create_function_alias (dispatch_decl, decl);*/ + cgraph_same_body_alias (NULL, dispatch_decl, decl); + return decl; +} + +/* Generate the dispatching code body to dispatch multi-versioned function + DECL. The target hook is called to process the "target" attributes and + provide the code to dispatch the right function at run-time. NODE points + to the dispatcher decl whose body will be created. */ + +static tree +ix86_generate_version_dispatcher_body (void *node_p) +{ + tree resolver_decl; + basic_block empty_bb; + VEC (tree, heap) *fn_ver_vec = NULL; + tree default_ver_decl; + struct cgraph_node *versn; + struct cgraph_node *node; + + struct cgraph_function_version_info *node_version_info = NULL; + struct cgraph_function_version_info *versn_info = NULL; + + node = (cgraph_node *)node_p; + + node_version_info = get_cgraph_node_version (node); + gcc_assert (node->dispatcher_function + && node_version_info != NULL); + + if (node_version_info->dispatcher_resolver) + return node_version_info->dispatcher_resolver; + + /* The first version in the chain corresponds to the default version. */ + default_ver_decl = node_version_info->next->this_node->symbol.decl; + + /* node is going to be an alias, so remove the finalized bit. */ + node->local.finalized = false; + + resolver_decl = make_resolver_func (default_ver_decl, + node->symbol.decl, &empty_bb); + + node_version_info->dispatcher_resolver = resolver_decl; + + push_cfun (DECL_STRUCT_FUNCTION (resolver_decl)); + + fn_ver_vec = VEC_alloc (tree, heap, 2); + + for (versn_info = node_version_info->next; versn_info; + versn_info = versn_info->next) + { + versn = versn_info->this_node; + /* Check for virtual functions here again, as by this time it should + have been determined if this function needs a vtable index or + not. This happens for methods in derived classes that override + virtual methods in base classes but are not explicitly marked as + virtual. */ + if (DECL_VINDEX (versn->symbol.decl)) + error_at (DECL_SOURCE_LOCATION (versn->symbol.decl), + "Virtual function multiversioning not supported"); + VEC_safe_push (tree, heap, fn_ver_vec, versn->symbol.decl); + } + + dispatch_function_versions (resolver_decl, fn_ver_vec, &empty_bb); + + rebuild_cgraph_edges (); + pop_cfun (); + return resolver_decl; +} /* This builds the processor_model struct type defined in libgcc/config/i386/cpuinfo.c */ @@ -28651,6 +29614,8 @@ fold_builtin_cpu (tree fndecl, tree *args) { tree ref; tree field; + tree final; + unsigned int field_val = 0; unsigned int NUM_ARCH_NAMES = sizeof (arch_names_table) / sizeof (struct _arch_names_table); @@ -28690,14 +29655,17 @@ fold_builtin_cpu (tree fndecl, tree *args) field, NULL_TREE); /* Check the value. */ - return build2 (EQ_EXPR, unsigned_type_node, ref, - build_int_cstu (unsigned_type_node, field_val)); + final = build2 (EQ_EXPR, unsigned_type_node, ref, + build_int_cstu (unsigned_type_node, field_val)); + return build1 (CONVERT_EXPR, integer_type_node, final); } else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS) { tree ref; tree array_elt; tree field; + tree final; + unsigned int field_val = 0; unsigned int NUM_ISA_NAMES = sizeof (isa_names_table) / sizeof (struct _isa_names_table); @@ -28729,8 +29697,9 @@ fold_builtin_cpu (tree fndecl, tree *args) field_val = (1 << isa_names_table[i].feature); /* Return __cpu_model.__cpu_features[0] & field_val */ - return build2 (BIT_AND_EXPR, unsigned_type_node, array_elt, - build_int_cstu (unsigned_type_node, field_val)); + final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt, + build_int_cstu (unsigned_type_node, field_val)); + return build1 (CONVERT_EXPR, integer_type_node, final); } gcc_unreachable (); } @@ -41218,6 +42187,9 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val) #undef TARGET_PROFILE_BEFORE_PROLOGUE #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue +#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME +#define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name + #undef TARGET_ASM_UNALIGNED_HI_OP #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP #undef TARGET_ASM_UNALIGNED_SI_OP @@ -41311,6 +42283,17 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val) #undef TARGET_FOLD_BUILTIN #define TARGET_FOLD_BUILTIN ix86_fold_builtin +#undef TARGET_COMPARE_VERSION_PRIORITY +#define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority + +#undef TARGET_GENERATE_VERSION_DISPATCHER_BODY +#define TARGET_GENERATE_VERSION_DISPATCHER_BODY \ + ix86_generate_version_dispatcher_body + +#undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER +#define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \ + ix86_get_function_versions_dispatcher + #undef TARGET_ENUM_VA_LIST_P #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list @@ -41451,6 +42434,9 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val) #undef TARGET_OPTION_PRINT #define TARGET_OPTION_PRINT ix86_function_specific_print +#undef TARGET_OPTION_FUNCTION_VERSIONS +#define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions + #undef TARGET_CAN_INLINE_P #define TARGET_CAN_INLINE_P ix86_can_inline_p diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index cc4db401af8..5e73e1bc0c7 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,22 @@ +2012-11-05 Sriraman Tallam + + * class.c (add_method): Change assembler names of function versions. + (mark_versions_used): New static function. + (resolve_address_of_overloaded_function): Create dispatcher decl and + return address of dispatcher instead. + * decl.c (decls_match): Make decls unmatched for versioned + functions. + (duplicate_decls): Remove ambiguity for versioned functions. + Delete versioned function data for merged decls. + * decl2.c (check_classfn): Check attributes of versioned functions + for match. + * call.c (get_function_version_dispatcher): New function. + (mark_versions_used): New static function. + (build_over_call): Make calls to multiversioned functions + to call the dispatcher. + (joust): For calls to multi-versioned functions, make the most + specialized function version win. + 2012-10-31 Lawrence Crowl * decl2.c (var_finalized_p): Rename varpool_node to diff --git a/gcc/cp/call.c b/gcc/cp/call.c index fcc973505be..4373bce6931 100644 --- a/gcc/cp/call.c +++ b/gcc/cp/call.c @@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see #include "langhooks.h" #include "c-family/c-objc.h" #include "timevar.h" +#include "cgraph.h" /* The various kinds of conversion. */ @@ -6514,6 +6515,63 @@ magic_varargs_p (tree fn) return false; } +/* Returns the decl of the dispatcher function if FN is a function version. */ + +static tree +get_function_version_dispatcher (tree fn) +{ + tree dispatcher_decl = NULL; + + gcc_assert (TREE_CODE (fn) == FUNCTION_DECL + && DECL_FUNCTION_VERSIONED (fn)); + + gcc_assert (targetm.get_function_versions_dispatcher); + dispatcher_decl = targetm.get_function_versions_dispatcher (fn); + + if (dispatcher_decl == NULL) + { + error_at (input_location, "Call to multiversioned function" + " without a default is not allowed"); + return NULL; + } + + retrofit_lang_decl (dispatcher_decl); + gcc_assert (dispatcher_decl != NULL); + return dispatcher_decl; +} + +/* fn is a function version dispatcher that is marked used. Mark all the + semantically identical function versions it will dispatch as used. */ + +static void +mark_versions_used (tree fn) +{ + struct cgraph_node *node; + struct cgraph_function_version_info *node_v; + struct cgraph_function_version_info *it_v; + + gcc_assert (TREE_CODE (fn) == FUNCTION_DECL); + + node = cgraph_get_node (fn); + if (node == NULL) + return; + + gcc_assert (node->dispatcher_function); + + node_v = get_cgraph_node_version (node); + if (node_v == NULL) + return; + + /* All semantically identical versions are chained. Traverse and mark each + one of them as used. */ + it_v = node_v->next; + while (it_v != NULL) + { + mark_used (it_v->this_node->symbol.decl); + it_v = it_v->next; + } +} + /* Subroutine of the various build_*_call functions. Overload resolution has chosen a winning candidate CAND; build up a CALL_EXPR accordingly. ARGS is a TREE_LIST of the unconverted arguments to the call. FLAGS is a @@ -6963,6 +7021,22 @@ build_over_call (struct z_candidate *cand, int flags, tsubst_flags_t complain) return fold_convert (void_type_node, argarray[0]); /* FIXME handle trivial default constructor, too. */ + /* For calls to a multi-versioned function, overload resolution + returns the function with the highest target priority, that is, + the version that will checked for dispatching first. If this + version is inlinable, a direct call to this version can be made + otherwise the call should go through the dispatcher. */ + + if (DECL_FUNCTION_VERSIONED (fn) + && !targetm.target_option.can_inline_p (current_function_decl, fn)) + { + fn = get_function_version_dispatcher (fn); + if (fn == NULL) + return NULL; + if (!already_used) + mark_versions_used (fn); + } + if (!already_used) mark_used (fn); @@ -8481,6 +8555,38 @@ joust (struct z_candidate *cand1, struct z_candidate *cand2, bool warn, } } + /* For candidates of a multi-versioned function, make the version with + the highest priority win. This version will be checked for dispatching + first. If this version can be inlined into the caller, the front-end + will simply make a direct call to this function. */ + + if (TREE_CODE (cand1->fn) == FUNCTION_DECL + && DECL_FUNCTION_VERSIONED (cand1->fn) + && TREE_CODE (cand2->fn) == FUNCTION_DECL + && DECL_FUNCTION_VERSIONED (cand2->fn)) + { + tree f1 = TREE_TYPE (cand1->fn); + tree f2 = TREE_TYPE (cand2->fn); + tree p1 = TYPE_ARG_TYPES (f1); + tree p2 = TYPE_ARG_TYPES (f2); + + /* Check if cand1->fn and cand2->fn are versions of the same function. It + is possible that cand1->fn and cand2->fn are function versions but of + different functions. Check types to see if they are versions of the same + function. */ + if (compparms (p1, p2) + && same_type_p (TREE_TYPE (f1), TREE_TYPE (f2))) + { + /* Always make the version with the higher priority, more + specialized, win. */ + gcc_assert (targetm.compare_version_priority); + if (targetm.compare_version_priority (cand1->fn, cand2->fn) >= 0) + return 1; + else + return -1; + } + } + /* If the two function declarations represent the same function (this can happen with declarations in multiple scopes and arg-dependent lookup), arbitrarily choose one. But first make sure the default args we're diff --git a/gcc/cp/class.c b/gcc/cp/class.c index e55f1f9c2b7..a91e63a6301 100644 --- a/gcc/cp/class.c +++ b/gcc/cp/class.c @@ -1087,6 +1087,35 @@ add_method (tree type, tree method, tree using_decl) || same_type_p (TREE_TYPE (fn_type), TREE_TYPE (method_type)))) { + /* For function versions, their parms and types match + but they are not duplicates. Record function versions + as and when they are found. extern "C" functions are + not treated as versions. */ + if (TREE_CODE (fn) == FUNCTION_DECL + && TREE_CODE (method) == FUNCTION_DECL + && !DECL_EXTERN_C_P (fn) + && !DECL_EXTERN_C_P (method) + && (DECL_FUNCTION_SPECIFIC_TARGET (fn) + || DECL_FUNCTION_SPECIFIC_TARGET (method)) + && targetm.target_option.function_versions (fn, method)) + { + /* Mark functions as versions if necessary. Modify the mangled + decl name if necessary. */ + if (!DECL_FUNCTION_VERSIONED (fn)) + { + DECL_FUNCTION_VERSIONED (fn) = 1; + if (DECL_ASSEMBLER_NAME_SET_P (fn)) + mangle_decl (fn); + } + if (!DECL_FUNCTION_VERSIONED (method)) + { + DECL_FUNCTION_VERSIONED (method) = 1; + if (DECL_ASSEMBLER_NAME_SET_P (method)) + mangle_decl (method); + } + record_function_versions (fn, method); + continue; + } if (DECL_INHERITED_CTOR_BASE (method)) { if (DECL_INHERITED_CTOR_BASE (fn)) @@ -6951,6 +6980,38 @@ pop_lang_context (void) { current_lang_name = VEC_pop (tree, current_lang_base); } + +/* fn is a function version dispatcher that is marked used. Mark all the + semantically identical function versions it will dispatch as used. */ + +static void +mark_versions_used (tree fn) +{ + struct cgraph_node *node; + struct cgraph_function_version_info *node_v; + struct cgraph_function_version_info *it_v; + + gcc_assert (TREE_CODE (fn) == FUNCTION_DECL); + + node = cgraph_get_node (fn); + if (node == NULL) + return; + + gcc_assert (node->dispatcher_function); + + node_v = get_cgraph_node_version (node); + if (node_v == NULL) + return; + + /* All semantically identical versions are chained. Traverse and mark each + one of them as used. */ + it_v = node_v->next; + while (it_v != NULL) + { + mark_used (it_v->this_node->symbol.decl); + it_v = it_v->next; + } +} /* Type instantiation routines. */ @@ -7162,12 +7223,26 @@ resolve_address_of_overloaded_function (tree target_type, { /* There were too many matches. First check if they're all the same function. */ - tree match; + tree match = NULL_TREE; fn = TREE_PURPOSE (matches); - for (match = TREE_CHAIN (matches); match; match = TREE_CHAIN (match)) - if (!decls_match (fn, TREE_PURPOSE (match))) - break; + + /* For multi-versioned functions, more than one match is just fine. + Call decls_match to make sure they are different because they are + versioned. */ + if (DECL_FUNCTION_VERSIONED (fn)) + { + for (match = TREE_CHAIN (matches); match; match = TREE_CHAIN (match)) + if (!DECL_FUNCTION_VERSIONED (TREE_PURPOSE (match)) + || decls_match (fn, TREE_PURPOSE (match))) + break; + } + else + { + for (match = TREE_CHAIN (matches); match; match = TREE_CHAIN (match)) + if (!decls_match (fn, TREE_PURPOSE (match))) + break; + } if (match) { @@ -7208,6 +7283,28 @@ resolve_address_of_overloaded_function (tree target_type, } } + /* If a pointer to a function that is multi-versioned is requested, the + pointer to the dispatcher function is returned instead. This works + well because indirectly calling the function will dispatch the right + function version at run-time. */ + if (DECL_FUNCTION_VERSIONED (fn)) + { + tree dispatcher_decl = NULL; + gcc_assert (targetm.get_function_versions_dispatcher); + dispatcher_decl = targetm.get_function_versions_dispatcher (fn); + if (!dispatcher_decl) + { + error_at (input_location, "Pointer to a multiversioned function" + " without a default is not allowed"); + return error_mark_node; + } + retrofit_lang_decl (dispatcher_decl); + fn = dispatcher_decl; + /* Mark all the versions corresponding to the dispatcher as used. */ + if (!(flags & tf_conv)) + mark_versions_used (fn); + } + /* If we're doing overload resolution purely for the purpose of determining conversion sequences, we should not consider the function used. If this conversion sequence is selected, the diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index d25aa804971..f8f9d4f2391 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -53,6 +53,7 @@ along with GCC; see the file COPYING3. If not see #include "pointer-set.h" #include "splay-tree.h" #include "plugin.h" +#include "cgraph.h" /* Possible cases of bad specifiers type used by bad_specifiers. */ enum bad_spec_place { @@ -981,6 +982,36 @@ decls_match (tree newdecl, tree olddecl) if (t1 != t2) return 0; + /* The decls dont match if they correspond to two different versions + of the same function. Disallow extern "C" functions to be + versions for now. */ + if (compparms (p1, p2) + && same_type_p (TREE_TYPE (f1), TREE_TYPE (f2)) + && !DECL_EXTERN_C_P (newdecl) + && !DECL_EXTERN_C_P (olddecl) + && targetm.target_option.function_versions (newdecl, olddecl)) + { + /* Mark functions as versions if necessary. Modify the mangled decl + name if necessary. */ + if (DECL_FUNCTION_VERSIONED (newdecl) + && DECL_FUNCTION_VERSIONED (olddecl)) + return 0; + if (!DECL_FUNCTION_VERSIONED (newdecl)) + { + DECL_FUNCTION_VERSIONED (newdecl) = 1; + if (DECL_ASSEMBLER_NAME_SET_P (newdecl)) + mangle_decl (newdecl); + } + if (!DECL_FUNCTION_VERSIONED (olddecl)) + { + DECL_FUNCTION_VERSIONED (olddecl) = 1; + if (DECL_ASSEMBLER_NAME_SET_P (olddecl)) + mangle_decl (olddecl); + } + record_function_versions (olddecl, newdecl); + return 0; + } + if (CP_DECL_CONTEXT (newdecl) != CP_DECL_CONTEXT (olddecl) && ! (DECL_EXTERN_C_P (newdecl) && DECL_EXTERN_C_P (olddecl))) @@ -1499,7 +1530,11 @@ duplicate_decls (tree newdecl, tree olddecl, bool newdecl_is_friend) error ("previous declaration %q+#D here", olddecl); return NULL_TREE; } - else if (compparms (TYPE_ARG_TYPES (TREE_TYPE (newdecl)), + /* For function versions, params and types match, but they + are not ambiguous. */ + else if ((!DECL_FUNCTION_VERSIONED (newdecl) + && !DECL_FUNCTION_VERSIONED (olddecl)) + && compparms (TYPE_ARG_TYPES (TREE_TYPE (newdecl)), TYPE_ARG_TYPES (TREE_TYPE (olddecl)))) { error ("new declaration %q#D", newdecl); @@ -2272,6 +2307,15 @@ duplicate_decls (tree newdecl, tree olddecl, bool newdecl_is_friend) else if (DECL_PRESERVE_P (newdecl)) DECL_PRESERVE_P (olddecl) = 1; + /* If the olddecl is a version, so is the newdecl. */ + if (TREE_CODE (newdecl) == FUNCTION_DECL + && DECL_FUNCTION_VERSIONED (olddecl)) + { + DECL_FUNCTION_VERSIONED (newdecl) = 1; + /* newdecl will be purged and is no longer a version. */ + delete_function_version (newdecl); + } + if (TREE_CODE (newdecl) == FUNCTION_DECL) { int function_size; diff --git a/gcc/cp/decl2.c b/gcc/cp/decl2.c index f3ce643e12e..90ee16bde96 100644 --- a/gcc/cp/decl2.c +++ b/gcc/cp/decl2.c @@ -674,9 +674,13 @@ check_classfn (tree ctype, tree function, tree template_parms) if (is_template != (TREE_CODE (fndecl) == TEMPLATE_DECL)) continue; + /* While finding a match, same types and params are not enough + if the function is versioned. Also check version ("target") + attributes. */ if (same_type_p (TREE_TYPE (TREE_TYPE (function)), TREE_TYPE (TREE_TYPE (fndecl))) && compparms (p1, p2) + && !targetm.target_option.function_versions (function, fndecl) && (!is_template || comp_template_parms (template_parms, DECL_TEMPLATE_PARMS (fndecl))) diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 665c5b1edd6..dbf6c20b8dd 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -9929,6 +9929,14 @@ changed via the optimize attribute or pragma, see @code{TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE} @end deftypefn +@deftypefn {Target Hook} bool TARGET_OPTION_FUNCTION_VERSIONS (tree @var{decl1}, tree @var{decl2}) +This target hook returns @code{true} if @var{DECL1} and @var{DECL2} are +versions of the same function. @var{DECL1} and @var{DECL2} are function +versions if and only if they have the same function signature and +different target specific attributes, that is, they are compiled for +different target machines. +@end deftypefn + @deftypefn {Target Hook} bool TARGET_CAN_INLINE_P (tree @var{caller}, tree @var{callee}) This target hook returns @code{false} if the @var{caller} function cannot inline @var{callee}, based on target specific information. By @@ -10952,6 +10960,29 @@ The result is another tree containing a simplified expression for the call's result. If @var{ignore} is true the value will be ignored. @end deftypefn +@deftypefn {Target Hook} int TARGET_COMPARE_VERSION_PRIORITY (tree @var{decl1}, tree @var{decl2}) +This hook is used to compare the target attributes in two functions to +determine which function's features get higher priority. This is used +during function multi-versioning to figure out the order in which two +versions must be dispatched. A function version with a higher priority +is checked for dispatching earlier. @var{decl1} and @var{decl2} are + the two function decls that will be compared. +@end deftypefn + +@deftypefn {Target Hook} tree TARGET_GET_FUNCTION_VERSIONS_DISPATCHER (void *@var{decl}) +This hook is used to get the dispatcher function for a set of function +versions. The dispatcher function is called to invoke the right function +version at run-time. @var{decl} is one version from a set of semantically +identical versions. +@end deftypefn + +@deftypefn {Target Hook} tree TARGET_GENERATE_VERSION_DISPATCHER_BODY (void *@var{arg}) +This hook is used to generate the dispatcher logic to invoke the right +function version at run-time for a given set of function versions. +@var{arg} points to the callgraph node of the dispatcher function whose +body must be generated. +@end deftypefn + @deftypefn {Target Hook} {const char *} TARGET_INVALID_WITHIN_DOLOOP (const_rtx @var{insn}) Take an instruction in @var{insn} and return NULL if it is valid within a diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 289934be17e..575cc7322af 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -9790,6 +9790,14 @@ changed via the optimize attribute or pragma, see @code{TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE} @end deftypefn +@hook TARGET_OPTION_FUNCTION_VERSIONS +This target hook returns @code{true} if @var{DECL1} and @var{DECL2} are +versions of the same function. @var{DECL1} and @var{DECL2} are function +versions if and only if they have the same function signature and +different target specific attributes, that is, they are compiled for +different target machines. +@end deftypefn + @hook TARGET_CAN_INLINE_P This target hook returns @code{false} if the @var{caller} function cannot inline @var{callee}, based on target specific information. By @@ -10798,6 +10806,29 @@ The result is another tree containing a simplified expression for the call's result. If @var{ignore} is true the value will be ignored. @end deftypefn +@hook TARGET_COMPARE_VERSION_PRIORITY +This hook is used to compare the target attributes in two functions to +determine which function's features get higher priority. This is used +during function multi-versioning to figure out the order in which two +versions must be dispatched. A function version with a higher priority +is checked for dispatching earlier. @var{decl1} and @var{decl2} are + the two function decls that will be compared. +@end deftypefn + +@hook TARGET_GET_FUNCTION_VERSIONS_DISPATCHER +This hook is used to get the dispatcher function for a set of function +versions. The dispatcher function is called to invoke the right function +version at run-time. @var{decl} is one version from a set of semantically +identical versions. +@end deftypefn + +@hook TARGET_GENERATE_VERSION_DISPATCHER_BODY +This hook is used to generate the dispatcher logic to invoke the right +function version at run-time for a given set of function versions. +@var{arg} points to the callgraph node of the dispatcher function whose +body must be generated. +@end deftypefn + @hook TARGET_INVALID_WITHIN_DOLOOP Take an instruction in @var{insn} and return NULL if it is valid within a diff --git a/gcc/target.def b/gcc/target.def index 586522435a2..2801aea5a17 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1298,6 +1298,37 @@ DEFHOOK tree, (tree fndecl, int n_args, tree *argp, bool ignore), hook_tree_tree_int_treep_bool_null) +/* Target hook is used to compare the target attributes in two functions to + determine which function's features get higher priority. This is used + during function multi-versioning to figure out the order in which two + versions must be dispatched. A function version with a higher priority + is checked for dispatching earlier. DECL1 and DECL2 are + the two function decls that will be compared. It returns positive value + if DECL1 is higher priority, negative value if DECL2 is higher priority + and 0 if they are the same. */ +DEFHOOK +(compare_version_priority, + "", + int, (tree decl1, tree decl2), NULL) + +/* Target hook is used to generate the dispatcher logic to invoke the right + function version at run-time for a given set of function versions. + ARG points to the callgraph node of the dispatcher function whose body + must be generated. */ +DEFHOOK +(generate_version_dispatcher_body, + "", + tree, (void *arg), NULL) + +/* Target hook is used to get the dispatcher function for a set of function + versions. The dispatcher function is called to invoke the right function + version at run-time. DECL is one version from a set of semantically + identical versions. */ +DEFHOOK +(get_function_versions_dispatcher, + "", + tree, (void *decl), NULL) + /* Returns a code for a target-specific builtin that implements reciprocal of the function, or NULL_TREE if not available. */ DEFHOOK @@ -2774,6 +2805,16 @@ DEFHOOK void, (void), hook_void_void) +/* This function returns true if DECL1 and DECL2 are versions of the same + function. DECL1 and DECL2 are function versions if and only if they + have the same function signature and different target specific attributes, + that is, they are compiled for different target machines. */ +DEFHOOK +(function_versions, + "", + bool, (tree decl1, tree decl2), + hook_bool_tree_tree_false) + /* Function to determine if one function can inline another function. */ #undef HOOK_PREFIX #define HOOK_PREFIX "TARGET_" diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0757405ba6a..e44a637b1ee 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2012-11-05 Sriraman Tallam + + * testsuite/g++.dg/mv1.C: New test. + * testsuite/g++.dg/mv2.C: New test. + * testsuite/g++.dg/mv3.C: New test. + * testsuite/g++.dg/mv4.C: New test. + * testsuite/g++.dg/mv5.C: New test. + * testsuite/g++.dg/mv6.C: New test. + 2012-11-05 Hans-Peter Nilsson PR testsuite/55186 diff --git a/gcc/testsuite/g++.dg/mv1.C b/gcc/testsuite/g++.dg/mv1.C new file mode 100644 index 00000000000..676e48577af --- /dev/null +++ b/gcc/testsuite/g++.dg/mv1.C @@ -0,0 +1,130 @@ +/* Test case to check if Multiversioning works. */ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-ifunc "" } */ +/* { dg-options "-O2 -fPIC -mno-avx -mno-popcnt" } */ + +#include + +/* Default version. */ +int foo (); +/* The other versions of foo. Mix up the ordering and + check if the dispatching does it in the order of priority. */ +/* Check combination of target attributes. */ +int foo () __attribute__ ((target("arch=corei7,popcnt"))); +/* The target operands in this declaration and the definition are re-ordered. + This should still work. */ +int foo () __attribute__ ((target("ssse3,avx2"))); + +/* Check for all target attributes for which dispatchers are available. */ +/* Check arch= */ +int foo () __attribute__((target("arch=core2"))); +int foo () __attribute__((target("arch=corei7"))); +int foo () __attribute__((target("arch=atom"))); +/* Check ISAs */ +int foo () __attribute__((target("avx"))); +int foo () __attribute__ ((target("arch=core2,sse4.2"))); +/* Check more arch=. */ +int foo () __attribute__((target("arch=amdfam10"))); +int foo () __attribute__((target("arch=bdver1"))); +int foo () __attribute__((target("arch=bdver2"))); + +int (*p)() = &foo; +int main () +{ + int val = foo (); + assert (val == (*p)()); + + /* Check in the exact same order in which the dispatching + is expected to happen. */ + if (__builtin_cpu_is ("bdver1")) + assert (val == 1); + else if (__builtin_cpu_is ("bdver2")) + assert (val == 2); + else if (__builtin_cpu_supports ("avx2") + && __builtin_cpu_supports ("ssse3")) + assert (val == 3); + else if (__builtin_cpu_supports ("avx")) + assert (val == 4); + else if (__builtin_cpu_is ("corei7") + && __builtin_cpu_supports ("popcnt")) + assert (val == 5); + else if (__builtin_cpu_is ("corei7")) + assert (val == 6); + else if (__builtin_cpu_is ("amdfam10h")) + assert (val == 7); + else if (__builtin_cpu_is ("core2") + && __builtin_cpu_supports ("sse4.2")) + assert (val == 8); + else if (__builtin_cpu_is ("core2")) + assert (val == 9); + else if (__builtin_cpu_is ("atom")) + assert (val == 10); + else + assert (val == 0); + + return 0; +} + +int foo () +{ + return 0; +} + +int __attribute__ ((target("arch=corei7,popcnt"))) +foo () +{ + return 5; +} +int __attribute__ ((target("avx2,ssse3"))) +foo () +{ + return 3; +} + +int __attribute__ ((target("arch=core2"))) +foo () +{ + return 9; +} + +int __attribute__ ((target("arch=corei7"))) +foo () +{ + return 6; +} + +int __attribute__ ((target("arch=atom"))) +foo () +{ + return 10; +} + +int __attribute__ ((target("avx"))) +foo () +{ + return 4; +} + +int __attribute__ ((target("arch=core2,sse4.2"))) +foo () +{ + return 8; +} + +int __attribute__ ((target("arch=amdfam10"))) +foo () +{ + return 7; +} + +int __attribute__ ((target("arch=bdver1"))) +foo () +{ + return 1; +} + +int __attribute__ ((target("arch=bdver2"))) +foo () +{ + return 2; +} diff --git a/gcc/testsuite/g++.dg/mv2.C b/gcc/testsuite/g++.dg/mv2.C new file mode 100644 index 00000000000..f94877a674f --- /dev/null +++ b/gcc/testsuite/g++.dg/mv2.C @@ -0,0 +1,118 @@ +/* Test case to check if Multiversioning chooses the correct + dispatching order when versions are for various ISAs. */ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-ifunc "" } */ +/* { dg-options "-O2 -mno-sse -mno-mmx -mno-popcnt -mno-avx" } */ + +#include + +/* Default version. */ +int foo (); +/* The dispatch checks should be in the exact reverse order of the + declarations below. */ +int foo () __attribute__ ((target ("mmx"))); +int foo () __attribute__ ((target ("sse"))); +int foo () __attribute__ ((target ("sse2"))); +int foo () __attribute__ ((target ("sse3"))); +int foo () __attribute__ ((target ("ssse3"))); +int foo () __attribute__ ((target ("sse4.1"))); +int foo () __attribute__ ((target ("sse4.2"))); +int foo () __attribute__ ((target ("popcnt"))); +int foo () __attribute__ ((target ("avx"))); +int foo () __attribute__ ((target ("avx2"))); + +int main () +{ + int val = foo (); + + if (__builtin_cpu_supports ("avx2")) + assert (val == 1); + else if (__builtin_cpu_supports ("avx")) + assert (val == 2); + else if (__builtin_cpu_supports ("popcnt")) + assert (val == 3); + else if (__builtin_cpu_supports ("sse4.2")) + assert (val == 4); + else if (__builtin_cpu_supports ("sse4.1")) + assert (val == 5); + else if (__builtin_cpu_supports ("ssse3")) + assert (val == 6); + else if (__builtin_cpu_supports ("sse3")) + assert (val == 7); + else if (__builtin_cpu_supports ("sse2")) + assert (val == 8); + else if (__builtin_cpu_supports ("sse")) + assert (val == 9); + else if (__builtin_cpu_supports ("mmx")) + assert (val == 10); + else + assert (val == 0); + + return 0; +} + +int +foo () +{ + return 0; +} + +int __attribute__ ((target("mmx"))) +foo () +{ + return 10; +} + +int __attribute__ ((target("sse"))) +foo () +{ + return 9; +} + +int __attribute__ ((target("sse2"))) +foo () +{ + return 8; +} + +int __attribute__ ((target("sse3"))) +foo () +{ + return 7; +} + +int __attribute__ ((target("ssse3"))) +foo () +{ + return 6; +} + +int __attribute__ ((target("sse4.1"))) +foo () +{ + return 5; +} + +int __attribute__ ((target("sse4.2"))) +foo () +{ + return 4; +} + +int __attribute__ ((target("popcnt"))) +foo () +{ + return 3; +} + +int __attribute__ ((target("avx"))) +foo () +{ + return 2; +} + +int __attribute__ ((target("avx2"))) +foo () +{ + return 1; +} diff --git a/gcc/testsuite/g++.dg/mv3.C b/gcc/testsuite/g++.dg/mv3.C new file mode 100644 index 00000000000..c7088f2b013 --- /dev/null +++ b/gcc/testsuite/g++.dg/mv3.C @@ -0,0 +1,36 @@ +/* Test case to check if a call to a multiversioned function + is replaced with a direct call to the particular version when + the most specialized version's target attributes match the + caller. + + In this program, foo is multiversioned but there is no default + function. This is an error if the call has to go through a + dispatcher. However, the call to foo in bar can be replaced + with a direct call to the popcnt version of foo. Hence, this + test should pass. */ + +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -mno-sse -mno-popcnt" } */ + + +int __attribute__ ((target ("sse"))) +foo () +{ + return 1; +} +int __attribute__ ((target ("popcnt"))) +foo () +{ + return 0; +} + +int __attribute__ ((target ("popcnt"))) +bar () +{ + return foo (); +} + +int main () +{ + return bar (); +} diff --git a/gcc/testsuite/g++.dg/mv4.C b/gcc/testsuite/g++.dg/mv4.C new file mode 100644 index 00000000000..1a7290643ac --- /dev/null +++ b/gcc/testsuite/g++.dg/mv4.C @@ -0,0 +1,23 @@ +/* Test case to check if the compiler generates an error message + when the default version of a multiversioned function is absent + and its pointer is taken. */ + +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -mno-sse -mno-popcnt" } */ + +int __attribute__ ((target ("sse"))) +foo () +{ + return 1; +} +int __attribute__ ((target ("popcnt"))) +foo () +{ + return 0; +} + +int main () +{ + int (*p)() = &foo; /* { dg-error "Pointer to a multiversioned function without a default is not allowed" {} } */ + return (*p)(); +} diff --git a/gcc/testsuite/g++.dg/mv5.C b/gcc/testsuite/g++.dg/mv5.C new file mode 100644 index 00000000000..33d72804bcf --- /dev/null +++ b/gcc/testsuite/g++.dg/mv5.C @@ -0,0 +1,24 @@ +/* Test case to check if multiversioned functions are still generated if they are + marked comdat with inline keyword. */ + +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -mno-popcnt" } */ + + +/* Default version. */ +inline int +foo () +{ + return 0; +} + +inline int __attribute__ ((target ("popcnt"))) +foo () +{ + return 0; +} + +int main () +{ + return foo (); +} diff --git a/gcc/testsuite/g++.dg/mv6.C b/gcc/testsuite/g++.dg/mv6.C new file mode 100644 index 00000000000..7e5aa29d0bb --- /dev/null +++ b/gcc/testsuite/g++.dg/mv6.C @@ -0,0 +1,25 @@ +/* Test to check if member version multiversioning works correctly. */ + +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ + +class Foo +{ + public: + /* Default version of foo. */ + int foo () + { + return 0; + } + /* corei7 version of foo. */ + __attribute__ ((target("arch=corei7"))) + int foo () + { + return 0; + } +}; + +int main () +{ + Foo f; + return f.foo (); +} diff --git a/gcc/tree.h b/gcc/tree.h index d921886c2ea..5fe1f1f46bd 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -3480,6 +3480,12 @@ extern VEC(tree, gc) **decl_debug_args_insert (tree); #define DECL_FUNCTION_SPECIFIC_OPTIMIZATION(NODE) \ (FUNCTION_DECL_CHECK (NODE)->function_decl.function_specific_optimization) +/* In FUNCTION_DECL, this is set if this function has other versions generated + using "target" attributes. The default version is the one which does not + have any "target" attribute set. */ +#define DECL_FUNCTION_VERSIONED(NODE)\ + (FUNCTION_DECL_CHECK (NODE)->function_decl.versioned_function) + /* FUNCTION_DECL inherits from DECL_NON_COMMON because of the use of the arguments/result/saved_tree fields by front ends. It was either inherit FUNCTION_DECL from non_common, or inherit non_common from FUNCTION_DECL, @@ -3524,8 +3530,8 @@ struct GTY(()) tree_function_decl { unsigned looping_const_or_pure_flag : 1; unsigned has_debug_args_flag : 1; unsigned tm_clone_flag : 1; - - /* 1 bit left */ + unsigned versioned_function : 1; + /* No bits left. */ }; /* The source language of the translation-unit. */