sw-1.c: Force rep;movsb.
* gcc.target/i386/sw-1.c: Force rep;movsb. * config/i386/i386.h (processor_costs): Add second dimension to stringop_algs array. * config/i386/i386.c (cost models): Initialize second dimension of stringop_algs arrays. (core_cost): New costs based on generic64 costs with updated stringop values. (promote_duplicated_reg): Add support for vector modes, add declaration. (promote_duplicated_reg_to_size): Likewise. (processor_target): Set core costs for core variants. (expand_set_or_movmem_via_loop_with_iter): New function. (expand_set_or_movmem_via_loop): Enable reuse of the same iters in different loops, produced by this function. (emit_strset): New function. (expand_movmem_epilogue): Add epilogue generation for bigger sizes, use SSE-moves where possible. (expand_setmem_epilogue): Likewise. (expand_movmem_prologue): Likewise for prologue. (expand_setmem_prologue): Likewise. (expand_constant_movmem_prologue): Likewise. (expand_constant_setmem_prologue): Likewise. (decide_alg): Add new argument align_unknown. Fix algorithm of strategy selection if TARGET_INLINE_ALL_STRINGOPS is set; Skip sse_loop (decide_alignment): Update desired alignment according to chosen move mode. (ix86_expand_movmem): Change unrolled_loop strategy to use SSE-moves. (ix86_expand_setmem): Likewise. (ix86_slow_unaligned_access): Implementation of new hook slow_unaligned_access. * config/i386/i386.md (strset): Enable half-SSE moves. * config/i386/sse.md (vec_dupv4si): Add expand for vec_dupv4si. (vec_dupv2di): Add expand for vec_dupv2di. Co-Authored-By: Jan Hubicka <jh@suse.cz> From-SVN: r181357
This commit is contained in:
parent
f8acdd3c52
commit
38877e9851
12 changed files with 898 additions and 247 deletions
|
@ -1,3 +1,39 @@
|
|||
2011-11-14 Zolotukhin Michael <michael.v.zolotukhin@gmail.com>
|
||||
Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* config/i386/i386.h (processor_costs): Add second dimension to
|
||||
stringop_algs array.
|
||||
* config/i386/i386.c (cost models): Initialize second dimension of
|
||||
stringop_algs arrays.
|
||||
(core_cost): New costs based on generic64 costs with updated stringop
|
||||
values.
|
||||
(promote_duplicated_reg): Add support for vector modes, add
|
||||
declaration.
|
||||
(promote_duplicated_reg_to_size): Likewise.
|
||||
(processor_target): Set core costs for core variants.
|
||||
(expand_set_or_movmem_via_loop_with_iter): New function.
|
||||
(expand_set_or_movmem_via_loop): Enable reuse of the same iters in
|
||||
different loops, produced by this function.
|
||||
(emit_strset): New function.
|
||||
(expand_movmem_epilogue): Add epilogue generation for bigger sizes,
|
||||
use SSE-moves where possible.
|
||||
(expand_setmem_epilogue): Likewise.
|
||||
(expand_movmem_prologue): Likewise for prologue.
|
||||
(expand_setmem_prologue): Likewise.
|
||||
(expand_constant_movmem_prologue): Likewise.
|
||||
(expand_constant_setmem_prologue): Likewise.
|
||||
(decide_alg): Add new argument align_unknown. Fix algorithm of
|
||||
strategy selection if TARGET_INLINE_ALL_STRINGOPS is set; Skip sse_loop
|
||||
(decide_alignment): Update desired alignment according to chosen move
|
||||
mode.
|
||||
(ix86_expand_movmem): Change unrolled_loop strategy to use SSE-moves.
|
||||
(ix86_expand_setmem): Likewise.
|
||||
(ix86_slow_unaligned_access): Implementation of new hook
|
||||
slow_unaligned_access.
|
||||
* config/i386/i386.md (strset): Enable half-SSE moves.
|
||||
* config/i386/sse.md (vec_dupv4si): Add expand for vec_dupv4si.
|
||||
(vec_dupv2di): Add expand for vec_dupv2di.
|
||||
|
||||
2011-11-14 Dimitrios Apostolou <jimis@gmx.net>
|
||||
|
||||
PR bootstrap/51094
|
||||
|
|
|
@ -641,6 +641,7 @@ void debug_varpool_node_set (varpool_node_set);
|
|||
void free_varpool_node_set (varpool_node_set);
|
||||
void ipa_discover_readonly_nonaddressable_vars (void);
|
||||
bool cgraph_comdat_can_be_unshared_p (struct cgraph_node *);
|
||||
bool varpool_externally_visible_p (struct varpool_node *, bool);
|
||||
|
||||
/* In predict.c */
|
||||
bool cgraph_maybe_hot_edge_p (struct cgraph_edge *e);
|
||||
|
@ -681,6 +682,7 @@ bool const_value_known_p (tree);
|
|||
bool varpool_for_node_and_aliases (struct varpool_node *,
|
||||
bool (*) (struct varpool_node *, void *),
|
||||
void *, bool);
|
||||
void varpool_add_new_variable (tree);
|
||||
|
||||
/* Walk all reachable static variables. */
|
||||
#define FOR_EACH_STATIC_VARIABLE(node) \
|
||||
|
|
|
@ -37,7 +37,8 @@ enum stringop_alg
|
|||
rep_prefix_8_byte,
|
||||
loop_1_byte,
|
||||
loop,
|
||||
unrolled_loop
|
||||
unrolled_loop,
|
||||
sse_loop
|
||||
};
|
||||
|
||||
/* Available call abi. */
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -159,8 +159,12 @@ struct processor_costs {
|
|||
const int fchs; /* cost of FCHS instruction. */
|
||||
const int fsqrt; /* cost of FSQRT instruction. */
|
||||
/* Specify what algorithm
|
||||
to use for stringops on unknown size. */
|
||||
struct stringop_algs memcpy[2], memset[2];
|
||||
to use for stringops on unknown size.
|
||||
First index is used to specify whether
|
||||
alignment is known or not.
|
||||
Second - to specify whether 32 or 64 bits
|
||||
are used. */
|
||||
struct stringop_algs memcpy[2][2], memset[2][2];
|
||||
const int scalar_stmt_cost; /* Cost of any scalar operation, excluding
|
||||
load and store. */
|
||||
const int scalar_load_cost; /* Cost of scalar load. */
|
||||
|
|
|
@ -324,6 +324,9 @@ Enum(stringop_alg) String(loop) Value(loop)
|
|||
EnumValue
|
||||
Enum(stringop_alg) String(unrolled_loop) Value(unrolled_loop)
|
||||
|
||||
EnumValue
|
||||
Enum(stringop_alg) String(sse_loop) Value(sse_loop)
|
||||
|
||||
mtls-dialect=
|
||||
Target RejectNegative Joined Var(ix86_tls_dialect) Enum(tls_dialect) Init(TLS_DIALECT_GNU)
|
||||
Use given thread-local storage dialect
|
||||
|
|
|
@ -7501,6 +7501,16 @@
|
|||
(set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
|
||||
(set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
|
||||
|
||||
(define_expand "vec_dupv4si"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "")
|
||||
(vec_duplicate:V4SI
|
||||
(match_operand:SI 1 "nonimmediate_operand" "")))]
|
||||
"TARGET_SSE"
|
||||
{
|
||||
if (!TARGET_AVX)
|
||||
operands[1] = force_reg (V4SImode, operands[1]);
|
||||
})
|
||||
|
||||
(define_insn "*vec_dupv4si"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
|
||||
(vec_duplicate:V4SI
|
||||
|
@ -7517,6 +7527,16 @@
|
|||
(set_attr "prefix" "maybe_vex,vex,orig")
|
||||
(set_attr "mode" "TI,V4SF,V4SF")])
|
||||
|
||||
(define_expand "vec_dupv2di"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "")
|
||||
(vec_duplicate:V2DI
|
||||
(match_operand:DI 1 "nonimmediate_operand" "")))]
|
||||
"TARGET_SSE"
|
||||
{
|
||||
if (!TARGET_AVX)
|
||||
operands[1] = force_reg (V2DImode, operands[1]);
|
||||
})
|
||||
|
||||
(define_insn "*vec_dupv2di"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
|
||||
(vec_duplicate:V2DI
|
||||
|
|
|
@ -647,7 +647,7 @@ cgraph_externally_visible_p (struct cgraph_node *node,
|
|||
|
||||
/* Return true when variable VNODE should be considered externally visible. */
|
||||
|
||||
static bool
|
||||
bool
|
||||
varpool_externally_visible_p (struct varpool_node *vnode, bool aliased)
|
||||
{
|
||||
if (!DECL_COMDAT (vnode->decl) && !TREE_PUBLIC (vnode->decl))
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2011-11-14 Zolotukhin Michael <michael.v.zolotukhin@gmail.com>
|
||||
Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* gcc.target/i386/sw-1.c: Force rep;movsb.
|
||||
|
||||
2011-11-14 Iain Sandoe <iains@gcc.gnu.org>
|
||||
|
||||
PR testsuite/51059
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fshrink-wrap -fdump-rtl-pro_and_epilogue" } */
|
||||
/* { dg-options "-O2 -fshrink-wrap -fdump-rtl-pro_and_epilogue -mstringop-strategy=rep_byte" } */
|
||||
|
||||
#include <string.h>
|
||||
|
||||
|
|
|
@ -262,7 +262,7 @@ get_emutls_init_templ_addr (tree decl)
|
|||
if (DECL_EXTERNAL (to))
|
||||
varpool_node (to);
|
||||
else
|
||||
varpool_finalize_decl (to);
|
||||
varpool_add_new_variable (to);
|
||||
return build_fold_addr_expr (to);
|
||||
}
|
||||
|
||||
|
@ -334,7 +334,7 @@ new_emutls_decl (tree decl, tree alias_of)
|
|||
if (DECL_EXTERNAL (to))
|
||||
varpool_node (to);
|
||||
else if (!alias_of)
|
||||
varpool_finalize_decl (to);
|
||||
varpool_add_new_variable (to);
|
||||
else
|
||||
varpool_create_variable_alias (to,
|
||||
varpool_node_for_asm
|
||||
|
|
|
@ -414,6 +414,20 @@ varpool_finalize_decl (tree decl)
|
|||
varpool_assemble_pending_decls ();
|
||||
}
|
||||
|
||||
/* Add the variable DECL to the varpool.
|
||||
Unlike varpool_finalize_decl function is intended to be used
|
||||
by middle end and allows insertion of new variable at arbitrary point
|
||||
of compilation. */
|
||||
void
|
||||
varpool_add_new_variable (tree decl)
|
||||
{
|
||||
struct varpool_node *node;
|
||||
varpool_finalize_decl (decl);
|
||||
node = varpool_node (decl);
|
||||
if (varpool_externally_visible_p (node, false))
|
||||
node->externally_visible = true;
|
||||
}
|
||||
|
||||
/* Return variable availability. See cgraph.h for description of individual
|
||||
return values. */
|
||||
enum availability
|
||||
|
|
Loading…
Add table
Reference in a new issue