Improve vzeroupper optimization.
gcc/ 2010-11-24 H.J. Lu <hongjiu.lu@intel.com> PR target/46519 * config/i386/i386.c (upper_128bits_state): New. (block_info_def): Remove upper_128bits_set and done. Add state, referenced, count, processed and rescanned. (check_avx256_stores): Updated. (move_or_delete_vzeroupper_2): Updated. Handle deleted BB_END. Call note_stores only if needed. Set referenced and count. (move_or_delete_vzeroupper_1): Updated. Set rescan_vzeroupper_p. (rescan_move_or_delete_vzeroupper): New. (move_or_delete_vzeroupper): Process and rescan all all basic blocks instead of predecessor blocks of all exit points. (ix86_option_override_internal): Enable vzeroupper optimization only for -fexpensive-optimizations and not optimizing for size. (use_avx256_p): Removed. (init_cumulative_args): Don't set use_avx256_p. (ix86_function_arg): Likewise. (ix86_expand_move): Likewise. (ix86_expand_vector_move_misalign): Likewise. (ix86_local_alignment): Likewise. (ix86_minimum_alignment): Likewise. (ix86_expand_epilogue): Don't check use_avx256_p when generating vzeroupper. (ix86_expand_call): Likewise. * config/i386/i386.h (machine_function): Remove use_vzeroupper_p and use_avx256_p. Add rescan_vzeroupper_p. gcc/testsuite/ 2010-11-24 H.J. Lu <hongjiu.lu@intel.com> PR target/46519 * gcc.target/i386/avx-vzeroupper-10.c: Expect no avx_vzeroupper. * gcc.target/i386/avx-vzeroupper-11.c: Likewise. * gcc.target/i386/avx-vzeroupper-14.c: Replace -O0 with -O2. * gcc.target/i386/avx-vzeroupper-15.c: Likewise. * gcc.target/i386/avx-vzeroupper-16.c: Likewise. * gcc.target/i386/avx-vzeroupper-17.c: Likewise. * gcc.target/i386/avx-vzeroupper-20.c: New. * gcc.target/i386/avx-vzeroupper-21.c: Likewise. * gcc.target/i386/avx-vzeroupper-22.c: Likewise. * gcc.target/i386/avx-vzeroupper-23.c: Likewise. * gcc.target/i386/avx-vzeroupper-24.c: Likewise. * gcc.target/i386/avx-vzeroupper-25.c: Likewise. * gcc.target/i386/avx-vzeroupper-26.c: Likewise. From-SVN: r167124
This commit is contained in:
parent
a19ff17794
commit
617e6634a2
17 changed files with 358 additions and 100 deletions
|
@ -1,3 +1,32 @@
|
|||
2010-11-24 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR target/46519
|
||||
* config/i386/i386.c (upper_128bits_state): New.
|
||||
(block_info_def): Remove upper_128bits_set and done. Add state,
|
||||
referenced, count, processed and rescanned.
|
||||
(check_avx256_stores): Updated.
|
||||
(move_or_delete_vzeroupper_2): Updated. Handle deleted BB_END.
|
||||
Call note_stores only if needed. Set referenced and count.
|
||||
(move_or_delete_vzeroupper_1): Updated. Set rescan_vzeroupper_p.
|
||||
(rescan_move_or_delete_vzeroupper): New.
|
||||
(move_or_delete_vzeroupper): Process and rescan all all basic
|
||||
blocks instead of predecessor blocks of all exit points.
|
||||
(ix86_option_override_internal): Enable vzeroupper optimization
|
||||
only for -fexpensive-optimizations and not optimizing for size.
|
||||
(use_avx256_p): Removed.
|
||||
(init_cumulative_args): Don't set use_avx256_p.
|
||||
(ix86_function_arg): Likewise.
|
||||
(ix86_expand_move): Likewise.
|
||||
(ix86_expand_vector_move_misalign): Likewise.
|
||||
(ix86_local_alignment): Likewise.
|
||||
(ix86_minimum_alignment): Likewise.
|
||||
(ix86_expand_epilogue): Don't check use_avx256_p when generating
|
||||
vzeroupper.
|
||||
(ix86_expand_call): Likewise.
|
||||
|
||||
* config/i386/i386.h (machine_function): Remove use_vzeroupper_p
|
||||
and use_avx256_p. Add rescan_vzeroupper_p.
|
||||
|
||||
2010-11-24 Joseph Myers <joseph@codesourcery.com>
|
||||
|
||||
* toplev.c: Include <signal.h>.
|
||||
|
|
|
@ -57,12 +57,25 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "dwarf2out.h"
|
||||
#include "sched-int.h"
|
||||
|
||||
enum upper_128bits_state
|
||||
{
|
||||
unknown = 0, /* Unknown. */
|
||||
unused, /* Not used or not referenced. */
|
||||
used /* Used or referenced. */
|
||||
};
|
||||
|
||||
typedef struct block_info_def
|
||||
{
|
||||
/* TRUE if the upper 128bits of any AVX registers are live at exit. */
|
||||
bool upper_128bits_set;
|
||||
/* State of the upper 128bits of any AVX registers at exit. */
|
||||
enum upper_128bits_state state;
|
||||
/* If the upper 128bits of any AVX registers are referenced. */
|
||||
enum upper_128bits_state referenced;
|
||||
/* Number of vzerouppers in this block. */
|
||||
unsigned int count;
|
||||
/* TRUE if block has been processed. */
|
||||
bool done;
|
||||
bool processed;
|
||||
/* TRUE if block has been rescanned. */
|
||||
bool rescanned;
|
||||
} *block_info;
|
||||
|
||||
#define BLOCK_INFO(B) ((block_info) (B)->aux)
|
||||
|
@ -93,8 +106,9 @@ check_avx256_stores (rtx dest, const_rtx set, void *data)
|
|||
&& REG_P (SET_SRC (set))
|
||||
&& VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
|
||||
{
|
||||
bool *upper_128bits_set = (bool *) data;
|
||||
*upper_128bits_set = true;
|
||||
enum upper_128bits_state *state
|
||||
= (enum upper_128bits_state *) data;
|
||||
*state = used;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -106,19 +120,24 @@ check_avx256_stores (rtx dest, const_rtx set, void *data)
|
|||
are live at entry. */
|
||||
|
||||
static void
|
||||
move_or_delete_vzeroupper_2 (basic_block bb, bool upper_128bits_set)
|
||||
move_or_delete_vzeroupper_2 (basic_block bb,
|
||||
enum upper_128bits_state state)
|
||||
{
|
||||
rtx insn;
|
||||
rtx insn, bb_end;
|
||||
rtx vzeroupper_insn = NULL_RTX;
|
||||
rtx pat;
|
||||
int avx256;
|
||||
enum upper_128bits_state referenced = BLOCK_INFO (bb)->referenced;
|
||||
int count = BLOCK_INFO (bb)->count;
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " BB [%i] entry: upper 128bits: %d\n",
|
||||
bb->index, upper_128bits_set);
|
||||
bb->index, state);
|
||||
|
||||
/* BB_END changes when it is deleted. */
|
||||
bb_end = BB_END (bb);
|
||||
insn = BB_HEAD (bb);
|
||||
while (insn != BB_END (bb))
|
||||
while (insn != bb_end)
|
||||
{
|
||||
insn = NEXT_INSN (insn);
|
||||
|
||||
|
@ -167,67 +186,89 @@ move_or_delete_vzeroupper_2 (basic_block bb, bool upper_128bits_set)
|
|||
&& GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
|
||||
&& XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
|
||||
{
|
||||
upper_128bits_set = false;
|
||||
state = unused;
|
||||
|
||||
/* Delete pending vzeroupper insertion. */
|
||||
if (vzeroupper_insn)
|
||||
{
|
||||
count--;
|
||||
delete_insn (vzeroupper_insn);
|
||||
vzeroupper_insn = NULL_RTX;
|
||||
}
|
||||
}
|
||||
else if (!upper_128bits_set)
|
||||
note_stores (pat, check_avx256_stores, &upper_128bits_set);
|
||||
else if (state != used && referenced != unused)
|
||||
{
|
||||
/* No need to call note_stores if the upper 128bits of
|
||||
AVX registers are never referenced. */
|
||||
note_stores (pat, check_avx256_stores, &state);
|
||||
if (state == used)
|
||||
referenced = used;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Process vzeroupper intrinsic. */
|
||||
count++;
|
||||
avx256 = INTVAL (XVECEXP (pat, 0, 0));
|
||||
|
||||
if (!upper_128bits_set)
|
||||
if (state == unused)
|
||||
{
|
||||
/* Since the upper 128bits are cleared, callee must not pass
|
||||
256bit AVX register. We only need to check if callee
|
||||
returns 256bit AVX register. */
|
||||
upper_128bits_set = (avx256 == callee_return_avx256);
|
||||
if (avx256 == callee_return_avx256)
|
||||
state = used;
|
||||
|
||||
/* Remove unnecessary vzeroupper since
|
||||
upper 128bits are cleared. */
|
||||
/* Remove unnecessary vzeroupper since upper 128bits are
|
||||
cleared. */
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, "Delete redundant vzeroupper:\n");
|
||||
print_rtl_single (dump_file, insn);
|
||||
}
|
||||
delete_insn (insn);
|
||||
}
|
||||
else if (avx256 == callee_return_pass_avx256
|
||||
|| avx256 == callee_pass_avx256)
|
||||
{
|
||||
/* Callee passes 256bit AVX register. Check if callee
|
||||
returns 256bit AVX register. */
|
||||
upper_128bits_set = (avx256 == callee_return_pass_avx256);
|
||||
|
||||
/* Must remove vzeroupper since
|
||||
callee passes in 256bit AVX register. */
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, "Delete callee pass vzeroupper:\n");
|
||||
print_rtl_single (dump_file, insn);
|
||||
}
|
||||
count--;
|
||||
delete_insn (insn);
|
||||
}
|
||||
else
|
||||
{
|
||||
upper_128bits_set = false;
|
||||
vzeroupper_insn = insn;
|
||||
/* Set state to UNUSED if callee doesn't return 256bit AVX
|
||||
register. */
|
||||
if (avx256 != callee_return_pass_avx256)
|
||||
state = unused;
|
||||
|
||||
if (avx256 == callee_return_pass_avx256
|
||||
|| avx256 == callee_pass_avx256)
|
||||
{
|
||||
/* Must remove vzeroupper since callee passes in 256bit
|
||||
AVX register. */
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, "Delete callee pass vzeroupper:\n");
|
||||
print_rtl_single (dump_file, insn);
|
||||
}
|
||||
count--;
|
||||
delete_insn (insn);
|
||||
}
|
||||
else
|
||||
vzeroupper_insn = insn;
|
||||
}
|
||||
}
|
||||
|
||||
BLOCK_INFO (bb)->upper_128bits_set = upper_128bits_set;
|
||||
BLOCK_INFO (bb)->state = state;
|
||||
|
||||
if (BLOCK_INFO (bb)->referenced == unknown)
|
||||
{
|
||||
/* The upper 128bits of AVX registers are never referenced if
|
||||
REFERENCED isn't updated. */
|
||||
if (referenced == unknown)
|
||||
referenced = unused;
|
||||
BLOCK_INFO (bb)->referenced = referenced;
|
||||
BLOCK_INFO (bb)->count = count;
|
||||
}
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " BB [%i] exit: upper 128bits: %d\n",
|
||||
bb->index, upper_128bits_set);
|
||||
bb->index, state);
|
||||
}
|
||||
|
||||
/* Helper function for move_or_delete_vzeroupper. Process vzeroupper
|
||||
|
@ -238,18 +279,18 @@ move_or_delete_vzeroupper_1 (basic_block block)
|
|||
{
|
||||
edge e;
|
||||
edge_iterator ei;
|
||||
bool upper_128bits_set;
|
||||
enum upper_128bits_state state;
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " Process BB [%i]: status: %d\n",
|
||||
block->index, BLOCK_INFO (block)->done);
|
||||
block->index, BLOCK_INFO (block)->processed);
|
||||
|
||||
if (BLOCK_INFO (block)->done)
|
||||
if (BLOCK_INFO (block)->processed)
|
||||
return;
|
||||
|
||||
BLOCK_INFO (block)->done = true;
|
||||
BLOCK_INFO (block)->processed = true;
|
||||
|
||||
upper_128bits_set = false;
|
||||
state = unknown;
|
||||
|
||||
/* Process all predecessor edges of this block. */
|
||||
FOR_EACH_EDGE (e, ei, block->preds)
|
||||
|
@ -257,12 +298,70 @@ move_or_delete_vzeroupper_1 (basic_block block)
|
|||
if (e->src == block)
|
||||
continue;
|
||||
move_or_delete_vzeroupper_1 (e->src);
|
||||
if (BLOCK_INFO (e->src)->upper_128bits_set)
|
||||
upper_128bits_set = true;
|
||||
switch (BLOCK_INFO (e->src)->state)
|
||||
{
|
||||
case unknown:
|
||||
if (state == unused)
|
||||
state = unknown;
|
||||
break;
|
||||
case used:
|
||||
state = used;
|
||||
break;
|
||||
case unused:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* If state of any predecessor edges is unknown, we need to rescan. */
|
||||
if (state == unknown)
|
||||
cfun->machine->rescan_vzeroupper_p = 1;
|
||||
|
||||
/* Process this block. */
|
||||
move_or_delete_vzeroupper_2 (block, upper_128bits_set);
|
||||
move_or_delete_vzeroupper_2 (block, state);
|
||||
}
|
||||
|
||||
/* Helper function for move_or_delete_vzeroupper. Rescan vzeroupper
|
||||
in BLOCK and its predecessor blocks recursively. */
|
||||
|
||||
static void
|
||||
rescan_move_or_delete_vzeroupper (basic_block block)
|
||||
{
|
||||
edge e;
|
||||
edge_iterator ei;
|
||||
enum upper_128bits_state state;
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " Rescan BB [%i]: status: %d\n",
|
||||
block->index, BLOCK_INFO (block)->rescanned);
|
||||
|
||||
if (BLOCK_INFO (block)->rescanned)
|
||||
return;
|
||||
|
||||
BLOCK_INFO (block)->rescanned = true;
|
||||
|
||||
state = unused;
|
||||
|
||||
/* Rescan all predecessor edges of this block. */
|
||||
FOR_EACH_EDGE (e, ei, block->preds)
|
||||
{
|
||||
if (e->src == block)
|
||||
continue;
|
||||
rescan_move_or_delete_vzeroupper (e->src);
|
||||
/* For rescan, UKKNOWN state is treated as UNUSED. */
|
||||
if (BLOCK_INFO (e->src)->state == used)
|
||||
state = used;
|
||||
}
|
||||
|
||||
/* Rescan this block only if there are vzerouppers or the upper
|
||||
128bits of AVX registers are referenced. */
|
||||
if (BLOCK_INFO (block)->count == 0
|
||||
&& (state == used || BLOCK_INFO (block)->referenced != used))
|
||||
{
|
||||
if (state == used)
|
||||
BLOCK_INFO (block)->state = state;
|
||||
}
|
||||
else
|
||||
move_or_delete_vzeroupper_2 (block, state);
|
||||
}
|
||||
|
||||
/* Go through the instruction stream looking for vzeroupper. Delete
|
||||
|
@ -274,6 +373,8 @@ move_or_delete_vzeroupper (void)
|
|||
{
|
||||
edge e;
|
||||
edge_iterator ei;
|
||||
basic_block bb;
|
||||
unsigned int count = 0;
|
||||
|
||||
/* Set up block info for each basic block. */
|
||||
alloc_aux_for_blocks (sizeof (struct block_info_def));
|
||||
|
@ -285,16 +386,31 @@ move_or_delete_vzeroupper (void)
|
|||
FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
|
||||
{
|
||||
move_or_delete_vzeroupper_2 (e->dest,
|
||||
cfun->machine->caller_pass_avx256_p);
|
||||
BLOCK_INFO (e->dest)->done = true;
|
||||
cfun->machine->caller_pass_avx256_p
|
||||
? used : unused);
|
||||
BLOCK_INFO (e->dest)->processed = true;
|
||||
BLOCK_INFO (e->dest)->rescanned = true;
|
||||
}
|
||||
|
||||
/* Process predecessor blocks of all exit points. */
|
||||
/* Process all basic blocks. */
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Process all exit points\n");
|
||||
fprintf (dump_file, "Process all basic blocks\n");
|
||||
|
||||
FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
|
||||
move_or_delete_vzeroupper_1 (e->src);
|
||||
FOR_EACH_BB (bb)
|
||||
{
|
||||
move_or_delete_vzeroupper_1 (bb);
|
||||
count += BLOCK_INFO (bb)->count;
|
||||
}
|
||||
|
||||
/* Rescan all basic blocks if needed. */
|
||||
if (count && cfun->machine->rescan_vzeroupper_p)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Rescan all basic blocks\n");
|
||||
|
||||
FOR_EACH_BB (bb)
|
||||
rescan_move_or_delete_vzeroupper (bb);
|
||||
}
|
||||
|
||||
free_aux_for_blocks ();
|
||||
}
|
||||
|
@ -4051,8 +4167,11 @@ ix86_option_override_internal (bool main_args_p)
|
|||
|
||||
if (TARGET_AVX)
|
||||
{
|
||||
/* Enable vzeroupper pass by default for TARGET_AVX. */
|
||||
if (!(target_flags_explicit & MASK_VZEROUPPER))
|
||||
/* When not optimize for size, enable vzeroupper optimization for
|
||||
TARGET_AVX with -fexpensive-optimizations. */
|
||||
if (!optimize_size
|
||||
&& flag_expensive_optimizations
|
||||
&& !(target_flags_explicit & MASK_VZEROUPPER))
|
||||
target_flags |= MASK_VZEROUPPER;
|
||||
}
|
||||
else
|
||||
|
@ -4062,17 +4181,6 @@ ix86_option_override_internal (bool main_args_p)
|
|||
}
|
||||
}
|
||||
|
||||
/* Return TRUE if type TYPE and mode MODE use 256bit AVX modes. */
|
||||
|
||||
static bool
|
||||
use_avx256_p (enum machine_mode mode, const_tree type)
|
||||
{
|
||||
return (VALID_AVX256_REG_MODE (mode)
|
||||
|| (type
|
||||
&& TREE_CODE (type) == VECTOR_TYPE
|
||||
&& int_size_in_bytes (type) == 32));
|
||||
}
|
||||
|
||||
/* Return TRUE if VAL is passed in register with 256bit AVX modes. */
|
||||
|
||||
static bool
|
||||
|
@ -5687,7 +5795,6 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
|
|||
if (function_pass_avx256_p (fnret_value))
|
||||
{
|
||||
/* The return value of this function uses 256bit AVX modes. */
|
||||
cfun->machine->use_avx256_p = true;
|
||||
if (caller)
|
||||
cfun->machine->callee_return_avx256_p = true;
|
||||
else
|
||||
|
@ -6956,7 +7063,6 @@ ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
|
|||
if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
|
||||
{
|
||||
/* This argument uses 256bit AVX modes. */
|
||||
cfun->machine->use_avx256_p = true;
|
||||
if (cum->caller)
|
||||
cfun->machine->callee_pass_avx256_p = true;
|
||||
else
|
||||
|
@ -10970,12 +11076,9 @@ ix86_expand_epilogue (int style)
|
|||
|
||||
/* Emit vzeroupper if needed. */
|
||||
if (TARGET_VZEROUPPER
|
||||
&& cfun->machine->use_avx256_p
|
||||
&& !TREE_THIS_VOLATILE (cfun->decl)
|
||||
&& !cfun->machine->caller_return_avx256_p)
|
||||
{
|
||||
cfun->machine->use_vzeroupper_p = 1;
|
||||
emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
|
||||
}
|
||||
emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
|
||||
|
||||
if (crtl->args.pops_args && crtl->args.size)
|
||||
{
|
||||
|
@ -15130,9 +15233,6 @@ ix86_expand_move (enum machine_mode mode, rtx operands[])
|
|||
rtx op0, op1;
|
||||
enum tls_model model;
|
||||
|
||||
if (VALID_AVX256_REG_MODE (mode))
|
||||
cfun->machine->use_avx256_p = true;
|
||||
|
||||
op0 = operands[0];
|
||||
op1 = operands[1];
|
||||
|
||||
|
@ -15277,9 +15377,6 @@ ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
|
|||
rtx op0 = operands[0], op1 = operands[1];
|
||||
unsigned int align = GET_MODE_ALIGNMENT (mode);
|
||||
|
||||
if (VALID_AVX256_REG_MODE (mode))
|
||||
cfun->machine->use_avx256_p = true;
|
||||
|
||||
/* Force constants other than zero into memory. We do not know how
|
||||
the instructions used to build constants modify the upper 64 bits
|
||||
of the register, once we have that information we may be able
|
||||
|
@ -15386,9 +15483,6 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
|||
{
|
||||
rtx op0, op1, m;
|
||||
|
||||
if (VALID_AVX256_REG_MODE (mode))
|
||||
cfun->machine->use_avx256_p = true;
|
||||
|
||||
op0 = operands[0];
|
||||
op1 = operands[1];
|
||||
|
||||
|
@ -21661,12 +21755,11 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
|
|||
}
|
||||
|
||||
/* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
|
||||
if (TARGET_VZEROUPPER && cfun->machine->use_avx256_p)
|
||||
if (TARGET_VZEROUPPER && !TREE_THIS_VOLATILE (cfun->decl))
|
||||
{
|
||||
rtx unspec;
|
||||
int avx256;
|
||||
|
||||
cfun->machine->use_vzeroupper_p = 1;
|
||||
if (cfun->machine->callee_pass_avx256_p)
|
||||
{
|
||||
if (cfun->machine->callee_return_avx256_p)
|
||||
|
@ -22763,9 +22856,6 @@ ix86_local_alignment (tree exp, enum machine_mode mode,
|
|||
decl = NULL;
|
||||
}
|
||||
|
||||
if (use_avx256_p (mode, type))
|
||||
cfun->machine->use_avx256_p = true;
|
||||
|
||||
/* Don't do dynamic stack realignment for long long objects with
|
||||
-mpreferred-stack-boundary=2. */
|
||||
if (!TARGET_64BIT
|
||||
|
@ -22872,9 +22962,6 @@ ix86_minimum_alignment (tree exp, enum machine_mode mode,
|
|||
decl = NULL;
|
||||
}
|
||||
|
||||
if (use_avx256_p (mode, type))
|
||||
cfun->machine->use_avx256_p = true;
|
||||
|
||||
if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
|
||||
return align;
|
||||
|
||||
|
@ -29782,7 +29869,7 @@ ix86_reorg (void)
|
|||
}
|
||||
|
||||
/* Run the vzeroupper optimization if needed. */
|
||||
if (cfun->machine->use_vzeroupper_p)
|
||||
if (TARGET_VZEROUPPER)
|
||||
move_or_delete_vzeroupper ();
|
||||
}
|
||||
|
||||
|
|
|
@ -2294,12 +2294,6 @@ struct GTY(()) machine_function {
|
|||
stack below the return address. */
|
||||
BOOL_BITFIELD static_chain_on_stack : 1;
|
||||
|
||||
/* Nonzero if the current function uses vzeroupper. */
|
||||
BOOL_BITFIELD use_vzeroupper_p : 1;
|
||||
|
||||
/* Nonzero if the current function uses 256bit AVX regisers. */
|
||||
BOOL_BITFIELD use_avx256_p : 1;
|
||||
|
||||
/* Nonzero if caller passes 256bit AVX modes. */
|
||||
BOOL_BITFIELD caller_pass_avx256_p : 1;
|
||||
|
||||
|
@ -2312,6 +2306,9 @@ struct GTY(()) machine_function {
|
|||
/* Nonzero if the current callee returns 256bit AVX modes. */
|
||||
BOOL_BITFIELD callee_return_avx256_p : 1;
|
||||
|
||||
/* Nonzero if rescan vzerouppers in the current function is needed. */
|
||||
BOOL_BITFIELD rescan_vzeroupper_p : 1;
|
||||
|
||||
/* During prologue/epilogue generation, the current frame state.
|
||||
Otherwise, the frame state at the end of the prologue. */
|
||||
struct machine_frame_state fs;
|
||||
|
|
|
@ -1,3 +1,22 @@
|
|||
2010-11-24 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR target/46519
|
||||
* gcc.target/i386/avx-vzeroupper-10.c: Expect no avx_vzeroupper.
|
||||
* gcc.target/i386/avx-vzeroupper-11.c: Likewise.
|
||||
|
||||
* gcc.target/i386/avx-vzeroupper-14.c: Replace -O0 with -O2.
|
||||
* gcc.target/i386/avx-vzeroupper-15.c: Likewise.
|
||||
* gcc.target/i386/avx-vzeroupper-16.c: Likewise.
|
||||
* gcc.target/i386/avx-vzeroupper-17.c: Likewise.
|
||||
|
||||
* gcc.target/i386/avx-vzeroupper-20.c: New.
|
||||
* gcc.target/i386/avx-vzeroupper-21.c: Likewise.
|
||||
* gcc.target/i386/avx-vzeroupper-22.c: Likewise.
|
||||
* gcc.target/i386/avx-vzeroupper-23.c: Likewise.
|
||||
* gcc.target/i386/avx-vzeroupper-24.c: Likewise.
|
||||
* gcc.target/i386/avx-vzeroupper-25.c: Likewise.
|
||||
* gcc.target/i386/avx-vzeroupper-26.c: Likewise.
|
||||
|
||||
2010-11-24 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR lto/43218
|
||||
|
|
|
@ -14,4 +14,4 @@ foo ()
|
|||
_mm256_zeroupper ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "avx_vzeroupper" 3 } } */
|
||||
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
|
||||
|
|
|
@ -16,4 +16,4 @@ foo ()
|
|||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "\\*avx_vzeroall" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "avx_vzeroupper" 3 } } */
|
||||
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O0 -mavx -mtune=generic -dp" } */
|
||||
/* { dg-options "-O2 -mavx -mtune=generic -dp" } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O0 -mavx -mtune=generic -dp" } */
|
||||
/* { dg-options "-O2 -mavx -mtune=generic -dp" } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target lp64 } */
|
||||
/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */
|
||||
/* { dg-options "-O2 -mavx -mabi=ms -mtune=generic -dp" } */
|
||||
|
||||
typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target lp64 } */
|
||||
/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */
|
||||
/* { dg-options "-O2 -mavx -mabi=ms -mtune=generic -dp" } */
|
||||
|
||||
typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
|
||||
|
||||
|
|
13
gcc/testsuite/gcc.target/i386/avx-vzeroupper-20.c
Normal file
13
gcc/testsuite/gcc.target/i386/avx-vzeroupper-20.c
Normal file
|
@ -0,0 +1,13 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -mavx -mtune=generic -dp" } */
|
||||
|
||||
extern void free (void *);
|
||||
void
|
||||
bar (void *ncstrp)
|
||||
{
|
||||
if(ncstrp==((void *)0))
|
||||
return;
|
||||
free(ncstrp);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
|
14
gcc/testsuite/gcc.target/i386/avx-vzeroupper-21.c
Normal file
14
gcc/testsuite/gcc.target/i386/avx-vzeroupper-21.c
Normal file
|
@ -0,0 +1,14 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx -mtune=generic -dp" } */
|
||||
|
||||
extern void exit (int) __attribute__ ((__noreturn__));
|
||||
|
||||
int
|
||||
foo (int i)
|
||||
{
|
||||
if (i == 0)
|
||||
exit (1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
|
18
gcc/testsuite/gcc.target/i386/avx-vzeroupper-22.c
Normal file
18
gcc/testsuite/gcc.target/i386/avx-vzeroupper-22.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx -mtune=generic -dp" } */
|
||||
|
||||
extern void exit (int) __attribute__ ((__noreturn__));
|
||||
extern void bar (void);
|
||||
|
||||
int
|
||||
foo (int i)
|
||||
{
|
||||
if (i == 0)
|
||||
{
|
||||
bar ();
|
||||
exit (1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
|
13
gcc/testsuite/gcc.target/i386/avx-vzeroupper-23.c
Normal file
13
gcc/testsuite/gcc.target/i386/avx-vzeroupper-23.c
Normal file
|
@ -0,0 +1,13 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx -mtune=generic -dp" } */
|
||||
|
||||
extern void fatal (void) __attribute__ ((__noreturn__));
|
||||
extern void exit (int) __attribute__ ((__noreturn__));
|
||||
|
||||
void
|
||||
fatal (void)
|
||||
{
|
||||
exit (1);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
|
38
gcc/testsuite/gcc.target/i386/avx-vzeroupper-24.c
Normal file
38
gcc/testsuite/gcc.target/i386/avx-vzeroupper-24.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx -mtune=generic -dp" } */
|
||||
|
||||
typedef struct bitmap_element_def {
|
||||
struct bitmap_element_def *next;
|
||||
unsigned int indx;
|
||||
} bitmap_element;
|
||||
typedef struct bitmap_head_def {
|
||||
bitmap_element *first;
|
||||
bitmap_element *current;
|
||||
unsigned int indx;
|
||||
} bitmap_head;
|
||||
typedef struct bitmap_head_def *bitmap;
|
||||
typedef const struct bitmap_head_def *const_bitmap;
|
||||
extern void bar (void) __attribute__ ((__noreturn__));
|
||||
unsigned char
|
||||
bitmap_and_compl_into (bitmap a, const_bitmap b)
|
||||
{
|
||||
bitmap_element *a_elt = a->first;
|
||||
const bitmap_element *b_elt = b->first;
|
||||
if (a == b)
|
||||
{
|
||||
if ((!(a)->first))
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
while (a_elt && b_elt)
|
||||
{
|
||||
if (a_elt->indx < b_elt->indx)
|
||||
a_elt = a_elt->next;
|
||||
}
|
||||
if (a->indx == a->current->indx)
|
||||
bar ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
|
14
gcc/testsuite/gcc.target/i386/avx-vzeroupper-25.c
Normal file
14
gcc/testsuite/gcc.target/i386/avx-vzeroupper-25.c
Normal file
|
@ -0,0 +1,14 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O0 -mavx -mtune=generic -dp" } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
extern __m256 x, y;
|
||||
|
||||
void
|
||||
foo ()
|
||||
{
|
||||
x = y;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
|
16
gcc/testsuite/gcc.target/i386/avx-vzeroupper-26.c
Normal file
16
gcc/testsuite/gcc.target/i386/avx-vzeroupper-26.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-Os -mavx -mtune=generic -dp" } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
extern __m256 x, y;
|
||||
extern void (*bar) (void);
|
||||
|
||||
void
|
||||
foo ()
|
||||
{
|
||||
x = y;
|
||||
bar ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
|
Loading…
Add table
Reference in a new issue