re PR target/25413 (wrong alignment or incorrect address computation in vectorized code on Pentium 4 SSE)
2007-07-12 Dorit Nuzman <dorit@il.ibm.com> Devang Patel <dpatel@apple.com> PR tree-optimization/25413 * targhooks.c (default_builtin_vector_alignment_reachable): New. * targhooks.h (default_builtin_vector_alignment_reachable): New. * tree.h (contains_packed_reference): New. * expr.c (contains_packed_reference): New. * tree-vect-analyze.c (vector_alignment_reachable_p): New. (vect_enhance_data_refs_alignment): Call vector_alignment_reachable_p. * target.h (vector_alignment_reachable): New builtin. * target-def.h (TARGET_VECTOR_ALIGNMENT_REACHABLE): New. * config/rs6000/rs6000.c (rs6000_vector_alignment_reachable): New. (TARGET_VECTOR_ALIGNMENT_REACHABLE): Define. Co-Authored-By: Devang Patel <dpatel@apple.com> From-SVN: r126591
This commit is contained in:
parent
2df6377e6e
commit
5b900a4c0e
15 changed files with 474 additions and 29 deletions
|
@ -10,6 +10,22 @@
|
|||
* varasm.c (assemble_start_function): Use DECL_ALIGN instead of
|
||||
FUNCTION_BOUNDARY.
|
||||
|
||||
2007-07-12 Dorit Nuzman <dorit@il.ibm.com>
|
||||
Devang Patel <dpatel@apple.com>
|
||||
|
||||
PR tree-optimization/25413
|
||||
* targhooks.c (default_builtin_vector_alignment_reachable): New.
|
||||
* targhooks.h (default_builtin_vector_alignment_reachable): New.
|
||||
* tree.h (contains_packed_reference): New.
|
||||
* expr.c (contains_packed_reference): New.
|
||||
* tree-vect-analyze.c (vector_alignment_reachable_p): New.
|
||||
(vect_enhance_data_refs_alignment): Call
|
||||
vector_alignment_reachable_p.
|
||||
* target.h (vector_alignment_reachable): New builtin.
|
||||
* target-def.h (TARGET_VECTOR_ALIGNMENT_REACHABLE): New.
|
||||
* config/rs6000/rs6000.c (rs6000_vector_alignment_reachable): New.
|
||||
(TARGET_VECTOR_ALIGNMENT_REACHABLE): Define.
|
||||
|
||||
2007-07-12 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
* target.h (builtin_vectorization_cost): Add new target builtin.
|
||||
|
|
|
@ -717,6 +717,7 @@ static tree rs6000_builtin_mul_widen_odd (tree);
|
|||
static tree rs6000_builtin_conversion (enum tree_code, tree);
|
||||
|
||||
static void def_builtin (int, const char *, tree, int);
|
||||
static bool rs6000_vector_alignment_reachable (tree, bool);
|
||||
static void rs6000_init_builtins (void);
|
||||
static rtx rs6000_expand_unop_builtin (enum insn_code, tree, rtx);
|
||||
static rtx rs6000_expand_binop_builtin (enum insn_code, tree, rtx);
|
||||
|
@ -984,6 +985,9 @@ static const char alt_reg_names[][8] =
|
|||
#undef TARGET_VECTORIZE_BUILTIN_CONVERSION
|
||||
#define TARGET_VECTORIZE_BUILTIN_CONVERSION rs6000_builtin_conversion
|
||||
|
||||
#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
|
||||
#define TARGET_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
|
||||
|
||||
#undef TARGET_INIT_BUILTINS
|
||||
#define TARGET_INIT_BUILTINS rs6000_init_builtins
|
||||
|
||||
|
@ -1806,6 +1810,37 @@ rs6000_builtin_mul_widen_odd (tree type)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/* Return true iff, data reference of TYPE can reach vector alignment (16)
|
||||
after applying N number of iterations. This routine does not determine
|
||||
how may iterations are required to reach desired alignment. */
|
||||
|
||||
static bool
|
||||
rs6000_vector_alignment_reachable (tree type ATTRIBUTE_UNUSED, bool is_packed)
|
||||
{
|
||||
if (is_packed)
|
||||
return false;
|
||||
|
||||
if (TARGET_32BIT)
|
||||
{
|
||||
if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
|
||||
return true;
|
||||
|
||||
if (rs6000_alignment_flags == MASK_ALIGN_POWER)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (TARGET_MACHO)
|
||||
return false;
|
||||
|
||||
/* Assuming that all other types are naturally aligned. CHECKME! */
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle generic options of the form -mfoo=yes/no.
|
||||
NAME is the option name.
|
||||
VALUE is the option value.
|
||||
|
|
41
gcc/expr.c
41
gcc/expr.c
|
@ -5924,6 +5924,47 @@ get_inner_reference (tree exp, HOST_WIDE_INT *pbitsize,
|
|||
return exp;
|
||||
}
|
||||
|
||||
/* Given an expression EXP that may be a COMPONENT_REF or an ARRAY_REF,
|
||||
look for whether EXP or any nested component-refs within EXP is marked
|
||||
as PACKED. */
|
||||
|
||||
bool
|
||||
contains_packed_reference (tree exp)
|
||||
{
|
||||
bool packed_p = false;
|
||||
|
||||
while (1)
|
||||
{
|
||||
switch (TREE_CODE (exp))
|
||||
{
|
||||
case COMPONENT_REF:
|
||||
{
|
||||
tree field = TREE_OPERAND (exp, 1);
|
||||
packed_p = DECL_PACKED (field)
|
||||
|| TYPE_PACKED (TREE_TYPE (field))
|
||||
|| TYPE_PACKED (TREE_TYPE (exp));
|
||||
if (packed_p)
|
||||
goto done;
|
||||
}
|
||||
break;
|
||||
|
||||
case BIT_FIELD_REF:
|
||||
case ARRAY_REF:
|
||||
case ARRAY_RANGE_REF:
|
||||
case REALPART_EXPR:
|
||||
case IMAGPART_EXPR:
|
||||
case VIEW_CONVERT_EXPR:
|
||||
break;
|
||||
|
||||
default:
|
||||
goto done;
|
||||
}
|
||||
exp = TREE_OPERAND (exp, 0);
|
||||
}
|
||||
done:
|
||||
return packed_p;
|
||||
}
|
||||
|
||||
/* Return a tree of sizetype representing the size, in bytes, of the element
|
||||
of EXP, an ARRAY_REF. */
|
||||
|
||||
|
|
|
@ -357,6 +357,8 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|||
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
|
||||
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
|
||||
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 0
|
||||
#define TARGET_VECTOR_ALIGNMENT_REACHABLE \
|
||||
default_builtin_vector_alignment_reachable
|
||||
|
||||
#define TARGET_VECTORIZE \
|
||||
{ \
|
||||
|
@ -365,7 +367,8 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|||
TARGET_VECTORIZE_BUILTIN_CONVERSION, \
|
||||
TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN, \
|
||||
TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD, \
|
||||
TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
|
||||
TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST, \
|
||||
TARGET_VECTOR_ALIGNMENT_REACHABLE \
|
||||
}
|
||||
|
||||
#define TARGET_DEFAULT_TARGET_FLAGS 0
|
||||
|
|
|
@ -417,6 +417,10 @@ struct gcc_target
|
|||
/* Returns the cost to be added to the overheads involved with
|
||||
executing the vectorized version of a loop. */
|
||||
int (*builtin_vectorization_cost) (bool);
|
||||
|
||||
/* Return true if vector alignment is reachable (by peeling N
|
||||
interations) for the given type. */
|
||||
bool (* vector_alignment_reachable) (tree, bool);
|
||||
} vectorize;
|
||||
|
||||
/* The initial value of target_flags. */
|
||||
|
|
|
@ -653,4 +653,20 @@ tree default_mangle_decl_assembler_name (tree decl ATTRIBUTE_UNUSED,
|
|||
return id;
|
||||
}
|
||||
|
||||
bool
|
||||
default_builtin_vector_alignment_reachable (tree type, bool is_packed)
|
||||
{
|
||||
if (is_packed)
|
||||
return false;
|
||||
|
||||
/* Assuming that types whose size is > pointer-size are not guaranteed to be
|
||||
naturally aligned. */
|
||||
if (tree_int_cst_compare (TYPE_SIZE (type), bitsize_int (POINTER_SIZE)) > 0)
|
||||
return false;
|
||||
|
||||
/* Assuming that types whose size is <= pointer-size
|
||||
are naturally aligned. */
|
||||
return true;
|
||||
}
|
||||
|
||||
#include "gt-targhooks.h"
|
||||
|
|
|
@ -66,6 +66,8 @@ extern tree default_builtin_vectorized_conversion (enum tree_code, tree);
|
|||
|
||||
extern tree default_builtin_reciprocal (enum built_in_function, bool, bool);
|
||||
|
||||
extern bool default_builtin_vector_alignment_reachable (tree, bool);
|
||||
|
||||
/* These are here, and not in hooks.[ch], because not all users of
|
||||
hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */
|
||||
|
||||
|
|
|
@ -1,3 +1,13 @@
|
|||
2007-07-12 Dorit Nuzman <dorit@il.ibm.com>
|
||||
Devang Patel <dpatel@apple.com>
|
||||
|
||||
PR tree-optimization/25413
|
||||
* gcc.dg/vect/vect-align-1.c: New.
|
||||
* gcc.dg/vect/vect-align-2.c: New.
|
||||
* gcc.dg/vect/pr25413.c: New.
|
||||
* gcc.dg/vect/pr25413a.c: New.
|
||||
* gcc.dg/vect/pr31699.c: Fix dg-final check.
|
||||
|
||||
2007-07-12 Nathan Froyd <froydnj@codesourcery.com>
|
||||
|
||||
* lib/target-support.exp (check_ultrasparc_hw_available):
|
||||
|
|
37
gcc/testsuite/gcc.dg/vect/pr25413.c
Normal file
37
gcc/testsuite/gcc.dg/vect/pr25413.c
Normal file
|
@ -0,0 +1,37 @@
|
|||
/* { dg-require-effective-target vect_double } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 8
|
||||
|
||||
struct
|
||||
{
|
||||
char c;
|
||||
double d[N];
|
||||
} a;
|
||||
|
||||
int main1()
|
||||
{
|
||||
int i;
|
||||
for ( i=0; i<N; ++i )
|
||||
a.d[i]=1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
check_vect ();
|
||||
|
||||
main1 ();
|
||||
for (i=0; i<N; i++)
|
||||
if (a.d[i] != 1)
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vector alignment may not be reachable" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "not vectorized: unsupported unaligned store" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
129
gcc/testsuite/gcc.dg/vect/pr25413a.c
Normal file
129
gcc/testsuite/gcc.dg/vect/pr25413a.c
Normal file
|
@ -0,0 +1,129 @@
|
|||
/* { dg-require-effective-target vect_double } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 8
|
||||
|
||||
typedef unsigned int size_t;
|
||||
|
||||
extern void *malloc (size_t __size) __attribute__ ((__nothrow__)) __attribute__ ((__malloc__));
|
||||
|
||||
typedef double num_t;
|
||||
static const num_t num__infty = ((num_t)1.0)/((num_t)0.0);
|
||||
|
||||
struct oct_tt;
|
||||
typedef struct oct_tt oct_t;
|
||||
|
||||
typedef unsigned int var_t;
|
||||
typedef enum {
|
||||
OCT_EMPTY = 0,
|
||||
OCT_NORMAL = 1,
|
||||
OCT_CLOSED = 2
|
||||
} oct_state;
|
||||
|
||||
struct oct_tt {
|
||||
var_t n;
|
||||
|
||||
int ref;
|
||||
|
||||
oct_state state;
|
||||
struct oct_tt* closed;
|
||||
|
||||
num_t* c;
|
||||
};
|
||||
|
||||
void* octfapg_mm_malloc (size_t t);
|
||||
oct_t* octfapg_alloc (var_t n);
|
||||
oct_t* octfapg_full_copy (oct_t* m);
|
||||
|
||||
struct mmalloc_tt;
|
||||
typedef struct mmalloc_tt mmalloc_t;
|
||||
|
||||
struct mmalloc_tt
|
||||
{
|
||||
int id;
|
||||
|
||||
int nb_alloc;
|
||||
int nb_realloc;
|
||||
int nb_free;
|
||||
|
||||
size_t rem;
|
||||
size_t max;
|
||||
size_t tot;
|
||||
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
size_t size;
|
||||
|
||||
mmalloc_t* mm;
|
||||
int id;
|
||||
|
||||
double dummy;
|
||||
|
||||
} mmheader_t;
|
||||
|
||||
void*
|
||||
octfapg_mm_malloc (size_t t)
|
||||
{
|
||||
char* m = (char*)malloc(t+sizeof(mmheader_t));
|
||||
return m+sizeof(mmheader_t);
|
||||
}
|
||||
|
||||
oct_t* octfapg_empty (var_t n);
|
||||
|
||||
oct_t*
|
||||
octfapg_empty (const var_t n)
|
||||
{
|
||||
oct_t* m;
|
||||
/*octfapg_timing_enter("oct_empty",3);*/
|
||||
m = ((oct_t*) octfapg_mm_malloc (sizeof(oct_t)));
|
||||
m->n = n;
|
||||
m->ref = 1;
|
||||
m->state = OCT_EMPTY;
|
||||
m->closed = (oct_t*)((void *)0);
|
||||
m->c = (num_t*)((void *)0);
|
||||
/*octfapg_timing_exit("oct_empty",3);*/
|
||||
return m;
|
||||
}
|
||||
|
||||
oct_t*
|
||||
octfapg_alloc (const var_t n)
|
||||
{
|
||||
size_t nn = (2*(size_t)(n)*((size_t)(n)+1));
|
||||
oct_t* m;
|
||||
m = octfapg_empty(n);
|
||||
m->c = ((num_t*) octfapg_mm_malloc (sizeof(num_t)*(nn)));
|
||||
;
|
||||
m->state = OCT_NORMAL;
|
||||
m->closed = (oct_t*)((void *)0);
|
||||
return m;
|
||||
}
|
||||
|
||||
oct_t*
|
||||
octfapg_universe (const var_t n)
|
||||
{
|
||||
oct_t* m;
|
||||
size_t i, nn = (2*(size_t)(n)*((size_t)(n)+1));
|
||||
m = octfapg_alloc(n);
|
||||
for (i=0;i<nn;i++) *(m->c+i) = num__infty;
|
||||
for (i=0;i<2*n;i++) *(m->c+((size_t)(i)+(((size_t)(i)+1)*((size_t)(i)+1))/2)) = (num_t)(0);
|
||||
m->state = OCT_CLOSED;
|
||||
return m;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
check_vect ();
|
||||
|
||||
oct_t *p = octfapg_universe(10);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vector alignment may not be reachable" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
|
@ -31,5 +31,6 @@ int main()
|
|||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_intfloat_cvt } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vector alignment may not be reachable" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
50
gcc/testsuite/gcc.dg/vect/vect-align-1.c
Normal file
50
gcc/testsuite/gcc.dg/vect/vect-align-1.c
Normal file
|
@ -0,0 +1,50 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
/* Compile time known misalignment. Cannot use loop peeling to align
|
||||
the store. */
|
||||
|
||||
#define N 16
|
||||
|
||||
struct foo {
|
||||
char x;
|
||||
int y[N];
|
||||
} __attribute__((packed));
|
||||
|
||||
int
|
||||
main1 (struct foo * __restrict__ p)
|
||||
{
|
||||
int i;
|
||||
int x[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
p->y[i] = x[i];
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (p->y[i] != x[i])
|
||||
abort ();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
struct foo *p = malloc (2*sizeof (struct foo));
|
||||
check_vect ();
|
||||
|
||||
main1 (p);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
46
gcc/testsuite/gcc.dg/vect/vect-align-2.c
Normal file
46
gcc/testsuite/gcc.dg/vect/vect-align-2.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
/* { dg-do run } */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
/* Compile time unknown misalignment. Cannot use loop peeling to align
|
||||
the store. */
|
||||
|
||||
#define N 17
|
||||
|
||||
struct foo {
|
||||
char x0;
|
||||
int y[N][N];
|
||||
} __attribute__ ((packed));
|
||||
|
||||
struct foo f2;
|
||||
int z[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
|
||||
void fbar(struct foo *fp)
|
||||
{
|
||||
int i,j;
|
||||
for (i=0; i<N; i++)
|
||||
for (j=0; j<N; j++)
|
||||
f2.y[i][j] = z[i];
|
||||
|
||||
for (i=0; i<N; i++)
|
||||
for (j=0; j<N; j++)
|
||||
if (f2.y[i][j] != z[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
struct foo *fp = (struct foo *) malloc (2*sizeof (struct foo));
|
||||
|
||||
fbar(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
|
@ -25,6 +25,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
|||
#include "tm.h"
|
||||
#include "ggc.h"
|
||||
#include "tree.h"
|
||||
#include "target.h"
|
||||
#include "basic-block.h"
|
||||
#include "diagnostic.h"
|
||||
#include "tree-flow.h"
|
||||
|
@ -1379,6 +1380,76 @@ vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
|
|||
}
|
||||
|
||||
|
||||
/* Function vector_alignment_reachable_p
|
||||
|
||||
Return true if vector alignment for DR is reachable by peeling
|
||||
a few loop iterations. Return false otherwise. */
|
||||
|
||||
static bool
|
||||
vector_alignment_reachable_p (struct data_reference *dr)
|
||||
{
|
||||
tree stmt = DR_STMT (dr);
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||
|
||||
if (DR_GROUP_FIRST_DR (stmt_info))
|
||||
{
|
||||
/* For interleaved access we peel only if number of iterations in
|
||||
the prolog loop ({VF - misalignment}), is a multiple of the
|
||||
number of the interleaved accesses. */
|
||||
int elem_size, mis_in_elements;
|
||||
int nelements = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
|
||||
/* FORNOW: handle only known alignment. */
|
||||
if (!known_alignment_for_access_p (dr))
|
||||
return false;
|
||||
|
||||
elem_size = UNITS_PER_SIMD_WORD / nelements;
|
||||
mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
|
||||
|
||||
if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
|
||||
return false;
|
||||
}
|
||||
|
||||
/* If misalignment is known at the compile time then allow peeling
|
||||
only if natural alignment is reachable through peeling. */
|
||||
if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
|
||||
{
|
||||
HOST_WIDE_INT elmsize =
|
||||
int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
{
|
||||
fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
|
||||
fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
|
||||
}
|
||||
if (DR_MISALIGNMENT (dr) % elmsize)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "data size does not divide the misalignment.\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!known_alignment_for_access_p (dr))
|
||||
{
|
||||
tree type = (TREE_TYPE (DR_REF (dr)));
|
||||
tree ba = DR_BASE_OBJECT (dr);
|
||||
bool is_packed = false;
|
||||
|
||||
if (ba)
|
||||
is_packed = contains_packed_reference (ba);
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
|
||||
if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Function vect_enhance_data_refs_alignment
|
||||
|
||||
This pass will use loop versioning and loop peeling in order to enhance
|
||||
|
@ -1540,33 +1611,11 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
|||
|
||||
if (!DR_IS_READ (dr) && !aligned_access_p (dr))
|
||||
{
|
||||
if (DR_GROUP_FIRST_DR (stmt_info))
|
||||
{
|
||||
/* For interleaved access we peel only if number of iterations in
|
||||
the prolog loop ({VF - misalignment}), is a multiple of the
|
||||
number of the interleaved accesses. */
|
||||
int elem_size, mis_in_elements;
|
||||
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||
int nelements = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
|
||||
/* FORNOW: handle only known alignment. */
|
||||
if (!known_alignment_for_access_p (dr))
|
||||
{
|
||||
do_peeling = false;
|
||||
break;
|
||||
}
|
||||
|
||||
elem_size = UNITS_PER_SIMD_WORD / nelements;
|
||||
mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
|
||||
|
||||
if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
|
||||
{
|
||||
do_peeling = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
dr0 = dr;
|
||||
do_peeling = true;
|
||||
do_peeling = vector_alignment_reachable_p (dr);
|
||||
if (do_peeling)
|
||||
dr0 = dr;
|
||||
if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "vector alignment may not be reachable");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4315,6 +4315,12 @@ extern tree get_inner_reference (tree, HOST_WIDE_INT *, HOST_WIDE_INT *,
|
|||
tree *, enum machine_mode *, int *, int *,
|
||||
bool);
|
||||
|
||||
/* Given an expression EXP that may be a COMPONENT_REF or an ARRAY_REF,
|
||||
look for whether EXP or any nested component-refs within EXP is marked
|
||||
as PACKED. */
|
||||
|
||||
extern bool contains_packed_reference (tree exp);
|
||||
|
||||
/* Return 1 if T is an expression that get_inner_reference handles. */
|
||||
|
||||
extern int handled_component_p (tree);
|
||||
|
|
Loading…
Add table
Reference in a new issue