re PR target/25500 (SSE2 vectorized code is slower on 4.x.x than previous)

2006-11-20  Andrew Pinski  <andrew_pinski@playstation.sony.com>

        PR tree-opt/25500
        * tree-sra.c (single_scalar_field_in_record_p): New function.
        (decide_block_copy): Use it.

2006-11-20  Andrew Pinski  <andrew_pinski@playstation.sony.com>

        PR tree-opt/25500
        * gcc.dg/tree-ssa/sra-4.c: New testcase.

From-SVN: r119026
This commit is contained in:
Andrew Pinski 2006-11-20 20:29:10 +00:00 committed by Andrew Pinski
parent d7043acd94
commit 903ff2758b
4 changed files with 67 additions and 0 deletions

View file

@ -1,3 +1,9 @@
2006-11-20 Andrew Pinski <andrew_pinski@playstation.sony.com>
PR tree-opt/25500
* tree-sra.c (single_scalar_field_in_record_p): New function.
(decide_block_copy): Use it.
2006-11-20 David Daney <ddaney@avtrex.com>
* config/mips/linux-unwind.h (mips_fallback_frame_state): Adjust

View file

@ -1,3 +1,8 @@
2006-11-20 Andrew Pinski <andrew_pinski@playstation.sony.com>
PR tree-opt/25500
* gcc.dg/tree-ssa/sra-4.c: New testcase.
2006-11-20 Tobias Burnus <burnus@net-b.de>
* gfortran.dg/volatile3.f90: Add conflict test.

View file

@ -0,0 +1,26 @@
/* { dg-do compile } */
/* { dg-options "-O1 -fdump-tree-optimized -w" } */
/* Check that SRA does non block copies for structs that just contain vectors. */
#define vector __attribute__((vector_size(16)))
struct vt
{
vector int t;
};
vector int f(vector int t1, vector int t2)
{
struct vt st1, st2, st3;
st1.t = t1;
st2 = st1;
st2.t += t2;
st3 = st2;
return st3.t;
}
/* There should be no references to st as SRA should not have done block copy. */
/* { dg-final { scan-tree-dump-times "st" 0 "optimized" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */

View file

@ -1351,6 +1351,32 @@ instantiate_missing_elements (struct sra_elt *elt)
}
}
/* Return true if there is only one non aggregate field in the record, TYPE.
Return false otherwise. */
static bool
single_scalar_field_in_record_p (tree type)
{
int num_fields = 0;
tree field;
if (TREE_CODE (type) != RECORD_TYPE)
return false;
for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
if (TREE_CODE (field) == FIELD_DECL)
{
num_fields++;
if (num_fields == 2)
return false;
if (AGGREGATE_TYPE_P (TREE_TYPE (field)))
return false;
}
return true;
}
/* Make one pass across an element tree deciding whether to perform block
or element copies. If we decide on element copies, instantiate all
elements. Return true if there are any instantiated sub-elements. */
@ -1430,6 +1456,10 @@ decide_block_copy (struct sra_elt *elt)
full_count = count_type_elements (elt->type, false);
inst_count = sum_instantiated_sizes (elt, &inst_size);
/* If there is only one scalar field in the record, don't block copy. */
if (single_scalar_field_in_record_p (elt->type))
use_block_copy = false;
/* ??? What to do here. If there are two fields, and we've only
instantiated one, then instantiating the other is clearly a win.
If there are a large number of fields then the size of the copy