re PR c++/82294 (Array of objects with constexpr constructors initialized from space-inefficient memory image)
PR c++/82294 PR c++/87436 * expr.h (categorize_ctor_elements): Add p_unique_nz_elts argument. * expr.c (categorize_ctor_elements_1): Likewise. Compute it like p_nz_elts, except don't multiply it by mult. Adjust recursive call. Fix up COMPLEX_CST handling. (categorize_ctor_elements): Add p_unique_nz_elts argument, initialize it and pass it through to categorize_ctor_elements_1. (mostly_zeros_p, all_zeros_p): Adjust categorize_ctor_elements callers. * gimplify.c (gimplify_init_constructor): Likewise. Don't force ctor into readonly data section if num_unique_nonzero_elements is smaller or equal to 1/8 of num_nonzero_elements and size is >= 64 bytes. * g++.dg/tree-ssa/pr82294.C: New test. * g++.dg/tree-ssa/pr87436.C: New test. From-SVN: r267143
This commit is contained in:
parent
faa9232da3
commit
0fca07e31d
7 changed files with 131 additions and 17 deletions
|
@ -1,3 +1,19 @@
|
|||
2018-12-14 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR c++/82294
|
||||
PR c++/87436
|
||||
* expr.h (categorize_ctor_elements): Add p_unique_nz_elts argument.
|
||||
* expr.c (categorize_ctor_elements_1): Likewise. Compute it like
|
||||
p_nz_elts, except don't multiply it by mult. Adjust recursive call.
|
||||
Fix up COMPLEX_CST handling.
|
||||
(categorize_ctor_elements): Add p_unique_nz_elts argument, initialize
|
||||
it and pass it through to categorize_ctor_elements_1.
|
||||
(mostly_zeros_p, all_zeros_p): Adjust categorize_ctor_elements callers.
|
||||
* gimplify.c (gimplify_init_constructor): Likewise. Don't force
|
||||
ctor into readonly data section if num_unique_nonzero_elements is
|
||||
smaller or equal to 1/8 of num_nonzero_elements and size is >= 64
|
||||
bytes.
|
||||
|
||||
2018-12-14 Thomas Preud'homme <thomas.preudhomme@arm.com>
|
||||
|
||||
* config/arm/arm.md (arm_movdi): Split if -mslow-flash-data and
|
||||
|
|
58
gcc/expr.c
58
gcc/expr.c
|
@ -5945,10 +5945,11 @@ count_type_elements (const_tree type, bool for_ctor_p)
|
|||
|
||||
static bool
|
||||
categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
|
||||
HOST_WIDE_INT *p_unique_nz_elts,
|
||||
HOST_WIDE_INT *p_init_elts, bool *p_complete)
|
||||
{
|
||||
unsigned HOST_WIDE_INT idx;
|
||||
HOST_WIDE_INT nz_elts, init_elts, num_fields;
|
||||
HOST_WIDE_INT nz_elts, unique_nz_elts, init_elts, num_fields;
|
||||
tree value, purpose, elt_type;
|
||||
|
||||
/* Whether CTOR is a valid constant initializer, in accordance with what
|
||||
|
@ -5958,6 +5959,7 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
|
|||
bool const_p = const_from_elts_p ? true : TREE_STATIC (ctor);
|
||||
|
||||
nz_elts = 0;
|
||||
unique_nz_elts = 0;
|
||||
init_elts = 0;
|
||||
num_fields = 0;
|
||||
elt_type = NULL_TREE;
|
||||
|
@ -5982,12 +5984,13 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
|
|||
{
|
||||
case CONSTRUCTOR:
|
||||
{
|
||||
HOST_WIDE_INT nz = 0, ic = 0;
|
||||
HOST_WIDE_INT nz = 0, unz = 0, ic = 0;
|
||||
|
||||
bool const_elt_p = categorize_ctor_elements_1 (value, &nz, &ic,
|
||||
p_complete);
|
||||
bool const_elt_p = categorize_ctor_elements_1 (value, &nz, &unz,
|
||||
&ic, p_complete);
|
||||
|
||||
nz_elts += mult * nz;
|
||||
unique_nz_elts += unz;
|
||||
init_elts += mult * ic;
|
||||
|
||||
if (const_from_elts_p && const_p)
|
||||
|
@ -5999,21 +6002,31 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
|
|||
case REAL_CST:
|
||||
case FIXED_CST:
|
||||
if (!initializer_zerop (value))
|
||||
nz_elts += mult;
|
||||
{
|
||||
nz_elts += mult;
|
||||
unique_nz_elts++;
|
||||
}
|
||||
init_elts += mult;
|
||||
break;
|
||||
|
||||
case STRING_CST:
|
||||
nz_elts += mult * TREE_STRING_LENGTH (value);
|
||||
unique_nz_elts += TREE_STRING_LENGTH (value);
|
||||
init_elts += mult * TREE_STRING_LENGTH (value);
|
||||
break;
|
||||
|
||||
case COMPLEX_CST:
|
||||
if (!initializer_zerop (TREE_REALPART (value)))
|
||||
nz_elts += mult;
|
||||
{
|
||||
nz_elts += mult;
|
||||
unique_nz_elts++;
|
||||
}
|
||||
if (!initializer_zerop (TREE_IMAGPART (value)))
|
||||
nz_elts += mult;
|
||||
init_elts += mult;
|
||||
{
|
||||
nz_elts += mult;
|
||||
unique_nz_elts++;
|
||||
}
|
||||
init_elts += 2 * mult;
|
||||
break;
|
||||
|
||||
case VECTOR_CST:
|
||||
|
@ -6025,7 +6038,10 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
|
|||
{
|
||||
tree v = VECTOR_CST_ELT (value, i);
|
||||
if (!initializer_zerop (v))
|
||||
nz_elts += mult;
|
||||
{
|
||||
nz_elts += mult;
|
||||
unique_nz_elts++;
|
||||
}
|
||||
init_elts += mult;
|
||||
}
|
||||
}
|
||||
|
@ -6035,6 +6051,7 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
|
|||
{
|
||||
HOST_WIDE_INT tc = count_type_elements (elt_type, false);
|
||||
nz_elts += mult * tc;
|
||||
unique_nz_elts += tc;
|
||||
init_elts += mult * tc;
|
||||
|
||||
if (const_from_elts_p && const_p)
|
||||
|
@ -6054,6 +6071,7 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
|
|||
*p_complete = false;
|
||||
|
||||
*p_nz_elts += nz_elts;
|
||||
*p_unique_nz_elts += unique_nz_elts;
|
||||
*p_init_elts += init_elts;
|
||||
|
||||
return const_p;
|
||||
|
@ -6062,6 +6080,11 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
|
|||
/* Examine CTOR to discover:
|
||||
* how many scalar fields are set to nonzero values,
|
||||
and place it in *P_NZ_ELTS;
|
||||
* the same, but counting RANGE_EXPRs as multiplier of 1 instead of
|
||||
high - low + 1 (this can be useful for callers to determine ctors
|
||||
that could be cheaply initialized with - perhaps nested - loops
|
||||
compared to copied from huge read-only data),
|
||||
and place it in *P_UNIQUE_NZ_ELTS;
|
||||
* how many scalar fields in total are in CTOR,
|
||||
and place it in *P_ELT_COUNT.
|
||||
* whether the constructor is complete -- in the sense that every
|
||||
|
@ -6073,13 +6096,16 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
|
|||
|
||||
bool
|
||||
categorize_ctor_elements (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
|
||||
HOST_WIDE_INT *p_unique_nz_elts,
|
||||
HOST_WIDE_INT *p_init_elts, bool *p_complete)
|
||||
{
|
||||
*p_nz_elts = 0;
|
||||
*p_unique_nz_elts = 0;
|
||||
*p_init_elts = 0;
|
||||
*p_complete = true;
|
||||
|
||||
return categorize_ctor_elements_1 (ctor, p_nz_elts, p_init_elts, p_complete);
|
||||
return categorize_ctor_elements_1 (ctor, p_nz_elts, p_unique_nz_elts,
|
||||
p_init_elts, p_complete);
|
||||
}
|
||||
|
||||
/* TYPE is initialized by a constructor with NUM_ELTS elements, the last
|
||||
|
@ -6110,17 +6136,18 @@ complete_ctor_at_level_p (const_tree type, HOST_WIDE_INT num_elts,
|
|||
return count_type_elements (type, true) == num_elts;
|
||||
}
|
||||
|
||||
/* Return 1 if EXP contains mostly (3/4) zeros. */
|
||||
/* Return 1 if EXP contains mostly (3/4) zeros. */
|
||||
|
||||
static int
|
||||
mostly_zeros_p (const_tree exp)
|
||||
{
|
||||
if (TREE_CODE (exp) == CONSTRUCTOR)
|
||||
{
|
||||
HOST_WIDE_INT nz_elts, init_elts;
|
||||
HOST_WIDE_INT nz_elts, unz_elts, init_elts;
|
||||
bool complete_p;
|
||||
|
||||
categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p);
|
||||
categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts,
|
||||
&complete_p);
|
||||
return !complete_p || nz_elts < init_elts / 4;
|
||||
}
|
||||
|
||||
|
@ -6134,10 +6161,11 @@ all_zeros_p (const_tree exp)
|
|||
{
|
||||
if (TREE_CODE (exp) == CONSTRUCTOR)
|
||||
{
|
||||
HOST_WIDE_INT nz_elts, init_elts;
|
||||
HOST_WIDE_INT nz_elts, unz_elts, init_elts;
|
||||
bool complete_p;
|
||||
|
||||
categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p);
|
||||
categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts,
|
||||
&complete_p);
|
||||
return nz_elts == 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -309,7 +309,8 @@ extern bool can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int);
|
|||
extern unsigned HOST_WIDE_INT highest_pow2_factor (const_tree);
|
||||
|
||||
extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *,
|
||||
HOST_WIDE_INT *, bool *);
|
||||
HOST_WIDE_INT *, HOST_WIDE_INT *,
|
||||
bool *);
|
||||
|
||||
extern void expand_operands (tree, tree, rtx, rtx*, rtx*,
|
||||
enum expand_modifier);
|
||||
|
|
|
@ -4778,7 +4778,15 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
|
|||
{
|
||||
struct gimplify_init_ctor_preeval_data preeval_data;
|
||||
HOST_WIDE_INT num_ctor_elements, num_nonzero_elements;
|
||||
HOST_WIDE_INT num_unique_nonzero_elements;
|
||||
bool cleared, complete_p, valid_const_initializer;
|
||||
/* Use readonly data for initializers of this or smaller size
|
||||
regardless of the num_nonzero_elements / num_unique_nonzero_elements
|
||||
ratio. */
|
||||
const HOST_WIDE_INT min_unique_size = 64;
|
||||
/* If num_nonzero_elements / num_unique_nonzero_elements ratio
|
||||
is smaller than this, use readonly data. */
|
||||
const int unique_nonzero_ratio = 8;
|
||||
|
||||
/* Aggregate types must lower constructors to initialization of
|
||||
individual elements. The exception is that a CONSTRUCTOR node
|
||||
|
@ -4795,6 +4803,7 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
|
|||
can only do so if it known to be a valid constant initializer. */
|
||||
valid_const_initializer
|
||||
= categorize_ctor_elements (ctor, &num_nonzero_elements,
|
||||
&num_unique_nonzero_elements,
|
||||
&num_ctor_elements, &complete_p);
|
||||
|
||||
/* If a const aggregate variable is being initialized, then it
|
||||
|
@ -4803,7 +4812,15 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
|
|||
&& num_nonzero_elements > 1
|
||||
&& TREE_READONLY (object)
|
||||
&& VAR_P (object)
|
||||
&& (flag_merge_constants >= 2 || !TREE_ADDRESSABLE (object)))
|
||||
&& (flag_merge_constants >= 2 || !TREE_ADDRESSABLE (object))
|
||||
/* For ctors that have many repeated nonzero elements
|
||||
represented through RANGE_EXPRs, prefer initializing
|
||||
those through runtime loops over copies of large amounts
|
||||
of data from readonly data section. */
|
||||
&& (num_unique_nonzero_elements
|
||||
> num_nonzero_elements / unique_nonzero_ratio
|
||||
|| ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)
|
||||
<= (unsigned HOST_WIDE_INT) min_unique_size)))
|
||||
{
|
||||
if (notify_temp_creation)
|
||||
return GS_ERROR;
|
||||
|
@ -4896,6 +4913,13 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
|
|||
is so large as to make individual moves inefficient. */
|
||||
if (size > 0
|
||||
&& num_nonzero_elements > 1
|
||||
/* For ctors that have many repeated nonzero elements
|
||||
represented through RANGE_EXPRs, prefer initializing
|
||||
those through runtime loops over copies of large amounts
|
||||
of data from readonly data section. */
|
||||
&& (num_unique_nonzero_elements
|
||||
> num_nonzero_elements / unique_nonzero_ratio
|
||||
|| size <= min_unique_size)
|
||||
&& (size < num_nonzero_elements
|
||||
|| !can_move_by_pieces (size, align)))
|
||||
{
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
2018-12-14 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR c++/82294
|
||||
PR c++/87436
|
||||
* g++.dg/tree-ssa/pr82294.C: New test.
|
||||
* g++.dg/tree-ssa/pr87436.C: New test.
|
||||
|
||||
2018-11-14 Thomas Preud'homme <thomas.preudhomme@arm.com>
|
||||
|
||||
* gcc.target/arm/thumb2-slow-flash-data-2.c: Require arm_fp_ok
|
||||
|
|
13
gcc/testsuite/g++.dg/tree-ssa/pr82294.C
Normal file
13
gcc/testsuite/g++.dg/tree-ssa/pr82294.C
Normal file
|
@ -0,0 +1,13 @@
|
|||
// PR c++/82294
|
||||
// { dg-do compile { target c++11 } }
|
||||
// { dg-options "-O2 -fdump-tree-gimple" }
|
||||
|
||||
// Verify we don't "optimize" the ctor as copying a 1KB .rodata
|
||||
// object into the variable. It is better to initialize it through
|
||||
// a loop.
|
||||
// { dg-final { scan-tree-dump-not "this->arr = " "gimple" } }
|
||||
|
||||
struct S { int x; explicit constexpr S (); };
|
||||
constexpr S::S () : x{7} {}
|
||||
struct T { S arr[256]; explicit T (); };
|
||||
T::T () {}
|
25
gcc/testsuite/g++.dg/tree-ssa/pr87436.C
Normal file
25
gcc/testsuite/g++.dg/tree-ssa/pr87436.C
Normal file
|
@ -0,0 +1,25 @@
|
|||
// PR c++/87436
|
||||
// { dg-do compile { target { c++11 && size32plus } } }
|
||||
// { dg-options "-O2 -fdump-tree-gimple" }
|
||||
|
||||
// Verify we don't "optimize" the ctor as copying a 384MB .rodata
|
||||
// object into the variable. It is better to initialize it through
|
||||
// two nested loops.
|
||||
// { dg-final { scan-tree-dump-not "this->arr = " "gimple" } }
|
||||
|
||||
struct S {
|
||||
int a = -1;
|
||||
short b = 3;
|
||||
int x = 0;
|
||||
int y = 1;
|
||||
int z = 42;
|
||||
float f = 0.123f;
|
||||
};
|
||||
|
||||
struct T { S arr[4096][4096]; };
|
||||
|
||||
T *
|
||||
foo ()
|
||||
{
|
||||
return new T;
|
||||
}
|
Loading…
Add table
Reference in a new issue