re PR tree-optimization/86853 (sprintf optimization for wide strings doesn't account for conversion failure)

gcc/ChangeLog:

	PR tree-optimization/86853
	* gimple-ssa-sprintf.c (struct format_result): Rename member.
	(struct fmtresult): Add member and initialize it in ctors.
	(format_character): Handle %C.  Extend range to NUL.  Set MAYFAIL.
	(format_string): Handle %S the same as %ls.  Set MAYFAIL.
	(format_directive): Set POSUNDER4K when MAYFAIL is set.
	(parse_directive): Handle %C same as %c.
	(sprintf_dom_walker::compute_format_length): Adjust.
	(is_call_safe): Adjust.

gcc/testsuite/ChangeLog:

	PR tree-optimization/86853
	* gcc.dg/tree-ssa/builtin-sprintf-10.c: New test.
	* gcc.dg/tree-ssa/builtin-sprintf-11.c: New test.
	* gcc.dg/tree-ssa/builtin-sprintf-warn-18.c: Adjust.

From-SVN: r263612
This commit is contained in:
Martin Sebor 2018-08-17 04:01:14 +00:00 committed by Jeff Law
parent e584cd3509
commit bbcbd744b8
6 changed files with 259 additions and 22 deletions

View file

@ -1,3 +1,15 @@
2018-08-16 Martin Sebor <msebor@redhat.com>
PR tree-optimization/86853
* gimple-ssa-sprintf.c (struct format_result): Rename member.
(struct fmtresult): Add member and initialize it in ctors.
(format_character): Handle %C. Extend range to NUL. Set MAYFAIL.
(format_string): Handle %S the same as %ls. Set MAYFAIL.
(format_directive): Set POSUNDER4K when MAYFAIL is set.
(parse_directive): Handle %C same as %c.
(sprintf_dom_walker::compute_format_length): Adjust.
(is_call_safe): Adjust.
2018-08-16 Bernd Edlinger <bernd.edlinger@hotmail.de>
* builtins.c (c_strlen): Add new parameter eltsize. Use it

View file

@ -211,12 +211,14 @@ struct format_result
the return value optimization. */
bool knownrange;
/* True if no individual directive resulted in more than 4095 bytes
of output (the total NUMBER_CHARS_{MIN,MAX} might be greater).
Implementations are not required to handle directives that produce
more than 4K bytes (leading to undefined behavior) and so when one
is found it disables the return value optimization. */
bool under4k;
/* True if no individual directive could fail or result in more than
4095 bytes of output (the total NUMBER_CHARS_{MIN,MAX} might be
greater). Implementations are not required to handle directives
that produce more than 4K bytes (leading to undefined behavior)
and so when one is found it disables the return value optimization.
Similarly, directives that can fail (such as wide character
directives) disable the optimization. */
bool posunder4k;
/* True when a floating point directive has been seen in the format
string. */
@ -651,7 +653,7 @@ struct fmtresult
fmtresult (unsigned HOST_WIDE_INT min = HOST_WIDE_INT_MAX)
: argmin (), argmax (),
knownrange (min < HOST_WIDE_INT_MAX),
nullp ()
mayfail (), nullp ()
{
range.min = min;
range.max = min;
@ -665,7 +667,7 @@ struct fmtresult
unsigned HOST_WIDE_INT likely = HOST_WIDE_INT_MAX)
: argmin (), argmax (),
knownrange (min < HOST_WIDE_INT_MAX && max < HOST_WIDE_INT_MAX),
nullp ()
mayfail (), nullp ()
{
range.min = min;
range.max = max;
@ -695,6 +697,10 @@ struct fmtresult
heuristics that depend on warning levels. */
bool knownrange;
/* True for a directive that may fail (such as wide character
directives). */
bool mayfail;
/* True when the argument is a null pointer. */
bool nullp;
};
@ -2210,7 +2216,8 @@ format_character (const directive &dir, tree arg, vr_values *vr_values)
res.knownrange = true;
if (dir.modifier == FMT_LEN_l)
if (dir.specifier == 'C'
|| dir.modifier == FMT_LEN_l)
{
/* A wide character can result in as few as zero bytes. */
res.range.min = 0;
@ -2225,13 +2232,20 @@ format_character (const directive &dir, tree arg, vr_values *vr_values)
res.range.likely = 0;
res.range.unlikely = 0;
}
else if (min > 0 && min < 128)
else if (min >= 0 && min < 128)
{
/* Be conservative if the target execution character set
is not a 1-to-1 mapping to the source character set or
if the source set is not ASCII. */
bool one_2_one_ascii
= (target_to_host_charmap[0] == 1 && target_to_host ('a') == 97);
/* A wide character in the ASCII range most likely results
in a single byte, and only unlikely in up to MB_LEN_MAX. */
res.range.max = 1;
res.range.max = one_2_one_ascii ? 1 : target_mb_len_max ();;
res.range.likely = 1;
res.range.unlikely = target_mb_len_max ();
res.mayfail = !one_2_one_ascii;
}
else
{
@ -2240,6 +2254,8 @@ format_character (const directive &dir, tree arg, vr_values *vr_values)
res.range.max = target_mb_len_max ();
res.range.likely = 2;
res.range.unlikely = res.range.max;
/* Converting such a character may fail. */
res.mayfail = true;
}
}
else
@ -2249,6 +2265,7 @@ format_character (const directive &dir, tree arg, vr_values *vr_values)
res.range.max = target_mb_len_max ();
res.range.likely = 2;
res.range.unlikely = res.range.max;
res.mayfail = true;
}
}
else
@ -2285,7 +2302,8 @@ format_string (const directive &dir, tree arg, vr_values *)
/* A '%s' directive with a string argument with constant length. */
res.range = slen.range;
if (dir.modifier == FMT_LEN_l)
if (dir.specifier == 'S'
|| dir.modifier == FMT_LEN_l)
{
/* In the worst case the length of output of a wide string S
is bounded by MB_LEN_MAX * wcslen (S). */
@ -2311,6 +2329,10 @@ format_string (const directive &dir, tree arg, vr_values *)
/* Even a non-empty wide character string need not convert into
any bytes. */
res.range.min = 0;
/* A non-empty wide character conversion may fail. */
if (slen.range.max > 0)
res.mayfail = true;
}
else
{
@ -2349,7 +2371,8 @@ format_string (const directive &dir, tree arg, vr_values *)
at level 2. This result is adjust upward for width (if it's
specified). */
if (dir.modifier == FMT_LEN_l)
if (dir.specifier == 'S'
|| dir.modifier == FMT_LEN_l)
{
/* A wide character converts to as few as zero bytes. */
slen.range.min = 0;
@ -2361,6 +2384,10 @@ format_string (const directive &dir, tree arg, vr_values *)
if (slen.range.likely < target_int_max ())
slen.range.unlikely *= target_mb_len_max ();
/* A non-empty wide character conversion may fail. */
if (slen.range.max > 0)
res.mayfail = true;
}
res.range = slen.range;
@ -2913,11 +2940,14 @@ format_directive (const sprintf_dom_walker::call_info &info,
of 4095 bytes required to be supported? */
bool minunder4k = fmtres.range.min < 4096;
bool maxunder4k = fmtres.range.max < 4096;
/* Clear UNDER4K in the overall result if the maximum has exceeded
the 4k (this is necessary to avoid the return valuye optimization
/* Clear POSUNDER4K in the overall result if the maximum has exceeded
the 4k (this is necessary to avoid the return value optimization
that may not be safe in the maximum case). */
if (!maxunder4k)
res->under4k = false;
res->posunder4k = false;
/* Also clear POSUNDER4K if the directive may fail. */
if (fmtres.mayfail)
res->posunder4k = false;
if (!warned
/* Only warn at level 2. */
@ -3363,12 +3393,15 @@ parse_directive (sprintf_dom_walker::call_info &info,
dir.fmtfunc = format_none;
break;
case 'C':
case 'c':
/* POSIX wide character and C/POSIX narrow character. */
dir.fmtfunc = format_character;
break;
case 'S':
case 's':
/* POSIX wide string and C/POSIX narrow character string. */
dir.fmtfunc = format_string;
break;
@ -3526,10 +3559,10 @@ sprintf_dom_walker::compute_format_length (call_info &info,
res->range.min = res->range.max = 0;
/* No directive has been seen yet so the length of output is bounded
by the known range [0, 0] (with no conversion producing more than
4K bytes) until determined otherwise. */
by the known range [0, 0] (with no conversion resulting in a failure
or producing more than 4K bytes) until determined otherwise. */
res->knownrange = true;
res->under4k = true;
res->posunder4k = true;
res->floating = false;
res->warned = false;
@ -3597,7 +3630,7 @@ is_call_safe (const sprintf_dom_walker::call_info &info,
const format_result &res, bool under4k,
unsigned HOST_WIDE_INT retval[2])
{
if (under4k && !res.under4k)
if (under4k && !res.posunder4k)
return false;
/* The minimum return value. */

View file

@ -1,3 +1,10 @@
2018-08-16 Martin Sebor <msebor@redhat.com>
PR tree-optimization/86853
* gcc.dg/tree-ssa/builtin-sprintf-10.c: New test.
* gcc.dg/tree-ssa/builtin-sprintf-11.c: New test.
* gcc.dg/tree-ssa/builtin-sprintf-warn-18.c: Adjust.
2018-08-16 David Malcolm <dmalcolm@redhat.com>
* gcc.dg/missing-header-fixit-3.c: New test.

View file

@ -0,0 +1,119 @@
/* PR tree-optimization/86853 - sprintf optimization for wide strings
doesn't account for conversion failureâ
{ dg-do compile }
{ dg-options "-O2 -Wall -fdump-tree-optimized" } */
typedef __SIZE_TYPE__ size_t;
typedef __WCHAR_TYPE__ wchar_t;
extern int snprintf (char*, size_t, const char*, ...);
#define CONCAT(x, y) x ## y
#define CAT(x, y) CONCAT (x, y)
#define FAILNAME(name) CAT (call_ ## name ##_on_line_, __LINE__)
#define FAIL(name) do { \
extern void FAILNAME (name) (void); \
FAILNAME (name)(); \
} while (0)
/* Macro to emit a call to funcation named
call_in_true_branch_not_eliminated_on_line_NNN()
for each call that's expected to be eliminated. The dg-final
scan-tree-dump-time directive at the bottom of the test verifies
that no such call appears in output. */
#define ELIM(expr) \
if (!(expr)) FAIL (in_true_branch_not_eliminated); else (void)0
/* Macro to emit a call to a function named
call_made_in_{true,false}_branch_on_line_NNN()
for each call that's expected to be retained. The dg-final
scan-tree-dump-time directive at the bottom of the test verifies
that the expected number of both kinds of calls appears in output
(a pair for each line with the invocation of the KEEP() macro. */
#define KEEP(expr) \
if (expr) \
FAIL (made_in_true_branch); \
else \
FAIL (made_in_false_branch)
extern wchar_t wc;
extern wchar_t ws[];
const wchar_t ws3[] = L"12\xff";
/* Verify that the following calls are eliminated. */
void elim_wide_char_call (void)
{
ELIM (snprintf (0, 0, "%lc", L'\0'));
ELIM (snprintf (0, 0, "%lc", L'1'));
ELIM (snprintf (0, 0, "%lc", L'a'));
ELIM (snprintf (0, 0, "%lc", ws3[0]));
ELIM (snprintf (0, 0, "%lc", ws3[1]));
ELIM (snprintf (0, 0, "%lc", ws3[3]));
ELIM (snprintf (0, 0, "%C", L'\0'));
ELIM (snprintf (0, 0, "%C", L'9'));
ELIM (snprintf (0, 0, "%C", L'z'));
ELIM (snprintf (0, 0, "%C", ws3[0]));
ELIM (snprintf (0, 0, "%C", ws3[1]));
ELIM (snprintf (0, 0, "%C", ws3[3]));
/* Verify an unknown character value within the ASCII range. */
if (wc < 1 || 127 < wc)
wc = 0;
ELIM (snprintf (0, 0, "%C", wc));
ELIM (snprintf (0, 0, "%C", wc));
}
void elim_wide_string_call (void)
{
ELIM (snprintf (0, 0, "%ls", L""));
}
#line 1000
/* Verify that the following calls are not eliminated. */
void keep_wide_char_call (void)
{
KEEP (snprintf (0, 0, "%lc", L'\xff'));
KEEP (snprintf (0, 0, "%lc", L'\xffff'));
KEEP (snprintf (0, 0, "%lc", wc));
KEEP (snprintf (0, 0, "%lc", ws3[2]));
KEEP (snprintf (0, 0, "%C", L'\xff'));
KEEP (snprintf (0, 0, "%C", L'\xffff'));
KEEP (snprintf (0, 0, "%C", wc));
KEEP (snprintf (0, 0, "%C", ws3[2]));
/* Verify an unknown character value outside the ASCII range
(with 128 being the only one). */
if (wc < 32 || 128 < wc)
wc = 32;
KEEP (snprintf (0, 0, "%lc", wc));
KEEP (snprintf (0, 0, "%C", wc));
}
void keep_wide_string_call (void)
{
KEEP (snprintf (0, 0, "%ls", L"\xff"));
KEEP (snprintf (0, 0, "%ls", L"\xffff"));
KEEP (snprintf (0, 0, "%ls", ws));
KEEP (snprintf (0, 0, "%ls", ws3));
KEEP (snprintf (0, 0, "%S", L"\xff"));
KEEP (snprintf (0, 0, "%S", L"\xffff"));
KEEP (snprintf (0, 0, "%S", ws));
KEEP (snprintf (0, 0, "%S", ws3));
}
/* { dg-final { scan-tree-dump-times "call_made_in_true_branch_not_eliminated" 0 "optimized" } }
{ dg-final { scan-tree-dump-times "call_made_in_true_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 18 "optimized" } }
{ dg-final { scan-tree-dump-times "call_made_in_false_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 18 "optimized" } } */

View file

@ -0,0 +1,65 @@
/* PR tree-optimization/86853 - sprintf optimization for wide strings
doesn't account for conversion failureâ
Exercise wide character handling in an EBCDIC execution charset.
{ dg-do compile }
{ dg-require-iconv "IBM1047" }
{ dg-options "-O2 -Wall -Wno-format -Wformat-overflow -fexec-charset=IBM1047 -fdump-tree-optimized" } */
typedef __WCHAR_TYPE__ wchar_t;
/* Exercise wide character constants. */
void test_lc_cst (void)
{
/* IBM1047 0x30 maps to ASCII 0x94 which neeed not be representable
in the current locale (and the snprintf() call may fail). Verify
that snprintf() doesn't assume it is. */
wchar_t wc = 0x30;
int n = __builtin_snprintf (0, 0, "%lc", wc);
if (n < 0)
__builtin_abort ();
}
void test_C_cst (void)
{
/* Same as above but for %C and 0x31 which maps to 0x95. */
wchar_t wc = 0x31;
int n = __builtin_snprintf (0, 0, "%C", wc);
if (n < 0)
__builtin_abort ();
}
/* Exercise wide character values in known ranges. */
void test_lc_range (wchar_t wc)
{
if (wc < 0x40 || 0x49 < wc)
wc = 0x40;
int n = __builtin_snprintf (0, 0, "%lc", wc);
if (n < 0)
__builtin_abort ();
}
void test_C_range (wchar_t wc)
{
if (wc < 0x41 || 0x48 < wc)
wc = 0x41;
int n = __builtin_snprintf (0, 0, "%C", wc);
if (n < 0)
__builtin_abort ();
}
/* Exercise unknown wide character values. */
void test_var (wchar_t wc)
{
int n = __builtin_snprintf (0, 0, "%lc", wc);
if (n < 0)
__builtin_abort ();
}
/* { dg-final { scan-tree-dump-times "abort" 5 "optimized" } } */

View file

@ -18,7 +18,7 @@ void test_characters ()
T ("%A", 0.0); /* { dg-warning ".%A. directive writing between 6 and 20 " } */
T ("%a", 0.0); /* { dg-warning ".%a. directive writing between 6 and 20 " } */
T ("%C", 'a'); /* { dg-warning ".%C. directive writing 1 byte" "bug 80537" { xfail *-*-* } } */
T ("%C", L'a'); /* { dg-warning ".%C. directive writing up to 6 bytes" } */
T ("%c", 'a'); /* { dg-warning ".%c. directive writing 1 byte" } */
T ("%d", 12); /* { dg-warning ".%d. directive writing 2 bytes" } */
@ -93,7 +93,8 @@ void test_characters ()
T ("%x", 1234); /* { dg-warning ".%x. directive writing 3 bytes" } */
T ("%#X", 1235); /* { dg-warning ".%#X. directive writing 5 bytes" } */
T ("%S", L"1"); /* { dg-warning ".%S. directive writing 1 byte" } */
T ("%S", L"1"); /* { dg-warning ".%S. directive writing up to 6 bytes" } */
T ("%ls", L"12"); /* { dg-warning ".%ls. directive writing up to 12 bytes" } */
T ("%-s", "1"); /* { dg-warning ".%-s. directive writing 1 byte" } */
/* Verify that characters in the source character set appear in