From 3f3284629ba481294562f8370bf45e205e1d1eec Mon Sep 17 00:00:00 2001 From: Jerry DeLisle Date: Fri, 9 Nov 2018 02:46:03 +0000 Subject: [PATCH] re PR fortran/78351 (comma not terminating READ of formatted input field - ok in 4.1.7, not 4.4.7- maybe related to 25419?) 2018-11-08 Jerry DeLisle PR libfortran/78351 * io/transfer.c (read_sf_internal): Add support for early comma termination of internal unit formatted reads. * gfortran.dg/read_legacy_comma.f90: New test. From-SVN: r265946 --- gcc/testsuite/ChangeLog | 5 + .../gfortran.dg/read_legacy_comma.f90 | 31 ++++++ libgfortran/ChangeLog | 6 ++ libgfortran/io/transfer.c | 95 ++++++++++++++----- 4 files changed, 114 insertions(+), 23 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/read_legacy_comma.f90 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 4769e451d4c..b300e5f0d95 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2018-11-08 Jerry DeLisle + + PR libfortran/78351 + * gfortran.dg/read_legacy_comma.f90: New test. + 2018-11-08 Peter Bergner PR rtl-optimization/87600 diff --git a/gcc/testsuite/gfortran.dg/read_legacy_comma.f90 b/gcc/testsuite/gfortran.dg/read_legacy_comma.f90 new file mode 100644 index 00000000000..7c3e1853412 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/read_legacy_comma.f90 @@ -0,0 +1,31 @@ +! { dg-do run } +! { dg-options "-std=legacy" } +! PR78351 +program read_csv + implicit none + integer, parameter :: dbl = selected_real_kind(p=14, r=99) + + call checkit("101,1.,2.,3.,7,7") + call checkit ("102,1.,,3.,,7") + call checkit (",1.,,3.,, ") + +contains + +subroutine checkit (text) + character(*) :: text + integer :: I1, I2, I3 + real(dbl) :: R1, R2, R3 + 10 format (I8,3ES16.8,2I8) + + I1=-99; I2=-99; I3=-99 + R1=-99._DBL; R2=-99._DBL; R3=-99._DBL + read(text,10) I1, R1, R2, R3, I2, I3 + if (I1 == -99) stop 1 + if (I2 == -99) stop 2 + if (I3 == -99) stop 3 + if (R1 == -99._DBL) stop 4 + if (R2 == -99._DBL) stop 5 + if (R3 == -99._DBL) stop 6 +end subroutine + +end program diff --git a/libgfortran/ChangeLog b/libgfortran/ChangeLog index 644053634e4..336c1c00de4 100644 --- a/libgfortran/ChangeLog +++ b/libgfortran/ChangeLog @@ -1,3 +1,9 @@ +2018-11-08 Jerry DeLisle + + PR libfortran/78351 + * io/transfer.c (read_sf_internal): Add support for early + comma termination of internal unit formatted reads. + 2018-10-31 Joseph Myers PR bootstrap/82856 diff --git a/libgfortran/io/transfer.c b/libgfortran/io/transfer.c index 31198a3cc39..21bfea48640 100644 --- a/libgfortran/io/transfer.c +++ b/libgfortran/io/transfer.c @@ -241,16 +241,6 @@ read_sf_internal (st_parameter_dt *dtp, size_t *length) && dtp->u.p.current_unit->pad_status == PAD_NO) hit_eof (dtp); - /* If we have seen an eor previously, return a length of 0. The - caller is responsible for correctly padding the input field. */ - if (dtp->u.p.sf_seen_eor) - { - *length = 0; - /* Just return something that isn't a NULL pointer, otherwise the - caller thinks an error occurred. */ - return (char*) empty_string; - } - /* There are some cases with mixed DTIO where we have read a character and saved it in the last character buffer, so we need to backup. */ if (unlikely (dtp->u.p.current_unit->child_dtio > 0 && @@ -260,22 +250,81 @@ read_sf_internal (st_parameter_dt *dtp, size_t *length) sseek (dtp->u.p.current_unit->s, -1, SEEK_CUR); } - lorig = *length; - if (is_char4_unit(dtp)) + /* To support legacy code we have to scan the input string one byte + at a time because we don't know where an early comma may be and the + requested length could go past the end of a comma shortened + string. We only do this if -std=legacy was given at compile + time. We also do not support this on kind=4 strings. */ + printf("allow_std=%d\n", compile_options.warn_std); + if (unlikely(compile_options.warn_std == 0)) // the slow legacy way. { - gfc_char4_t *p = (gfc_char4_t *) mem_alloc_r4 (dtp->u.p.current_unit->s, - length); - base = fbuf_alloc (dtp->u.p.current_unit, lorig); - for (size_t i = 0; i < *length; i++, p++) - base[i] = *p > 255 ? '?' : (unsigned char) *p; - } - else - base = mem_alloc_r (dtp->u.p.current_unit->s, length); + size_t n; + size_t tmp = 1; + char *q; - if (unlikely (lorig > *length)) + /* If we have seen an eor previously, return a length of 0. The + caller is responsible for correctly padding the input field. */ + if (dtp->u.p.sf_seen_eor) + { + *length = 0; + /* Just return something that isn't a NULL pointer, otherwise the + caller thinks an error occurred. */ + return (char*) empty_string; + } + + /* Get the first character of the string to establish the base + address and check for comma or end-of-record condition. */ + base = mem_alloc_r (dtp->u.p.current_unit->s, &tmp); + if (tmp == 0) + { + dtp->u.p.sf_seen_eor = 1; + *length = 0; + return (char*) empty_string; + } + if (*base == ',') + { + dtp->u.p.current_unit->bytes_left--; + *length = 0; + return (char*) empty_string; + } + + /* Now we scan the rest and deal with either an end-of-file + condition or a comma, as needed. */ + for (n = 1; n < *length; n++) + { + q = mem_alloc_r (dtp->u.p.current_unit->s, &tmp); + if (tmp == 0) + { + hit_eof (dtp); + return NULL; + } + if (*q == ',') + { + dtp->u.p.current_unit->bytes_left -= n; + *length = n; + break; + } + } + } + else // the fast way { - hit_eof (dtp); - return NULL; + lorig = *length; + if (is_char4_unit(dtp)) + { + gfc_char4_t *p = (gfc_char4_t *) mem_alloc_r4 (dtp->u.p.current_unit->s, + length); + base = fbuf_alloc (dtp->u.p.current_unit, lorig); + for (size_t i = 0; i < *length; i++, p++) + base[i] = *p > 255 ? '?' : (unsigned char) *p; + } + else + base = mem_alloc_r (dtp->u.p.current_unit->s, length); + + if (unlikely (lorig > *length)) + { + hit_eof (dtp); + return NULL; + } } dtp->u.p.current_unit->bytes_left -= *length;