Commit d428be77 by Jerry DeLisle

re PR libfortran/52539 (I/O: Wrong result for UTF-8/UCS-4 list-directed and…

re PR libfortran/52539 (I/O: Wrong result for UTF-8/UCS-4 list-directed and namelist read and nml write)

2014-05-17  Jerry DeLisle  <jvdelisle@gcc.gnu>

	PR libfortran/52539
	* io/io.h (gfc_unit): New function pointers *next_char_fn_ptr
	and *push_char_fn_ptr.
	*io/list_read.c (next_char): Create macro with this name to call
	the new function pointer. Split the original next_char function
	into three new functions. (next_char_default, next_char_internal,
	next_char_utf8): New functions. (push_char): Create macro with
	this name to call new function pointer. Split the original
	push_char into three new functions. (push_char_default,
	push_char_internal, push_char4): New functions. (set_workers):
	New function to initilize the function pointers depending on the
	type of IO to be performed. (list_formatted_read_scalar): Use
	set_workers function. (finish_list_read): Likewise.
	(namelist_read): Likewise.
	(nml_get_obj_data): Use push_char_default.

From-SVN: r210574
parent ef61de09
2014-05-17 Jerry DeLisle <jvdelisle@gcc.gnu>
PR libfortran/52539
* io/io.h (gfc_unit): New function pointers *next_char_fn_ptr
and *push_char_fn_ptr.
*io/list_read.c (next_char): Create macro with this name to call
the new function pointer. Split the original next_char function
into three new functions. (next_char_default, next_char_internal,
next_char_utf8): New functions. (push_char): Create macro with
this name to call new function pointer. Split the original
push_char into three new functions. (push_char_default,
push_char_internal, push_char4): New functions. (set_workers):
New function to initilize the function pointers depending on the
type of IO to be performed. (list_formatted_read_scalar): Use
set_workers function. (finish_list_read): Likewise.
(namelist_read): Likewise.
(nml_get_obj_data): Use push_char_default.
2014-05-16 Janne Blomqvist <jb@gcc.gnu.org> 2014-05-16 Janne Blomqvist <jb@gcc.gnu.org>
PR libfortran/61187 PR libfortran/61187
......
...@@ -575,6 +575,10 @@ typedef struct gfc_unit ...@@ -575,6 +575,10 @@ typedef struct gfc_unit
/* Formatting buffer. */ /* Formatting buffer. */
struct fbuf *fbuf; struct fbuf *fbuf;
/* Function pointer, points to list_read worker functions. */
int (*next_char_fn_ptr) (st_parameter_dt *);
void (*push_char_fn_ptr) (st_parameter_dt *, int);
} }
gfc_unit; gfc_unit;
......
...@@ -67,10 +67,17 @@ typedef unsigned char uchar; ...@@ -67,10 +67,17 @@ typedef unsigned char uchar;
#define MSGLEN 100 #define MSGLEN 100
/* Save a character to a string buffer, enlarging it as necessary. */
/* Wrappers for calling the current worker functions. */
#define next_char(dtp) ((dtp)->u.p.current_unit->next_char_fn_ptr (dtp))
#define push_char(dtp, c) ((dtp)->u.p.current_unit->push_char_fn_ptr (dtp, c))
/* Worker function to save a default KIND=1 character to a string
buffer, enlarging it as necessary. */
static void static void
push_char (st_parameter_dt *dtp, char c) push_char_default (st_parameter_dt *dtp, int c)
{ {
char *new; char *new;
...@@ -96,14 +103,15 @@ push_char (st_parameter_dt *dtp, char c) ...@@ -96,14 +103,15 @@ push_char (st_parameter_dt *dtp, char c)
} }
dtp->u.p.saved_string[dtp->u.p.saved_used++] = c; dtp->u.p.saved_string[dtp->u.p.saved_used++] = (char) c;
} }
/* Save a KIND=4 character to a string buffer, enlarging the buffer
as necessary. */
/* Worker function to save a KIND=4 character to a string buffer,
enlarging the buffer as necessary. */
static void static void
push_char4 (st_parameter_dt *dtp, gfc_char4_t c) push_char4 (st_parameter_dt *dtp, int c)
{ {
gfc_char4_t *new, *p = (gfc_char4_t *) dtp->u.p.saved_string; gfc_char4_t *new, *p = (gfc_char4_t *) dtp->u.p.saved_string;
...@@ -118,12 +126,12 @@ push_char4 (st_parameter_dt *dtp, gfc_char4_t c) ...@@ -118,12 +126,12 @@ push_char4 (st_parameter_dt *dtp, gfc_char4_t c)
if (dtp->u.p.saved_used >= dtp->u.p.saved_length) if (dtp->u.p.saved_used >= dtp->u.p.saved_length)
{ {
dtp->u.p.saved_length = 2 * dtp->u.p.saved_length; dtp->u.p.saved_length = 2 * dtp->u.p.saved_length;
new = realloc (p, dtp->u.p.saved_length); new = realloc (p, dtp->u.p.saved_length * sizeof (gfc_char4_t));
if (new == NULL) if (new == NULL)
generate_error (&dtp->common, LIBERROR_OS, NULL); generate_error (&dtp->common, LIBERROR_OS, NULL);
p = new; p = new;
memset (new + dtp->u.p.saved_used, 0, memset4 (new + dtp->u.p.saved_used, 0,
dtp->u.p.saved_length - dtp->u.p.saved_used); dtp->u.p.saved_length - dtp->u.p.saved_used);
} }
...@@ -162,13 +170,16 @@ free_line (st_parameter_dt *dtp) ...@@ -162,13 +170,16 @@ free_line (st_parameter_dt *dtp)
} }
/* Unget saves the last character so when reading the next character,
we need to check to see if there is a character waiting. Similar,
if the line buffer is being used to read_logical, check it too. */
static int static int
next_char (st_parameter_dt *dtp) check_buffers (st_parameter_dt *dtp)
{ {
ssize_t length;
gfc_offset record;
int c; int c;
c = '\0';
if (dtp->u.p.last_char != EOF - 1) if (dtp->u.p.last_char != EOF - 1)
{ {
dtp->u.p.at_eol = 0; dtp->u.p.at_eol = 0;
...@@ -194,6 +205,43 @@ next_char (st_parameter_dt *dtp) ...@@ -194,6 +205,43 @@ next_char (st_parameter_dt *dtp)
dtp->u.p.line_buffer_pos = 0; dtp->u.p.line_buffer_pos = 0;
dtp->u.p.line_buffer_enabled = 0; dtp->u.p.line_buffer_enabled = 0;
} }
done:
dtp->u.p.at_eol = (c == '\n' || c == EOF);
return c;
}
/* Worker function for default character encoded file. */
static int
next_char_default (st_parameter_dt *dtp)
{
int c;
/* Always check the unget and line buffer first. */
if ((c = check_buffers (dtp)))
return c;
c = fbuf_getc (dtp->u.p.current_unit);
if (c != EOF && is_stream_io (dtp))
dtp->u.p.current_unit->strm_pos++;
dtp->u.p.at_eol = (c == '\n' || c == EOF);
return c;
}
/* Worker function for internal and array I/O units. */
static int
next_char_internal (st_parameter_dt *dtp)
{
ssize_t length;
gfc_offset record;
int c;
/* Always check the unget and line buffer first. */
if ((c = check_buffers (dtp)))
return c;
/* Handle the end-of-record and end-of-file conditions for /* Handle the end-of-record and end-of-file conditions for
internal array unit. */ internal array unit. */
...@@ -229,58 +277,50 @@ next_char (st_parameter_dt *dtp) ...@@ -229,58 +277,50 @@ next_char (st_parameter_dt *dtp)
/* Get the next character and handle end-of-record conditions. */ /* Get the next character and handle end-of-record conditions. */
if (is_internal_unit (dtp)) if (dtp->common.unit) /* Check for kind=4 internal unit. */
length = sread (dtp->u.p.current_unit->s, &c, sizeof (gfc_char4_t));
else
{
char cc;
length = sread (dtp->u.p.current_unit->s, &cc, 1);
c = cc;
}
if (unlikely (length < 0))
{ {
/* Check for kind=4 internal unit. */ generate_error (&dtp->common, LIBERROR_OS, NULL);
if (dtp->common.unit) return '\0';
length = sread (dtp->u.p.current_unit->s, &c, sizeof (gfc_char4_t)); }
else
{
char cc;
length = sread (dtp->u.p.current_unit->s, &cc, 1);
c = cc;
}
if (unlikely (length < 0)) if (is_array_io (dtp))
{
/* Check whether we hit EOF. */
if (unlikely (length == 0))
{ {
generate_error (&dtp->common, LIBERROR_OS, NULL); generate_error (&dtp->common, LIBERROR_INTERNAL_UNIT, NULL);
return '\0'; return '\0';
} }
dtp->u.p.current_unit->bytes_left--;
if (is_array_io (dtp))
{
/* Check whether we hit EOF. */
if (unlikely (length == 0))
{
generate_error (&dtp->common, LIBERROR_INTERNAL_UNIT, NULL);
return '\0';
}
dtp->u.p.current_unit->bytes_left--;
}
else
{
if (dtp->u.p.at_eof)
return EOF;
if (length == 0)
{
c = '\n';
dtp->u.p.at_eof = 1;
}
}
} }
else else
{ {
c = fbuf_getc (dtp->u.p.current_unit); if (dtp->u.p.at_eof)
if (c != EOF && is_stream_io (dtp)) return EOF;
dtp->u.p.current_unit->strm_pos++; if (length == 0)
{
c = '\n';
dtp->u.p.at_eof = 1;
}
} }
done: done:
dtp->u.p.at_eol = (c == '\n' || c == EOF); dtp->u.p.at_eol = (c == '\n' || c == EOF);
return c; return c;
} }
static gfc_char4_t /* Worker function for UTF encoded files. */
static int
next_char_utf8 (st_parameter_dt *dtp) next_char_utf8 (st_parameter_dt *dtp)
{ {
static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 }; static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
...@@ -288,9 +328,12 @@ next_char_utf8 (st_parameter_dt *dtp) ...@@ -288,9 +328,12 @@ next_char_utf8 (st_parameter_dt *dtp)
int i, nb; int i, nb;
gfc_char4_t c; gfc_char4_t c;
c = next_char (dtp); /* Always check the unget and line buffer first. */
if (!(c = check_buffers (dtp)))
c = fbuf_getc (dtp->u.p.current_unit);
if (c < 0x80) if (c < 0x80)
return c; goto utf_done;
/* The number of leading 1-bits in the first byte indicates how many /* The number of leading 1-bits in the first byte indicates how many
bytes follow. */ bytes follow. */
...@@ -305,11 +348,9 @@ next_char_utf8 (st_parameter_dt *dtp) ...@@ -305,11 +348,9 @@ next_char_utf8 (st_parameter_dt *dtp)
/* Decode the bytes read. */ /* Decode the bytes read. */
for (i = 1; i < nb; i++) for (i = 1; i < nb; i++)
{ {
gfc_char4_t n = next_char (dtp); gfc_char4_t n = fbuf_getc (dtp->u.p.current_unit);
if ((n & 0xC0) != 0x80) if ((n & 0xC0) != 0x80)
goto invalid; goto invalid;
c = ((c << 6) + (n & 0x3F)); c = ((c << 6) + (n & 0x3F));
} }
...@@ -324,7 +365,9 @@ next_char_utf8 (st_parameter_dt *dtp) ...@@ -324,7 +365,9 @@ next_char_utf8 (st_parameter_dt *dtp)
if (c > 0x7FFFFFFF || (c >= 0xD800 && c <= 0xDFFF)) if (c > 0x7FFFFFFF || (c >= 0xD800 && c <= 0xDFFF))
goto invalid; goto invalid;
return c; utf_done:
dtp->u.p.at_eol = (c == '\n' || c == (gfc_char4_t) EOF);
return (int) c;
invalid: invalid:
generate_error (&dtp->common, LIBERROR_READ_VALUE, "Invalid UTF-8 encoding"); generate_error (&dtp->common, LIBERROR_READ_VALUE, "Invalid UTF-8 encoding");
...@@ -1172,96 +1215,50 @@ read_character (st_parameter_dt *dtp, int length __attribute__ ((unused))) ...@@ -1172,96 +1215,50 @@ read_character (st_parameter_dt *dtp, int length __attribute__ ((unused)))
get_string: get_string:
if (dtp->u.p.current_unit->flags.encoding == ENCODING_UTF8) for (;;)
for (;;) {
{ if ((c = next_char (dtp)) == EOF)
if ((c = next_char_utf8 (dtp)) == EOF) goto done_eof;
goto done_eof; switch (c)
switch (c) {
{ case '"':
case '"': case '\'':
case '\'': if (c != quote)
if (c != quote) {
{ push_char (dtp, c);
push_char4 (dtp, c); break;
break; }
}
/* See if we have a doubled quote character or the end of
the string. */
if ((c = next_char_utf8 (dtp)) == EOF)
goto done_eof;
if (c == quote)
{
push_char4 (dtp, quote);
break;
}
unget_char (dtp, c);
goto done;
CASE_SEPARATORS:
if (quote == ' ')
{
unget_char (dtp, c);
goto done;
}
if (c != '\n' && c != '\r')
push_char4 (dtp, c);
break;
default:
push_char4 (dtp, c);
break;
}
}
else
for (;;)
{
if ((c = next_char (dtp)) == EOF)
goto done_eof;
switch (c)
{
case '"':
case '\'':
if (c != quote)
{
push_char (dtp, c);
break;
}
/* See if we have a doubled quote character or the end of
the string. */
if ((c = next_char (dtp)) == EOF) /* See if we have a doubled quote character or the end of
goto done_eof; the string. */
if (c == quote)
{
push_char (dtp, quote);
break;
}
unget_char (dtp, c); if ((c = next_char (dtp)) == EOF)
goto done; goto done_eof;
if (c == quote)
{
push_char (dtp, quote);
break;
}
CASE_SEPARATORS: unget_char (dtp, c);
if (quote == ' ') goto done;
{
unget_char (dtp, c);
goto done;
}
if (c != '\n' && c != '\r') CASE_SEPARATORS:
push_char (dtp, c); if (quote == ' ')
break; {
unget_char (dtp, c);
goto done;
}
default: if (c != '\n' && c != '\r')
push_char (dtp, c); push_char (dtp, c);
break; break;
}
} default:
push_char (dtp, c);
break;
}
}
/* At this point, we have to have a separator, or else the string is /* At this point, we have to have a separator, or else the string is
invalid. */ invalid. */
...@@ -2025,6 +2022,30 @@ check_type (st_parameter_dt *dtp, bt type, int kind) ...@@ -2025,6 +2022,30 @@ check_type (st_parameter_dt *dtp, bt type, int kind)
} }
/* Initialize the function pointers to select the correct versions of
next_char and push_char depending on what we are doing. */
static void
set_workers (st_parameter_dt *dtp)
{
if (dtp->u.p.current_unit->flags.encoding == ENCODING_UTF8)
{
dtp->u.p.current_unit->next_char_fn_ptr = &next_char_utf8;
dtp->u.p.current_unit->push_char_fn_ptr = &push_char4;
}
else if (is_internal_unit (dtp))
{
dtp->u.p.current_unit->next_char_fn_ptr = &next_char_internal;
dtp->u.p.current_unit->push_char_fn_ptr = &push_char_default;
}
else
{
dtp->u.p.current_unit->next_char_fn_ptr = &next_char_default;
dtp->u.p.current_unit->push_char_fn_ptr = &push_char_default;
}
}
/* Top level data transfer subroutine for list reads. Because we have /* Top level data transfer subroutine for list reads. Because we have
to deal with repeat counts, the data item is always saved after to deal with repeat counts, the data item is always saved after
reading, usually in the dtp->u.p.value[] array. If a repeat count is reading, usually in the dtp->u.p.value[] array. If a repeat count is
...@@ -2040,6 +2061,9 @@ list_formatted_read_scalar (st_parameter_dt *dtp, bt type, void *p, ...@@ -2040,6 +2061,9 @@ list_formatted_read_scalar (st_parameter_dt *dtp, bt type, void *p,
dtp->u.p.namelist_mode = 0; dtp->u.p.namelist_mode = 0;
/* Set the next_char and push_char worker functions. */
set_workers (dtp);
if (dtp->u.p.first_item) if (dtp->u.p.first_item)
{ {
dtp->u.p.first_item = 0; dtp->u.p.first_item = 0;
...@@ -2162,7 +2186,7 @@ list_formatted_read_scalar (st_parameter_dt *dtp, bt type, void *p, ...@@ -2162,7 +2186,7 @@ list_formatted_read_scalar (st_parameter_dt *dtp, bt type, void *p,
memcpy (p, dtp->u.p.saved_string, m); memcpy (p, dtp->u.p.saved_string, m);
else else
for (i = 0; i < m; i++) for (i = 0; i < m; i++)
*q++ = (unsigned char) dtp->u.p.saved_string[i]; *q++ = *r++;
} }
} }
else else
...@@ -2244,6 +2268,10 @@ finish_list_read (st_parameter_dt *dtp) ...@@ -2244,6 +2268,10 @@ finish_list_read (st_parameter_dt *dtp)
if (!is_internal_unit (dtp)) if (!is_internal_unit (dtp))
{ {
int c; int c;
/* Set the next_char and push_char worker functions. */
set_workers (dtp);
c = next_char (dtp); c = next_char (dtp);
if (c == EOF) if (c == EOF)
{ {
...@@ -3060,7 +3088,7 @@ get_name: ...@@ -3060,7 +3088,7 @@ get_name:
do do
{ {
if (!is_separator (c)) if (!is_separator (c))
push_char (dtp, tolower(c)); push_char_default (dtp, tolower(c));
if ((c = next_char (dtp)) == EOF) if ((c = next_char (dtp)) == EOF)
goto nml_err_ret; goto nml_err_ret;
} }
...@@ -3075,7 +3103,7 @@ get_name: ...@@ -3075,7 +3103,7 @@ get_name:
are present for an object. (iii) gives the same error message are present for an object. (iii) gives the same error message
as (i) */ as (i) */
push_char (dtp, '\0'); push_char_default (dtp, '\0');
if (component_flag) if (component_flag)
{ {
...@@ -3314,6 +3342,9 @@ namelist_read (st_parameter_dt *dtp) ...@@ -3314,6 +3342,9 @@ namelist_read (st_parameter_dt *dtp)
dtp->u.p.namelist_mode = 1; dtp->u.p.namelist_mode = 1;
dtp->u.p.input_complete = 0; dtp->u.p.input_complete = 0;
dtp->u.p.expanded_read = 0; dtp->u.p.expanded_read = 0;
/* Set the next_char and push_char worker functions. */
set_workers (dtp);
/* Look for &namelist_name . Skip all characters, testing for $nmlname. /* Look for &namelist_name . Skip all characters, testing for $nmlname.
Exit on success or EOF. If '?' or '=?' encountered in stdin, print Exit on success or EOF. If '?' or '=?' encountered in stdin, print
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment