Commit cea93abb by Jerry DeLisle

re PR libfortran/35863 ([F2003] Implement ENCODING="UTF-8")

2008-06-13  Jerry DeLisle  <jvdelisle@gcc.gnu.org>

	PR fortran/35863
	* libgfortran.h: Change l8_to_l4_offset to big_endian and add endian_off.
	* runtime/main.c: Fix error in comment. Change l8_to_l4_offset to
	big_endian. (determine_endianness): Add endian_off and set its value
	according to big_endian.
	* gfortran.map: Add symbol for new _gfortran_transfer_character_wide.
	* io/io.h: Add prototype declarations for new functions.
	* io/list_read.c (list_formatted_read_scalar): Modify to handle kind=4.
	(list_formatted_read): Calculate stride based on kind for character type
	and use it when calling list_formatted_read_scalar.
	* io/inquire.c (inquire_via_unit): Change l8_to_l4_offset to big_endian.
	* io/open.c (st_open): Change l8_to_l4_offset to big_endian.
	* io/read.c (read_a_char4): New function to handle formatted read.
	* io/write.c: Define GFC_CHAR4(x) to improve readability of code.
	(write_a_char4): New function to handle formatted write.
	(write_character): Modify to accept the kind parameter and adjust for
	endianess of the machine. (list_formatted_write): Calculate the stride
	resulting from the kind and adjust the list_formatted_write_scalar call
	accordingly. (nml_write_obj): Adjust calls to write_character.
	(namelist_write): Likewise.
	* io/transfer.c (formatted_transfer_scaler): Rename 'len' argument to
	'kind' argument to better describe what it is. Add calls to new
	functions for kind == 4. (formatted_transfer): Modify to handle the case
	of type character and kind equals 4 to pass in the kind to the transfer
	routines. (transfer_character_wide): Add this new function.
	(transfer_array): Don't set kind to the character string length. Adjust
	strides bases on character kind.
	(unformatted_read): Adjust size based on kind for character types.
	(unformatted_write): Likewise. (data_transfer_init): Change
	l8_to_l4_offset to big_endian.

From-SVN: r136763
parent c5f4d1cc
2008-06-13 Jerry DeLisle <jvdelisle@gcc.gnu.org>
PR fortran/35863
* libgfortran.h: Change l8_to_l4_offset to big_endian and add endian_off.
* runtime/main.c: Fix error in comment. Change l8_to_l4_offset to
big_endian. (determine_endianness): Add endian_off and set its value
according to big_endian.
* gfortran.map: Add symbol for new _gfortran_transfer_character_wide.
* io/io.h: Add prototype declarations for new functions.
* io/list_read.c (list_formatted_read_scalar): Modify to handle kind=4.
(list_formatted_read): Calculate stride based on kind for character type
and use it when calling list_formatted_read_scalar.
* io/inquire.c (inquire_via_unit): Change l8_to_l4_offset to big_endian.
* io/open.c (st_open): Change l8_to_l4_offset to big_endian.
* io/read.c (read_a_char4): New function to handle formatted read.
* io/write.c: Define GFC_CHAR4(x) to improve readability of code.
(write_a_char4): New function to handle formatted write.
(write_character): Modify to accept the kind parameter and adjust for
endianess of the machine. (list_formatted_write): Calculate the stride
resulting from the kind and adjust the list_formatted_write_scalar call
accordingly. (nml_write_obj): Adjust calls to write_character.
(namelist_write): Likewise.
* io/transfer.c (formatted_transfer_scaler): Rename 'len' argument to
'kind' argument to better describe what it is. Add calls to new
functions for kind == 4. (formatted_transfer): Modify to handle the case
of type character and kind equals 4 to pass in the kind to the transfer
routines. (transfer_character_wide): Add this new function.
(transfer_array): Don't set kind to the character string length. Adjust
strides bases on character kind.
(unformatted_read): Adjust size based on kind for character types.
(unformatted_write): Likewise. (data_transfer_init): Change
l8_to_l4_offset to big_endian.
2008-06-13 Tobias Burnus <burnus@net-b.de>
* configure.ac (AM_CFLAGS): Remove -Werror again.
......
......@@ -1083,6 +1083,7 @@ GFORTRAN_1.1 {
_gfortran_string_trim_char4;
_gfortran_string_verify_char4;
_gfortran_st_wait;
_gfortran_transfer_character_wide;
_gfortran_transpose_char4;
_gfortran_unpack0_char4;
_gfortran_unpack1_char4;
......
......@@ -157,7 +157,7 @@ fbuf_seek (gfc_unit * u, gfc_offset off)
/* Moving to the left past the flushed marked would imply moving past
the left tab limit, which is never allowed. So return error if
that is attempted. */
if (pos < u->fbuf->flushed)
if (pos < (gfc_offset) u->fbuf->flushed)
return -1;
u->fbuf->pos = pos;
return 0;
......
......@@ -268,10 +268,9 @@ inquire_via_unit (st_parameter_inquire *iqp, gfc_unit * u)
case ENCODING_DEFAULT:
p = "UNKNOWN";
break;
/* TODO: Enable UTF-8 case here when implemented.
case ENCODING_UTF8:
p = "UTF-8";
break; */
break;
default:
internal_error (&iqp->common, "inquire_via_unit(): Bad encoding");
}
......@@ -497,13 +496,13 @@ inquire_via_unit (st_parameter_inquire *iqp, gfc_unit * u)
else
switch (u->flags.convert)
{
/* l8_to_l4_offset is 0 for little-endian, 1 for big-endian. */
/* big_endian is 0 for little-endian, 1 for big-endian. */
case GFC_CONVERT_NATIVE:
p = l8_to_l4_offset ? "BIG_ENDIAN" : "LITTLE_ENDIAN";
p = big_endian ? "BIG_ENDIAN" : "LITTLE_ENDIAN";
break;
case GFC_CONVERT_SWAP:
p = l8_to_l4_offset ? "LITTLE_ENDIAN" : "BIG_ENDIAN";
p = big_endian ? "LITTLE_ENDIAN" : "BIG_ENDIAN";
break;
default:
......
......@@ -869,6 +869,9 @@ internal_proto(convert_real);
extern void read_a (st_parameter_dt *, const fnode *, char *, int);
internal_proto(read_a);
extern void read_a_char4 (st_parameter_dt *, const fnode *, char *, int);
internal_proto(read_a);
extern void read_f (st_parameter_dt *, const fnode *, char *, int);
internal_proto(read_f);
......@@ -904,6 +907,9 @@ internal_proto(namelist_write);
extern void write_a (st_parameter_dt *, const fnode *, const char *, int);
internal_proto(write_a);
extern void write_a_char4 (st_parameter_dt *, const fnode *, const char *, int);
internal_proto(write_a_char4);
extern void write_b (st_parameter_dt *, const fnode *, const char *, int);
internal_proto(write_b);
......
......@@ -1728,7 +1728,8 @@ list_formatted_read_scalar (st_parameter_dt *dtp, volatile bt type, void *p,
int kind, size_t size)
{
char c;
int m;
gfc_char4_t *q;
int i, m;
jmp_buf eof_jump;
dtp->u.p.namelist_mode = 0;
......@@ -1831,17 +1832,33 @@ list_formatted_read_scalar (st_parameter_dt *dtp, volatile bt type, void *p,
case BT_CHARACTER:
if (dtp->u.p.saved_string)
{
{
m = ((int) size < dtp->u.p.saved_used)
? (int) size : dtp->u.p.saved_used;
memcpy (p, dtp->u.p.saved_string, m);
}
if (kind == 1)
memcpy (p, dtp->u.p.saved_string, m);
else
{
q = (gfc_char4_t *) p;
for (i = 0; i < m; i++)
q[i] = (unsigned char) dtp->u.p.saved_string[i];
}
}
else
/* Just delimiters encountered, nothing to copy but SPACE. */
m = 0;
if (m < (int) size)
memset (((char *) p) + m, ' ', size - m);
{
if (kind == 1)
memset (((char *) p) + m, ' ', size - m);
else
{
q = (gfc_char4_t *) p;
for (i = m; i < (int) size; i++)
q[i] = (unsigned char) ' ';
}
}
break;
case BT_NULL:
......@@ -1862,6 +1879,8 @@ list_formatted_read (st_parameter_dt *dtp, bt type, void *p, int kind,
{
size_t elem;
char *tmp;
size_t stride = type == BT_CHARACTER ?
size * GFC_SIZE_OF_CHAR_KIND(kind) : size;
tmp = (char *) p;
......@@ -1869,7 +1888,7 @@ list_formatted_read (st_parameter_dt *dtp, bt type, void *p, int kind,
for (elem = 0; elem < nelems; elem++)
{
dtp->u.p.item_count++;
list_formatted_read_scalar (dtp, type, tmp + size*elem, kind, size);
list_formatted_read_scalar (dtp, type, tmp + stride*elem, kind, size);
}
}
......
......@@ -107,7 +107,7 @@ static const st_option decimal_opt[] =
static const st_option encoding_opt[] =
{
/* TODO { "utf-8", ENCODING_UTF8}, */
{ "utf-8", ENCODING_UTF8},
{ "default", ENCODING_DEFAULT},
{ NULL, 0}
};
......@@ -795,7 +795,7 @@ st_open (st_parameter_open *opp)
conv = compile_options.convert;
}
/* We use l8_to_l4_offset, which is 0 on little-endian machines
/* We use big_endian, which is 0 on little-endian machines
and 1 on big-endian machines. */
switch (conv)
{
......@@ -804,11 +804,11 @@ st_open (st_parameter_open *opp)
break;
case GFC_CONVERT_BIG:
conv = l8_to_l4_offset ? GFC_CONVERT_NATIVE : GFC_CONVERT_SWAP;
conv = big_endian ? GFC_CONVERT_NATIVE : GFC_CONVERT_SWAP;
break;
case GFC_CONVERT_LITTLE:
conv = l8_to_l4_offset ? GFC_CONVERT_SWAP : GFC_CONVERT_NATIVE;
conv = big_endian ? GFC_CONVERT_SWAP : GFC_CONVERT_NATIVE;
break;
default:
......
......@@ -270,6 +270,43 @@ read_a (st_parameter_dt *dtp, const fnode *f, char *p, int length)
memset (p + m, ' ', n);
}
void
read_a_char4 (st_parameter_dt *dtp, const fnode *f, char *p, int length)
{
char *s;
gfc_char4_t *dest;
int m, n, wi, status;
size_t w;
wi = f->u.w;
if (wi == -1) /* '(A)' edit descriptor */
wi = length;
w = wi;
s = gfc_alloca (w);
/* Read in w bytes, treating comma as not a separator. */
dtp->u.p.sf_read_comma = 0;
status = read_block_form (dtp, s, &w);
dtp->u.p.sf_read_comma =
dtp->u.p.decimal_status == DECIMAL_COMMA ? 0 : 1;
if (status == FAILURE)
return;
if (w > (size_t) length)
s += (w - length);
m = ((int) w > length) ? length : (int) w;
dest = (gfc_char4_t *) p;
for (n = 0; n < m; n++, dest++, s++)
*dest = (unsigned char ) *s;
for (n = 0; n < length - (int) w; n++, dest++)
*dest = (unsigned char) ' ';
}
/* eat_leading_spaces()-- Given a character pointer and a width,
* ignore the leading spaces. */
......
......@@ -124,6 +124,108 @@ write_a (st_parameter_dt *dtp, const fnode *f, const char *source, int len)
#endif
}
/* The primary difference between write_a_char4 and write_a is that we have to
deal with writing from the first byte of the 4-byte character and take care
of endianess. This currently implements encoding="default" which means we
write the lowest significant byte. If the 3 most significant bytes are
not representable emit a '?'. TODO: Implement encoding="UTF-8"
which will process all 4 bytes and translate to the encoded output. */
void
write_a_char4 (st_parameter_dt *dtp, const fnode *f, const char *source, int len)
{
int wlen;
char *p;
gfc_char4_t *q;
wlen = f->u.string.length < 0
|| (f->format == FMT_G && f->u.string.length == 0)
? len : f->u.string.length;
q = (gfc_char4_t *) source;
#ifdef HAVE_CRLF
/* If this is formatted STREAM IO convert any embedded line feed characters
to CR_LF on systems that use that sequence for newlines. See F2003
Standard sections 10.6.3 and 9.9 for further information. */
if (is_stream_io (dtp))
{
const char crlf[] = "\r\n";
int i, j, bytes;
gfc_char4_t *qq;
bytes = 0;
/* Write out any padding if needed. */
if (len < wlen)
{
p = write_block (dtp, wlen - len);
if (p == NULL)
return;
memset (p, ' ', wlen - len);
}
/* Scan the source string looking for '\n' and convert it if found. */
qq = (gfc_char4_t *) source;
for (i = 0; i < wlen; i++)
{
if (qq[i] == '\n')
{
/* Write out the previously scanned characters in the string. */
if (bytes > 0)
{
p = write_block (dtp, bytes);
if (p == NULL)
return;
for (j = 0; j < bytes; j++)
p[j] = q[j] > 255 ? '?' : (unsigned char) q[j];
bytes = 0;
}
/* Write out the CR_LF sequence. */
p = write_block (dtp, 2);
if (p == NULL)
return;
memcpy (p, crlf, 2);
}
else
bytes++;
}
/* Write out any remaining bytes if no LF was found. */
if (bytes > 0)
{
p = write_block (dtp, bytes);
if (p == NULL)
return;
for (j = 0; j < bytes; j++)
p[j] = q[j] > 255 ? '?' : (unsigned char) q[j];
}
}
else
{
#endif
int j;
p = write_block (dtp, wlen);
if (p == NULL)
return;
if (wlen < len)
{
for (j = 0; j < wlen; j++)
p[j] = q[j] > 255 ? '?' : (unsigned char) q[j];
}
else
{
memset (p, ' ', wlen - len);
for (j = wlen - len; j < wlen; j++)
p[j] = q[j] > 255 ? '?' : (unsigned char) q[j];
}
#ifdef HAVE_CRLF
}
#endif
}
static GFC_INTEGER_LARGEST
extract_int (const void *p, int len)
{
......@@ -639,10 +741,12 @@ write_integer (st_parameter_dt *dtp, const char *source, int length)
the strings if the file has been opened in that mode. */
static void
write_character (st_parameter_dt *dtp, const char *source, int length)
write_character (st_parameter_dt *dtp, const char *source, int kind, int length)
{
int i, extra;
char *p, d;
gfc_char4_t *q;
switch (dtp->u.p.delim_status)
{
......@@ -657,35 +761,77 @@ write_character (st_parameter_dt *dtp, const char *source, int length)
break;
}
if (d == ' ')
extra = 0;
else
if (kind == 1)
{
extra = 2;
if (d == ' ')
extra = 0;
else
{
extra = 2;
for (i = 0; i < length; i++)
if (source[i] == d)
extra++;
}
for (i = 0; i < length; i++)
if (source[i] == d)
extra++;
}
p = write_block (dtp, length + extra);
if (p == NULL)
return;
p = write_block (dtp, length + extra);
if (p == NULL)
return;
if (d == ' ')
memcpy (p, source, length);
else
{
*p++ = d;
if (d == ' ')
memcpy (p, source, length);
for (i = 0; i < length; i++)
{
*p++ = source[i];
if (source[i] == d)
*p++ = d;
}
*p = d;
}
}
else
{
*p++ = d;
for (i = 0; i < length; i++)
/* We have to scan the source string looking for delimiters to determine
how large the write block needs to be. */
if (d == ' ')
extra = 0;
else
{
*p++ = source[i];
if (source[i] == d)
*p++ = d;
extra = 2;
q = (gfc_char4_t *) source;
for (i = 0; i < length; i++, q++)
if (*q == (gfc_char4_t) d)
extra++;
}
*p = d;
p = write_block (dtp, length + extra);
if (p == NULL)
return;
if (d == ' ')
{
q = (gfc_char4_t *) source;
for (i = 0; i < length; i++, q++)
p[i] = *q > 255 ? '?' : (unsigned char) *q;
}
else
{
*p++ = d;
q = (gfc_char4_t *) source;
for (i = 0; i < length; i++, q++)
{
*p++ = *q > 255 ? '?' : (unsigned char) *q;
if (*q == (gfc_char4_t) d)
*p++ = d;
}
*p = d;
}
}
}
......@@ -796,7 +942,7 @@ list_formatted_write_scalar (st_parameter_dt *dtp, bt type, void *p, int kind,
write_logical (dtp, p, kind);
break;
case BT_CHARACTER:
write_character (dtp, p, kind);
write_character (dtp, p, kind, size);
break;
case BT_REAL:
write_real (dtp, p, kind);
......@@ -818,6 +964,8 @@ list_formatted_write (st_parameter_dt *dtp, bt type, void *p, int kind,
{
size_t elem;
char *tmp;
size_t stride = type == BT_CHARACTER ?
size * GFC_SIZE_OF_CHAR_KIND(kind) : size;
tmp = (char *) p;
......@@ -825,7 +973,7 @@ list_formatted_write (st_parameter_dt *dtp, bt type, void *p, int kind,
for (elem = 0; elem < nelems; elem++)
{
dtp->u.p.item_count++;
list_formatted_write_scalar (dtp, type, tmp + size*elem, kind, size);
list_formatted_write_scalar (dtp, type, tmp + elem * stride, kind, size);
}
}
......@@ -889,9 +1037,9 @@ nml_write_obj (st_parameter_dt *dtp, namelist_info * obj, index_type offset,
if (obj->type != GFC_DTYPE_DERIVED)
{
#ifdef HAVE_CRLF
write_character (dtp, "\r\n ", 3);
write_character (dtp, "\r\n ", 1, 3);
#else
write_character (dtp, "\n ", 2);
write_character (dtp, "\n ", 1, 2);
#endif
len = 0;
if (base)
......@@ -900,15 +1048,15 @@ nml_write_obj (st_parameter_dt *dtp, namelist_info * obj, index_type offset,
for (dim_i = 0; dim_i < (index_type) strlen (base_name); dim_i++)
{
cup = toupper (base_name[dim_i]);
write_character (dtp, &cup, 1);
write_character (dtp, &cup, 1, 1);
}
}
for (dim_i =len; dim_i < (index_type) strlen (obj->var_name); dim_i++)
{
cup = toupper (obj->var_name[dim_i]);
write_character (dtp, &cup, 1);
write_character (dtp, &cup, 1, 1);
}
write_character (dtp, "=", 1);
write_character (dtp, "=", 1, 1);
}
/* Counts the number of data output on a line, including names. */
......@@ -978,7 +1126,7 @@ nml_write_obj (st_parameter_dt *dtp, namelist_info * obj, index_type offset,
if (rep_ctr > 1)
{
sprintf(rep_buff, " %d*", rep_ctr);
write_character (dtp, rep_buff, strlen (rep_buff));
write_character (dtp, rep_buff, 1, strlen (rep_buff));
dtp->u.p.no_leading_blank = 1;
}
num++;
......@@ -1003,7 +1151,7 @@ nml_write_obj (st_parameter_dt *dtp, namelist_info * obj, index_type offset,
dtp->u.p.delim_status = DELIM_QUOTE;
if (dtp->u.p.nml_delim == '\'')
dtp->u.p.delim_status = DELIM_APOSTROPHE;
write_character (dtp, p, obj->string_length);
write_character (dtp, p, 1, obj->string_length);
dtp->u.p.delim_status = tmp_delim;
break;
......@@ -1093,14 +1241,14 @@ nml_write_obj (st_parameter_dt *dtp, namelist_info * obj, index_type offset,
to column 2. Reset the repeat counter. */
dtp->u.p.no_leading_blank = 0;
write_character (dtp, &semi_comma, 1);
write_character (dtp, &semi_comma, 1, 1);
if (num > 5)
{
num = 0;
#ifdef HAVE_CRLF
write_character (dtp, "\r\n ", 3);
write_character (dtp, "\r\n ", 1, 3);
#else
write_character (dtp, "\n ", 2);
write_character (dtp, "\n ", 1, 2);
#endif
}
rep_ctr = 1;
......@@ -1164,13 +1312,13 @@ namelist_write (st_parameter_dt *dtp)
/* Temporarily disable namelist delimters. */
dtp->u.p.delim_status = DELIM_NONE;
write_character (dtp, "&", 1);
write_character (dtp, "&", 1, 1);
/* Write namelist name in upper case - f95 std. */
for (i = 0 ;i < dtp->namelist_name_len ;i++ )
{
c = toupper (dtp->namelist_name[i]);
write_character (dtp, &c ,1);
write_character (dtp, &c, 1 ,1);
}
if (dtp->u.p.ionml != NULL)
......@@ -1184,9 +1332,9 @@ namelist_write (st_parameter_dt *dtp)
}
#ifdef HAVE_CRLF
write_character (dtp, " /\r\n", 5);
write_character (dtp, " /\r\n", 1, 5);
#else
write_character (dtp, " /\n", 4);
write_character (dtp, " /\n", 1, 4);
#endif
/* Restore the original delimiter. */
......
......@@ -272,13 +272,12 @@ typedef GFC_UINTEGER_4 gfc_char4_t;
simply equal to the kind parameter itself. */
#define GFC_SIZE_OF_CHAR_KIND(kind) (kind)
/* This will be 0 on little-endian machines and one on big-endian machines. */
extern int l8_to_l4_offset;
internal_proto(l8_to_l4_offset);
extern int big_endian;
internal_proto(big_endian);
#define GFOR_POINTER_TO_L1(p, kind) \
(l8_to_l4_offset * (kind - 1) + (GFC_LOGICAL_1 *)(p))
(big_endian * (kind - 1) + (GFC_LOGICAL_1 *)(p))
#define GFC_INTEGER_1_HUGE \
(GFC_INTEGER_1)((((GFC_UINTEGER_1)1) << 7) - 1)
......
......@@ -45,10 +45,9 @@ stupid_function_name_for_static_linking (void)
return;
}
/* This is the offset (in bytes) required to cast from logical(8)* to
logical(4)*. and still get the same result. Will be 0 for little-endian
machines and 4 for big-endian machines. */
int l8_to_l4_offset = 0;
/* This will be 0 for little-endian
machines and 1 for big-endian machines. */
int big_endian = 0;
/* Figure out endianness for this machine. */
......@@ -64,9 +63,9 @@ determine_endianness (void)
u.l8 = 1;
if (u.l4[0])
l8_to_l4_offset = 0;
big_endian = 0;
else if (u.l4[1])
l8_to_l4_offset = 1;
big_endian = 1;
else
runtime_error ("Unable to determine machine endianness");
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment