Commit facb23dc by Janne Blomqvist

Improve performance of byte-swapped I/O.

2013-01-23  Janne Blomqvist  <jb@gcc.gnu.org>

	* io/file_pos.c (unformatted_backspace): Use __builtin_bswapXX
	instead of reverse_memcpy.
	* io/io.h (reverse_memcpy): Remove prototype.
	* io/transfer.c (reverse_memcpy): Make static, move towards
	beginning of file.
	(bswap_array): New function.
	(unformatted_read): Use bswap_array to byte swap the data
	in-place.
	(unformatted_write): Use a larger temp buffer and bswap_array.
	(us_read): Use __builtin_bswapXX instead of reverse_memcpy.
	(write_us_marker): Likewise.

From-SVN: r195413
parent ad3e2ad2
2013-01-23 Janne Blomqvist <jb@gcc.gnu.org>
* io/file_pos.c (unformatted_backspace): Use __builtin_bswapXX
instead of reverse_memcpy.
* io/io.h (reverse_memcpy): Remove prototype.
* io/transfer.c (reverse_memcpy): Make static, move towards
beginning of file.
(bswap_array): New function.
(unformatted_read): Use bswap_array to byte swap the data
in-place.
(unformatted_write): Use a larger temp buffer and bswap_array.
(us_read): Use __builtin_bswapXX instead of reverse_memcpy.
(write_us_marker): Likewise.
2013-01-14 Richard Sandiford <rdsandiford@googlemail.com> 2013-01-14 Richard Sandiford <rdsandiford@googlemail.com>
Update copyright years. Update copyright years.
......
...@@ -139,15 +139,21 @@ unformatted_backspace (st_parameter_filepos *fpp, gfc_unit *u) ...@@ -139,15 +139,21 @@ unformatted_backspace (st_parameter_filepos *fpp, gfc_unit *u)
} }
else else
{ {
uint32_t u32;
uint64_t u64;
switch (length) switch (length)
{ {
case sizeof(GFC_INTEGER_4): case sizeof(GFC_INTEGER_4):
reverse_memcpy (&m4, p, sizeof (m4)); memcpy (&u32, p, sizeof (u32));
u32 = __builtin_bswap32 (u32);
memcpy (&m4, &u32, sizeof (m4));
m = m4; m = m4;
break; break;
case sizeof(GFC_INTEGER_8): case sizeof(GFC_INTEGER_8):
reverse_memcpy (&m8, p, sizeof (m8)); memcpy (&u64, p, sizeof (u64));
u64 = __builtin_bswap64 (u64);
memcpy (&m8, &u64, sizeof (m8));
m = m8; m = m8;
break; break;
......
...@@ -647,9 +647,6 @@ internal_proto(init_loop_spec); ...@@ -647,9 +647,6 @@ internal_proto(init_loop_spec);
extern void next_record (st_parameter_dt *, int); extern void next_record (st_parameter_dt *, int);
internal_proto(next_record); internal_proto(next_record);
extern void reverse_memcpy (void *, const void *, size_t);
internal_proto (reverse_memcpy);
extern void st_wait (st_parameter_wait *); extern void st_wait (st_parameter_wait *);
export_proto(st_wait); export_proto(st_wait);
......
...@@ -877,50 +877,138 @@ write_buf (st_parameter_dt *dtp, void *buf, size_t nbytes) ...@@ -877,50 +877,138 @@ write_buf (st_parameter_dt *dtp, void *buf, size_t nbytes)
} }
/* Master function for unformatted reads. */ /* Reverse memcpy - used for byte swapping. */
static void static void
unformatted_read (st_parameter_dt *dtp, bt type, reverse_memcpy (void *dest, const void *src, size_t n)
void *dest, int kind, size_t size, size_t nelems)
{ {
if (likely (dtp->u.p.current_unit->flags.convert == GFC_CONVERT_NATIVE) char *d, *s;
|| kind == 1) size_t i;
d = (char *) dest;
s = (char *) src + n - 1;
/* Write with ascending order - this is likely faster
on modern architectures because of write combining. */
for (i=0; i<n; i++)
*(d++) = *(s--);
}
/* Utility function for byteswapping an array, using the bswap
builtins if possible. dest and src can overlap completely, or then
they must point to separate objects; partial overlaps are not
allowed. */
static void
bswap_array (void *dest, const void *src, size_t size, size_t nelems)
{
const char *ps;
char *pd;
switch (size)
{ {
if (type == BT_CHARACTER) case 1:
size *= GFC_SIZE_OF_CHAR_KIND(kind); break;
read_block_direct (dtp, dest, size * nelems); case 2:
for (size_t i = 0; i < nelems; i++)
((uint16_t*)dest)[i] = __builtin_bswap16 (((uint16_t*)src)[i]);
break;
case 4:
for (size_t i = 0; i < nelems; i++)
((uint32_t*)dest)[i] = __builtin_bswap32 (((uint32_t*)src)[i]);
break;
case 8:
for (size_t i = 0; i < nelems; i++)
((uint64_t*)dest)[i] = __builtin_bswap64 (((uint64_t*)src)[i]);
break;
case 12:
ps = src;
pd = dest;
for (size_t i = 0; i < nelems; i++)
{
uint32_t tmp;
memcpy (&tmp, ps, 4);
*(uint32_t*)pd = __builtin_bswap32 (*(uint32_t*)(ps + 8));
*(uint32_t*)(pd + 4) = __builtin_bswap32 (*(uint32_t*)(ps + 4));
*(uint32_t*)(pd + 8) = __builtin_bswap32 (tmp);
ps += size;
pd += size;
}
break;
case 16:
ps = src;
pd = dest;
for (size_t i = 0; i < nelems; i++)
{
uint64_t tmp;
memcpy (&tmp, ps, 8);
*(uint64_t*)pd = __builtin_bswap64 (*(uint64_t*)(ps + 8));
*(uint64_t*)(pd + 8) = __builtin_bswap64 (tmp);
ps += size;
pd += size;
}
break;
default:
pd = dest;
if (dest != src)
{
ps = src;
for (size_t i = 0; i < nelems; i++)
{
reverse_memcpy (pd, ps, size);
ps += size;
pd += size;
}
}
else
{
/* In-place byte swap. */
for (size_t i = 0; i < nelems; i++)
{
char tmp, *low = pd, *high = pd + size - 1;
for (size_t j = 0; j < size/2; j++)
{
tmp = *low;
*low = *high;
*high = tmp;
low++;
high--;
}
pd += size;
}
}
} }
else }
{
char buffer[16];
char *p;
size_t i;
p = dest; /* Master function for unformatted reads. */
static void
unformatted_read (st_parameter_dt *dtp, bt type,
void *dest, int kind, size_t size, size_t nelems)
{
if (type == BT_CHARACTER)
size *= GFC_SIZE_OF_CHAR_KIND(kind);
read_block_direct (dtp, dest, size * nelems);
if (unlikely (dtp->u.p.current_unit->flags.convert == GFC_CONVERT_SWAP)
&& kind != 1)
{
/* Handle wide chracters. */ /* Handle wide chracters. */
if (type == BT_CHARACTER && kind != 1) if (type == BT_CHARACTER)
{ {
nelems *= size; nelems *= size;
size = kind; size = kind;
} }
/* Break up complex into its constituent reals. */ /* Break up complex into its constituent reals. */
if (type == BT_COMPLEX) else if (type == BT_COMPLEX)
{ {
nelems *= 2; nelems *= 2;
size /= 2; size /= 2;
} }
bswap_array (dest, dest, size, nelems);
/* By now, all complex variables have been split into their
constituent reals. */
for (i = 0; i < nelems; i++)
{
read_block_direct (dtp, buffer, size);
reverse_memcpy (p, buffer, size);
p += size;
}
} }
} }
...@@ -944,9 +1032,10 @@ unformatted_write (st_parameter_dt *dtp, bt type, ...@@ -944,9 +1032,10 @@ unformatted_write (st_parameter_dt *dtp, bt type,
} }
else else
{ {
char buffer[16]; #define BSWAP_BUFSZ 512
char buffer[BSWAP_BUFSZ];
char *p; char *p;
size_t i; size_t nrem;
p = source; p = source;
...@@ -967,12 +1056,21 @@ unformatted_write (st_parameter_dt *dtp, bt type, ...@@ -967,12 +1056,21 @@ unformatted_write (st_parameter_dt *dtp, bt type,
/* By now, all complex variables have been split into their /* By now, all complex variables have been split into their
constituent reals. */ constituent reals. */
for (i = 0; i < nelems; i++) nrem = nelems;
do
{ {
reverse_memcpy(buffer, p, size); size_t nc;
p += size; if (size * nrem > BSWAP_BUFSZ)
write_buf (dtp, buffer, size); nc = BSWAP_BUFSZ / size;
else
nc = nrem;
bswap_array (buffer, p, size, nc);
write_buf (dtp, buffer, size * nc);
p += size * nc;
nrem -= nc;
} }
while (nrem > 0);
} }
} }
...@@ -2152,15 +2250,22 @@ us_read (st_parameter_dt *dtp, int continued) ...@@ -2152,15 +2250,22 @@ us_read (st_parameter_dt *dtp, int continued)
} }
} }
else else
{
uint32_t u32;
uint64_t u64;
switch (nr) switch (nr)
{ {
case sizeof(GFC_INTEGER_4): case sizeof(GFC_INTEGER_4):
reverse_memcpy (&i4, &i, sizeof (i4)); memcpy (&u32, &i, sizeof (u32));
u32 = __builtin_bswap32 (u32);
memcpy (&i4, &u32, sizeof (i4));
i = i4; i = i4;
break; break;
case sizeof(GFC_INTEGER_8): case sizeof(GFC_INTEGER_8):
reverse_memcpy (&i8, &i, sizeof (i8)); memcpy (&u64, &i, sizeof (u64));
u64 = __builtin_bswap64 (u64);
memcpy (&i8, &u64, sizeof (i8));
i = i8; i = i8;
break; break;
...@@ -2168,6 +2273,7 @@ us_read (st_parameter_dt *dtp, int continued) ...@@ -2168,6 +2273,7 @@ us_read (st_parameter_dt *dtp, int continued)
runtime_error ("Illegal value for record marker"); runtime_error ("Illegal value for record marker");
break; break;
} }
}
if (i >= 0) if (i >= 0)
{ {
...@@ -3035,7 +3141,6 @@ write_us_marker (st_parameter_dt *dtp, const gfc_offset buf) ...@@ -3035,7 +3141,6 @@ write_us_marker (st_parameter_dt *dtp, const gfc_offset buf)
size_t len; size_t len;
GFC_INTEGER_4 buf4; GFC_INTEGER_4 buf4;
GFC_INTEGER_8 buf8; GFC_INTEGER_8 buf8;
char p[sizeof (GFC_INTEGER_8)];
if (compile_options.record_marker == 0) if (compile_options.record_marker == 0)
len = sizeof (GFC_INTEGER_4); len = sizeof (GFC_INTEGER_4);
...@@ -3064,18 +3169,22 @@ write_us_marker (st_parameter_dt *dtp, const gfc_offset buf) ...@@ -3064,18 +3169,22 @@ write_us_marker (st_parameter_dt *dtp, const gfc_offset buf)
} }
else else
{ {
uint32_t u32;
uint64_t u64;
switch (len) switch (len)
{ {
case sizeof (GFC_INTEGER_4): case sizeof (GFC_INTEGER_4):
buf4 = buf; buf4 = buf;
reverse_memcpy (p, &buf4, sizeof (GFC_INTEGER_4)); memcpy (&u32, &buf4, sizeof (u32));
return swrite (dtp->u.p.current_unit->s, p, len); u32 = __builtin_bswap32 (u32);
return swrite (dtp->u.p.current_unit->s, &u32, len);
break; break;
case sizeof (GFC_INTEGER_8): case sizeof (GFC_INTEGER_8):
buf8 = buf; buf8 = buf;
reverse_memcpy (p, &buf8, sizeof (GFC_INTEGER_8)); memcpy (&u64, &buf8, sizeof (u64));
return swrite (dtp->u.p.current_unit->s, p, len); u64 = __builtin_bswap64 (u64);
return swrite (dtp->u.p.current_unit->s, &u64, len);
break; break;
default: default:
...@@ -3712,22 +3821,6 @@ st_set_nml_var_dim (st_parameter_dt *dtp, GFC_INTEGER_4 n_dim, ...@@ -3712,22 +3821,6 @@ st_set_nml_var_dim (st_parameter_dt *dtp, GFC_INTEGER_4 n_dim,
GFC_DIMENSION_SET(nml->dim[n],lbound,ubound,stride); GFC_DIMENSION_SET(nml->dim[n],lbound,ubound,stride);
} }
/* Reverse memcpy - used for byte swapping. */
void reverse_memcpy (void *dest, const void *src, size_t n)
{
char *d, *s;
size_t i;
d = (char *) dest;
s = (char *) src + n - 1;
/* Write with ascending order - this is likely faster
on modern architectures because of write combining. */
for (i=0; i<n; i++)
*(d++) = *(s--);
}
/* Once upon a time, a poor innocent Fortran program was reading a /* Once upon a time, a poor innocent Fortran program was reading a
file, when suddenly it hit the end-of-file (EOF). Unfortunately file, when suddenly it hit the end-of-file (EOF). Unfortunately
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment