Commit 01d93568 by Thomas Koenig

pack.m4 (pack_'rtype_code`): Use count_0 for counting true values in a logical array.

2009-08-30  Thomas Koenig  <tkoenig@gcc.gnu.org>

	* m4/pack.m4 (pack_'rtype_code`): Use count_0 for counting true
	values in a logical array.  Mark bounds checking tests as
	unlikely.
	* intrinsics/pack_generic.c (pack_internal): Likewise.
	* runtime/bounds.c (count_0):  Fix off-by-one error in detecting
	empty arrays.
	* generated/pack_c4.c: Regenerated.
	* generated/pack_c8.c: Regenerated.
	* generated/pack_c10.c: Regenerated.
	* generated/pack_c16.c: Regenerated.
	* generated/pack_i1.c: Regenerated.
	* generated/pack_i16.c: Regenerated.
	* generated/pack_i2.c: Regenerated.
	* generated/pack_i4.c: Regenerated.
	* generated/pack_i8.c: Regenerated.
	* generated/pack_r4.c: Regenerated.
	* generated/pack_r8.c: Regenerated.
	* generated/pack_r10.c: Regenerated.
	* generated/pack_r16.c: Regenerated.

From-SVN: r151225
parent 26ef2b42
2009-08-30 Thomas Koenig <tkoenig@gcc.gnu.org>
* m4/pack.m4 (pack_'rtype_code`): Use count_0 for counting true
values in a logical array. Mark bounds checking tests as
unlikely.
* intrinsics/pack_generic.c (pack_internal): Likewise.
* runtime/bounds.c (count_0): Fix off-by-one error in detecting
empty arrays.
* generated/pack_c4.c: Regenerated.
* generated/pack_c8.c: Regenerated.
* generated/pack_c10.c: Regenerated.
* generated/pack_c16.c: Regenerated.
* generated/pack_i1.c: Regenerated.
* generated/pack_i16.c: Regenerated.
* generated/pack_i2.c: Regenerated.
* generated/pack_i4.c: Regenerated.
* generated/pack_i8.c: Regenerated.
* generated/pack_r4.c: Regenerated.
* generated/pack_r8.c: Regenerated.
* generated/pack_r10.c: Regenerated.
* generated/pack_r16.c: Regenerated.
2009-08-25 Thomas Koenig <tkoenig@gcc.gnu.org> 2009-08-25 Thomas Koenig <tkoenig@gcc.gnu.org>
PR libfortran/34670 PR libfortran/34670
......
...@@ -138,7 +138,7 @@ pack_c10 (gfc_array_c10 *ret, const gfc_array_c10 *array, ...@@ -138,7 +138,7 @@ pack_c10 (gfc_array_c10 *ret, const gfc_array_c10 *array,
else else
sptr = array->data; sptr = array->data;
if (ret->data == NULL || compile_options.bounds_check) if (ret->data == NULL || unlikely (compile_options.bounds_check))
{ {
/* Count the elements, either for allocating memory or /* Count the elements, either for allocating memory or
for bounds checking. */ for bounds checking. */
...@@ -155,62 +155,10 @@ pack_c10 (gfc_array_c10 *ret, const gfc_array_c10 *array, ...@@ -155,62 +155,10 @@ pack_c10 (gfc_array_c10 *ret, const gfc_array_c10 *array,
} }
} }
else else
{ {
/* We have to count the true elements in MASK. */ /* We have to count the true elements in MASK. */
total = count_0 (mask);
/* TODO: We could speed up pack easily in the case of only }
few .TRUE. entries in MASK, by keeping track of where we
would be in the source array during the initial traversal
of MASK, and caching the pointers to those elements. Then,
supposed the number of elements is small enough, we would
only have to traverse the list, and copy those elements
into the result array. In the case of datatypes which fit
in one of the integer types we could also cache the
value instead of a pointer to it.
This approach might be bad from the point of view of
cache behavior in the case where our cache is not big
enough to hold all elements that have to be copied. */
const GFC_LOGICAL_1 *m = mptr;
total = 0;
if (zero_sized)
m = NULL;
while (m)
{
/* Test this element. */
if (*m)
total++;
/* Advance to the next element. */
m += mstride[0];
count[0]++;
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it
and increment the next dimension. */
count[n] = 0;
/* We could precalculate this product, but this is a
less frequently used path so probably not worth
it. */
m -= mstride[n] * extent[n];
n++;
if (n >= dim)
{
/* Break out of the loop. */
m = NULL;
break;
}
else
{
count[n]++;
m += mstride[n];
}
}
}
}
if (ret->data == NULL) if (ret->data == NULL)
{ {
......
...@@ -138,7 +138,7 @@ pack_c16 (gfc_array_c16 *ret, const gfc_array_c16 *array, ...@@ -138,7 +138,7 @@ pack_c16 (gfc_array_c16 *ret, const gfc_array_c16 *array,
else else
sptr = array->data; sptr = array->data;
if (ret->data == NULL || compile_options.bounds_check) if (ret->data == NULL || unlikely (compile_options.bounds_check))
{ {
/* Count the elements, either for allocating memory or /* Count the elements, either for allocating memory or
for bounds checking. */ for bounds checking. */
...@@ -155,62 +155,10 @@ pack_c16 (gfc_array_c16 *ret, const gfc_array_c16 *array, ...@@ -155,62 +155,10 @@ pack_c16 (gfc_array_c16 *ret, const gfc_array_c16 *array,
} }
} }
else else
{ {
/* We have to count the true elements in MASK. */ /* We have to count the true elements in MASK. */
total = count_0 (mask);
/* TODO: We could speed up pack easily in the case of only }
few .TRUE. entries in MASK, by keeping track of where we
would be in the source array during the initial traversal
of MASK, and caching the pointers to those elements. Then,
supposed the number of elements is small enough, we would
only have to traverse the list, and copy those elements
into the result array. In the case of datatypes which fit
in one of the integer types we could also cache the
value instead of a pointer to it.
This approach might be bad from the point of view of
cache behavior in the case where our cache is not big
enough to hold all elements that have to be copied. */
const GFC_LOGICAL_1 *m = mptr;
total = 0;
if (zero_sized)
m = NULL;
while (m)
{
/* Test this element. */
if (*m)
total++;
/* Advance to the next element. */
m += mstride[0];
count[0]++;
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it
and increment the next dimension. */
count[n] = 0;
/* We could precalculate this product, but this is a
less frequently used path so probably not worth
it. */
m -= mstride[n] * extent[n];
n++;
if (n >= dim)
{
/* Break out of the loop. */
m = NULL;
break;
}
else
{
count[n]++;
m += mstride[n];
}
}
}
}
if (ret->data == NULL) if (ret->data == NULL)
{ {
......
...@@ -138,7 +138,7 @@ pack_c4 (gfc_array_c4 *ret, const gfc_array_c4 *array, ...@@ -138,7 +138,7 @@ pack_c4 (gfc_array_c4 *ret, const gfc_array_c4 *array,
else else
sptr = array->data; sptr = array->data;
if (ret->data == NULL || compile_options.bounds_check) if (ret->data == NULL || unlikely (compile_options.bounds_check))
{ {
/* Count the elements, either for allocating memory or /* Count the elements, either for allocating memory or
for bounds checking. */ for bounds checking. */
...@@ -155,62 +155,10 @@ pack_c4 (gfc_array_c4 *ret, const gfc_array_c4 *array, ...@@ -155,62 +155,10 @@ pack_c4 (gfc_array_c4 *ret, const gfc_array_c4 *array,
} }
} }
else else
{ {
/* We have to count the true elements in MASK. */ /* We have to count the true elements in MASK. */
total = count_0 (mask);
/* TODO: We could speed up pack easily in the case of only }
few .TRUE. entries in MASK, by keeping track of where we
would be in the source array during the initial traversal
of MASK, and caching the pointers to those elements. Then,
supposed the number of elements is small enough, we would
only have to traverse the list, and copy those elements
into the result array. In the case of datatypes which fit
in one of the integer types we could also cache the
value instead of a pointer to it.
This approach might be bad from the point of view of
cache behavior in the case where our cache is not big
enough to hold all elements that have to be copied. */
const GFC_LOGICAL_1 *m = mptr;
total = 0;
if (zero_sized)
m = NULL;
while (m)
{
/* Test this element. */
if (*m)
total++;
/* Advance to the next element. */
m += mstride[0];
count[0]++;
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it
and increment the next dimension. */
count[n] = 0;
/* We could precalculate this product, but this is a
less frequently used path so probably not worth
it. */
m -= mstride[n] * extent[n];
n++;
if (n >= dim)
{
/* Break out of the loop. */
m = NULL;
break;
}
else
{
count[n]++;
m += mstride[n];
}
}
}
}
if (ret->data == NULL) if (ret->data == NULL)
{ {
......
...@@ -138,7 +138,7 @@ pack_c8 (gfc_array_c8 *ret, const gfc_array_c8 *array, ...@@ -138,7 +138,7 @@ pack_c8 (gfc_array_c8 *ret, const gfc_array_c8 *array,
else else
sptr = array->data; sptr = array->data;
if (ret->data == NULL || compile_options.bounds_check) if (ret->data == NULL || unlikely (compile_options.bounds_check))
{ {
/* Count the elements, either for allocating memory or /* Count the elements, either for allocating memory or
for bounds checking. */ for bounds checking. */
...@@ -155,62 +155,10 @@ pack_c8 (gfc_array_c8 *ret, const gfc_array_c8 *array, ...@@ -155,62 +155,10 @@ pack_c8 (gfc_array_c8 *ret, const gfc_array_c8 *array,
} }
} }
else else
{ {
/* We have to count the true elements in MASK. */ /* We have to count the true elements in MASK. */
total = count_0 (mask);
/* TODO: We could speed up pack easily in the case of only }
few .TRUE. entries in MASK, by keeping track of where we
would be in the source array during the initial traversal
of MASK, and caching the pointers to those elements. Then,
supposed the number of elements is small enough, we would
only have to traverse the list, and copy those elements
into the result array. In the case of datatypes which fit
in one of the integer types we could also cache the
value instead of a pointer to it.
This approach might be bad from the point of view of
cache behavior in the case where our cache is not big
enough to hold all elements that have to be copied. */
const GFC_LOGICAL_1 *m = mptr;
total = 0;
if (zero_sized)
m = NULL;
while (m)
{
/* Test this element. */
if (*m)
total++;
/* Advance to the next element. */
m += mstride[0];
count[0]++;
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it
and increment the next dimension. */
count[n] = 0;
/* We could precalculate this product, but this is a
less frequently used path so probably not worth
it. */
m -= mstride[n] * extent[n];
n++;
if (n >= dim)
{
/* Break out of the loop. */
m = NULL;
break;
}
else
{
count[n]++;
m += mstride[n];
}
}
}
}
if (ret->data == NULL) if (ret->data == NULL)
{ {
......
...@@ -138,7 +138,7 @@ pack_i1 (gfc_array_i1 *ret, const gfc_array_i1 *array, ...@@ -138,7 +138,7 @@ pack_i1 (gfc_array_i1 *ret, const gfc_array_i1 *array,
else else
sptr = array->data; sptr = array->data;
if (ret->data == NULL || compile_options.bounds_check) if (ret->data == NULL || unlikely (compile_options.bounds_check))
{ {
/* Count the elements, either for allocating memory or /* Count the elements, either for allocating memory or
for bounds checking. */ for bounds checking. */
...@@ -155,62 +155,10 @@ pack_i1 (gfc_array_i1 *ret, const gfc_array_i1 *array, ...@@ -155,62 +155,10 @@ pack_i1 (gfc_array_i1 *ret, const gfc_array_i1 *array,
} }
} }
else else
{ {
/* We have to count the true elements in MASK. */ /* We have to count the true elements in MASK. */
total = count_0 (mask);
/* TODO: We could speed up pack easily in the case of only }
few .TRUE. entries in MASK, by keeping track of where we
would be in the source array during the initial traversal
of MASK, and caching the pointers to those elements. Then,
supposed the number of elements is small enough, we would
only have to traverse the list, and copy those elements
into the result array. In the case of datatypes which fit
in one of the integer types we could also cache the
value instead of a pointer to it.
This approach might be bad from the point of view of
cache behavior in the case where our cache is not big
enough to hold all elements that have to be copied. */
const GFC_LOGICAL_1 *m = mptr;
total = 0;
if (zero_sized)
m = NULL;
while (m)
{
/* Test this element. */
if (*m)
total++;
/* Advance to the next element. */
m += mstride[0];
count[0]++;
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it
and increment the next dimension. */
count[n] = 0;
/* We could precalculate this product, but this is a
less frequently used path so probably not worth
it. */
m -= mstride[n] * extent[n];
n++;
if (n >= dim)
{
/* Break out of the loop. */
m = NULL;
break;
}
else
{
count[n]++;
m += mstride[n];
}
}
}
}
if (ret->data == NULL) if (ret->data == NULL)
{ {
......
...@@ -138,7 +138,7 @@ pack_i16 (gfc_array_i16 *ret, const gfc_array_i16 *array, ...@@ -138,7 +138,7 @@ pack_i16 (gfc_array_i16 *ret, const gfc_array_i16 *array,
else else
sptr = array->data; sptr = array->data;
if (ret->data == NULL || compile_options.bounds_check) if (ret->data == NULL || unlikely (compile_options.bounds_check))
{ {
/* Count the elements, either for allocating memory or /* Count the elements, either for allocating memory or
for bounds checking. */ for bounds checking. */
...@@ -155,62 +155,10 @@ pack_i16 (gfc_array_i16 *ret, const gfc_array_i16 *array, ...@@ -155,62 +155,10 @@ pack_i16 (gfc_array_i16 *ret, const gfc_array_i16 *array,
} }
} }
else else
{ {
/* We have to count the true elements in MASK. */ /* We have to count the true elements in MASK. */
total = count_0 (mask);
/* TODO: We could speed up pack easily in the case of only }
few .TRUE. entries in MASK, by keeping track of where we
would be in the source array during the initial traversal
of MASK, and caching the pointers to those elements. Then,
supposed the number of elements is small enough, we would
only have to traverse the list, and copy those elements
into the result array. In the case of datatypes which fit
in one of the integer types we could also cache the
value instead of a pointer to it.
This approach might be bad from the point of view of
cache behavior in the case where our cache is not big
enough to hold all elements that have to be copied. */
const GFC_LOGICAL_1 *m = mptr;
total = 0;
if (zero_sized)
m = NULL;
while (m)
{
/* Test this element. */
if (*m)
total++;
/* Advance to the next element. */
m += mstride[0];
count[0]++;
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it
and increment the next dimension. */
count[n] = 0;
/* We could precalculate this product, but this is a
less frequently used path so probably not worth
it. */
m -= mstride[n] * extent[n];
n++;
if (n >= dim)
{
/* Break out of the loop. */
m = NULL;
break;
}
else
{
count[n]++;
m += mstride[n];
}
}
}
}
if (ret->data == NULL) if (ret->data == NULL)
{ {
......
...@@ -138,7 +138,7 @@ pack_i2 (gfc_array_i2 *ret, const gfc_array_i2 *array, ...@@ -138,7 +138,7 @@ pack_i2 (gfc_array_i2 *ret, const gfc_array_i2 *array,
else else
sptr = array->data; sptr = array->data;
if (ret->data == NULL || compile_options.bounds_check) if (ret->data == NULL || unlikely (compile_options.bounds_check))
{ {
/* Count the elements, either for allocating memory or /* Count the elements, either for allocating memory or
for bounds checking. */ for bounds checking. */
...@@ -155,62 +155,10 @@ pack_i2 (gfc_array_i2 *ret, const gfc_array_i2 *array, ...@@ -155,62 +155,10 @@ pack_i2 (gfc_array_i2 *ret, const gfc_array_i2 *array,
} }
} }
else else
{ {
/* We have to count the true elements in MASK. */ /* We have to count the true elements in MASK. */
total = count_0 (mask);
/* TODO: We could speed up pack easily in the case of only }
few .TRUE. entries in MASK, by keeping track of where we
would be in the source array during the initial traversal
of MASK, and caching the pointers to those elements. Then,
supposed the number of elements is small enough, we would
only have to traverse the list, and copy those elements
into the result array. In the case of datatypes which fit
in one of the integer types we could also cache the
value instead of a pointer to it.
This approach might be bad from the point of view of
cache behavior in the case where our cache is not big
enough to hold all elements that have to be copied. */
const GFC_LOGICAL_1 *m = mptr;
total = 0;
if (zero_sized)
m = NULL;
while (m)
{
/* Test this element. */
if (*m)
total++;
/* Advance to the next element. */
m += mstride[0];
count[0]++;
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it
and increment the next dimension. */
count[n] = 0;
/* We could precalculate this product, but this is a
less frequently used path so probably not worth
it. */
m -= mstride[n] * extent[n];
n++;
if (n >= dim)
{
/* Break out of the loop. */
m = NULL;
break;
}
else
{
count[n]++;
m += mstride[n];
}
}
}
}
if (ret->data == NULL) if (ret->data == NULL)
{ {
......
...@@ -138,7 +138,7 @@ pack_i4 (gfc_array_i4 *ret, const gfc_array_i4 *array, ...@@ -138,7 +138,7 @@ pack_i4 (gfc_array_i4 *ret, const gfc_array_i4 *array,
else else
sptr = array->data; sptr = array->data;
if (ret->data == NULL || compile_options.bounds_check) if (ret->data == NULL || unlikely (compile_options.bounds_check))
{ {
/* Count the elements, either for allocating memory or /* Count the elements, either for allocating memory or
for bounds checking. */ for bounds checking. */
...@@ -155,62 +155,10 @@ pack_i4 (gfc_array_i4 *ret, const gfc_array_i4 *array, ...@@ -155,62 +155,10 @@ pack_i4 (gfc_array_i4 *ret, const gfc_array_i4 *array,
} }
} }
else else
{ {
/* We have to count the true elements in MASK. */ /* We have to count the true elements in MASK. */
total = count_0 (mask);
/* TODO: We could speed up pack easily in the case of only }
few .TRUE. entries in MASK, by keeping track of where we
would be in the source array during the initial traversal
of MASK, and caching the pointers to those elements. Then,
supposed the number of elements is small enough, we would
only have to traverse the list, and copy those elements
into the result array. In the case of datatypes which fit
in one of the integer types we could also cache the
value instead of a pointer to it.
This approach might be bad from the point of view of
cache behavior in the case where our cache is not big
enough to hold all elements that have to be copied. */
const GFC_LOGICAL_1 *m = mptr;
total = 0;
if (zero_sized)
m = NULL;
while (m)
{
/* Test this element. */
if (*m)
total++;
/* Advance to the next element. */
m += mstride[0];
count[0]++;
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it
and increment the next dimension. */
count[n] = 0;
/* We could precalculate this product, but this is a
less frequently used path so probably not worth
it. */
m -= mstride[n] * extent[n];
n++;
if (n >= dim)
{
/* Break out of the loop. */
m = NULL;
break;
}
else
{
count[n]++;
m += mstride[n];
}
}
}
}
if (ret->data == NULL) if (ret->data == NULL)
{ {
......
...@@ -138,7 +138,7 @@ pack_i8 (gfc_array_i8 *ret, const gfc_array_i8 *array, ...@@ -138,7 +138,7 @@ pack_i8 (gfc_array_i8 *ret, const gfc_array_i8 *array,
else else
sptr = array->data; sptr = array->data;
if (ret->data == NULL || compile_options.bounds_check) if (ret->data == NULL || unlikely (compile_options.bounds_check))
{ {
/* Count the elements, either for allocating memory or /* Count the elements, either for allocating memory or
for bounds checking. */ for bounds checking. */
...@@ -155,62 +155,10 @@ pack_i8 (gfc_array_i8 *ret, const gfc_array_i8 *array, ...@@ -155,62 +155,10 @@ pack_i8 (gfc_array_i8 *ret, const gfc_array_i8 *array,
} }
} }
else else
{ {
/* We have to count the true elements in MASK. */ /* We have to count the true elements in MASK. */
total = count_0 (mask);
/* TODO: We could speed up pack easily in the case of only }
few .TRUE. entries in MASK, by keeping track of where we
would be in the source array during the initial traversal
of MASK, and caching the pointers to those elements. Then,
supposed the number of elements is small enough, we would
only have to traverse the list, and copy those elements
into the result array. In the case of datatypes which fit
in one of the integer types we could also cache the
value instead of a pointer to it.
This approach might be bad from the point of view of
cache behavior in the case where our cache is not big
enough to hold all elements that have to be copied. */
const GFC_LOGICAL_1 *m = mptr;
total = 0;
if (zero_sized)
m = NULL;
while (m)
{
/* Test this element. */
if (*m)
total++;
/* Advance to the next element. */
m += mstride[0];
count[0]++;
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it
and increment the next dimension. */
count[n] = 0;
/* We could precalculate this product, but this is a
less frequently used path so probably not worth
it. */
m -= mstride[n] * extent[n];
n++;
if (n >= dim)
{
/* Break out of the loop. */
m = NULL;
break;
}
else
{
count[n]++;
m += mstride[n];
}
}
}
}
if (ret->data == NULL) if (ret->data == NULL)
{ {
......
...@@ -138,7 +138,7 @@ pack_r10 (gfc_array_r10 *ret, const gfc_array_r10 *array, ...@@ -138,7 +138,7 @@ pack_r10 (gfc_array_r10 *ret, const gfc_array_r10 *array,
else else
sptr = array->data; sptr = array->data;
if (ret->data == NULL || compile_options.bounds_check) if (ret->data == NULL || unlikely (compile_options.bounds_check))
{ {
/* Count the elements, either for allocating memory or /* Count the elements, either for allocating memory or
for bounds checking. */ for bounds checking. */
...@@ -155,62 +155,10 @@ pack_r10 (gfc_array_r10 *ret, const gfc_array_r10 *array, ...@@ -155,62 +155,10 @@ pack_r10 (gfc_array_r10 *ret, const gfc_array_r10 *array,
} }
} }
else else
{ {
/* We have to count the true elements in MASK. */ /* We have to count the true elements in MASK. */
total = count_0 (mask);
/* TODO: We could speed up pack easily in the case of only }
few .TRUE. entries in MASK, by keeping track of where we
would be in the source array during the initial traversal
of MASK, and caching the pointers to those elements. Then,
supposed the number of elements is small enough, we would
only have to traverse the list, and copy those elements
into the result array. In the case of datatypes which fit
in one of the integer types we could also cache the
value instead of a pointer to it.
This approach might be bad from the point of view of
cache behavior in the case where our cache is not big
enough to hold all elements that have to be copied. */
const GFC_LOGICAL_1 *m = mptr;
total = 0;
if (zero_sized)
m = NULL;
while (m)
{
/* Test this element. */
if (*m)
total++;
/* Advance to the next element. */
m += mstride[0];
count[0]++;
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it
and increment the next dimension. */
count[n] = 0;
/* We could precalculate this product, but this is a
less frequently used path so probably not worth
it. */
m -= mstride[n] * extent[n];
n++;
if (n >= dim)
{
/* Break out of the loop. */
m = NULL;
break;
}
else
{
count[n]++;
m += mstride[n];
}
}
}
}
if (ret->data == NULL) if (ret->data == NULL)
{ {
......
...@@ -138,7 +138,7 @@ pack_r16 (gfc_array_r16 *ret, const gfc_array_r16 *array, ...@@ -138,7 +138,7 @@ pack_r16 (gfc_array_r16 *ret, const gfc_array_r16 *array,
else else
sptr = array->data; sptr = array->data;
if (ret->data == NULL || compile_options.bounds_check) if (ret->data == NULL || unlikely (compile_options.bounds_check))
{ {
/* Count the elements, either for allocating memory or /* Count the elements, either for allocating memory or
for bounds checking. */ for bounds checking. */
...@@ -155,62 +155,10 @@ pack_r16 (gfc_array_r16 *ret, const gfc_array_r16 *array, ...@@ -155,62 +155,10 @@ pack_r16 (gfc_array_r16 *ret, const gfc_array_r16 *array,
} }
} }
else else
{ {
/* We have to count the true elements in MASK. */ /* We have to count the true elements in MASK. */
total = count_0 (mask);
/* TODO: We could speed up pack easily in the case of only }
few .TRUE. entries in MASK, by keeping track of where we
would be in the source array during the initial traversal
of MASK, and caching the pointers to those elements. Then,
supposed the number of elements is small enough, we would
only have to traverse the list, and copy those elements
into the result array. In the case of datatypes which fit
in one of the integer types we could also cache the
value instead of a pointer to it.
This approach might be bad from the point of view of
cache behavior in the case where our cache is not big
enough to hold all elements that have to be copied. */
const GFC_LOGICAL_1 *m = mptr;
total = 0;
if (zero_sized)
m = NULL;
while (m)
{
/* Test this element. */
if (*m)
total++;
/* Advance to the next element. */
m += mstride[0];
count[0]++;
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it
and increment the next dimension. */
count[n] = 0;
/* We could precalculate this product, but this is a
less frequently used path so probably not worth
it. */
m -= mstride[n] * extent[n];
n++;
if (n >= dim)
{
/* Break out of the loop. */
m = NULL;
break;
}
else
{
count[n]++;
m += mstride[n];
}
}
}
}
if (ret->data == NULL) if (ret->data == NULL)
{ {
......
...@@ -138,7 +138,7 @@ pack_r4 (gfc_array_r4 *ret, const gfc_array_r4 *array, ...@@ -138,7 +138,7 @@ pack_r4 (gfc_array_r4 *ret, const gfc_array_r4 *array,
else else
sptr = array->data; sptr = array->data;
if (ret->data == NULL || compile_options.bounds_check) if (ret->data == NULL || unlikely (compile_options.bounds_check))
{ {
/* Count the elements, either for allocating memory or /* Count the elements, either for allocating memory or
for bounds checking. */ for bounds checking. */
...@@ -155,62 +155,10 @@ pack_r4 (gfc_array_r4 *ret, const gfc_array_r4 *array, ...@@ -155,62 +155,10 @@ pack_r4 (gfc_array_r4 *ret, const gfc_array_r4 *array,
} }
} }
else else
{ {
/* We have to count the true elements in MASK. */ /* We have to count the true elements in MASK. */
total = count_0 (mask);
/* TODO: We could speed up pack easily in the case of only }
few .TRUE. entries in MASK, by keeping track of where we
would be in the source array during the initial traversal
of MASK, and caching the pointers to those elements. Then,
supposed the number of elements is small enough, we would
only have to traverse the list, and copy those elements
into the result array. In the case of datatypes which fit
in one of the integer types we could also cache the
value instead of a pointer to it.
This approach might be bad from the point of view of
cache behavior in the case where our cache is not big
enough to hold all elements that have to be copied. */
const GFC_LOGICAL_1 *m = mptr;
total = 0;
if (zero_sized)
m = NULL;
while (m)
{
/* Test this element. */
if (*m)
total++;
/* Advance to the next element. */
m += mstride[0];
count[0]++;
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it
and increment the next dimension. */
count[n] = 0;
/* We could precalculate this product, but this is a
less frequently used path so probably not worth
it. */
m -= mstride[n] * extent[n];
n++;
if (n >= dim)
{
/* Break out of the loop. */
m = NULL;
break;
}
else
{
count[n]++;
m += mstride[n];
}
}
}
}
if (ret->data == NULL) if (ret->data == NULL)
{ {
......
...@@ -138,7 +138,7 @@ pack_r8 (gfc_array_r8 *ret, const gfc_array_r8 *array, ...@@ -138,7 +138,7 @@ pack_r8 (gfc_array_r8 *ret, const gfc_array_r8 *array,
else else
sptr = array->data; sptr = array->data;
if (ret->data == NULL || compile_options.bounds_check) if (ret->data == NULL || unlikely (compile_options.bounds_check))
{ {
/* Count the elements, either for allocating memory or /* Count the elements, either for allocating memory or
for bounds checking. */ for bounds checking. */
...@@ -155,62 +155,10 @@ pack_r8 (gfc_array_r8 *ret, const gfc_array_r8 *array, ...@@ -155,62 +155,10 @@ pack_r8 (gfc_array_r8 *ret, const gfc_array_r8 *array,
} }
} }
else else
{ {
/* We have to count the true elements in MASK. */ /* We have to count the true elements in MASK. */
total = count_0 (mask);
/* TODO: We could speed up pack easily in the case of only }
few .TRUE. entries in MASK, by keeping track of where we
would be in the source array during the initial traversal
of MASK, and caching the pointers to those elements. Then,
supposed the number of elements is small enough, we would
only have to traverse the list, and copy those elements
into the result array. In the case of datatypes which fit
in one of the integer types we could also cache the
value instead of a pointer to it.
This approach might be bad from the point of view of
cache behavior in the case where our cache is not big
enough to hold all elements that have to be copied. */
const GFC_LOGICAL_1 *m = mptr;
total = 0;
if (zero_sized)
m = NULL;
while (m)
{
/* Test this element. */
if (*m)
total++;
/* Advance to the next element. */
m += mstride[0];
count[0]++;
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it
and increment the next dimension. */
count[n] = 0;
/* We could precalculate this product, but this is a
less frequently used path so probably not worth
it. */
m -= mstride[n] * extent[n];
n++;
if (n >= dim)
{
/* Break out of the loop. */
m = NULL;
break;
}
else
{
count[n]++;
m += mstride[n];
}
}
}
}
if (ret->data == NULL) if (ret->data == NULL)
{ {
......
...@@ -132,7 +132,7 @@ pack_internal (gfc_array_char *ret, const gfc_array_char *array, ...@@ -132,7 +132,7 @@ pack_internal (gfc_array_char *ret, const gfc_array_char *array,
if (mstride[0] == 0) if (mstride[0] == 0)
mstride[0] = mask_kind; mstride[0] = mask_kind;
if (ret->data == NULL || compile_options.bounds_check) if (ret->data == NULL || unlikely (compile_options.bounds_check))
{ {
/* Count the elements, either for allocating memory or /* Count the elements, either for allocating memory or
for bounds checking. */ for bounds checking. */
...@@ -147,58 +147,7 @@ pack_internal (gfc_array_char *ret, const gfc_array_char *array, ...@@ -147,58 +147,7 @@ pack_internal (gfc_array_char *ret, const gfc_array_char *array,
{ {
/* We have to count the true elements in MASK. */ /* We have to count the true elements in MASK. */
/* TODO: We could speed up pack easily in the case of only total = count_0 (mask);
few .TRUE. entries in MASK, by keeping track of where we
would be in the source array during the initial traversal
of MASK, and caching the pointers to those elements. Then,
supposed the number of elements is small enough, we would
only have to traverse the list, and copy those elements
into the result array. In the case of datatypes which fit
in one of the integer types we could also cache the
value instead of a pointer to it.
This approach might be bad from the point of view of
cache behavior in the case where our cache is not big
enough to hold all elements that have to be copied. */
const GFC_LOGICAL_1 *m = mptr;
total = 0;
if (zero_sized)
m = NULL;
while (m)
{
/* Test this element. */
if (*m)
total++;
/* Advance to the next element. */
m += mstride[0];
count[0]++;
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it
and increment the next dimension. */
count[n] = 0;
/* We could precalculate this product, but this is a
less frequently used path so probably not worth
it. */
m -= mstride[n] * extent[n];
n++;
if (n >= dim)
{
/* Break out of the loop. */
m = NULL;
break;
}
else
{
count[n]++;
m += mstride[n];
}
}
}
} }
if (ret->data == NULL) if (ret->data == NULL)
......
...@@ -139,7 +139,7 @@ pack_'rtype_code` ('rtype` *ret, const 'rtype` *array, ...@@ -139,7 +139,7 @@ pack_'rtype_code` ('rtype` *ret, const 'rtype` *array,
else else
sptr = array->data; sptr = array->data;
if (ret->data == NULL || compile_options.bounds_check) if (ret->data == NULL || unlikely (compile_options.bounds_check))
{ {
/* Count the elements, either for allocating memory or /* Count the elements, either for allocating memory or
for bounds checking. */ for bounds checking. */
...@@ -156,62 +156,10 @@ pack_'rtype_code` ('rtype` *ret, const 'rtype` *array, ...@@ -156,62 +156,10 @@ pack_'rtype_code` ('rtype` *ret, const 'rtype` *array,
} }
} }
else else
{ {
/* We have to count the true elements in MASK. */ /* We have to count the true elements in MASK. */
total = count_0 (mask);
/* TODO: We could speed up pack easily in the case of only }
few .TRUE. entries in MASK, by keeping track of where we
would be in the source array during the initial traversal
of MASK, and caching the pointers to those elements. Then,
supposed the number of elements is small enough, we would
only have to traverse the list, and copy those elements
into the result array. In the case of datatypes which fit
in one of the integer types we could also cache the
value instead of a pointer to it.
This approach might be bad from the point of view of
cache behavior in the case where our cache is not big
enough to hold all elements that have to be copied. */
const GFC_LOGICAL_1 *m = mptr;
total = 0;
if (zero_sized)
m = NULL;
while (m)
{
/* Test this element. */
if (*m)
total++;
/* Advance to the next element. */
m += mstride[0];
count[0]++;
n = 0;
while (count[n] == extent[n])
{
/* When we get to the end of a dimension, reset it
and increment the next dimension. */
count[n] = 0;
/* We could precalculate this product, but this is a
less frequently used path so probably not worth
it. */
m -= mstride[n] * extent[n];
n++;
if (n >= dim)
{
/* Break out of the loop. */
m = NULL;
break;
}
else
{
count[n]++;
m += mstride[n];
}
}
}
}
if (ret->data == NULL) if (ret->data == NULL)
{ {
......
...@@ -237,7 +237,7 @@ index_type count_0 (const gfc_array_l1 * array) ...@@ -237,7 +237,7 @@ index_type count_0 (const gfc_array_l1 * array)
extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); extent[n] = GFC_DESCRIPTOR_EXTENT(array,n);
count[n] = 0; count[n] = 0;
if (extent[n] < 0) if (extent[n] <= 0)
return 0; return 0;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment