Commit 8c8e9a6b by Tom de Vries Committed by Tom de Vries

[nvptx] Fix bug in jit bug workaround

2018-01-19  Tom de Vries  <tom@codesourcery.com>
	    Cesar Philippidis  <cesar@codesourcery.com>

	PR target/83920

	* config/nvptx/nvptx.c (nvptx_single): Fix jit workaround.

	* testsuite/libgomp.oacc-c-c++-common/pr83920.c: New test.
	* testsuite/libgomp.oacc-fortran/pr83920.f90: New test.

Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>

From-SVN: r256894
parent 6c7c4708
2018-01-19 Tom de Vries <tom@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
PR target/83920
* config/nvptx/nvptx.c (nvptx_single): Fix jit workaround.
2018-01-19 Cesar Philippidis <cesar@codesourcery.com> 2018-01-19 Cesar Philippidis <cesar@codesourcery.com>
PR target/83790 PR target/83790
......
...@@ -4102,9 +4102,33 @@ nvptx_single (unsigned mask, basic_block from, basic_block to) ...@@ -4102,9 +4102,33 @@ nvptx_single (unsigned mask, basic_block from, basic_block to)
There is nothing in the PTX spec to suggest that this is wrong, or There is nothing in the PTX spec to suggest that this is wrong, or
to explain why the extra initialization is needed. So, we classify to explain why the extra initialization is needed. So, we classify
it as a JIT bug, and the extra initialization as workaround. */ it as a JIT bug, and the extra initialization as workaround:
emit_insn_before (gen_movbi (pvar, const0_rtx),
{
.reg .u32 %x;
mov.u32 %x,%tid.x;
setp.ne.u32 %rnotvzero,%x,0;
}
+.reg .pred %rcond2;
+setp.eq.u32 %rcond2, 1, 0;
@%rnotvzero bra Lskip;
setp.<op>.<type> %rcond,op1,op2;
+mov.pred %rcond2, %rcond;
Lskip:
+mov.pred %rcond, %rcond2;
selp.u32 %rcondu32,1,0,%rcond;
shfl.idx.b32 %rcondu32,%rcondu32,0,31;
setp.ne.u32 %rcond,%rcondu32,0;
*/
rtx_insn *label = PREV_INSN (tail);
gcc_assert (label && LABEL_P (label));
rtx tmp = gen_reg_rtx (BImode);
emit_insn_before (gen_movbi (tmp, const0_rtx),
bb_first_real_insn (from)); bb_first_real_insn (from));
emit_insn_before (gen_rtx_SET (tmp, pvar), label);
emit_insn_before (gen_rtx_SET (pvar, tmp), tail);
#endif #endif
emit_insn_before (nvptx_gen_vcast (pvar), tail); emit_insn_before (nvptx_gen_vcast (pvar), tail);
} }
......
2018-01-19 Tom de Vries <tom@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
PR target/83920
* testsuite/libgomp.oacc-c-c++-common/pr83920.c: New test.
* testsuite/libgomp.oacc-fortran/pr83920.f90: New test.
2018-01-03 Jakub Jelinek <jakub@redhat.com> 2018-01-03 Jakub Jelinek <jakub@redhat.com>
Update copyright years. Update copyright years.
......
/* { dg-do run } */
#include <stdlib.h>
#define n 10
static void __attribute__((noinline)) __attribute__((noclone))
foo (int beta, int *c)
{
#pragma acc parallel copy(c[0:(n * n) - 1]) num_gangs(2)
#pragma acc loop gang
for (int j = 0; j < n; ++j)
if (beta != 1)
{
#pragma acc loop vector
for (int i = 0; i < n; ++i)
c[i + (j * n)] = 0;
}
}
int
main (void)
{
int c[n * n];
c[0] = 1;
foo (0, c);
if (c[0] != 0)
abort ();
return 0;
}
! { dg-do run }
subroutine foo (BETA, C)
real :: C(100,100)
integer :: i, j, l
real, parameter :: one = 1.0
real :: beta
!$acc parallel copy(c(1:100,1:100)) num_gangs(2)
!$acc loop gang
do j = 1, 100
if (beta /= one) then
!$acc loop vector
do i = 1, 100
C(i,j) = 0.0
end do
end if
end do
!$acc end parallel
end subroutine foo
program test_foo
real :: c(100,100), beta
beta = 0.0
c(:,:) = 1.0
call foo (beta, c)
if (c(1,1) /= 0.0) call abort ()
end program test_foo
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment