tree-optimization/110451 - hoist invariant compare after interchange
The following adjusts the cost model of invariant motion to consider [VEC_]COND_EXPRs and comparisons producing a data value as expensive. For 503.bwaves_r this avoids an unnecessarily high vectorization factor because of an integer comparison besides data operations on double. PR tree-optimization/110451 * tree-ssa-loop-im.cc (stmt_cost): [VEC_]COND_EXPR and tcc_comparison are expensive. * gfortran.dg/vect/pr110451.f: New testcase.
This commit is contained in:
parent
3521768e8e
commit
237e83e215
2 changed files with 61 additions and 1 deletions
51
gcc/testsuite/gfortran.dg/vect/pr110451.f
Normal file
51
gcc/testsuite/gfortran.dg/vect/pr110451.f
Normal file
|
@ -0,0 +1,51 @@
|
|||
! { dg-do compile }
|
||||
! { dg-require-effective-target vect_condition }
|
||||
! { dg-require-effective-target vect_double }
|
||||
! { dg-additional-options "-ffast-math -floop-interchange -fdump-tree-linterchange-details -fdump-tree-vect-details" }
|
||||
! { dg-additional-options "-mprefer-vector-width=128" { target x86_64-*-* i?86-*-* } }
|
||||
|
||||
subroutine mat_times_vec(y,x,a,axp,ayp,azp,axm,aym,azm,
|
||||
$ nb,nx,ny,nz)
|
||||
implicit none
|
||||
integer nb,nx,ny,nz,i,j,k,m,l,kit,im1,ip1,jm1,jp1,km1,kp1
|
||||
|
||||
real*8 y(nb,nx,ny,nz),x(nb,nx,ny,nz)
|
||||
|
||||
real*8 a(nb,nb,nx,ny,nz),
|
||||
1 axp(nb,nb,nx,ny,nz),ayp(nb,nb,nx,ny,nz),azp(nb,nb,nx,ny,nz),
|
||||
2 axm(nb,nb,nx,ny,nz),aym(nb,nb,nx,ny,nz),azm(nb,nb,nx,ny,nz)
|
||||
|
||||
|
||||
do k=1,nz
|
||||
km1=mod(k+nz-2,nz)+1
|
||||
kp1=mod(k,nz)+1
|
||||
do j=1,ny
|
||||
jm1=mod(j+ny-2,ny)+1
|
||||
jp1=mod(j,ny)+1
|
||||
do i=1,nx
|
||||
im1=mod(i+nx-2,nx)+1
|
||||
ip1=mod(i,nx)+1
|
||||
do l=1,nb
|
||||
y(l,i,j,k)=0.0d0
|
||||
do m=1,nb
|
||||
y(l,i,j,k)=y(l,i,j,k)+
|
||||
1 a(l,m,i,j,k)*x(m,i,j,k)+
|
||||
2 axp(l,m,i,j,k)*x(m,ip1,j,k)+
|
||||
3 ayp(l,m,i,j,k)*x(m,i,jp1,k)+
|
||||
4 azp(l,m,i,j,k)*x(m,i,j,kp1)+
|
||||
5 axm(l,m,i,j,k)*x(m,im1,j,k)+
|
||||
6 aym(l,m,i,j,k)*x(m,i,jm1,k)+
|
||||
7 azm(l,m,i,j,k)*x(m,i,j,km1)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
return
|
||||
end
|
||||
|
||||
! loop interchange adds a conditional on m != 1 in the innermost loop
|
||||
! verify that is hoisted and thus not affecting the vectorization factor
|
||||
|
||||
! { dg-final { scan-tree-dump-times "is interchanged" 1 "linterchange" } }
|
||||
! { dg-final { scan-tree-dump "vectorization factor = 2" "vect" { target x86_64-*-* i?86-*-* } } }
|
|
@ -617,7 +617,8 @@ stmt_cost (gimple *stmt)
|
|||
if (gimple_code (stmt) != GIMPLE_ASSIGN)
|
||||
return 1;
|
||||
|
||||
switch (gimple_assign_rhs_code (stmt))
|
||||
enum tree_code code = gimple_assign_rhs_code (stmt);
|
||||
switch (code)
|
||||
{
|
||||
case MULT_EXPR:
|
||||
case WIDEN_MULT_EXPR:
|
||||
|
@ -645,6 +646,11 @@ stmt_cost (gimple *stmt)
|
|||
/* Shifts and rotates are usually expensive. */
|
||||
return LIM_EXPENSIVE;
|
||||
|
||||
case COND_EXPR:
|
||||
case VEC_COND_EXPR:
|
||||
/* Conditionals are expensive. */
|
||||
return LIM_EXPENSIVE;
|
||||
|
||||
case CONSTRUCTOR:
|
||||
/* Make vector construction cost proportional to the number
|
||||
of elements. */
|
||||
|
@ -658,6 +664,9 @@ stmt_cost (gimple *stmt)
|
|||
return 0;
|
||||
|
||||
default:
|
||||
/* Comparisons are usually expensive. */
|
||||
if (TREE_CODE_CLASS (code) == tcc_comparison)
|
||||
return LIM_EXPENSIVE;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue