tree-optimization/110451 - hoist invariant compare after interchange

The following adjusts the cost model of invariant motion to consider
[VEC_]COND_EXPRs and comparisons producing a data value as expensive.
For 503.bwaves_r this avoids an unnecessarily high vectorization
factor because of an integer comparison besides data operations on
double.

	PR tree-optimization/110451
	* tree-ssa-loop-im.cc (stmt_cost): [VEC_]COND_EXPR and
	tcc_comparison are expensive.

	* gfortran.dg/vect/pr110451.f: New testcase.
This commit is contained in:
Richard Biener 2023-06-28 13:36:59 +02:00
parent 3521768e8e
commit 237e83e215
2 changed files with 61 additions and 1 deletions

View file

@ -0,0 +1,51 @@
! { dg-do compile }
! { dg-require-effective-target vect_condition }
! { dg-require-effective-target vect_double }
! { dg-additional-options "-ffast-math -floop-interchange -fdump-tree-linterchange-details -fdump-tree-vect-details" }
! { dg-additional-options "-mprefer-vector-width=128" { target x86_64-*-* i?86-*-* } }
subroutine mat_times_vec(y,x,a,axp,ayp,azp,axm,aym,azm,
$ nb,nx,ny,nz)
implicit none
integer nb,nx,ny,nz,i,j,k,m,l,kit,im1,ip1,jm1,jp1,km1,kp1
real*8 y(nb,nx,ny,nz),x(nb,nx,ny,nz)
real*8 a(nb,nb,nx,ny,nz),
1 axp(nb,nb,nx,ny,nz),ayp(nb,nb,nx,ny,nz),azp(nb,nb,nx,ny,nz),
2 axm(nb,nb,nx,ny,nz),aym(nb,nb,nx,ny,nz),azm(nb,nb,nx,ny,nz)
do k=1,nz
km1=mod(k+nz-2,nz)+1
kp1=mod(k,nz)+1
do j=1,ny
jm1=mod(j+ny-2,ny)+1
jp1=mod(j,ny)+1
do i=1,nx
im1=mod(i+nx-2,nx)+1
ip1=mod(i,nx)+1
do l=1,nb
y(l,i,j,k)=0.0d0
do m=1,nb
y(l,i,j,k)=y(l,i,j,k)+
1 a(l,m,i,j,k)*x(m,i,j,k)+
2 axp(l,m,i,j,k)*x(m,ip1,j,k)+
3 ayp(l,m,i,j,k)*x(m,i,jp1,k)+
4 azp(l,m,i,j,k)*x(m,i,j,kp1)+
5 axm(l,m,i,j,k)*x(m,im1,j,k)+
6 aym(l,m,i,j,k)*x(m,i,jm1,k)+
7 azm(l,m,i,j,k)*x(m,i,j,km1)
enddo
enddo
enddo
enddo
enddo
return
end
! loop interchange adds a conditional on m != 1 in the innermost loop
! verify that is hoisted and thus not affecting the vectorization factor
! { dg-final { scan-tree-dump-times "is interchanged" 1 "linterchange" } }
! { dg-final { scan-tree-dump "vectorization factor = 2" "vect" { target x86_64-*-* i?86-*-* } } }

View file

@ -617,7 +617,8 @@ stmt_cost (gimple *stmt)
if (gimple_code (stmt) != GIMPLE_ASSIGN)
return 1;
switch (gimple_assign_rhs_code (stmt))
enum tree_code code = gimple_assign_rhs_code (stmt);
switch (code)
{
case MULT_EXPR:
case WIDEN_MULT_EXPR:
@ -645,6 +646,11 @@ stmt_cost (gimple *stmt)
/* Shifts and rotates are usually expensive. */
return LIM_EXPENSIVE;
case COND_EXPR:
case VEC_COND_EXPR:
/* Conditionals are expensive. */
return LIM_EXPENSIVE;
case CONSTRUCTOR:
/* Make vector construction cost proportional to the number
of elements. */
@ -658,6 +664,9 @@ stmt_cost (gimple *stmt)
return 0;
default:
/* Comparisons are usually expensive. */
if (TREE_CODE_CLASS (code) == tcc_comparison)
return LIM_EXPENSIVE;
return 1;
}
}