PR libfortran/97063 - Wrong result for vector (step size is negative) * matrix
The MATMUL intrinsic provided a wrong result for rank-1 times rank-2 array when a negative stride was used for addressing the elements of the rank-1 array, because a check on strides was erroneously placed before the check on the rank. Interchange order of checks. libgfortran/ChangeLog: * m4/matmul_internal.m4: Move check for rank-1 times rank-2 before checks on strides for rank-2 times rank-2. * generated/matmul_c10.c: Regenerated. * generated/matmul_c16.c: Likewise. * generated/matmul_c4.c: Likewise. * generated/matmul_c8.c: Likewise. * generated/matmul_i1.c: Likewise. * generated/matmul_i16.c: Likewise. * generated/matmul_i2.c: Likewise. * generated/matmul_i4.c: Likewise. * generated/matmul_i8.c: Likewise. * generated/matmul_r10.c: Likewise. * generated/matmul_r16.c: Likewise. * generated/matmul_r4.c: Likewise. * generated/matmul_r8.c: Likewise. * generated/matmulavx128_c10.c: Likewise. * generated/matmulavx128_c16.c: Likewise. * generated/matmulavx128_c4.c: Likewise. * generated/matmulavx128_c8.c: Likewise. * generated/matmulavx128_i1.c: Likewise. * generated/matmulavx128_i16.c: Likewise. * generated/matmulavx128_i2.c: Likewise. * generated/matmulavx128_i4.c: Likewise. * generated/matmulavx128_i8.c: Likewise. * generated/matmulavx128_r10.c: Likewise. * generated/matmulavx128_r16.c: Likewise. * generated/matmulavx128_r4.c: Likewise. * generated/matmulavx128_r8.c: Likewise. gcc/testsuite/ChangeLog: * gfortran.dg/matmul_20.f90: New test.
This commit is contained in:
parent
019e1c85fe
commit
cd6cd6aed1
28 changed files with 1335 additions and 1288 deletions
47
gcc/testsuite/gfortran.dg/matmul_20.f90
Normal file
47
gcc/testsuite/gfortran.dg/matmul_20.f90
Normal file
|
@ -0,0 +1,47 @@
|
|||
! { dg-do run }
|
||||
! PR97063 - Wrong result for vector (step size is negative) * matrix
|
||||
|
||||
program p
|
||||
implicit none
|
||||
integer, parameter :: m = 3, k = 2*m, l = k-1, n = 4
|
||||
integer :: i, j, m1, m2, ms
|
||||
integer :: ai(k), bi(k,n), ci(n), ci_ref(n), c1, c2
|
||||
real :: ar(k), br(k,n), cr(n), cr_ref(n)
|
||||
|
||||
ai(:) = [(i,i=0,k-1)]
|
||||
bi(:,:) = reshape ([(((5*i+j),i=0,k-1),j=0,n-1)],[k,n])
|
||||
|
||||
! Parameters of subscript triplet
|
||||
m1 = 1; m2 = l; ms = 2
|
||||
|
||||
! Reference values for cross-checks: integer variant
|
||||
c1 = dot_product (ai(m1:m2: ms), bi(m1:m2: ms,1))
|
||||
c2 = dot_product (ai(m1:m2: ms), bi(m1:m2: ms,2))
|
||||
ci_ref = matmul (ai(m1:m2: ms), bi(m1:m2: ms,:))
|
||||
ci = matmul (ai(m2:m1:-ms), bi(m2:m1:-ms,:))
|
||||
|
||||
if (ci_ref(1) /= c1 .or. ci_ref(2) /= c2) stop 1
|
||||
if (any (ci /= ci_ref)) stop 2
|
||||
|
||||
! Real variant
|
||||
ar = real (ai)
|
||||
br = real (bi)
|
||||
cr_ref = matmul (ar(m1:m2: ms), br(m1:m2: ms,:))
|
||||
cr = matmul (ar(m2:m1:-ms), br(m2:m1:-ms,:))
|
||||
|
||||
if (any (cr_ref /= real (ci_ref))) stop 3
|
||||
if (any (cr /= cr_ref )) stop 4
|
||||
|
||||
! Mixed variants
|
||||
cr_ref = matmul (ar(m1:m2: ms), bi(m1:m2: ms,:))
|
||||
cr = matmul (ar(m2:m1:-ms), bi(m2:m1:-ms,:))
|
||||
|
||||
if (any (cr_ref /= real (ci_ref))) stop 5
|
||||
if (any (cr /= cr_ref )) stop 6
|
||||
|
||||
cr_ref = matmul (ai(m1:m2: ms), br(m1:m2: ms,:))
|
||||
cr = matmul (ai(m2:m1:-ms), br(m2:m1:-ms,:))
|
||||
|
||||
if (any (cr_ref /= real (ci_ref))) stop 7
|
||||
if (any (cr /= cr_ref )) stop 8
|
||||
end program
|
|
@ -590,20 +590,6 @@ matmul_c10_avx (gfc_array_c10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_10 *restrict bbase_y;
|
||||
|
@ -618,6 +604,20 @@ matmul_c10_avx (gfc_array_c10 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_10 *restrict abase_x;
|
||||
|
@ -1158,20 +1158,6 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_10 *restrict bbase_y;
|
||||
|
@ -1186,6 +1172,20 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_10 *restrict abase_x;
|
||||
|
@ -1726,20 +1726,6 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_10 *restrict bbase_y;
|
||||
|
@ -1754,6 +1740,20 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_10 *restrict abase_x;
|
||||
|
@ -2308,20 +2308,6 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_10 *restrict bbase_y;
|
||||
|
@ -2336,6 +2322,20 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_10 *restrict abase_x;
|
||||
|
@ -2949,20 +2949,6 @@ matmul_c10 (gfc_array_c10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_10 *restrict bbase_y;
|
||||
|
@ -2977,6 +2963,20 @@ matmul_c10 (gfc_array_c10 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_10 *restrict abase_x;
|
||||
|
|
|
@ -590,20 +590,6 @@ matmul_c16_avx (gfc_array_c16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_16 *restrict bbase_y;
|
||||
|
@ -618,6 +604,20 @@ matmul_c16_avx (gfc_array_c16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_16 *restrict abase_x;
|
||||
|
@ -1158,20 +1158,6 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_16 *restrict bbase_y;
|
||||
|
@ -1186,6 +1172,20 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_16 *restrict abase_x;
|
||||
|
@ -1726,20 +1726,6 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_16 *restrict bbase_y;
|
||||
|
@ -1754,6 +1740,20 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_16 *restrict abase_x;
|
||||
|
@ -2308,20 +2308,6 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_16 *restrict bbase_y;
|
||||
|
@ -2336,6 +2322,20 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_16 *restrict abase_x;
|
||||
|
@ -2949,20 +2949,6 @@ matmul_c16 (gfc_array_c16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_16 *restrict bbase_y;
|
||||
|
@ -2977,6 +2963,20 @@ matmul_c16 (gfc_array_c16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_16 *restrict abase_x;
|
||||
|
|
|
@ -590,20 +590,6 @@ matmul_c4_avx (gfc_array_c4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_4 *restrict bbase_y;
|
||||
|
@ -618,6 +604,20 @@ matmul_c4_avx (gfc_array_c4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_4 *restrict abase_x;
|
||||
|
@ -1158,20 +1158,6 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_4 *restrict bbase_y;
|
||||
|
@ -1186,6 +1172,20 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_4 *restrict abase_x;
|
||||
|
@ -1726,20 +1726,6 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_4 *restrict bbase_y;
|
||||
|
@ -1754,6 +1740,20 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_4 *restrict abase_x;
|
||||
|
@ -2308,20 +2308,6 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_4 *restrict bbase_y;
|
||||
|
@ -2336,6 +2322,20 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_4 *restrict abase_x;
|
||||
|
@ -2949,20 +2949,6 @@ matmul_c4 (gfc_array_c4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_4 *restrict bbase_y;
|
||||
|
@ -2977,6 +2963,20 @@ matmul_c4 (gfc_array_c4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_4 *restrict abase_x;
|
||||
|
|
|
@ -590,20 +590,6 @@ matmul_c8_avx (gfc_array_c8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_8 *restrict bbase_y;
|
||||
|
@ -618,6 +604,20 @@ matmul_c8_avx (gfc_array_c8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_8 *restrict abase_x;
|
||||
|
@ -1158,20 +1158,6 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_8 *restrict bbase_y;
|
||||
|
@ -1186,6 +1172,20 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_8 *restrict abase_x;
|
||||
|
@ -1726,20 +1726,6 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_8 *restrict bbase_y;
|
||||
|
@ -1754,6 +1740,20 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_8 *restrict abase_x;
|
||||
|
@ -2308,20 +2308,6 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_8 *restrict bbase_y;
|
||||
|
@ -2336,6 +2322,20 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_8 *restrict abase_x;
|
||||
|
@ -2949,20 +2949,6 @@ matmul_c8 (gfc_array_c8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_8 *restrict bbase_y;
|
||||
|
@ -2977,6 +2963,20 @@ matmul_c8 (gfc_array_c8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_8 *restrict abase_x;
|
||||
|
|
|
@ -590,20 +590,6 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_1 *restrict bbase_y;
|
||||
|
@ -618,6 +604,20 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_1 *restrict abase_x;
|
||||
|
@ -1158,20 +1158,6 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_1 *restrict bbase_y;
|
||||
|
@ -1186,6 +1172,20 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_1 *restrict abase_x;
|
||||
|
@ -1726,20 +1726,6 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_1 *restrict bbase_y;
|
||||
|
@ -1754,6 +1740,20 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_1 *restrict abase_x;
|
||||
|
@ -2308,20 +2308,6 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_1 *restrict bbase_y;
|
||||
|
@ -2336,6 +2322,20 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_1 *restrict abase_x;
|
||||
|
@ -2949,20 +2949,6 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_1 *restrict bbase_y;
|
||||
|
@ -2977,6 +2963,20 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_1 *restrict abase_x;
|
||||
|
|
|
@ -590,20 +590,6 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_16 *restrict bbase_y;
|
||||
|
@ -618,6 +604,20 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_16 *restrict abase_x;
|
||||
|
@ -1158,20 +1158,6 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_16 *restrict bbase_y;
|
||||
|
@ -1186,6 +1172,20 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_16 *restrict abase_x;
|
||||
|
@ -1726,20 +1726,6 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_16 *restrict bbase_y;
|
||||
|
@ -1754,6 +1740,20 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_16 *restrict abase_x;
|
||||
|
@ -2308,20 +2308,6 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_16 *restrict bbase_y;
|
||||
|
@ -2336,6 +2322,20 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_16 *restrict abase_x;
|
||||
|
@ -2949,20 +2949,6 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_16 *restrict bbase_y;
|
||||
|
@ -2977,6 +2963,20 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_16 *restrict abase_x;
|
||||
|
|
|
@ -590,20 +590,6 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_2 *restrict bbase_y;
|
||||
|
@ -618,6 +604,20 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_2 *restrict abase_x;
|
||||
|
@ -1158,20 +1158,6 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_2 *restrict bbase_y;
|
||||
|
@ -1186,6 +1172,20 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_2 *restrict abase_x;
|
||||
|
@ -1726,20 +1726,6 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_2 *restrict bbase_y;
|
||||
|
@ -1754,6 +1740,20 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_2 *restrict abase_x;
|
||||
|
@ -2308,20 +2308,6 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_2 *restrict bbase_y;
|
||||
|
@ -2336,6 +2322,20 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_2 *restrict abase_x;
|
||||
|
@ -2949,20 +2949,6 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_2 *restrict bbase_y;
|
||||
|
@ -2977,6 +2963,20 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_2 *restrict abase_x;
|
||||
|
|
|
@ -590,20 +590,6 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_4 *restrict bbase_y;
|
||||
|
@ -618,6 +604,20 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_4 *restrict abase_x;
|
||||
|
@ -1158,20 +1158,6 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_4 *restrict bbase_y;
|
||||
|
@ -1186,6 +1172,20 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_4 *restrict abase_x;
|
||||
|
@ -1726,20 +1726,6 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_4 *restrict bbase_y;
|
||||
|
@ -1754,6 +1740,20 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_4 *restrict abase_x;
|
||||
|
@ -2308,20 +2308,6 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_4 *restrict bbase_y;
|
||||
|
@ -2336,6 +2322,20 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_4 *restrict abase_x;
|
||||
|
@ -2949,20 +2949,6 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_4 *restrict bbase_y;
|
||||
|
@ -2977,6 +2963,20 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_4 *restrict abase_x;
|
||||
|
|
|
@ -590,20 +590,6 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_8 *restrict bbase_y;
|
||||
|
@ -618,6 +604,20 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_8 *restrict abase_x;
|
||||
|
@ -1158,20 +1158,6 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_8 *restrict bbase_y;
|
||||
|
@ -1186,6 +1172,20 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_8 *restrict abase_x;
|
||||
|
@ -1726,20 +1726,6 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_8 *restrict bbase_y;
|
||||
|
@ -1754,6 +1740,20 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_8 *restrict abase_x;
|
||||
|
@ -2308,20 +2308,6 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_8 *restrict bbase_y;
|
||||
|
@ -2336,6 +2322,20 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_8 *restrict abase_x;
|
||||
|
@ -2949,20 +2949,6 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_8 *restrict bbase_y;
|
||||
|
@ -2977,6 +2963,20 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_8 *restrict abase_x;
|
||||
|
|
|
@ -590,20 +590,6 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_10 *restrict bbase_y;
|
||||
|
@ -618,6 +604,20 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_10 *restrict abase_x;
|
||||
|
@ -1158,20 +1158,6 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_10 *restrict bbase_y;
|
||||
|
@ -1186,6 +1172,20 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_10 *restrict abase_x;
|
||||
|
@ -1726,20 +1726,6 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_10 *restrict bbase_y;
|
||||
|
@ -1754,6 +1740,20 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_10 *restrict abase_x;
|
||||
|
@ -2308,20 +2308,6 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_10 *restrict bbase_y;
|
||||
|
@ -2336,6 +2322,20 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_10 *restrict abase_x;
|
||||
|
@ -2949,20 +2949,6 @@ matmul_r10 (gfc_array_r10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_10 *restrict bbase_y;
|
||||
|
@ -2977,6 +2963,20 @@ matmul_r10 (gfc_array_r10 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_10 *restrict abase_x;
|
||||
|
|
|
@ -590,20 +590,6 @@ matmul_r16_avx (gfc_array_r16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_16 *restrict bbase_y;
|
||||
|
@ -618,6 +604,20 @@ matmul_r16_avx (gfc_array_r16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_16 *restrict abase_x;
|
||||
|
@ -1158,20 +1158,6 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_16 *restrict bbase_y;
|
||||
|
@ -1186,6 +1172,20 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_16 *restrict abase_x;
|
||||
|
@ -1726,20 +1726,6 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_16 *restrict bbase_y;
|
||||
|
@ -1754,6 +1740,20 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_16 *restrict abase_x;
|
||||
|
@ -2308,20 +2308,6 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_16 *restrict bbase_y;
|
||||
|
@ -2336,6 +2322,20 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_16 *restrict abase_x;
|
||||
|
@ -2949,20 +2949,6 @@ matmul_r16 (gfc_array_r16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_16 *restrict bbase_y;
|
||||
|
@ -2977,6 +2963,20 @@ matmul_r16 (gfc_array_r16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_16 *restrict abase_x;
|
||||
|
|
|
@ -590,20 +590,6 @@ matmul_r4_avx (gfc_array_r4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_4 *restrict bbase_y;
|
||||
|
@ -618,6 +604,20 @@ matmul_r4_avx (gfc_array_r4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_4 *restrict abase_x;
|
||||
|
@ -1158,20 +1158,6 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_4 *restrict bbase_y;
|
||||
|
@ -1186,6 +1172,20 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_4 *restrict abase_x;
|
||||
|
@ -1726,20 +1726,6 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_4 *restrict bbase_y;
|
||||
|
@ -1754,6 +1740,20 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_4 *restrict abase_x;
|
||||
|
@ -2308,20 +2308,6 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_4 *restrict bbase_y;
|
||||
|
@ -2336,6 +2322,20 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_4 *restrict abase_x;
|
||||
|
@ -2949,20 +2949,6 @@ matmul_r4 (gfc_array_r4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_4 *restrict bbase_y;
|
||||
|
@ -2977,6 +2963,20 @@ matmul_r4 (gfc_array_r4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_4 *restrict abase_x;
|
||||
|
|
|
@ -590,20 +590,6 @@ matmul_r8_avx (gfc_array_r8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_8 *restrict bbase_y;
|
||||
|
@ -618,6 +604,20 @@ matmul_r8_avx (gfc_array_r8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_8 *restrict abase_x;
|
||||
|
@ -1158,20 +1158,6 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_8 *restrict bbase_y;
|
||||
|
@ -1186,6 +1172,20 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_8 *restrict abase_x;
|
||||
|
@ -1726,20 +1726,6 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_8 *restrict bbase_y;
|
||||
|
@ -1754,6 +1740,20 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_8 *restrict abase_x;
|
||||
|
@ -2308,20 +2308,6 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_8 *restrict bbase_y;
|
||||
|
@ -2336,6 +2322,20 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_8 *restrict abase_x;
|
||||
|
@ -2949,20 +2949,6 @@ matmul_r8 (gfc_array_r8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_8 *restrict bbase_y;
|
||||
|
@ -2977,6 +2963,20 @@ matmul_r8 (gfc_array_r8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_8 *restrict abase_x;
|
||||
|
|
|
@ -555,20 +555,6 @@ matmul_c10_avx128_fma3 (gfc_array_c10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_10 *restrict bbase_y;
|
||||
|
@ -583,6 +569,20 @@ matmul_c10_avx128_fma3 (gfc_array_c10 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_10 *restrict abase_x;
|
||||
|
@ -1124,20 +1124,6 @@ matmul_c10_avx128_fma4 (gfc_array_c10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_10 *restrict bbase_y;
|
||||
|
@ -1152,6 +1138,20 @@ matmul_c10_avx128_fma4 (gfc_array_c10 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_10 *restrict abase_x;
|
||||
|
|
|
@ -555,20 +555,6 @@ matmul_c16_avx128_fma3 (gfc_array_c16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_16 *restrict bbase_y;
|
||||
|
@ -583,6 +569,20 @@ matmul_c16_avx128_fma3 (gfc_array_c16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_16 *restrict abase_x;
|
||||
|
@ -1124,20 +1124,6 @@ matmul_c16_avx128_fma4 (gfc_array_c16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_16 *restrict bbase_y;
|
||||
|
@ -1152,6 +1138,20 @@ matmul_c16_avx128_fma4 (gfc_array_c16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_16 *restrict abase_x;
|
||||
|
|
|
@ -555,20 +555,6 @@ matmul_c4_avx128_fma3 (gfc_array_c4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_4 *restrict bbase_y;
|
||||
|
@ -583,6 +569,20 @@ matmul_c4_avx128_fma3 (gfc_array_c4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_4 *restrict abase_x;
|
||||
|
@ -1124,20 +1124,6 @@ matmul_c4_avx128_fma4 (gfc_array_c4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_4 *restrict bbase_y;
|
||||
|
@ -1152,6 +1138,20 @@ matmul_c4_avx128_fma4 (gfc_array_c4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_4 *restrict abase_x;
|
||||
|
|
|
@ -555,20 +555,6 @@ matmul_c8_avx128_fma3 (gfc_array_c8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_8 *restrict bbase_y;
|
||||
|
@ -583,6 +569,20 @@ matmul_c8_avx128_fma3 (gfc_array_c8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_8 *restrict abase_x;
|
||||
|
@ -1124,20 +1124,6 @@ matmul_c8_avx128_fma4 (gfc_array_c8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_COMPLEX_8 *restrict bbase_y;
|
||||
|
@ -1152,6 +1138,20 @@ matmul_c8_avx128_fma4 (gfc_array_c8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_COMPLEX_8 *restrict abase_x;
|
||||
|
|
|
@ -555,20 +555,6 @@ matmul_i1_avx128_fma3 (gfc_array_i1 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_1 *restrict bbase_y;
|
||||
|
@ -583,6 +569,20 @@ matmul_i1_avx128_fma3 (gfc_array_i1 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_1 *restrict abase_x;
|
||||
|
@ -1124,20 +1124,6 @@ matmul_i1_avx128_fma4 (gfc_array_i1 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_1 *restrict bbase_y;
|
||||
|
@ -1152,6 +1138,20 @@ matmul_i1_avx128_fma4 (gfc_array_i1 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_1 *restrict abase_x;
|
||||
|
|
|
@ -555,20 +555,6 @@ matmul_i16_avx128_fma3 (gfc_array_i16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_16 *restrict bbase_y;
|
||||
|
@ -583,6 +569,20 @@ matmul_i16_avx128_fma3 (gfc_array_i16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_16 *restrict abase_x;
|
||||
|
@ -1124,20 +1124,6 @@ matmul_i16_avx128_fma4 (gfc_array_i16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_16 *restrict bbase_y;
|
||||
|
@ -1152,6 +1138,20 @@ matmul_i16_avx128_fma4 (gfc_array_i16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_16 *restrict abase_x;
|
||||
|
|
|
@ -555,20 +555,6 @@ matmul_i2_avx128_fma3 (gfc_array_i2 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_2 *restrict bbase_y;
|
||||
|
@ -583,6 +569,20 @@ matmul_i2_avx128_fma3 (gfc_array_i2 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_2 *restrict abase_x;
|
||||
|
@ -1124,20 +1124,6 @@ matmul_i2_avx128_fma4 (gfc_array_i2 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_2 *restrict bbase_y;
|
||||
|
@ -1152,6 +1138,20 @@ matmul_i2_avx128_fma4 (gfc_array_i2 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_2 *restrict abase_x;
|
||||
|
|
|
@ -555,20 +555,6 @@ matmul_i4_avx128_fma3 (gfc_array_i4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_4 *restrict bbase_y;
|
||||
|
@ -583,6 +569,20 @@ matmul_i4_avx128_fma3 (gfc_array_i4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_4 *restrict abase_x;
|
||||
|
@ -1124,20 +1124,6 @@ matmul_i4_avx128_fma4 (gfc_array_i4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_4 *restrict bbase_y;
|
||||
|
@ -1152,6 +1138,20 @@ matmul_i4_avx128_fma4 (gfc_array_i4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_4 *restrict abase_x;
|
||||
|
|
|
@ -555,20 +555,6 @@ matmul_i8_avx128_fma3 (gfc_array_i8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_8 *restrict bbase_y;
|
||||
|
@ -583,6 +569,20 @@ matmul_i8_avx128_fma3 (gfc_array_i8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_8 *restrict abase_x;
|
||||
|
@ -1124,20 +1124,6 @@ matmul_i8_avx128_fma4 (gfc_array_i8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_INTEGER_8 *restrict bbase_y;
|
||||
|
@ -1152,6 +1138,20 @@ matmul_i8_avx128_fma4 (gfc_array_i8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_INTEGER_8 *restrict abase_x;
|
||||
|
|
|
@ -555,20 +555,6 @@ matmul_r10_avx128_fma3 (gfc_array_r10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_10 *restrict bbase_y;
|
||||
|
@ -583,6 +569,20 @@ matmul_r10_avx128_fma3 (gfc_array_r10 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_10 *restrict abase_x;
|
||||
|
@ -1124,20 +1124,6 @@ matmul_r10_avx128_fma4 (gfc_array_r10 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_10 *restrict bbase_y;
|
||||
|
@ -1152,6 +1138,20 @@ matmul_r10_avx128_fma4 (gfc_array_r10 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_10 *restrict abase_x;
|
||||
|
|
|
@ -555,20 +555,6 @@ matmul_r16_avx128_fma3 (gfc_array_r16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_16 *restrict bbase_y;
|
||||
|
@ -583,6 +569,20 @@ matmul_r16_avx128_fma3 (gfc_array_r16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_16 *restrict abase_x;
|
||||
|
@ -1124,20 +1124,6 @@ matmul_r16_avx128_fma4 (gfc_array_r16 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_16 *restrict bbase_y;
|
||||
|
@ -1152,6 +1138,20 @@ matmul_r16_avx128_fma4 (gfc_array_r16 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_16 *restrict abase_x;
|
||||
|
|
|
@ -555,20 +555,6 @@ matmul_r4_avx128_fma3 (gfc_array_r4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_4 *restrict bbase_y;
|
||||
|
@ -583,6 +569,20 @@ matmul_r4_avx128_fma3 (gfc_array_r4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_4 *restrict abase_x;
|
||||
|
@ -1124,20 +1124,6 @@ matmul_r4_avx128_fma4 (gfc_array_r4 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_4 *restrict bbase_y;
|
||||
|
@ -1152,6 +1138,20 @@ matmul_r4_avx128_fma4 (gfc_array_r4 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_4 *restrict abase_x;
|
||||
|
|
|
@ -555,20 +555,6 @@ matmul_r8_avx128_fma3 (gfc_array_r8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_8 *restrict bbase_y;
|
||||
|
@ -583,6 +569,20 @@ matmul_r8_avx128_fma3 (gfc_array_r8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_8 *restrict abase_x;
|
||||
|
@ -1124,20 +1124,6 @@ matmul_r8_avx128_fma4 (gfc_array_r8 * const restrict retarray,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const GFC_REAL_8 *restrict bbase_y;
|
||||
|
@ -1152,6 +1138,20 @@ matmul_r8_avx128_fma4 (gfc_array_r8 * const restrict retarray,
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const GFC_REAL_8 *restrict abase_x;
|
||||
|
|
|
@ -506,20 +506,6 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = ('rtype_name`)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else if (GFC_DESCRIPTOR_RANK (a) == 1)
|
||||
{
|
||||
const 'rtype_name` *restrict bbase_y;
|
||||
|
@ -534,6 +520,20 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
|
|||
dest[y*rxstride] = s;
|
||||
}
|
||||
}
|
||||
else if (axstride < aystride)
|
||||
{
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
dest[x*rxstride + y*rystride] = ('rtype_name`)0;
|
||||
|
||||
for (y = 0; y < ycount; y++)
|
||||
for (n = 0; n < count; n++)
|
||||
for (x = 0; x < xcount; x++)
|
||||
/* dest[x,y] += a[x,n] * b[n,y] */
|
||||
dest[x*rxstride + y*rystride] +=
|
||||
abase[x*axstride + n*aystride] *
|
||||
bbase[n*bxstride + y*bystride];
|
||||
}
|
||||
else
|
||||
{
|
||||
const 'rtype_name` *restrict abase_x;
|
||||
|
|
Loading…
Add table
Reference in a new issue