re PR libfortran/80602 (Reduce stack usage for blocked matmul)
2017-05-08 Thomas Koenig <tkoenig@gcc.gnu.org> PR fortran/80602 * m4/matmul_internal.m4: 'matmul_name`: Change t1 to a VLA of the required size. * generated/matmul_c10.c: Regenerated. * generated/matmul_c16.c: Regenerated. * generated/matmul_c4.c: Regenerated. * generated/matmul_c8.c: Regenerated. * generated/matmul_i1.c: Regenerated. * generated/matmul_i16.c: Regenerated. * generated/matmul_i2.c: Regenerated. * generated/matmul_i4.c: Regenerated. * generated/matmul_i8.c: Regenerated. * generated/matmul_r10.c: Regenerated. * generated/matmul_r16.c: Regenerated. * generated/matmul_r4.c: Regenerated. * generated/matmul_r8.c: Regenerated. 2017-05-08 Thomas Koenig <tkoenig@gcc.gnu.org> PR fortran/80602 * gfortran.dg/matmul_15.f90: New test case. From-SVN: r247753
This commit is contained in:
parent
d16d5eac2a
commit
fd9910392b
18 changed files with 853 additions and 134 deletions
|
@ -1,3 +1,8 @@
|
|||
2017-05-08 Thomas Koenig <tkoenig@gcc.gnu.org>
|
||||
|
||||
PR fortran/80602
|
||||
* gfortran.dg/matmul_15.f90: New test case.
|
||||
|
||||
2017-05-08 Wilco Dijkstra <wdijkstr@arm.com>
|
||||
|
||||
* testsuite/gcc.dg/vect/vect-44.c: Add -fno-vect-cost-model.
|
||||
|
|
34
gcc/testsuite/gfortran.dg/matmul_15.f90
Normal file
34
gcc/testsuite/gfortran.dg/matmul_15.f90
Normal file
|
@ -0,0 +1,34 @@
|
|||
! { dg-do run }
|
||||
! { dg-options "-finline-matmul-limit=0" }
|
||||
! Stress-test the matmul blocking code with sizes close to or
|
||||
! equal to powers ot two.
|
||||
|
||||
program main
|
||||
implicit none
|
||||
integer, dimension(*), parameter :: nn = &
|
||||
& [2,3,4,5, 7,8,9, 15,16,17, 31,32,33, 63,64,65, &
|
||||
127 ,228,129, 255,256,257];
|
||||
integer, parameter :: s = size(nn)
|
||||
real, dimension(:,:),allocatable :: a, b, c
|
||||
integer :: i1, i2, i3
|
||||
integer :: nx, ny, count
|
||||
real :: sm
|
||||
|
||||
sm = 0.0
|
||||
do i1=1, s
|
||||
nx = nn(i1)
|
||||
do i2=1,s
|
||||
ny = nn(i2)
|
||||
do i3=1,s
|
||||
count = nn(i3)
|
||||
allocate (a(nx,ny), b(ny,count), c(nx,count))
|
||||
call random_number(a)
|
||||
call random_number(b)
|
||||
c = matmul(a,b)
|
||||
sm = sm + sum(c)
|
||||
deallocate(a,b,c)
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
|
||||
end program main
|
|
@ -1,3 +1,22 @@
|
|||
2017-05-08 Thomas Koenig <tkoenig@gcc.gnu.org>
|
||||
|
||||
PR fortran/80602
|
||||
* m4/matmul_internal.m4: 'matmul_name`: Change
|
||||
t1 to a VLA of the required size.
|
||||
* generated/matmul_c10.c: Regenerated.
|
||||
* generated/matmul_c16.c: Regenerated.
|
||||
* generated/matmul_c4.c: Regenerated.
|
||||
* generated/matmul_c8.c: Regenerated.
|
||||
* generated/matmul_i1.c: Regenerated.
|
||||
* generated/matmul_i16.c: Regenerated.
|
||||
* generated/matmul_i2.c: Regenerated.
|
||||
* generated/matmul_i4.c: Regenerated.
|
||||
* generated/matmul_i8.c: Regenerated.
|
||||
* generated/matmul_r10.c: Regenerated.
|
||||
* generated/matmul_r16.c: Regenerated.
|
||||
* generated/matmul_r4.c: Regenerated.
|
||||
* generated/matmul_r8.c: Regenerated.
|
||||
|
||||
2017-04-11 Jerry DeLisle <jvdelisle@gcc.gnu.org>
|
||||
|
||||
* close.c: Fix white space in pointer declarations and comment
|
||||
|
|
|
@ -137,8 +137,9 @@ am__aclocal_m4_deps = $(top_srcdir)/../config/depstand.m4 \
|
|||
$(top_srcdir)/../ltversion.m4 $(top_srcdir)/../lt~obsolete.m4 \
|
||||
$(top_srcdir)/acinclude.m4 $(top_srcdir)/../config/acx.m4 \
|
||||
$(top_srcdir)/../config/no-executables.m4 \
|
||||
$(top_srcdir)/../config/math.m4 $(top_srcdir)/../libtool.m4 \
|
||||
$(top_srcdir)/configure.ac
|
||||
$(top_srcdir)/../config/math.m4 \
|
||||
$(top_srcdir)/../config/ax_check_define.m4 \
|
||||
$(top_srcdir)/../libtool.m4 $(top_srcdir)/configure.ac
|
||||
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
|
||||
$(ACLOCAL_M4)
|
||||
am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
|
||||
|
|
|
@ -286,8 +286,7 @@ matmul_c10_avx (gfc_array_c10 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_10 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -311,6 +310,17 @@ matmul_c10_avx (gfc_array_c10 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -829,8 +839,7 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_10 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -854,6 +863,17 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1372,8 +1392,7 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_10 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1397,6 +1416,17 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1911,8 +1941,7 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_10 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1936,6 +1965,17 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -2508,8 +2548,7 @@ matmul_c10 (gfc_array_c10 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_10 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -2533,6 +2572,17 @@ matmul_c10 (gfc_array_c10 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
|
|
@ -286,8 +286,7 @@ matmul_c16_avx (gfc_array_c16 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_16 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_16 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -311,6 +310,17 @@ matmul_c16_avx (gfc_array_c16 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -829,8 +839,7 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_16 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_16 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -854,6 +863,17 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1372,8 +1392,7 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_16 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_16 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1397,6 +1416,17 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1911,8 +1941,7 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_16 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_16 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1936,6 +1965,17 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -2508,8 +2548,7 @@ matmul_c16 (gfc_array_c16 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_16 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_16 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -2533,6 +2572,17 @@ matmul_c16 (gfc_array_c16 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
|
|
@ -286,8 +286,7 @@ matmul_c4_avx (gfc_array_c4 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_4 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_4 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -311,6 +310,17 @@ matmul_c4_avx (gfc_array_c4 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -829,8 +839,7 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_4 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_4 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -854,6 +863,17 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1372,8 +1392,7 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_4 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_4 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1397,6 +1416,17 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1911,8 +1941,7 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_4 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_4 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1936,6 +1965,17 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -2508,8 +2548,7 @@ matmul_c4 (gfc_array_c4 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_4 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_4 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -2533,6 +2572,17 @@ matmul_c4 (gfc_array_c4 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
|
|
@ -286,8 +286,7 @@ matmul_c8_avx (gfc_array_c8 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_8 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_8 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -311,6 +310,17 @@ matmul_c8_avx (gfc_array_c8 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -829,8 +839,7 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_8 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_8 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -854,6 +863,17 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1372,8 +1392,7 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_8 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_8 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1397,6 +1416,17 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1911,8 +1941,7 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_8 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_8 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1936,6 +1965,17 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -2508,8 +2548,7 @@ matmul_c8 (gfc_array_c8 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_COMPLEX_8 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_COMPLEX_8 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -2533,6 +2572,17 @@ matmul_c8 (gfc_array_c8 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
|
|
@ -286,8 +286,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_1 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -311,6 +310,17 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -829,8 +839,7 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_1 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -854,6 +863,17 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1372,8 +1392,7 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_1 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1397,6 +1416,17 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1911,8 +1941,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_1 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1936,6 +1965,17 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -2508,8 +2548,7 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_1 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -2533,6 +2572,17 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
|
|
@ -286,8 +286,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_16 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -311,6 +310,17 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -829,8 +839,7 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_16 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -854,6 +863,17 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1372,8 +1392,7 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_16 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1397,6 +1416,17 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1911,8 +1941,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_16 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1936,6 +1965,17 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -2508,8 +2548,7 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_16 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -2533,6 +2572,17 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
|
|
@ -286,8 +286,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_2 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -311,6 +310,17 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -829,8 +839,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_2 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -854,6 +863,17 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1372,8 +1392,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_2 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1397,6 +1416,17 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1911,8 +1941,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_2 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1936,6 +1965,17 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -2508,8 +2548,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_2 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -2533,6 +2572,17 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
|
|
@ -286,8 +286,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_4 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -311,6 +310,17 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -829,8 +839,7 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_4 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -854,6 +863,17 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1372,8 +1392,7 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_4 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1397,6 +1416,17 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1911,8 +1941,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_4 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1936,6 +1965,17 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -2508,8 +2548,7 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_4 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -2533,6 +2572,17 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
|
|
@ -286,8 +286,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_8 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -311,6 +310,17 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -829,8 +839,7 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_8 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -854,6 +863,17 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1372,8 +1392,7 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_8 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1397,6 +1416,17 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1911,8 +1941,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_8 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1936,6 +1965,17 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -2508,8 +2548,7 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_INTEGER_8 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -2533,6 +2572,17 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
|
|
@ -286,8 +286,7 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_10 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_10 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -311,6 +310,17 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_10 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -829,8 +839,7 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_10 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_10 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -854,6 +863,17 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_10 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1372,8 +1392,7 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_10 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_10 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1397,6 +1416,17 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_10 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1911,8 +1941,7 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_10 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_10 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1936,6 +1965,17 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_10 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -2508,8 +2548,7 @@ matmul_r10 (gfc_array_r10 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_10 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_10 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -2533,6 +2572,17 @@ matmul_r10 (gfc_array_r10 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_10 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
|
|
@ -286,8 +286,7 @@ matmul_r16_avx (gfc_array_r16 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_16 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_16 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -311,6 +310,17 @@ matmul_r16_avx (gfc_array_r16 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_16 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -829,8 +839,7 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_16 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_16 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -854,6 +863,17 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_16 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1372,8 +1392,7 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_16 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_16 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1397,6 +1416,17 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_16 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1911,8 +1941,7 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_16 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_16 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1936,6 +1965,17 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_16 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -2508,8 +2548,7 @@ matmul_r16 (gfc_array_r16 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_16 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_16 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -2533,6 +2572,17 @@ matmul_r16 (gfc_array_r16 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_16 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
|
|
@ -286,8 +286,7 @@ matmul_r4_avx (gfc_array_r4 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_4 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_4 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -311,6 +310,17 @@ matmul_r4_avx (gfc_array_r4 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_4 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -829,8 +839,7 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_4 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_4 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -854,6 +863,17 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_4 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1372,8 +1392,7 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_4 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_4 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1397,6 +1416,17 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_4 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1911,8 +1941,7 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_4 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_4 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1936,6 +1965,17 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_4 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -2508,8 +2548,7 @@ matmul_r4 (gfc_array_r4 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_4 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_4 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -2533,6 +2572,17 @@ matmul_r4 (gfc_array_r4 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_4 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
|
|
@ -286,8 +286,7 @@ matmul_r8_avx (gfc_array_r8 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_8 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_8 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -311,6 +310,17 @@ matmul_r8_avx (gfc_array_r8 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_8 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -829,8 +839,7 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_8 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_8 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -854,6 +863,17 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_8 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1372,8 +1392,7 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_8 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_8 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1397,6 +1416,17 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_8 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -1911,8 +1941,7 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_8 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_8 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -1936,6 +1965,17 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_8 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
@ -2508,8 +2548,7 @@ matmul_r8 (gfc_array_r8 * const restrict retarray,
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
GFC_REAL_8 t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
GFC_REAL_8 f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -2533,6 +2572,17 @@ matmul_r8 (gfc_array_r8 * const restrict retarray,
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
GFC_REAL_8 t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
|
|
@ -202,8 +202,7 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
|
|||
i1, i2, i3, i4, i5, i6;
|
||||
|
||||
/* Local variables */
|
||||
'rtype_name` t1[65536], /* was [256][256] */
|
||||
f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
'rtype_name` f11, f12, f21, f22, f31, f32, f41, f42,
|
||||
f13, f14, f23, f24, f33, f34, f43, f44;
|
||||
index_type i, j, l, ii, jj, ll;
|
||||
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
|
||||
|
@ -227,6 +226,17 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
|
|||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
||||
/* Adjust size of t1 to what is needed. */
|
||||
index_type t1_dim;
|
||||
t1_dim = (a_dim1-1) * 256 + b_dim1;
|
||||
if (t1_dim > 65536)
|
||||
t1_dim = 65536;
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wvla"
|
||||
'rtype_name` t1[t1_dim]; /* was [256][256] */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
|
|
Loading…
Add table
Reference in a new issue