rs6000: Make some BIFs vectorized on P10
This patch is to add the support to make vectorizer able to vectorize some built-in function scalar versions on Power10. gcc/ChangeLog: * config/rs6000/rs6000.c (rs6000_builtin_md_vectorized_function): Add support for built-in functions MISC_BUILTIN_DIVWE, MISC_BUILTIN_DIVWEU, MISC_BUILTIN_DIVDE, MISC_BUILTIN_DIVDEU, P10_BUILTIN_CFUGED, P10_BUILTIN_CNTLZDM, P10_BUILTIN_CNTTZDM, P10_BUILTIN_PDEPD and P10_BUILTIN_PEXTD on Power10. gcc/testsuite/ChangeLog: * gcc.target/powerpc/dive-vectorize-1.c: New test. * gcc.target/powerpc/dive-vectorize-1.h: New test. * gcc.target/powerpc/dive-vectorize-2.c: New test. * gcc.target/powerpc/dive-vectorize-2.h: New test. * gcc.target/powerpc/dive-vectorize-run-1.c: New test. * gcc.target/powerpc/dive-vectorize-run-2.c: New test. * gcc.target/powerpc/p10-bifs-vectorize-1.c: New test. * gcc.target/powerpc/p10-bifs-vectorize-1.h: New test. * gcc.target/powerpc/p10-bifs-vectorize-run-1.c: New test.
This commit is contained in:
parent
2e64eec671
commit
26f5ea5e14
10 changed files with 335 additions and 0 deletions
|
@ -5793,6 +5793,59 @@ rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
|
|||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
machine_mode in_vmode = TYPE_MODE (type_in);
|
||||
machine_mode out_vmode = TYPE_MODE (type_out);
|
||||
|
||||
/* Power10 supported vectorized built-in functions. */
|
||||
if (TARGET_POWER10
|
||||
&& in_vmode == out_vmode
|
||||
&& VECTOR_UNIT_ALTIVEC_OR_VSX_P (in_vmode))
|
||||
{
|
||||
machine_mode exp_mode = DImode;
|
||||
machine_mode exp_vmode = V2DImode;
|
||||
enum rs6000_builtins bif;
|
||||
switch (fn)
|
||||
{
|
||||
case MISC_BUILTIN_DIVWE:
|
||||
case MISC_BUILTIN_DIVWEU:
|
||||
exp_mode = SImode;
|
||||
exp_vmode = V4SImode;
|
||||
if (fn == MISC_BUILTIN_DIVWE)
|
||||
bif = P10V_BUILTIN_DIVES_V4SI;
|
||||
else
|
||||
bif = P10V_BUILTIN_DIVEU_V4SI;
|
||||
break;
|
||||
case MISC_BUILTIN_DIVDE:
|
||||
case MISC_BUILTIN_DIVDEU:
|
||||
if (fn == MISC_BUILTIN_DIVDE)
|
||||
bif = P10V_BUILTIN_DIVES_V2DI;
|
||||
else
|
||||
bif = P10V_BUILTIN_DIVEU_V2DI;
|
||||
break;
|
||||
case P10_BUILTIN_CFUGED:
|
||||
bif = P10V_BUILTIN_VCFUGED;
|
||||
break;
|
||||
case P10_BUILTIN_CNTLZDM:
|
||||
bif = P10V_BUILTIN_VCLZDM;
|
||||
break;
|
||||
case P10_BUILTIN_CNTTZDM:
|
||||
bif = P10V_BUILTIN_VCTZDM;
|
||||
break;
|
||||
case P10_BUILTIN_PDEPD:
|
||||
bif = P10V_BUILTIN_VPDEPD;
|
||||
break;
|
||||
case P10_BUILTIN_PEXTD:
|
||||
bif = P10V_BUILTIN_VPEXTD;
|
||||
break;
|
||||
default:
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
if (in_mode == exp_mode && in_vmode == exp_vmode)
|
||||
return rs6000_builtin_decls[bif];
|
||||
}
|
||||
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
|
|
11
gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c
Normal file
11
gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c
Normal file
|
@ -0,0 +1,11 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target power10_ok } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */
|
||||
|
||||
/* Test if signed/unsigned int extended divisions get vectorized. */
|
||||
|
||||
#include "dive-vectorize-1.h"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
|
||||
/* { dg-final { scan-assembler-times {\mvdivesw\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mvdiveuw\M} 1 } } */
|
22
gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.h
Normal file
22
gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.h
Normal file
|
@ -0,0 +1,22 @@
|
|||
#define N 128
|
||||
|
||||
typedef signed int si;
|
||||
typedef unsigned int ui;
|
||||
|
||||
si si_a[N], si_b[N], si_c[N];
|
||||
ui ui_a[N], ui_b[N], ui_c[N];
|
||||
|
||||
__attribute__ ((noipa)) void
|
||||
test_divwe ()
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
si_c[i] = __builtin_divwe (si_a[i], si_b[i]);
|
||||
}
|
||||
|
||||
__attribute__ ((noipa)) void
|
||||
test_divweu ()
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
ui_c[i] = __builtin_divweu (ui_a[i], ui_b[i]);
|
||||
}
|
||||
|
13
gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.c
Normal file
13
gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.c
Normal file
|
@ -0,0 +1,13 @@
|
|||
/* { dg-do compile } */
|
||||
/* We scan for vdive*d which are only supported on 64-bit env. */
|
||||
/* { dg-require-effective-target lp64 } */
|
||||
/* { dg-require-effective-target power10_ok } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */
|
||||
|
||||
/* Test if signed/unsigned long long extended divisions get vectorized. */
|
||||
|
||||
#include "dive-vectorize-2.h"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
|
||||
/* { dg-final { scan-assembler-times {\mvdivesd\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mvdiveud\M} 1 } } */
|
22
gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.h
Normal file
22
gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.h
Normal file
|
@ -0,0 +1,22 @@
|
|||
#define N 128
|
||||
|
||||
typedef signed long long sLL;
|
||||
typedef unsigned long long uLL;
|
||||
|
||||
sLL sll_a[N], sll_b[N], sll_c[N];
|
||||
uLL ull_a[N], ull_b[N], ull_c[N];
|
||||
|
||||
__attribute__ ((noipa)) void
|
||||
test_divde ()
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
sll_c[i] = __builtin_divde (sll_a[i], sll_b[i]);
|
||||
}
|
||||
|
||||
__attribute__ ((noipa)) void
|
||||
test_divdeu ()
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
ull_c[i] = __builtin_divdeu (ull_a[i], ull_b[i]);
|
||||
}
|
||||
|
54
gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-1.c
Normal file
54
gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-1.c
Normal file
|
@ -0,0 +1,54 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target power10_hw } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model" } */
|
||||
|
||||
#include "dive-vectorize-1.h"
|
||||
|
||||
/* Check if test cases with signed/unsigned int extended division
|
||||
vectorization run successfully. */
|
||||
|
||||
/* Make optimize (1) to avoid vectorization applied on check func. */
|
||||
|
||||
__attribute__ ((optimize (1))) void
|
||||
check_divwe ()
|
||||
{
|
||||
test_divwe ();
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
si exp = __builtin_divwe (si_a[i], si_b[i]);
|
||||
if (exp != si_c[i])
|
||||
__builtin_abort ();
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__ ((optimize (1))) void
|
||||
check_divweu ()
|
||||
{
|
||||
test_divweu ();
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
ui exp = __builtin_divweu (ui_a[i], ui_b[i]);
|
||||
if (exp != ui_c[i])
|
||||
__builtin_abort ();
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
si_a[i] = 0x10 * (i * 3 + 2);
|
||||
si_b[i] = 0x7890 * (i * 3 + 1);
|
||||
ui_a[i] = 0x234 * (i * 11 + 3) - 0xcd * (i * 5 - 7);
|
||||
ui_b[i] = 0x6078 * (i * 7 + 3) + 0xef * (i * 7 - 11);
|
||||
if (si_b[i] == 0 || ui_b[i] == 0)
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
check_divwe ();
|
||||
check_divweu ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
56
gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-2.c
Normal file
56
gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-2.c
Normal file
|
@ -0,0 +1,56 @@
|
|||
/* { dg-do run } */
|
||||
/* The checked bifs are only supported on 64-bit env. */
|
||||
/* { dg-require-effective-target lp64 } */
|
||||
/* { dg-require-effective-target power10_hw } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model" } */
|
||||
|
||||
#include "dive-vectorize-2.h"
|
||||
|
||||
/* Check if test cases with signed/unsigned int extended division
|
||||
vectorization run successfully. */
|
||||
|
||||
/* Make optimize (1) to avoid vectorization applied on check func. */
|
||||
|
||||
__attribute__ ((optimize (1))) void
|
||||
check_divde ()
|
||||
{
|
||||
test_divde ();
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
sLL exp = __builtin_divde (sll_a[i], sll_b[i]);
|
||||
if (exp != sll_c[i])
|
||||
__builtin_abort ();
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__ ((optimize (1))) void
|
||||
check_divdeu ()
|
||||
{
|
||||
test_divdeu ();
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
uLL exp = __builtin_divdeu (ull_a[i], ull_b[i]);
|
||||
if (exp != ull_c[i])
|
||||
__builtin_abort ();
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
sll_a[i] = 0x102 * (i * 3 + 2);
|
||||
sll_b[i] = 0x789ab * (i * 3 + 1);
|
||||
ull_a[i] = 0x2345 * (i * 11 + 3) - 0xcd1 * (i * 5 - 7);
|
||||
ull_b[i] = 0x6078e * (i * 7 + 3) + 0xefa * (i * 7 - 11);
|
||||
if (sll_b[i] == 0 || ull_b[i] == 0)
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
check_divde ();
|
||||
check_divdeu ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
16
gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.c
Normal file
16
gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do compile } */
|
||||
/* What we scan for are only supported on 64-bit env. */
|
||||
/* { dg-require-effective-target lp64 } */
|
||||
/* { dg-require-effective-target power10_ok } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */
|
||||
|
||||
/* Test if some Power10 built-in functions get vectorized. */
|
||||
|
||||
#include "p10-bifs-vectorize-1.h"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 5 "vect" } } */
|
||||
/* { dg-final { scan-assembler-times {\mvcfuged\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mvclzdm\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mvctzdm\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mvpdepd\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mvpextd\M} 1 } } */
|
40
gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.h
Normal file
40
gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.h
Normal file
|
@ -0,0 +1,40 @@
|
|||
#define N 32
|
||||
|
||||
typedef unsigned long long uLL;
|
||||
uLL ull_a[N], ull_b[N], ull_c[N];
|
||||
|
||||
__attribute__ ((noipa)) void
|
||||
test_cfuged ()
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
ull_c[i] = __builtin_cfuged (ull_a[i], ull_b[i]);
|
||||
}
|
||||
|
||||
__attribute__ ((noipa)) void
|
||||
test_cntlzdm ()
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
ull_c[i] = __builtin_cntlzdm (ull_a[i], ull_b[i]);
|
||||
}
|
||||
|
||||
__attribute__ ((noipa)) void
|
||||
test_cnttzdm ()
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
ull_c[i] = __builtin_cnttzdm (ull_a[i], ull_b[i]);
|
||||
}
|
||||
|
||||
__attribute__ ((noipa)) void
|
||||
test_pdepd ()
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
ull_c[i] = __builtin_pdepd (ull_a[i], ull_b[i]);
|
||||
}
|
||||
|
||||
__attribute__ ((noipa)) void
|
||||
test_pextd ()
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
ull_c[i] = __builtin_pextd (ull_a[i], ull_b[i]);
|
||||
}
|
||||
|
48
gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-run-1.c
Normal file
48
gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-run-1.c
Normal file
|
@ -0,0 +1,48 @@
|
|||
/* { dg-do run } */
|
||||
/* The checked bifs are only supported on 64-bit env. */
|
||||
/* { dg-require-effective-target lp64 } */
|
||||
/* { dg-require-effective-target power10_hw } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model" } */
|
||||
|
||||
#include "p10-bifs-vectorize-1.h"
|
||||
|
||||
/* Check if vectorized built-in functions run expectedly. */
|
||||
|
||||
/* Make optimize (1) to avoid vectorization applied on check func. */
|
||||
|
||||
#define CHECK(name) \
|
||||
__attribute__ ((optimize (1))) void check_##name () \
|
||||
{ \
|
||||
test_##name (); \
|
||||
for (int i = 0; i < N; i++) \
|
||||
{ \
|
||||
uLL exp = __builtin_##name (ull_a[i], ull_b[i]); \
|
||||
if (exp != ull_c[i]) \
|
||||
__builtin_abort (); \
|
||||
} \
|
||||
}
|
||||
|
||||
CHECK (cfuged)
|
||||
CHECK (cntlzdm)
|
||||
CHECK (cnttzdm)
|
||||
CHECK (pdepd)
|
||||
CHECK (pextd)
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
ull_a[i] = 0x789a * (i * 11 - 5) - 0xcd1 * (i * 5 - 7);
|
||||
ull_b[i] = 0xfedc * (i * 7 + 3) + 0x467 * (i * 7 - 11);
|
||||
}
|
||||
|
||||
check_cfuged ();
|
||||
check_cntlzdm ();
|
||||
check_cnttzdm ();
|
||||
check_pdepd ();
|
||||
check_pextd ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Add table
Reference in a new issue