i386: Add V2SFmode FMA insn patterns [PR95046]

gcc/ChangeLog:

	PR target/95046
	* config/i386/mmx.md (fmav2sf4): New insn pattern.
	(fmsv2sf4): Ditto.
	(fnmav2sf4): Ditto.
	(fnmsv2sf4): Ditto.

testsuite/ChangeLog:

	PR target/95046
	* gcc.target/i386/pr95046-3.c: New test.
This commit is contained in:
Uros Bizjak 2020-05-12 18:36:27 +02:00
parent fa31a3cdbf
commit 955b1f9299
4 changed files with 125 additions and 0 deletions

View file

@ -1,3 +1,11 @@
2020-05-12 Uroš Bizjak <ubizjak@gmail.com>
PR target/95046
* config/i386/mmx.md (fmav2sf4): New insn pattern.
(fmsv2sf4): Ditto.
(fnmav2sf4): Ditto.
(fnmsv2sf4): Ditto.
2020-05-12 H.J. Lu <hongjiu.lu@intel.com>
* Makefile.in (CET_HOST_FLAGS): New.

View file

@ -660,6 +660,76 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V4SF")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel single-precision FMA multiply/accumulate instructions.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "fmav2sf4"
[(set (match_operand:V2SF 0 "register_operand" "=v,v,x")
(fma:V2SF
(match_operand:V2SF 1 "register_operand" "%0,v,x")
(match_operand:V2SF 2 "register_operand" "v,v,x")
(match_operand:V2SF 3 "register_operand" "v,0,x")))]
"(TARGET_FMA || TARGET_FMA4) && TARGET_MMX_WITH_SSE"
"@
vfmadd132ps\t{%2, %3, %0|%0, %3, %2}
vfmadd231ps\t{%2, %1, %0|%0, %1, %2}
vfmaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "isa" "fma,fma,fma4")
(set_attr "type" "ssemuladd")
(set_attr "mode" "V4SF")])
(define_insn "fmsv2sf4"
[(set (match_operand:V2SF 0 "register_operand" "=v,v,x")
(fma:V2SF
(match_operand:V2SF 1 "register_operand" "%0,v,x")
(match_operand:V2SF 2 "register_operand" "v,v,x")
(neg:V2SF
(match_operand:V2SF 3 "register_operand" "v,0,x"))))]
"(TARGET_FMA || TARGET_FMA4) && TARGET_MMX_WITH_SSE"
"@
vfmsub132ps\t{%2, %3, %0|%0, %3, %2}
vfmsub231ps\t{%2, %1, %0|%0, %1, %2}
vfmsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "isa" "fma,fma,fma4")
(set_attr "type" "ssemuladd")
(set_attr "mode" "V4SF")])
(define_insn "fnmav2sf4"
[(set (match_operand:V2SF 0 "register_operand" "=v,v,x")
(fma:V2SF
(neg:V2SF
(match_operand:V2SF 1 "register_operand" "%0,v,x"))
(match_operand:V2SF 2 "register_operand" "v,v,x")
(match_operand:V2SF 3 "register_operand" "v,0,x")))]
"(TARGET_FMA || TARGET_FMA4) && TARGET_MMX_WITH_SSE"
"@
vfnmadd132ps\t{%2, %3, %0|%0, %3, %2}
vfnmadd231ps\t{%2, %1, %0|%0, %1, %2}
vfnmaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "isa" "fma,fma,fma4")
(set_attr "type" "ssemuladd")
(set_attr "mode" "V4SF")])
(define_insn "fnmsv2sf4"
[(set (match_operand:V2SF 0 "register_operand" "=v,v,x")
(fma:V2SF
(neg:V2SF
(match_operand:V2SF 1 "register_operand" "%0,v,x"))
(match_operand:V2SF 2 "register_operand" "v,v,x")
(neg:V2SF
(match_operand:V2SF 3 "register_operand" "v,0,x"))))]
"(TARGET_FMA || TARGET_FMA4) && TARGET_MMX_WITH_SSE"
"@
vfnmsub132ps\t{%2, %3, %0|%0, %3, %2}
vfnmsub231ps\t{%2, %1, %0|%0, %1, %2}
vfnmsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "isa" "fma,fma,fma4")
(set_attr "type" "ssemuladd")
(set_attr "mode" "V4SF")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel single-precision floating point conversion operations

View file

@ -3,6 +3,11 @@
PR c++/78752
* g++.dg/cpp2a/concepts-pr78752-2.C: New test.
2020-05-12 Uroš Bizjak <ubizjak@gmail.com>
PR target/95046
* gcc.target/i386/pr95046-3.c: New test.
2020-05-12 Uroš Bizjak <ubizjak@gmail.com>
PR target/95046

View file

@ -0,0 +1,42 @@
/* PR target/95046 */
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O3 -mfma" } */
float r[2], a[2], b[2], c[2];
void
test_fma (void)
{
for (int i = 0; i < 2; i++)
r[i] = a[i] * b[i] + c[i];
}
/* { dg-final { scan-assembler "\tvfmadd\[123\]+ps" } } */
void
test_fms (void)
{
for (int i = 0; i < 2; i++)
r[i] = a[i] * b[i] - c[i];
}
/* { dg-final { scan-assembler "\tvfmsub\[123\]+ps" } } */
void
test_fnma (void)
{
for (int i = 0; i < 2; i++)
r[i] = -(a[i] * b[i]) + c[i];
}
/* { dg-final { scan-assembler "\tvfnmadd\[123\]+ps" } } */
void
test_fnms (void)
{
for (int i = 0; i < 2; i++)
r[i] = -(a[i] * b[i]) - c[i];
}
/* { dg-final { scan-assembler "\tvfnmsub\[123\]+ps" } } */