amdgcn: Fix addsub bug
The vec_fmsubadd instuction actually had add twice, by mistake. Also improve code-gen for all the complex patterns by using properly undefined values. Mostly this just prevents the compiler reserving space in the stack frame. gcc/ChangeLog: * config/gcn/gcn-valu.md (cmul<conj_op><mode>3): Use gcn_gen_undef. (cml<addsub_as><mode>4): Likewise. (vec_addsub<mode>3): Likewise. (cadd<rot><mode>3): Likewise. (vec_fmaddsub<mode>4): Likewise. (vec_fmsubadd<mode>4): Likewise, and use sub for the odd lanes.
This commit is contained in:
parent
14e881eb03
commit
b17c57b06d
1 changed files with 15 additions and 8 deletions
|
@ -2323,8 +2323,9 @@
|
|||
rtx even = gen_rtx_REG (DImode, EXEC_REG);
|
||||
emit_move_insn (even, get_exec (0x5555555555555555UL));
|
||||
rtx dest = operands[0];
|
||||
emit_insn (gen_<cmul_subadd><mode>3_exec (dest, t1, t1_perm, dest, even));
|
||||
// a*c-b*d 0
|
||||
emit_insn (gen_<cmul_subadd><mode>3_exec (dest, t1, t1_perm,
|
||||
gcn_gen_undef (<MODE>mode),
|
||||
even)); // a*c-b*d 0
|
||||
|
||||
rtx t2_perm = gen_reg_rtx (<MODE>mode);
|
||||
emit_insn (gen_dpp_swap_pairs<mode> (t2_perm, t2)); // b*c a*d
|
||||
|
@ -2368,7 +2369,8 @@
|
|||
rtx even = gen_rtx_REG (DImode, EXEC_REG);
|
||||
emit_move_insn (even, get_exec (0x5555555555555555UL));
|
||||
rtx dest = operands[0];
|
||||
emit_insn (gen_sub<mode>3_exec (dest, t1, t2_perm, dest, even));
|
||||
emit_insn (gen_sub<mode>3_exec (dest, t1, t2_perm,
|
||||
gcn_gen_undef (<MODE>mode), even));
|
||||
|
||||
rtx odd = gen_rtx_REG (DImode, EXEC_REG);
|
||||
emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
|
||||
|
@ -2392,7 +2394,8 @@
|
|||
rtx dest = operands[0];
|
||||
rtx x = operands[1];
|
||||
rtx y = operands[2];
|
||||
emit_insn (gen_sub<mode>3_exec (dest, x, y, dest, even));
|
||||
emit_insn (gen_sub<mode>3_exec (dest, x, y, gcn_gen_undef (<MODE>mode),
|
||||
even));
|
||||
rtx odd = gen_rtx_REG (DImode, EXEC_REG);
|
||||
emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
|
||||
emit_insn (gen_add<mode>3_exec (dest, x, y, dest, odd));
|
||||
|
@ -2419,7 +2422,9 @@
|
|||
|
||||
rtx even = gen_rtx_REG (DImode, EXEC_REG);
|
||||
emit_move_insn (even, get_exec (0x5555555555555555UL));
|
||||
emit_insn (gen_<cadd_subadd><mode>3_exec (dest, x, y, dest, even));
|
||||
emit_insn (gen_<cadd_subadd><mode>3_exec (dest, x, y,
|
||||
gcn_gen_undef (<MODE>mode),
|
||||
even));
|
||||
rtx odd = gen_rtx_REG (DImode, EXEC_REG);
|
||||
emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
|
||||
emit_insn (gen_<cadd_addsub><mode>3_exec (dest, x, y, dest, odd));
|
||||
|
@ -2439,7 +2444,8 @@
|
|||
rtx even = gen_rtx_REG (DImode, EXEC_REG);
|
||||
emit_move_insn (even, get_exec (0x5555555555555555UL));
|
||||
rtx dest = operands[0];
|
||||
emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3], dest, even));
|
||||
emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3],
|
||||
gcn_gen_undef (<MODE>mode), even));
|
||||
rtx odd = gen_rtx_REG (DImode, EXEC_REG);
|
||||
emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
|
||||
emit_insn (gen_add<mode>3_exec (dest, t1, operands[3], dest, odd));
|
||||
|
@ -2459,10 +2465,11 @@
|
|||
rtx even = gen_rtx_REG (DImode, EXEC_REG);
|
||||
emit_move_insn (even, get_exec (0x5555555555555555UL));
|
||||
rtx dest = operands[0];
|
||||
emit_insn (gen_add<mode>3_exec (dest, t1, operands[3], dest, even));
|
||||
emit_insn (gen_add<mode>3_exec (dest, t1, operands[3],
|
||||
gcn_gen_undef (<MODE>mode), even));
|
||||
rtx odd = gen_rtx_REG (DImode, EXEC_REG);
|
||||
emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
|
||||
emit_insn (gen_add<mode>3_exec (dest, t1, operands[3], dest, odd));
|
||||
emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3], dest, odd));
|
||||
|
||||
DONE;
|
||||
})
|
||||
|
|
Loading…
Add table
Reference in a new issue