x86_64: Improve code expanded for highpart multiplications.
While working on a middle-end patch to more aggressively use highpart multiplications on targets that support them, I noticed that the RTL expanded by the x86 backend interacts poorly with register allocation leading to suboptimal code. For the testcase, typedef int __attribute ((mode(TI))) ti_t; long foo(long x) { return ((ti_t)x * 19065) >> 64; } we'd like to avoid: foo: movq %rdi, %rax movl $19065, %edx imulq %rdx movq %rdx, %rax ret and would prefer: foo: movl $19065, %eax imulq %rdi movq %rdx, %rax ret This patch provides a pair of peephole2 transformations to tweak the spills generated by reload, and at the same time replaces the current define_expand with a define_insn pattern using the new [su]mul_highpart RTX codes. 2021-12-20 Roger Sayle <roger@nextmovesoftware.com> Uroš Bizjak <ubizjak@gmail.com> gcc/ChangeLog * config/i386/i386.md (any_mul_highpart): New code iterator. (sgnprefix, s): Add attribute support for [su]mul_highpart. (<s>mul<mode>3_highpart): Delete expander. (<s>mul<mode>3_highpart, <s>mulsi32_highpart_zext): New define_insn patterns. (define_peephole2): Tweak the register allocation for the above instructions after reload. gcc/testsuite/ChangeLog * gcc.target/i386/smuldi3_highpart.c: New test case.
This commit is contained in:
parent
1f56dbe2da
commit
c9c466ea33
2 changed files with 103 additions and 20 deletions
|
@ -992,11 +992,16 @@
|
|||
;; Mapping of extend operators
|
||||
(define_code_iterator any_extend [sign_extend zero_extend])
|
||||
|
||||
;; Mapping of highpart multiply operators
|
||||
(define_code_iterator any_mul_highpart [smul_highpart umul_highpart])
|
||||
|
||||
;; Prefix for insn menmonic.
|
||||
(define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
|
||||
(smul_highpart "i") (umul_highpart "")
|
||||
(div "i") (udiv "")])
|
||||
;; Prefix for define_insn
|
||||
(define_code_attr s [(sign_extend "s") (zero_extend "u")])
|
||||
(define_code_attr s [(sign_extend "s") (zero_extend "u")
|
||||
(smul_highpart "s") (umul_highpart "u")])
|
||||
(define_code_attr u [(sign_extend "") (zero_extend "u")
|
||||
(div "") (udiv "u")])
|
||||
(define_code_attr u_bool [(sign_extend "false") (zero_extend "true")
|
||||
|
@ -8433,20 +8438,45 @@
|
|||
(set_attr "bdver1_decode" "direct")
|
||||
(set_attr "mode" "QI")])
|
||||
|
||||
(define_expand "<s>mul<mode>3_highpart"
|
||||
[(parallel [(set (match_operand:DWIH 0 "register_operand")
|
||||
(truncate:DWIH
|
||||
(lshiftrt:<DWI>
|
||||
(mult:<DWI>
|
||||
(any_extend:<DWI>
|
||||
(match_operand:DWIH 1 "nonimmediate_operand"))
|
||||
(any_extend:<DWI>
|
||||
(match_operand:DWIH 2 "register_operand")))
|
||||
(match_dup 3))))
|
||||
(clobber (scratch:DWIH))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
;; Highpart multiplication patterns
|
||||
(define_insn "<s>mul<mode>3_highpart"
|
||||
[(set (match_operand:DWIH 0 "register_operand" "=d")
|
||||
(any_mul_highpart:DWIH
|
||||
(match_operand:DWIH 1 "register_operand" "%a")
|
||||
(match_operand:DWIH 2 "nonimmediate_operand" "rm")))
|
||||
(clobber (match_scratch:DWIH 3 "=1"))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
""
|
||||
"operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));")
|
||||
"<sgnprefix>mul{<imodesuffix>}\t%2"
|
||||
[(set_attr "type" "imul")
|
||||
(set_attr "length_immediate" "0")
|
||||
(set (attr "athlon_decode")
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "bdver1_decode" "direct")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*<s>mulsi3_highpart_zext"
|
||||
[(set (match_operand:DI 0 "register_operand" "=d")
|
||||
(zero_extend:DI
|
||||
(any_mul_highpart:SI
|
||||
(match_operand:SI 1 "register_operand" "%a")
|
||||
(match_operand:SI 2 "nonimmediate_operand" "rm"))))
|
||||
(clobber (match_scratch:SI 3 "=1"))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_64BIT"
|
||||
"<sgnprefix>mul{l}\t%2"
|
||||
[(set_attr "type" "imul")
|
||||
(set_attr "length_immediate" "0")
|
||||
(set (attr "athlon_decode")
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "bdver1_decode" "direct")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "*<s>muldi3_highpart_1"
|
||||
[(set (match_operand:DI 0 "register_operand" "=d")
|
||||
|
@ -8467,8 +8497,8 @@
|
|||
(set_attr "length_immediate" "0")
|
||||
(set (attr "athlon_decode")
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "bdver1_decode" "direct")
|
||||
(set_attr "mode" "DI")])
|
||||
|
@ -8491,8 +8521,8 @@
|
|||
(set_attr "length_immediate" "0")
|
||||
(set (attr "athlon_decode")
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "bdver1_decode" "direct")
|
||||
(set_attr "mode" "SI")])
|
||||
|
@ -8515,12 +8545,54 @@
|
|||
(set_attr "length_immediate" "0")
|
||||
(set (attr "athlon_decode")
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "bdver1_decode" "direct")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
;; Highpart multiplication peephole2s to tweak register allocation.
|
||||
;; mov %rdx,imm; mov %rax,%rdi; imulq %rdx -> mov %rax,imm; imulq %rdi
|
||||
(define_peephole2
|
||||
[(set (match_operand:SWI48 0 "general_reg_operand")
|
||||
(match_operand:SWI48 1 "immediate_operand"))
|
||||
(set (match_operand:SWI48 2 "general_reg_operand")
|
||||
(match_operand:SWI48 3 "general_reg_operand"))
|
||||
(parallel [(set (match_operand:SWI48 4 "general_reg_operand")
|
||||
(any_mul_highpart:SWI48 (match_dup 2) (match_dup 0)))
|
||||
(clobber (match_dup 2))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"REGNO (operands[0]) != REGNO (operands[2])
|
||||
&& REGNO (operands[0]) != REGNO (operands[3])
|
||||
&& (REGNO (operands[0]) == REGNO (operands[4])
|
||||
|| peep2_reg_dead_p (3, operands[0]))"
|
||||
[(set (match_dup 2) (match_dup 1))
|
||||
(parallel [(set (match_dup 4)
|
||||
(any_mul_highpart:SWI48 (match_dup 2) (match_dup 3)))
|
||||
(clobber (match_dup 2))
|
||||
(clobber (reg:CC FLAGS_REG))])])
|
||||
|
||||
(define_peephole2
|
||||
[(set (match_operand:SI 0 "general_reg_operand")
|
||||
(match_operand:SI 1 "immediate_operand"))
|
||||
(set (match_operand:SI 2 "general_reg_operand")
|
||||
(match_operand:SI 3 "general_reg_operand"))
|
||||
(parallel [(set (match_operand:DI 4 "general_reg_operand")
|
||||
(zero_extend:DI
|
||||
(any_mul_highpart:SI (match_dup 2) (match_dup 0))))
|
||||
(clobber (match_dup 2))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"REGNO (operands[0]) != REGNO (operands[2])
|
||||
&& REGNO (operands[0]) != REGNO (operands[3])
|
||||
&& (REGNO (operands[0]) == REGNO (operands[4])
|
||||
|| peep2_reg_dead_p (3, operands[0]))"
|
||||
[(set (match_dup 2) (match_dup 1))
|
||||
(parallel [(set (match_dup 4)
|
||||
(zero_extend:DI
|
||||
(any_mul_highpart:SI (match_dup 2) (match_dup 3))))
|
||||
(clobber (match_dup 2))
|
||||
(clobber (reg:CC FLAGS_REG))])])
|
||||
|
||||
;; The patterns that match these are at the end of this file.
|
||||
|
||||
(define_expand "mulxf3"
|
||||
|
|
11
gcc/testsuite/gcc.target/i386/smuldi3_highpart.c
Normal file
11
gcc/testsuite/gcc.target/i386/smuldi3_highpart.c
Normal file
|
@ -0,0 +1,11 @@
|
|||
/* { dg-do compile { target int128 } } */
|
||||
/* { dg-options "-O2" } */
|
||||
typedef int __attribute ((mode(TI))) ti_t;
|
||||
|
||||
long foo(long x)
|
||||
{
|
||||
return ((ti_t)x * 19065) >> 72;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "movl\[ \\t]+\\\$19065, %eax" } } */
|
||||
/* { dg-final { scan-assembler-times "movq" 1 } } */
|
Loading…
Add table
Reference in a new issue