With -fpu=neon DI mode shifts are expanded after reload.

With -fpu=neon DI mode shifts are expanded after reload.  DI mode registers can 
either fully or partially overlap on both ARM and Thumb-2.  However the shift
expansion code can only deal with the full overlap case, and generates incorrect
code for partial overlaps.  The fix is to add new variants that support either
full overlap or no overlap.

    gcc/
	PR target/78041
	* config/arm/neon.md (ashldi3_neon): Add "r 0 i" and "&r r i" variants.
	Remove partial overlap check for shift by 1.
	(ashldi3_neon): Likewise.
    testsuite/
	* gcc.target/arm/pr78041.c: New test.

From-SVN: r241508
This commit is contained in:
Wilco Dijkstra 2016-10-25 10:25:28 +00:00 committed by Wilco Dijkstra
parent 84c2025396
commit ad6922b03b
4 changed files with 58 additions and 22 deletions

View file

@ -1,3 +1,10 @@
2016-10-25 Wilco Dijkstra <wdijkstr@arm.com>
PR target/78041
* config/arm/neon.md (ashldi3_neon): Add "r 0 i" and "&r r i" variants.
Remove partial overlap check for shift by 1.
(ashldi3_neon): Likewise.
2016-10-25 Thomas Preud'homme <thomas.preudhomme@arm.com>
* config/arm/constraints.md (Q constraint): Document its use for

View file

@ -1143,12 +1143,12 @@
)
(define_insn_and_split "ashldi3_neon"
[(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
(ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
(match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
(clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
(clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
(clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
[(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r, ?w,w")
(ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w,w")
(match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm,i")))
(clobber (match_scratch:SI 3 "= X, X,?&r, X, X, X,X"))
(clobber (match_scratch:SI 4 "= X, X,?&r, X, X, X,X"))
(clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w,X"))
(clobber (reg:CC_C CC_REGNUM))]
"TARGET_NEON"
"#"
@ -1180,9 +1180,11 @@
}
else
{
if (operands[2] == CONST1_RTX (SImode)
&& (!reg_overlap_mentioned_p (operands[0], operands[1])
|| REGNO (operands[0]) == REGNO (operands[1])))
/* The shift expanders support either full overlap or no overlap. */
gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
|| REGNO (operands[0]) == REGNO (operands[1]));
if (operands[2] == CONST1_RTX (SImode))
/* This clobbers CC. */
emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
else
@ -1191,8 +1193,8 @@
}
DONE;
}"
[(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
(set_attr "opt" "*,*,speed,speed,*,*")
[(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
(set_attr "opt" "*,*,speed,speed,speed,*,*")
(set_attr "type" "multiple")]
)
@ -1241,12 +1243,12 @@
;; ashrdi3_neon
;; lshrdi3_neon
(define_insn_and_split "<shift>di3_neon"
[(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
(RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
(match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
(clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
(clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
(clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
[(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r,?w,?w")
(RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w")
(match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i")))
(clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X"))
(clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
(clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X"))
(clobber (reg:CC CC_REGNUM))]
"TARGET_NEON"
"#"
@ -1282,9 +1284,11 @@
}
else
{
if (operands[2] == CONST1_RTX (SImode)
&& (!reg_overlap_mentioned_p (operands[0], operands[1])
|| REGNO (operands[0]) == REGNO (operands[1])))
/* The shift expanders support either full overlap or no overlap. */
gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
|| REGNO (operands[0]) == REGNO (operands[1]));
if (operands[2] == CONST1_RTX (SImode))
/* This clobbers CC. */
emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
else
@ -1295,8 +1299,8 @@
DONE;
}"
[(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
(set_attr "opt" "*,*,speed,speed,*,*")
[(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
(set_attr "opt" "*,*,speed,speed,speed,*,*")
(set_attr "type" "multiple")]
)

View file

@ -1,3 +1,8 @@
2016-10-25 Wilco Dijkstra <wdijkstr@arm.com>
PR target/78041
* gcc.target/arm/pr78041.c: New test.
2016-10-25 Jakub Jelinek <jakub@redhat.com>
* g++.dg/cpp1z/launder1.C: New test.

View file

@ -0,0 +1,20 @@
/* { dg-require-effective-target arm_thumb2_ok } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-fno-inline -mthumb -O1 -mfpu=neon -w" } */
extern void abort (void);
register long long x asm ("r1");
long long f (void)
{
return x << 5;
}
int main ()
{
x = 0x0100000001;
if (f () != 0x2000000020)
abort ();
return 0;
}