re PR target/12902 (Invalid assembly generated when using SSE / xmmintrin.h)
PR target/12902 * config/i386/i386.md (sse_movhps, sse_movlps): Remove. (sse_shufps): Change operand 3 to const_int_operand. (sse2_storelps): Fix typo in template. (sse_storehps, sse_loadhps, sse_storelps, sse_loadlps): New. * config/i386/i386.c (ix86_expand_vector_move_misalign): Use them. (ix86_expand_builtin): Likewise. From-SVN: r92967
This commit is contained in:
parent
a8182d3719
commit
2cdb314898
4 changed files with 143 additions and 59 deletions
|
@ -1,4 +1,14 @@
|
|||
2004-01-05 Julian Brown <julian@codesourcery.com>
|
||||
2005-01-05 Richard Henderson <rth@redhat.com>
|
||||
|
||||
PR target/12902
|
||||
* config/i386/i386.md (sse_movhps, sse_movlps): Remove.
|
||||
(sse_shufps): Change operand 3 to const_int_operand.
|
||||
(sse2_storelps): Fix typo in template.
|
||||
(sse_storehps, sse_loadhps, sse_storelps, sse_loadlps): New.
|
||||
* config/i386/i386.c (ix86_expand_vector_move_misalign): Use them.
|
||||
(ix86_expand_builtin): Likewise.
|
||||
|
||||
2005-01-05 Julian Brown <julian@codesourcery.com>
|
||||
|
||||
* config/arm/arm.c (arm_return_in_memory): Treat complex types
|
||||
as aggregates for AAPCS ABIs.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* Subroutines used for code generation on IA-32.
|
||||
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
|
||||
2002, 2003, 2004 Free Software Foundation, Inc.
|
||||
2002, 2003, 2004, 2005 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
|
@ -7645,11 +7645,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
|||
else
|
||||
emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
|
||||
|
||||
op0 = gen_lowpart (V4SFmode, op0);
|
||||
m = adjust_address (op1, V4SFmode, 0);
|
||||
emit_insn (gen_sse_movlps (op0, op0, m));
|
||||
m = adjust_address (op1, V4SFmode, 8);
|
||||
emit_insn (gen_sse_movhps (op0, op0, m));
|
||||
m = adjust_address (op1, V2SFmode, 0);
|
||||
emit_insn (gen_sse_loadlps (op0, op0, m));
|
||||
m = adjust_address (op1, V2SFmode, 8);
|
||||
emit_insn (gen_sse_loadhps (op0, op0, m));
|
||||
}
|
||||
}
|
||||
else if (MEM_P (op0))
|
||||
|
@ -7684,11 +7683,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
|||
}
|
||||
else
|
||||
{
|
||||
op1 = gen_lowpart (V4SFmode, op1);
|
||||
m = adjust_address (op0, V4SFmode, 0);
|
||||
emit_insn (gen_sse_movlps (m, m, op1));
|
||||
m = adjust_address (op0, V4SFmode, 8);
|
||||
emit_insn (gen_sse_movhps (m, m, op1));
|
||||
m = adjust_address (op0, V2SFmode, 0);
|
||||
emit_insn (gen_sse_storelps (m, op1));
|
||||
m = adjust_address (op0, V2SFmode, 8);
|
||||
emit_insn (gen_sse_storehps (m, op1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -13508,8 +13506,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|
|||
case IX86_BUILTIN_LOADLPS:
|
||||
case IX86_BUILTIN_LOADHPD:
|
||||
case IX86_BUILTIN_LOADLPD:
|
||||
icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
|
||||
: fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
|
||||
icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
|
||||
: fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
|
||||
: fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
|
||||
: CODE_FOR_sse2_loadlpd);
|
||||
arg0 = TREE_VALUE (arglist);
|
||||
|
@ -13535,28 +13533,11 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|
|||
|
||||
case IX86_BUILTIN_STOREHPS:
|
||||
case IX86_BUILTIN_STORELPS:
|
||||
icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
|
||||
: CODE_FOR_sse_movlps);
|
||||
arg0 = TREE_VALUE (arglist);
|
||||
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
|
||||
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
|
||||
op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
|
||||
mode0 = insn_data[icode].operand[1].mode;
|
||||
mode1 = insn_data[icode].operand[2].mode;
|
||||
|
||||
op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
|
||||
if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
|
||||
op1 = copy_to_mode_reg (mode1, op1);
|
||||
|
||||
pat = GEN_FCN (icode) (op0, op0, op1);
|
||||
if (! pat)
|
||||
return 0;
|
||||
emit_insn (pat);
|
||||
return const0_rtx;
|
||||
|
||||
case IX86_BUILTIN_STOREHPD:
|
||||
case IX86_BUILTIN_STORELPD:
|
||||
icode = (fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
|
||||
icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
|
||||
: fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_storelps
|
||||
: fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
|
||||
: CODE_FOR_sse2_storelpd);
|
||||
arg0 = TREE_VALUE (arglist);
|
||||
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
;; GCC machine description for IA-32 and x86-64.
|
||||
;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
|
||||
;; 2001, 2002, 2003, 2004
|
||||
;; 2001, 2002, 2003, 2004, 2005
|
||||
;; Free Software Foundation, Inc.
|
||||
;; Mostly by William Schelter.
|
||||
;; x86_64 support added by Jan Hubicka
|
||||
|
@ -20335,29 +20335,98 @@
|
|||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "mode" "V4SF")])
|
||||
|
||||
(define_insn "sse_movhps"
|
||||
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
|
||||
(vec_merge:V4SF
|
||||
(match_operand:V4SF 1 "nonimmediate_operand" "0,0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "m,x")
|
||||
(const_int 12)))]
|
||||
"TARGET_SSE
|
||||
&& (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
|
||||
"movhps\t{%2, %0|%0, %2}"
|
||||
;; Store the high V2SF of the source vector to the destination.
|
||||
(define_insn "sse_storehps"
|
||||
[(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
|
||||
(vec_select:V2SF
|
||||
(match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
|
||||
(parallel [(const_int 2) (const_int 3)])))]
|
||||
"TARGET_SSE"
|
||||
"@
|
||||
movhps\t{%1, %0|%0, %1}
|
||||
movhlps\t{%1, %0|%0, %1}
|
||||
#"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "mode" "V4SF")])
|
||||
(set_attr "mode" "V2SF")])
|
||||
|
||||
(define_insn "sse_movlps"
|
||||
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
|
||||
(vec_merge:V4SF
|
||||
(match_operand:V4SF 1 "nonimmediate_operand" "0,0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "m,x")
|
||||
(const_int 3)))]
|
||||
"TARGET_SSE
|
||||
&& (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
|
||||
"movlps\t{%2, %0|%0, %2}"
|
||||
(define_split
|
||||
[(set (match_operand:V2SF 0 "register_operand" "")
|
||||
(vec_select:V2SF
|
||||
(match_operand:V4SF 1 "memory_operand" "")
|
||||
(parallel [(const_int 2) (const_int 3)])))]
|
||||
"TARGET_SSE && reload_completed"
|
||||
[(const_int 0)]
|
||||
{
|
||||
emit_move_insn (operands[0], adjust_address (operands[1], V2SFmode, 8));
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Load the high V2SF of the target vector from the source vector.
|
||||
(define_insn "sse_loadhps"
|
||||
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
|
||||
(vec_concat:V4SF
|
||||
(vec_select:V2SF
|
||||
(match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
|
||||
(parallel [(const_int 0) (const_int 1)]))
|
||||
(match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
|
||||
"TARGET_SSE"
|
||||
"@
|
||||
movhps\t{%2, %0|%0, %2}
|
||||
movlhps\t{%2, %0|%0, %2}
|
||||
#"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "mode" "V4SF")])
|
||||
(set_attr "mode" "V2SF")])
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:V4SF 0 "memory_operand" "")
|
||||
(vec_concat:V4SF
|
||||
(vec_select:V2SF
|
||||
(match_dup 0)
|
||||
(parallel [(const_int 0) (const_int 1)]))
|
||||
(match_operand:V2SF 2 "register_operand" "")))]
|
||||
"TARGET_SSE && reload_completed"
|
||||
[(const_int 0)]
|
||||
{
|
||||
emit_move_insn (adjust_address (operands[0], V2SFmode, 8), operands[1]);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Store the low V2SF of the source vector to the destination.
|
||||
(define_expand "sse_storelps"
|
||||
[(set (match_operand:V2SF 0 "nonimmediate_operand" "")
|
||||
(vec_select:V2SF
|
||||
(match_operand:V4SF 1 "nonimmediate_operand" "")
|
||||
(parallel [(const_int 0) (const_int 1)])))]
|
||||
"TARGET_SSE"
|
||||
{
|
||||
operands[1] = gen_lowpart (V2SFmode, operands[1]);
|
||||
emit_move_insn (operands[0], operands[1]);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Load the low V2SF of the target vector from the source vector.
|
||||
(define_insn "sse_loadlps"
|
||||
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
|
||||
(vec_concat:V4SF
|
||||
(match_operand:V2SF 2 "nonimmediate_operand" "m,0,x")
|
||||
(vec_select:V2SF
|
||||
(match_operand:V4SF 1 "nonimmediate_operand" "0,x,0")
|
||||
(parallel [(const_int 2) (const_int 3)]))))]
|
||||
"TARGET_SSE"
|
||||
{
|
||||
static const char * const alt[] = {
|
||||
"movlps\t{%2, %0|%0, %2}",
|
||||
"shufps\t{%2, %1, %0|%0, %1, %2}",
|
||||
"movlps\t{%2, %0|%0, %2}"
|
||||
};
|
||||
|
||||
if (which_alternative == 1)
|
||||
operands[2] = GEN_INT (0xe4);
|
||||
|
||||
return alt[which_alternative];
|
||||
}
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "mode" "V2SF")])
|
||||
|
||||
(define_expand "sse_loadss"
|
||||
[(match_operand:V4SF 0 "register_operand" "")
|
||||
|
@ -20405,10 +20474,9 @@
|
|||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm")
|
||||
(match_operand:SI 3 "immediate_operand" "i")]
|
||||
(match_operand:SI 3 "const_int_operand" "n")]
|
||||
UNSPEC_SHUFFLE))]
|
||||
"TARGET_SSE"
|
||||
;; @@@ check operand order for intel/nonintel syntax
|
||||
"shufps\t{%3, %2, %0|%0, %2, %3}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "mode" "V4SF")])
|
||||
|
@ -23902,7 +23970,7 @@
|
|||
[(set (match_operand:DF 0 "nonimmediate_operand" "")
|
||||
(vec_select:DF
|
||||
(match_operand:V2DF 1 "nonimmediate_operand" "")
|
||||
(parallel [(const_int 1)])))]
|
||||
(parallel [(const_int 0)])))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
operands[1] = gen_lowpart (DFmode, operands[1]);
|
||||
|
@ -23910,7 +23978,7 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
;; Load the load double of the target vector from the source scalar.
|
||||
;; Load the low double of the target vector from the source scalar.
|
||||
(define_insn "sse2_loadlpd"
|
||||
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,m")
|
||||
(vec_concat:V2DF
|
||||
|
|
25
gcc/testsuite/gcc.target/i386/sse-1.c
Normal file
25
gcc/testsuite/gcc.target/i386/sse-1.c
Normal file
|
@ -0,0 +1,25 @@
|
|||
/* PR 12902 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O1 -msse" } */
|
||||
|
||||
#include <xmmintrin.h>
|
||||
|
||||
typedef union
|
||||
{
|
||||
int i[4];
|
||||
float f[4];
|
||||
__m128 v;
|
||||
} vector4_t;
|
||||
|
||||
void
|
||||
swizzle (const void *a, vector4_t * b, vector4_t * c)
|
||||
{
|
||||
b->v = _mm_loadl_pi (b->v, (__m64 *) a);
|
||||
c->v = _mm_loadl_pi (c->v, ((__m64 *) a) + 1);
|
||||
}
|
||||
|
||||
/* While one legal rendering of each statement would be movaps;movlps;movaps,
|
||||
we can implmenent this with just movlps;movlps. Since we do now, anything
|
||||
less would be a regression. */
|
||||
/* { dg-final { scan-assembler-not "movaps" } } */
|
||||
/* { dg-final { scan-assembler "movlps" } } */
|
Loading…
Add table
Reference in a new issue