diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 85ddc22a55e..cdd0b84ffc9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2007-05-26 H.J. Lu + + * config/i386/i386-protos.h (ix86_expand_sse4_unpack): New. + + * config/i386/i386.c (ix86_expand_sse4_unpack): New. + + * config/i386/sse.md (vec_unpacku_hi_v16qi): Call + ix86_expand_sse4_unpack if SSE4.1 is enabled. + (vec_unpacks_hi_v16qi): Likewise. + (vec_unpacku_lo_v16qi): Likewise. + (vec_unpacks_lo_v16qi): Likewise. + (vec_unpacku_hi_v8hi): Likewise. + (vec_unpacks_hi_v8hi): Likewise. + (vec_unpacku_lo_v8hi): Likewise. + (vec_unpacks_lo_v8hi): Likewise. + (vec_unpacku_hi_v4si): Likewise. + (vec_unpacks_hi_v4si): Likewise. + (vec_unpacku_lo_v4si): Likewise. + (vec_unpacks_lo_v4si): Likewise. + 2007-05-26 Kazu Hirata * c-typeck.c, config/arm/arm.c, config/darwin.c, diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 2f320391943..60b495582aa 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -112,6 +112,7 @@ extern int ix86_expand_fp_movcc (rtx[]); extern bool ix86_expand_fp_vcond (rtx[]); extern bool ix86_expand_int_vcond (rtx[]); extern void ix86_expand_sse_unpack (rtx[], bool, bool); +extern void ix86_expand_sse4_unpack (rtx[], bool, bool); extern int ix86_expand_int_addcc (rtx[]); extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int); extern void x86_initialize_trampoline (rtx, rtx, rtx); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7bc5fe04c89..b0db9504e3d 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12843,6 +12843,55 @@ ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p) emit_insn (unpack (dest, operands[1], se)); } +/* This function performs the same task as ix86_expand_sse_unpack, + but with SSE4.1 instructions. */ + +void +ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p) +{ + enum machine_mode imode = GET_MODE (operands[1]); + rtx (*unpack)(rtx, rtx); + rtx src, dest; + + switch (imode) + { + case V16QImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv8qiv8hi2; + else + unpack = gen_sse4_1_extendv8qiv8hi2; + break; + case V8HImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv4hiv4si2; + else + unpack = gen_sse4_1_extendv4hiv4si2; + break; + case V4SImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv2siv2di2; + else + unpack = gen_sse4_1_extendv2siv2di2; + break; + default: + gcc_unreachable (); + } + + dest = operands[0]; + if (high_p) + { + /* Shift higher 8 bytes to lower 8 bytes. */ + src = gen_reg_rtx (imode); + emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src), + gen_lowpart (TImode, operands[1]), + GEN_INT (64))); + } + else + src = operands[1]; + + emit_insn (unpack (dest, src)); +} + /* Expand conditional increment or decrement using adb/sbb instructions. The default case using setcc followed by the conditional move can be done by generic code. */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 800807cde17..81ff9258ef9 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4722,7 +4722,10 @@ (match_operand:V16QI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, true, true); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, true, true); + else + ix86_expand_sse_unpack (operands, true, true); DONE; }) @@ -4731,7 +4734,10 @@ (match_operand:V16QI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, false, true); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, false, true); + else + ix86_expand_sse_unpack (operands, false, true); DONE; }) @@ -4740,7 +4746,10 @@ (match_operand:V16QI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, true, false); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, true, false); + else + ix86_expand_sse_unpack (operands, true, false); DONE; }) @@ -4749,7 +4758,10 @@ (match_operand:V16QI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, false, false); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, false, false); + else + ix86_expand_sse_unpack (operands, false, false); DONE; }) @@ -4758,7 +4770,10 @@ (match_operand:V8HI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, true, true); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, true, true); + else + ix86_expand_sse_unpack (operands, true, true); DONE; }) @@ -4767,7 +4782,10 @@ (match_operand:V8HI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, false, true); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, false, true); + else + ix86_expand_sse_unpack (operands, false, true); DONE; }) @@ -4776,7 +4794,10 @@ (match_operand:V8HI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, true, false); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, true, false); + else + ix86_expand_sse_unpack (operands, true, false); DONE; }) @@ -4785,7 +4806,10 @@ (match_operand:V8HI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, false, false); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, false, false); + else + ix86_expand_sse_unpack (operands, false, false); DONE; }) @@ -4794,7 +4818,10 @@ (match_operand:V4SI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, true, true); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, true, true); + else + ix86_expand_sse_unpack (operands, true, true); DONE; }) @@ -4803,7 +4830,10 @@ (match_operand:V4SI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, false, true); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, false, true); + else + ix86_expand_sse_unpack (operands, false, true); DONE; }) @@ -4812,7 +4842,10 @@ (match_operand:V4SI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, true, false); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, true, false); + else + ix86_expand_sse_unpack (operands, true, false); DONE; }) @@ -4821,7 +4854,10 @@ (match_operand:V4SI 1 "register_operand" "")] "TARGET_SSE2" { - ix86_expand_sse_unpack (operands, false, false); + if (TARGET_SSE4_1) + ix86_expand_sse4_unpack (operands, false, false); + else + ix86_expand_sse_unpack (operands, false, false); DONE; })