From a30cc07224154c101c1275c148bf60d3acd43015 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 18 Nov 2007 21:55:26 -0800 Subject: [PATCH] BR 1834292: Fix multiple disassembler bugs - Correct the building on the disassembler decision tree. - Handle SSE instructions with F2 prefix (\332) correctly. - Mark instructions which are now used as prefixes with ND. (In a future version when we have better CPU version handling, we should probably build the decision tree at runtime based on the selected CPU feature sets.) - Sanitize the handling of \144-147 and \154-157 in both the assembler and disassembler. They take an opcode byte as argument; don't pretend they don't. --- assemble.c | 14 +++---- disasm.c | 27 ++++++++++-- insns.dat | 118 ++++++++++++++++++++++++++--------------------------- insns.pl | 42 +++++++++++-------- 4 files changed, 114 insertions(+), 87 deletions(-) diff --git a/assemble.c b/assemble.c index 5670a5cf..30921edb 100644 --- a/assemble.c +++ b/assemble.c @@ -33,11 +33,11 @@ * \1ab - a ModRM, calculated on EA in operand a, with the spare * field the register value of operand b. * \140..\143 - an immediate word or signed byte for operand 0..3 - * \144..\147 - or 2 (s-field) into next opcode byte if operand 0..3 - * is a signed byte rather than a word. + * \144..\147 - or 2 (s-field) into opcode byte if operand 0..3 + * is a signed byte rather than a word. Opcode byte follows. * \150..\153 - an immediate dword or signed byte for operand 0..3 - * \154..\157 - or 2 (s-field) into next opcode byte if operand 0..3 - * is a signed byte rather than a dword. + * \154..\157 - or 2 (s-field) into opcode byte if operand 0..3 + * is a signed byte rather than a word. Opcode byte follows. * \160..\163 - this instruction uses DREX rather than REX, with the * OC0 field set to 0, and the dest field taken from * operand 0..3. @@ -907,7 +907,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits, case 0145: case 0146: case 0147: - codes += 2; + codes++; length++; break; case 0150: @@ -920,7 +920,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits, case 0155: case 0156: case 0157: - codes += 2; + codes++; length++; break; case 0160: @@ -1397,7 +1397,6 @@ static void gencode(int32_t segment, int64_t offset, int bits, case 0146: case 0147: EMIT_REX(); - codes++; bytes[0] = *codes++; if (is_sbyte(ins, c & 3, 16)) bytes[0] |= 2; /* s-bit */ @@ -1427,7 +1426,6 @@ static void gencode(int32_t segment, int64_t offset, int bits, case 0156: case 0157: EMIT_REX(); - codes++; bytes[0] = *codes++; if (is_sbyte(ins, c & 3, 32)) bytes[0] |= 2; /* s-bit */ diff --git a/disasm.c b/disasm.c index 18911f0e..3b604072 100644 --- a/disasm.c +++ b/disasm.c @@ -376,6 +376,7 @@ static int matches(const struct itemplate *t, uint8_t *data, int asize = prefix->asize; int i, c; struct operand *opx; + int s_field_for = -1; /* No 144/154 series code encountered */ for (i = 0; i < MAX_OPERANDS; i++) { ins->oprs[i].segment = ins->oprs[i].disp_size = @@ -595,13 +596,30 @@ static int matches(const struct itemplate *t, uint8_t *data, } case4(0140): - opx->offset = getu16(data); - data += 2; + if (s_field_for == (c & 3)) { + opx->offset = gets8(data); + data++; + } else { + opx->offset = getu16(data); + data += 2; + } + break; + + case4(0144): + case4(0154): + s_field_for = (*data & 0x02) ? c & 3 : -1; + if ((*data++ & ~0x02) != *r++) + return false; break; case4(0150): - opx->offset = getu32(data); - data += 4; + if (s_field_for == (c & 3)) { + opx->offset = gets8(data); + data++; + } else { + opx->offset = getu32(data); + data += 4; + } break; case4(0160): @@ -743,6 +761,7 @@ static int matches(const struct itemplate *t, uint8_t *data, case 0332: if (prefix->rep != 0xF2) return false; + drep = 0; break; case 0333: diff --git a/insns.dat b/insns.dat index 58532aa6..1361bf97 100644 --- a/insns.dat +++ b/insns.dat @@ -63,12 +63,12 @@ ADC reg_eax,imm \321\1\x15\41 386,SM ADC reg_rax,sbyte \324\1\x83\202\15 X64,SM,ND ADC reg_rax,imm \324\1\x15\41 X64,SM ADC rm8,imm \1\x80\202\21 8086,SM -ADC rm16,imm \320\145\1\x81\202\141 8086,SM -ADC rm32,imm \321\155\1\x81\202\151 386,SM -ADC rm64,imm \324\155\1\x81\202\151 X64,SM +ADC rm16,imm \320\145\x81\202\141 8086,SM +ADC rm32,imm \321\155\x81\202\151 386,SM +ADC rm64,imm \324\155\x81\202\151 X64,SM ADC mem,imm8 \1\x80\202\21 8086,SM -ADC mem,imm16 \320\145\1\x81\202\141 8086,SM -ADC mem,imm32 \321\155\1\x81\202\151 386,SM +ADC mem,imm16 \320\145\x81\202\141 8086,SM +ADC mem,imm32 \321\155\x81\202\151 386,SM ADD mem,reg8 \170\101 8086,SM ADD reg8,reg8 \170\101 8086 ADD mem,reg16 \320\1\x01\101 8086,SM @@ -96,12 +96,12 @@ ADD reg_eax,imm \321\1\x05\41 386,SM ADD reg_rax,sbyte \324\1\x83\200\15 X64,SM,ND ADD reg_rax,imm \324\1\x05\41 X64,SM ADD rm8,imm \1\x80\200\21 8086,SM -ADD rm16,imm \320\145\1\x81\200\141 8086,SM -ADD rm32,imm \321\155\1\x81\200\151 386,SM -ADD rm64,imm \324\155\1\x81\200\151 X64,SM +ADD rm16,imm \320\145\x81\200\141 8086,SM +ADD rm32,imm \321\155\x81\200\151 386,SM +ADD rm64,imm \324\155\x81\200\151 X64,SM ADD mem,imm8 \1\x80\200\21 8086,SM -ADD mem,imm16 \320\145\1\x81\200\141 8086,SM -ADD mem,imm32 \321\155\1\x81\200\151 386,SM +ADD mem,imm16 \320\145\x81\200\141 8086,SM +ADD mem,imm32 \321\155\x81\200\151 386,SM AND mem,reg8 \1\x20\101 8086,SM AND reg8,reg8 \1\x20\101 8086 AND mem,reg16 \320\1\x21\101 8086,SM @@ -129,12 +129,12 @@ AND reg_eax,imm \321\1\x25\41 386,SM AND reg_rax,sbyte \324\1\x83\204\15 X64,SM,ND AND reg_rax,imm \324\1\x25\41 X64,SM AND rm8,imm \1\x80\204\21 8086,SM -AND rm16,imm \320\145\1\x81\204\141 8086,SM -AND rm32,imm \321\155\1\x81\204\151 386,SM -AND rm64,imm \324\155\1\x81\204\151 X64,SM +AND rm16,imm \320\145\x81\204\141 8086,SM +AND rm32,imm \321\155\x81\204\151 386,SM +AND rm64,imm \324\155\x81\204\151 X64,SM AND mem,imm8 \1\x80\204\21 8086,SM -AND mem,imm16 \320\145\1\x81\204\141 8086,SM -AND mem,imm32 \321\155\1\x81\204\151 386,SM +AND mem,imm16 \320\145\x81\204\141 8086,SM +AND mem,imm32 \321\155\x81\204\151 386,SM ARPL mem,reg16 \1\x63\101 286,PROT,SM,NOLONG ARPL reg16,reg16 \1\x63\101 286,PROT,NOLONG BB0_RESET void \2\x0F\x3A PENT,CYRIX,ND @@ -256,16 +256,16 @@ CMP reg_eax,imm \321\1\x3D\41 386,SM CMP reg_rax,sbyte \324\1\x83\207\15 X64,SM,ND CMP reg_rax,imm \324\1\x3D\41 X64,SM CMP rm8,imm \1\x80\207\21 8086,SM -CMP rm16,imm \320\145\1\x81\207\141 8086,SM -CMP rm32,imm \321\155\1\x81\207\151 386,SM -CMP rm64,imm \324\155\1\x81\207\151 X64,SM +CMP rm16,imm \320\145\x81\207\141 8086,SM +CMP rm32,imm \321\155\x81\207\151 386,SM +CMP rm64,imm \324\155\x81\207\151 X64,SM CMP mem,imm8 \1\x80\207\21 8086,SM -CMP mem,imm16 \320\145\1\x81\207\141 8086,SM -CMP mem,imm32 \321\155\1\x81\207\151 386,SM +CMP mem,imm16 \320\145\x81\207\141 8086,SM +CMP mem,imm32 \321\155\x81\207\151 386,SM CMPSB void \335\1\xA6 8086 -CMPSD void \335\321\1\xA7 386 -CMPSQ void \335\324\1\xA7 X64 -CMPSW void \335\320\1\xA7 8086 +CMPSD void \335\321\xA7 386 +CMPSQ void \335\324\xA7 X64 +CMPSW void \335\320\xA7 8086 CMPXCHG mem,reg8 \2\x0F\xB0\101 PENT,SM CMPXCHG reg8,reg8 \2\x0F\xB0\101 PENT CMPXCHG mem,reg16 \320\2\x0F\xB1\101 PENT,SM @@ -546,38 +546,38 @@ IMUL reg64,reg64 \324\2\x0F\xAF\110 X64 IMUL reg16,mem,imm8 \320\1\x6B\110\16 186,SM IMUL reg16,mem,sbyte \320\1\x6B\110\16 186,SM,ND IMUL reg16,mem,imm16 \320\1\x69\110\32 186,SM -IMUL reg16,mem,imm \320\146\1\x69\110\142 186,SM,ND +IMUL reg16,mem,imm \320\146\x69\110\142 186,SM,ND IMUL reg16,reg16,imm8 \320\1\x6B\110\16 186 IMUL reg16,reg16,sbyte \320\1\x6B\110\16 186,SM,ND IMUL reg16,reg16,imm16 \320\1\x69\110\32 186 -IMUL reg16,reg16,imm \320\146\1\x69\110\142 186,SM,ND +IMUL reg16,reg16,imm \320\146\x69\110\142 186,SM,ND IMUL reg32,mem,imm8 \321\1\x6B\110\16 386,SM IMUL reg32,mem,sbyte \321\1\x6B\110\16 386,SM,ND IMUL reg32,mem,imm32 \321\1\x69\110\42 386,SM -IMUL reg32,mem,imm \321\156\1\x69\110\152 386,SM,ND +IMUL reg32,mem,imm \321\156\x69\110\152 386,SM,ND IMUL reg32,reg32,imm8 \321\1\x6B\110\16 386 IMUL reg32,reg32,sbyte \321\1\x6B\110\16 386,SM,ND IMUL reg32,reg32,imm32 \321\1\x69\110\42 386 -IMUL reg32,reg32,imm \321\156\1\x69\110\152 386,SM,ND +IMUL reg32,reg32,imm \321\156\x69\110\152 386,SM,ND IMUL reg64,mem,imm8 \324\1\x6B\110\16 X64,SM IMUL reg64,mem,sbyte \324\1\x6B\110\16 X64,SM,ND IMUL reg64,mem,imm32 \324\1\x69\110\42 X64,SM -IMUL reg64,mem,imm \324\156\1\x69\110\152 X64,SM,ND +IMUL reg64,mem,imm \324\156\x69\110\152 X64,SM,ND IMUL reg64,reg64,imm8 \324\1\x6B\110\16 X64 IMUL reg64,reg64,sbyte \324\1\x6B\110\16 X64,SM,ND IMUL reg64,reg64,imm32 \324\1\x69\110\42 X64 -IMUL reg64,reg64,imm \324\156\1\x69\110\152 X64,SM,ND +IMUL reg64,reg64,imm \324\156\x69\110\152 X64,SM,ND IMUL reg16,imm8 \320\1\x6B\100\15 186 IMUL reg16,sbyte \320\1\x6B\100\15 186,SM,ND IMUL reg16,imm16 \320\1\x69\100\31 186 -IMUL reg16,imm \320\145\1\x69\100\141 186,SM,ND +IMUL reg16,imm \320\145\x69\100\141 186,SM,ND IMUL reg32,imm8 \321\1\x6B\100\15 386 IMUL reg32,sbyte \321\1\x6B\100\15 386,SM,ND IMUL reg32,imm32 \321\1\x69\100\41 386 -IMUL reg32,imm \321\155\1\x69\100\151 386,SM,ND +IMUL reg32,imm \321\155\x69\100\151 386,SM,ND IMUL reg64,sbyte \324\1\x6B\100\15 X64,SM,ND IMUL reg64,imm32 \324\1\x69\100\41 X64 -IMUL reg64,imm \324\155\1\x69\100\151 X64,SM,ND +IMUL reg64,imm \324\155\x69\100\151 X64,SM,ND IN reg_al,imm \1\xE4\25 8086,SB IN reg_ax,imm \320\1\xE5\25 8086,SB IN reg_eax,imm \321\1\xE5\25 386,SB @@ -750,8 +750,8 @@ MOV reg32,reg_dreg \2\x0F\x21\101 386,PRIV MOV reg64,reg_dreg \323\2\x0F\x21\101 X64,PRIV MOV reg_dreg,reg32 \2\x0F\x23\110 386,PRIV MOV reg_dreg,reg64 \323\2\x0F\x23\110 X64,PRIV -MOV reg32,reg_treg \2\x0F\x24\101 386,NOLONG -MOV reg_treg,reg32 \2\x0F\x26\110 386,NOLONG +MOV reg32,reg_treg \2\x0F\x24\101 386,NOLONG,ND +MOV reg_treg,reg32 \2\x0F\x26\110 386,NOLONG,ND MOV mem,reg8 \1\x88\101 8086,SM MOV reg8,reg8 \1\x88\101 8086 MOV mem,reg16 \320\1\x89\101 8086,SM @@ -853,12 +853,12 @@ OR reg_eax,imm \321\1\x0D\41 386,SM OR reg_rax,sbyte \324\1\x83\201\15 X64,SM,ND OR reg_rax,imm \324\1\x0D\41 X64,SM OR rm8,imm \1\x80\201\21 8086,SM -OR rm16,imm \320\145\1\x81\201\141 8086,SM -OR rm32,imm \321\155\1\x81\201\151 386,SM -OR rm64,imm \324\155\1\x81\201\151 X64,SM +OR rm16,imm \320\145\x81\201\141 8086,SM +OR rm32,imm \321\155\x81\201\151 386,SM +OR rm64,imm \324\155\x81\201\151 X64,SM OR mem,imm8 \1\x80\201\21 8086,SM -OR mem,imm16 \320\145\1\x81\201\141 8086,SM -OR mem,imm32 \321\155\1\x81\201\151 386,SM +OR mem,imm16 \320\145\x81\201\141 8086,SM +OR mem,imm32 \321\155\x81\201\151 386,SM OUT imm,reg_al \1\xE6\24 8086,SB OUT imm,reg_ax \320\1\xE7\24 8086,SB OUT imm,reg_eax \321\1\xE7\24 386,SB @@ -984,9 +984,9 @@ PUSH reg_dess \6 8086,NOLONG PUSH reg_fsgs \1\x0F\7 386 PUSH imm8 \1\x6A\14 186 PUSH sbyte \1\x6A\14 186,ND -PUSH imm16 \320\144\1\x68\140 186 -PUSH imm32 \321\154\1\x68\150 386,NOLONG -PUSH imm64 \321\154\1\x68\150 X64 +PUSH imm16 \320\144\x68\140 186 +PUSH imm32 \321\154\x68\150 386,NOLONG +PUSH imm64 \321\154\x68\150 X64 PUSH imm \1\x68\34 186 PUSHA void \322\1\x60 186,NOLONG PUSHAD void \321\1\x60 386,NOLONG @@ -1055,7 +1055,7 @@ ROR rm32,imm \321\1\xC1\201\25 386,SB ROR rm64,unity \324\1\xD1\201 X64 ROR rm64,reg_cl \324\1\xD3\201 X64 ROR rm64,imm \324\1\xC1\201\25 X64,SB -RDM void \2\x0F\x3A P6,CYRIX +RDM void \2\x0F\x3A P6,CYRIX,ND RSDC reg_sreg,mem80 \2\x0F\x79\110 486,CYRIX,SMM RSLDT mem80 \2\x0F\x7B\200 486,CYRIX,SMM RSM void \2\x0F\xAA PENT,SMM @@ -1113,12 +1113,12 @@ SBB reg_eax,imm \321\1\x1D\41 386,SM SBB reg_rax,sbyte \324\1\x83\203\15 X64,SM,ND SBB reg_rax,imm \324\1\x1D\41 X64,SM SBB rm8,imm \1\x80\203\21 8086,SM -SBB rm16,imm \320\145\1\x81\203\141 8086,SM -SBB rm32,imm \321\155\1\x81\203\151 386,SM -SBB rm64,imm \324\155\1\x81\203\151 X64,SM +SBB rm16,imm \320\145\x81\203\141 8086,SM +SBB rm32,imm \321\155\x81\203\151 386,SM +SBB rm64,imm \324\155\x81\203\151 X64,SM SBB mem,imm8 \1\x80\203\21 8086,SM -SBB mem,imm16 \320\145\1\x81\203\141 8086,SM -SBB mem,imm32 \321\155\1\x81\203\151 386,SM +SBB mem,imm16 \320\145\x81\203\141 8086,SM +SBB mem,imm32 \321\155\x81\203\151 386,SM SCASB void \335\1\xAE 8086 SCASD void \335\321\1\xAF 386 SCASQ void \335\324\1\xAF X64 @@ -1227,14 +1227,14 @@ SUB reg_eax,imm \321\1\x2D\41 386,SM SUB reg_rax,sbyte \324\1\x83\205\15 X64,SM,ND SUB reg_rax,imm \324\1\x2D\41 X64,SM SUB rm8,imm \1\x80\205\21 8086,SM -SUB rm16,imm \320\145\1\x81\205\141 8086,SM -SUB rm32,imm \321\155\1\x81\205\151 386,SM -SUB rm64,imm \324\155\1\x81\205\151 X64,SM +SUB rm16,imm \320\145\x81\205\141 8086,SM +SUB rm32,imm \321\155\x81\205\151 386,SM +SUB rm64,imm \324\155\x81\205\151 X64,SM SUB mem,imm8 \1\x80\205\21 8086,SM -SUB mem,imm16 \320\145\1\x81\205\141 8086,SM -SUB mem,imm32 \321\155\1\x81\205\151 386,SM +SUB mem,imm16 \320\145\x81\205\141 8086,SM +SUB mem,imm32 \321\155\x81\205\151 386,SM SVDC mem80,reg_sreg \2\x0F\x78\101 486,CYRIX,SMM -SVLDT mem80 \2\x0F\x7A\200 486,CYRIX,SMM +SVLDT mem80 \2\x0F\x7A\200 486,CYRIX,SMM,ND SVTS mem80 \2\x0F\x7C\200 486,CYRIX,SMM SWAPGS void \3\x0F\x01\xF8 X64 SYSCALL void \2\x0F\x05 P6,AMD @@ -1356,12 +1356,12 @@ XOR reg_eax,imm \321\1\x35\41 386,SM XOR reg_rax,sbyte \324\1\x83\206\15 X64,SM,ND XOR reg_rax,imm \324\1\x35\41 X64,SM XOR rm8,imm \1\x80\206\21 8086,SM -XOR rm16,imm \320\145\1\x81\206\141 8086,SM -XOR rm32,imm \321\155\1\x81\206\151 386,SM -XOR rm64,imm \324\155\1\x81\206\151 X64,SM +XOR rm16,imm \320\145\x81\206\141 8086,SM +XOR rm32,imm \321\155\x81\206\151 386,SM +XOR rm64,imm \324\155\x81\206\151 X64,SM XOR mem,imm8 \1\x80\206\21 8086,SM -XOR mem,imm16 \320\145\1\x81\206\141 8086,SM -XOR mem,imm32 \321\155\1\x81\206\151 386,SM +XOR mem,imm16 \320\145\x81\206\141 8086,SM +XOR mem,imm32 \321\155\x81\206\151 386,SM CMOVcc reg16,mem \320\1\x0F\330\x40\110 P6,SM CMOVcc reg16,reg16 \320\1\x0F\330\x40\110 P6 CMOVcc reg32,mem \321\1\x0F\330\x40\110 P6,SM diff --git a/insns.pl b/insns.pl index c68b0b58..158c2f07 100644 --- a/insns.pl +++ b/insns.pl @@ -56,7 +56,9 @@ while () { } if ($formatted && !$nd) { push @big, $formatted; - foreach $i (startseq($_[2])) { + my @sseq = startseq($_[2]); + print $_[0], ':', join(',', @sseq), "\n"; + foreach $i (@sseq) { if (!defined($dinstables{$i})) { $dinstables{$i} = []; } @@ -263,14 +265,15 @@ sub format { ("{I_$opcode, $num, {$operands}, \"$codes\", $flags},", $nd); } -sub hexlist($$$) { - my($prefix, $start, $n) = @_; - my $i; +sub addprefix ($@) { + my ($prefix, @list) = @_; + my $x; my @l = (); - for ($i = 0; $i < $n; $i++) { - push(@l, sprintf("%s%02X", $prefix, $start+$i)); + foreach $x (@list) { + push(@l, sprintf("%s%02X", $prefix, $x)); } + return @l; } @@ -326,9 +329,9 @@ sub startseq($) { } foreach $pfx (@disasm_prefixes) { - if ($fbs =~ /^$pfx(.*)$/) { + if (substr($fbs, 0, length($pfx)) eq $pfx) { $prefix = $pfx; - $fbs = $1; + $fbs = substr($fbs, length($pfx)); last; } } @@ -336,21 +339,28 @@ sub startseq($) { if ($fbs ne '') { return ($prefix.substr($fbs,0,2)); } + + unshift(@codes, $c0); } elsif ($c0 == 04) { - return ("07", "17", "1F"); + return addprefix($prefix, 0x07, 0x17, 0x1F); } elsif ($c0 == 05) { - return ("A1", "A9"); + return addprefix($prefix, 0xA1, 0xA9); } elsif ($c0 == 06) { - return ("06", "0E", "16", "1E"); + return addprefix($prefix, 0x06, 0x0E, 0x16, 0x1E); } elsif ($c0 == 07) { - return ("A0", "A8"); + return addprefix($prefix, 0xA0, 0xA8); } elsif ($c0 >= 010 && $c0 <= 013) { - return hexlist($prefix, $c1, 8); + return addprefix($prefix, $c1..($c1+7)); + } elsif (($c0 & ~013) == 0144) { + return addprefix($prefix, $c1, $c1|2); } elsif ($c0 == 0330) { - return hexlist($prefix, $c1, 16); + return addprefix($prefix, $c1..($c1+15)); } elsif ($c0 == 0 || $c0 == 0340) { - return (); + return $prefix; + } else { + # We really need to be able to distinguish "forbidden" + # and "ignorable" codes here } } - return (); + return $prefix; }