BR 1834292: Fix multiple disassembler bugs

- Correct the building on the disassembler decision tree.
- Handle SSE instructions with F2 prefix (\332) correctly.
- Mark instructions which are now used as prefixes with ND.
  (In a future version when we have better CPU version handling,
  we should probably build the decision tree at runtime based on
  the selected CPU feature sets.)
- Sanitize the handling of \144-147 and \154-157 in both the assembler
  and disassembler.  They take an opcode byte as argument; don't
  pretend they don't.
This commit is contained in:
H. Peter Anvin 2007-11-18 21:55:26 -08:00
parent d0378fb1ed
commit a30cc07224
4 changed files with 114 additions and 87 deletions

View file

@ -33,11 +33,11 @@
* \1ab - a ModRM, calculated on EA in operand a, with the spare
* field the register value of operand b.
* \140..\143 - an immediate word or signed byte for operand 0..3
* \144..\147 - or 2 (s-field) into next opcode byte if operand 0..3
* is a signed byte rather than a word.
* \144..\147 - or 2 (s-field) into opcode byte if operand 0..3
* is a signed byte rather than a word. Opcode byte follows.
* \150..\153 - an immediate dword or signed byte for operand 0..3
* \154..\157 - or 2 (s-field) into next opcode byte if operand 0..3
* is a signed byte rather than a dword.
* \154..\157 - or 2 (s-field) into opcode byte if operand 0..3
* is a signed byte rather than a word. Opcode byte follows.
* \160..\163 - this instruction uses DREX rather than REX, with the
* OC0 field set to 0, and the dest field taken from
* operand 0..3.
@ -907,7 +907,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
case 0145:
case 0146:
case 0147:
codes += 2;
codes++;
length++;
break;
case 0150:
@ -920,7 +920,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
case 0155:
case 0156:
case 0157:
codes += 2;
codes++;
length++;
break;
case 0160:
@ -1397,7 +1397,6 @@ static void gencode(int32_t segment, int64_t offset, int bits,
case 0146:
case 0147:
EMIT_REX();
codes++;
bytes[0] = *codes++;
if (is_sbyte(ins, c & 3, 16))
bytes[0] |= 2; /* s-bit */
@ -1427,7 +1426,6 @@ static void gencode(int32_t segment, int64_t offset, int bits,
case 0156:
case 0157:
EMIT_REX();
codes++;
bytes[0] = *codes++;
if (is_sbyte(ins, c & 3, 32))
bytes[0] |= 2; /* s-bit */

View file

@ -376,6 +376,7 @@ static int matches(const struct itemplate *t, uint8_t *data,
int asize = prefix->asize;
int i, c;
struct operand *opx;
int s_field_for = -1; /* No 144/154 series code encountered */
for (i = 0; i < MAX_OPERANDS; i++) {
ins->oprs[i].segment = ins->oprs[i].disp_size =
@ -595,13 +596,30 @@ static int matches(const struct itemplate *t, uint8_t *data,
}
case4(0140):
opx->offset = getu16(data);
data += 2;
if (s_field_for == (c & 3)) {
opx->offset = gets8(data);
data++;
} else {
opx->offset = getu16(data);
data += 2;
}
break;
case4(0144):
case4(0154):
s_field_for = (*data & 0x02) ? c & 3 : -1;
if ((*data++ & ~0x02) != *r++)
return false;
break;
case4(0150):
opx->offset = getu32(data);
data += 4;
if (s_field_for == (c & 3)) {
opx->offset = gets8(data);
data++;
} else {
opx->offset = getu32(data);
data += 4;
}
break;
case4(0160):
@ -743,6 +761,7 @@ static int matches(const struct itemplate *t, uint8_t *data,
case 0332:
if (prefix->rep != 0xF2)
return false;
drep = 0;
break;
case 0333:

118
insns.dat
View file

@ -63,12 +63,12 @@ ADC reg_eax,imm \321\1\x15\41 386,SM
ADC reg_rax,sbyte \324\1\x83\202\15 X64,SM,ND
ADC reg_rax,imm \324\1\x15\41 X64,SM
ADC rm8,imm \1\x80\202\21 8086,SM
ADC rm16,imm \320\145\1\x81\202\141 8086,SM
ADC rm32,imm \321\155\1\x81\202\151 386,SM
ADC rm64,imm \324\155\1\x81\202\151 X64,SM
ADC rm16,imm \320\145\x81\202\141 8086,SM
ADC rm32,imm \321\155\x81\202\151 386,SM
ADC rm64,imm \324\155\x81\202\151 X64,SM
ADC mem,imm8 \1\x80\202\21 8086,SM
ADC mem,imm16 \320\145\1\x81\202\141 8086,SM
ADC mem,imm32 \321\155\1\x81\202\151 386,SM
ADC mem,imm16 \320\145\x81\202\141 8086,SM
ADC mem,imm32 \321\155\x81\202\151 386,SM
ADD mem,reg8 \170\101 8086,SM
ADD reg8,reg8 \170\101 8086
ADD mem,reg16 \320\1\x01\101 8086,SM
@ -96,12 +96,12 @@ ADD reg_eax,imm \321\1\x05\41 386,SM
ADD reg_rax,sbyte \324\1\x83\200\15 X64,SM,ND
ADD reg_rax,imm \324\1\x05\41 X64,SM
ADD rm8,imm \1\x80\200\21 8086,SM
ADD rm16,imm \320\145\1\x81\200\141 8086,SM
ADD rm32,imm \321\155\1\x81\200\151 386,SM
ADD rm64,imm \324\155\1\x81\200\151 X64,SM
ADD rm16,imm \320\145\x81\200\141 8086,SM
ADD rm32,imm \321\155\x81\200\151 386,SM
ADD rm64,imm \324\155\x81\200\151 X64,SM
ADD mem,imm8 \1\x80\200\21 8086,SM
ADD mem,imm16 \320\145\1\x81\200\141 8086,SM
ADD mem,imm32 \321\155\1\x81\200\151 386,SM
ADD mem,imm16 \320\145\x81\200\141 8086,SM
ADD mem,imm32 \321\155\x81\200\151 386,SM
AND mem,reg8 \1\x20\101 8086,SM
AND reg8,reg8 \1\x20\101 8086
AND mem,reg16 \320\1\x21\101 8086,SM
@ -129,12 +129,12 @@ AND reg_eax,imm \321\1\x25\41 386,SM
AND reg_rax,sbyte \324\1\x83\204\15 X64,SM,ND
AND reg_rax,imm \324\1\x25\41 X64,SM
AND rm8,imm \1\x80\204\21 8086,SM
AND rm16,imm \320\145\1\x81\204\141 8086,SM
AND rm32,imm \321\155\1\x81\204\151 386,SM
AND rm64,imm \324\155\1\x81\204\151 X64,SM
AND rm16,imm \320\145\x81\204\141 8086,SM
AND rm32,imm \321\155\x81\204\151 386,SM
AND rm64,imm \324\155\x81\204\151 X64,SM
AND mem,imm8 \1\x80\204\21 8086,SM
AND mem,imm16 \320\145\1\x81\204\141 8086,SM
AND mem,imm32 \321\155\1\x81\204\151 386,SM
AND mem,imm16 \320\145\x81\204\141 8086,SM
AND mem,imm32 \321\155\x81\204\151 386,SM
ARPL mem,reg16 \1\x63\101 286,PROT,SM,NOLONG
ARPL reg16,reg16 \1\x63\101 286,PROT,NOLONG
BB0_RESET void \2\x0F\x3A PENT,CYRIX,ND
@ -256,16 +256,16 @@ CMP reg_eax,imm \321\1\x3D\41 386,SM
CMP reg_rax,sbyte \324\1\x83\207\15 X64,SM,ND
CMP reg_rax,imm \324\1\x3D\41 X64,SM
CMP rm8,imm \1\x80\207\21 8086,SM
CMP rm16,imm \320\145\1\x81\207\141 8086,SM
CMP rm32,imm \321\155\1\x81\207\151 386,SM
CMP rm64,imm \324\155\1\x81\207\151 X64,SM
CMP rm16,imm \320\145\x81\207\141 8086,SM
CMP rm32,imm \321\155\x81\207\151 386,SM
CMP rm64,imm \324\155\x81\207\151 X64,SM
CMP mem,imm8 \1\x80\207\21 8086,SM
CMP mem,imm16 \320\145\1\x81\207\141 8086,SM
CMP mem,imm32 \321\155\1\x81\207\151 386,SM
CMP mem,imm16 \320\145\x81\207\141 8086,SM
CMP mem,imm32 \321\155\x81\207\151 386,SM
CMPSB void \335\1\xA6 8086
CMPSD void \335\321\1\xA7 386
CMPSQ void \335\324\1\xA7 X64
CMPSW void \335\320\1\xA7 8086
CMPSD void \335\321\xA7 386
CMPSQ void \335\324\xA7 X64
CMPSW void \335\320\xA7 8086
CMPXCHG mem,reg8 \2\x0F\xB0\101 PENT,SM
CMPXCHG reg8,reg8 \2\x0F\xB0\101 PENT
CMPXCHG mem,reg16 \320\2\x0F\xB1\101 PENT,SM
@ -546,38 +546,38 @@ IMUL reg64,reg64 \324\2\x0F\xAF\110 X64
IMUL reg16,mem,imm8 \320\1\x6B\110\16 186,SM
IMUL reg16,mem,sbyte \320\1\x6B\110\16 186,SM,ND
IMUL reg16,mem,imm16 \320\1\x69\110\32 186,SM
IMUL reg16,mem,imm \320\146\1\x69\110\142 186,SM,ND
IMUL reg16,mem,imm \320\146\x69\110\142 186,SM,ND
IMUL reg16,reg16,imm8 \320\1\x6B\110\16 186
IMUL reg16,reg16,sbyte \320\1\x6B\110\16 186,SM,ND
IMUL reg16,reg16,imm16 \320\1\x69\110\32 186
IMUL reg16,reg16,imm \320\146\1\x69\110\142 186,SM,ND
IMUL reg16,reg16,imm \320\146\x69\110\142 186,SM,ND
IMUL reg32,mem,imm8 \321\1\x6B\110\16 386,SM
IMUL reg32,mem,sbyte \321\1\x6B\110\16 386,SM,ND
IMUL reg32,mem,imm32 \321\1\x69\110\42 386,SM
IMUL reg32,mem,imm \321\156\1\x69\110\152 386,SM,ND
IMUL reg32,mem,imm \321\156\x69\110\152 386,SM,ND
IMUL reg32,reg32,imm8 \321\1\x6B\110\16 386
IMUL reg32,reg32,sbyte \321\1\x6B\110\16 386,SM,ND
IMUL reg32,reg32,imm32 \321\1\x69\110\42 386
IMUL reg32,reg32,imm \321\156\1\x69\110\152 386,SM,ND
IMUL reg32,reg32,imm \321\156\x69\110\152 386,SM,ND
IMUL reg64,mem,imm8 \324\1\x6B\110\16 X64,SM
IMUL reg64,mem,sbyte \324\1\x6B\110\16 X64,SM,ND
IMUL reg64,mem,imm32 \324\1\x69\110\42 X64,SM
IMUL reg64,mem,imm \324\156\1\x69\110\152 X64,SM,ND
IMUL reg64,mem,imm \324\156\x69\110\152 X64,SM,ND
IMUL reg64,reg64,imm8 \324\1\x6B\110\16 X64
IMUL reg64,reg64,sbyte \324\1\x6B\110\16 X64,SM,ND
IMUL reg64,reg64,imm32 \324\1\x69\110\42 X64
IMUL reg64,reg64,imm \324\156\1\x69\110\152 X64,SM,ND
IMUL reg64,reg64,imm \324\156\x69\110\152 X64,SM,ND
IMUL reg16,imm8 \320\1\x6B\100\15 186
IMUL reg16,sbyte \320\1\x6B\100\15 186,SM,ND
IMUL reg16,imm16 \320\1\x69\100\31 186
IMUL reg16,imm \320\145\1\x69\100\141 186,SM,ND
IMUL reg16,imm \320\145\x69\100\141 186,SM,ND
IMUL reg32,imm8 \321\1\x6B\100\15 386
IMUL reg32,sbyte \321\1\x6B\100\15 386,SM,ND
IMUL reg32,imm32 \321\1\x69\100\41 386
IMUL reg32,imm \321\155\1\x69\100\151 386,SM,ND
IMUL reg32,imm \321\155\x69\100\151 386,SM,ND
IMUL reg64,sbyte \324\1\x6B\100\15 X64,SM,ND
IMUL reg64,imm32 \324\1\x69\100\41 X64
IMUL reg64,imm \324\155\1\x69\100\151 X64,SM,ND
IMUL reg64,imm \324\155\x69\100\151 X64,SM,ND
IN reg_al,imm \1\xE4\25 8086,SB
IN reg_ax,imm \320\1\xE5\25 8086,SB
IN reg_eax,imm \321\1\xE5\25 386,SB
@ -750,8 +750,8 @@ MOV reg32,reg_dreg \2\x0F\x21\101 386,PRIV
MOV reg64,reg_dreg \323\2\x0F\x21\101 X64,PRIV
MOV reg_dreg,reg32 \2\x0F\x23\110 386,PRIV
MOV reg_dreg,reg64 \323\2\x0F\x23\110 X64,PRIV
MOV reg32,reg_treg \2\x0F\x24\101 386,NOLONG
MOV reg_treg,reg32 \2\x0F\x26\110 386,NOLONG
MOV reg32,reg_treg \2\x0F\x24\101 386,NOLONG,ND
MOV reg_treg,reg32 \2\x0F\x26\110 386,NOLONG,ND
MOV mem,reg8 \1\x88\101 8086,SM
MOV reg8,reg8 \1\x88\101 8086
MOV mem,reg16 \320\1\x89\101 8086,SM
@ -853,12 +853,12 @@ OR reg_eax,imm \321\1\x0D\41 386,SM
OR reg_rax,sbyte \324\1\x83\201\15 X64,SM,ND
OR reg_rax,imm \324\1\x0D\41 X64,SM
OR rm8,imm \1\x80\201\21 8086,SM
OR rm16,imm \320\145\1\x81\201\141 8086,SM
OR rm32,imm \321\155\1\x81\201\151 386,SM
OR rm64,imm \324\155\1\x81\201\151 X64,SM
OR rm16,imm \320\145\x81\201\141 8086,SM
OR rm32,imm \321\155\x81\201\151 386,SM
OR rm64,imm \324\155\x81\201\151 X64,SM
OR mem,imm8 \1\x80\201\21 8086,SM
OR mem,imm16 \320\145\1\x81\201\141 8086,SM
OR mem,imm32 \321\155\1\x81\201\151 386,SM
OR mem,imm16 \320\145\x81\201\141 8086,SM
OR mem,imm32 \321\155\x81\201\151 386,SM
OUT imm,reg_al \1\xE6\24 8086,SB
OUT imm,reg_ax \320\1\xE7\24 8086,SB
OUT imm,reg_eax \321\1\xE7\24 386,SB
@ -984,9 +984,9 @@ PUSH reg_dess \6 8086,NOLONG
PUSH reg_fsgs \1\x0F\7 386
PUSH imm8 \1\x6A\14 186
PUSH sbyte \1\x6A\14 186,ND
PUSH imm16 \320\144\1\x68\140 186
PUSH imm32 \321\154\1\x68\150 386,NOLONG
PUSH imm64 \321\154\1\x68\150 X64
PUSH imm16 \320\144\x68\140 186
PUSH imm32 \321\154\x68\150 386,NOLONG
PUSH imm64 \321\154\x68\150 X64
PUSH imm \1\x68\34 186
PUSHA void \322\1\x60 186,NOLONG
PUSHAD void \321\1\x60 386,NOLONG
@ -1055,7 +1055,7 @@ ROR rm32,imm \321\1\xC1\201\25 386,SB
ROR rm64,unity \324\1\xD1\201 X64
ROR rm64,reg_cl \324\1\xD3\201 X64
ROR rm64,imm \324\1\xC1\201\25 X64,SB
RDM void \2\x0F\x3A P6,CYRIX
RDM void \2\x0F\x3A P6,CYRIX,ND
RSDC reg_sreg,mem80 \2\x0F\x79\110 486,CYRIX,SMM
RSLDT mem80 \2\x0F\x7B\200 486,CYRIX,SMM
RSM void \2\x0F\xAA PENT,SMM
@ -1113,12 +1113,12 @@ SBB reg_eax,imm \321\1\x1D\41 386,SM
SBB reg_rax,sbyte \324\1\x83\203\15 X64,SM,ND
SBB reg_rax,imm \324\1\x1D\41 X64,SM
SBB rm8,imm \1\x80\203\21 8086,SM
SBB rm16,imm \320\145\1\x81\203\141 8086,SM
SBB rm32,imm \321\155\1\x81\203\151 386,SM
SBB rm64,imm \324\155\1\x81\203\151 X64,SM
SBB rm16,imm \320\145\x81\203\141 8086,SM
SBB rm32,imm \321\155\x81\203\151 386,SM
SBB rm64,imm \324\155\x81\203\151 X64,SM
SBB mem,imm8 \1\x80\203\21 8086,SM
SBB mem,imm16 \320\145\1\x81\203\141 8086,SM
SBB mem,imm32 \321\155\1\x81\203\151 386,SM
SBB mem,imm16 \320\145\x81\203\141 8086,SM
SBB mem,imm32 \321\155\x81\203\151 386,SM
SCASB void \335\1\xAE 8086
SCASD void \335\321\1\xAF 386
SCASQ void \335\324\1\xAF X64
@ -1227,14 +1227,14 @@ SUB reg_eax,imm \321\1\x2D\41 386,SM
SUB reg_rax,sbyte \324\1\x83\205\15 X64,SM,ND
SUB reg_rax,imm \324\1\x2D\41 X64,SM
SUB rm8,imm \1\x80\205\21 8086,SM
SUB rm16,imm \320\145\1\x81\205\141 8086,SM
SUB rm32,imm \321\155\1\x81\205\151 386,SM
SUB rm64,imm \324\155\1\x81\205\151 X64,SM
SUB rm16,imm \320\145\x81\205\141 8086,SM
SUB rm32,imm \321\155\x81\205\151 386,SM
SUB rm64,imm \324\155\x81\205\151 X64,SM
SUB mem,imm8 \1\x80\205\21 8086,SM
SUB mem,imm16 \320\145\1\x81\205\141 8086,SM
SUB mem,imm32 \321\155\1\x81\205\151 386,SM
SUB mem,imm16 \320\145\x81\205\141 8086,SM
SUB mem,imm32 \321\155\x81\205\151 386,SM
SVDC mem80,reg_sreg \2\x0F\x78\101 486,CYRIX,SMM
SVLDT mem80 \2\x0F\x7A\200 486,CYRIX,SMM
SVLDT mem80 \2\x0F\x7A\200 486,CYRIX,SMM,ND
SVTS mem80 \2\x0F\x7C\200 486,CYRIX,SMM
SWAPGS void \3\x0F\x01\xF8 X64
SYSCALL void \2\x0F\x05 P6,AMD
@ -1356,12 +1356,12 @@ XOR reg_eax,imm \321\1\x35\41 386,SM
XOR reg_rax,sbyte \324\1\x83\206\15 X64,SM,ND
XOR reg_rax,imm \324\1\x35\41 X64,SM
XOR rm8,imm \1\x80\206\21 8086,SM
XOR rm16,imm \320\145\1\x81\206\141 8086,SM
XOR rm32,imm \321\155\1\x81\206\151 386,SM
XOR rm64,imm \324\155\1\x81\206\151 X64,SM
XOR rm16,imm \320\145\x81\206\141 8086,SM
XOR rm32,imm \321\155\x81\206\151 386,SM
XOR rm64,imm \324\155\x81\206\151 X64,SM
XOR mem,imm8 \1\x80\206\21 8086,SM
XOR mem,imm16 \320\145\1\x81\206\141 8086,SM
XOR mem,imm32 \321\155\1\x81\206\151 386,SM
XOR mem,imm16 \320\145\x81\206\141 8086,SM
XOR mem,imm32 \321\155\x81\206\151 386,SM
CMOVcc reg16,mem \320\1\x0F\330\x40\110 P6,SM
CMOVcc reg16,reg16 \320\1\x0F\330\x40\110 P6
CMOVcc reg32,mem \321\1\x0F\330\x40\110 P6,SM

View file

@ -56,7 +56,9 @@ while (<F>) {
}
if ($formatted && !$nd) {
push @big, $formatted;
foreach $i (startseq($_[2])) {
my @sseq = startseq($_[2]);
print $_[0], ':', join(',', @sseq), "\n";
foreach $i (@sseq) {
if (!defined($dinstables{$i})) {
$dinstables{$i} = [];
}
@ -263,14 +265,15 @@ sub format {
("{I_$opcode, $num, {$operands}, \"$codes\", $flags},", $nd);
}
sub hexlist($$$) {
my($prefix, $start, $n) = @_;
my $i;
sub addprefix ($@) {
my ($prefix, @list) = @_;
my $x;
my @l = ();
for ($i = 0; $i < $n; $i++) {
push(@l, sprintf("%s%02X", $prefix, $start+$i));
foreach $x (@list) {
push(@l, sprintf("%s%02X", $prefix, $x));
}
return @l;
}
@ -326,9 +329,9 @@ sub startseq($) {
}
foreach $pfx (@disasm_prefixes) {
if ($fbs =~ /^$pfx(.*)$/) {
if (substr($fbs, 0, length($pfx)) eq $pfx) {
$prefix = $pfx;
$fbs = $1;
$fbs = substr($fbs, length($pfx));
last;
}
}
@ -336,21 +339,28 @@ sub startseq($) {
if ($fbs ne '') {
return ($prefix.substr($fbs,0,2));
}
unshift(@codes, $c0);
} elsif ($c0 == 04) {
return ("07", "17", "1F");
return addprefix($prefix, 0x07, 0x17, 0x1F);
} elsif ($c0 == 05) {
return ("A1", "A9");
return addprefix($prefix, 0xA1, 0xA9);
} elsif ($c0 == 06) {
return ("06", "0E", "16", "1E");
return addprefix($prefix, 0x06, 0x0E, 0x16, 0x1E);
} elsif ($c0 == 07) {
return ("A0", "A8");
return addprefix($prefix, 0xA0, 0xA8);
} elsif ($c0 >= 010 && $c0 <= 013) {
return hexlist($prefix, $c1, 8);
return addprefix($prefix, $c1..($c1+7));
} elsif (($c0 & ~013) == 0144) {
return addprefix($prefix, $c1, $c1|2);
} elsif ($c0 == 0330) {
return hexlist($prefix, $c1, 16);
return addprefix($prefix, $c1..($c1+15));
} elsif ($c0 == 0 || $c0 == 0340) {
return ();
return $prefix;
} else {
# We really need to be able to distinguish "forbidden"
# and "ignorable" codes here
}
}
return ();
return $prefix;
}