1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
a) Code starts at offset 0 in the given file. This means you do not have to worry
about the headers that certain linkers add.
b) You only have to implement the given mnemonics.
c) If you hit an unknown opcode, your program should exit gracefully and give
feedback to the user as to the cause of the problem.
d) You must handle jumping/calling forwards and backwards.
e) This must work on the sample that is supplied, but it must also work on other
tests as well.
f) You must use either the recursive descent algorithm or linear sweep algorithm.
Supported Mnemonics
===================
* For all instructions below, do not worry about the ESP register being a destination
register. ESP is sometimes handled differently and you are not expected to handle that.
* All register references will be 32-bit references. For example, you do not need
to handle “mov dl, byte [ ebx ]”, you only need to handle “mov edx, dword [ ebx ]”.
An immediate will be a 32-bit value while the displacement may be 8-bit or 32-bit
in size. The only exception is the ‘retn 16-bit value’ instruction.
* You must implement labels (as seen in the Example 2):
add nop and not call or cmp pop dec push idiv repne cmpsd imul retf inc retn
jmp sal jz/jnz sar lea sbb mov shr movsd test mul xor neg
* Your output must be similar to the examples given.
For the ‘repne cmpsd’, recall that the ‘d’ in ‘cmpsd’ refers to the data
size. In this case, it is a DWORD or 32-bit value. Thus, in the Intel Manual
we are looking for ‘repne cmps m32, m32’.
For the 'sal'/'shr'/’sar’ instructions, you only need to support:
Note that the shl/sal are the same opcode (Why is this?)
sal r/m32, 1
shr r/m32, 1
shl r/m32, 1
For the 'jz'/'jnz'/'jmp' you must implement:
jz 32-bit displacement
jz 8-bit displacement
jnz 32-bit displacement
jnz 8-bit displacement
jmp 32-bit displacement
jmp reg
jmp [ reg ]
jmp [ reg + 32-bit displacement]
For the 'retn/retf' (listed as just ‘ret’ in the Intel Instruction Manual)
instruction family, you must implement the following. Note that retn
refers to ‘return near’ and ‘retf’ refers to ‘return far’:
retn
retn 16-bit value
retf
retf 16-bit value
For the 'mov/add/and/not/or/pop/push' and similar, you must
implement (where applicable): Note: displacement could be either 32-bit
or 8-bit value.
mov eax, edx
mov [ eax ], edx
mov [ eax + displacement], edx
mov eax, [ edx ]
mov eax , [ edx + displacement]
mov eax, 0x12345678
mov [ eax ], 0x12345678
mov [ eax + displacement], 0x12345678
mov [ 0x12345678 ], 0x12345678
mov eax, [ 0x12345678 ]
"""
import argparse
import shutil
import sys
import os
import utils
from decoderState import DecoderState
from x86.decoder import X86Decoder
from strategy.linearSweep import LinearSweepDecoder
from strategy.recursiveDescent import RecursiveDescent
# Testers...
from test import fromUnit
#from test import fromOnline # not needed... blerg!
from test import fromExample
utils.setupLogging()
def parseArgs():
parser = argparse.ArgumentParser()
#parser.add_argument("x", type=int, help="the base")
#parser.add_argument("y", type=int, help="the exponent")
parser.add_argument("-b", "--binary", nargs=1, help="Disassemble the given binary file.")
parser.add_argument("-v", "--verbosity", action="count", default=0, help="Show verbosity. Add more -v's to show more detail")
parser.add_argument("--recursive-descent", action="store_true", help="Use the recursive descent method. ")
parser.add_argument("--linear-sweep", action="store_true", help="Use the linear sweep method. ")
parser.add_argument("--test-examples", action="store_true", help="Disassemble the class examples (example1, example2, ex2)")
parser.add_argument("--test-unit", action="store_true", help="Disassemble unit examples (one instruction at a time)")
args = parser.parse_args()
if not args.test_examples and not args.test_unit and not args.binary:
print(utils.colors.RED+utils.colors.BOLD + "Must provide at least on of the following options:\n\t--test-examples \n\t--test-unit \n\t--binary <filepath>.\n"+ utils.colors.NORMAL)
parser.print_help()
sys.exit(1)
if args.recursive_descent and args.linear_sweep:
print(utils.colors.RED+utils.colors.BOLD + "Cannot provide both '--recursive-descent' and '--linear-sweep' options! Pick one.\n"+ utils.colors.NORMAL)
parser.print_help()
sys.exit(1)
if not args.recursive_descent and not args.linear_sweep:
print(utils.colors.RED+utils.colors.BOLD + "Must provide either '--recursive-descent' or '--linear-sweep' option!\n"+ utils.colors.NORMAL)
parser.print_help()
sys.exit(1)
return args
def main(objectFile, StrategyClass, verbose, detail):
if not os.path.exists(objectFile):
utils.logger.info(utils.colors.RED+utils.colors.BOLD + ("Could not find the given file: %s" %repr(objectFile)) + utils.colors.NORMAL )
sys.exit(1)
terminalSize = shutil.get_terminal_size((80, 20))
decoderState = DecoderState(objectFile=objectFile)
decoderSpec = X86Decoder(decoderState)
decoder = StrategyClass(decoderSpec)
decoder.decode(verbose=verbose, detail=detail)
decoderState.showDecodeProgress(detail=True)
if decoderState.isComplete():
utils.logger.info(utils.colors.GREEN+utils.colors.INVERT+(" "*(terminalSize.columns))+utils.colors.NORMAL)
title = "Completed disassembly of %s"%repr(objectFile)
utils.logger.info(utils.colors.GREEN+utils.colors.INVERT+(title + " "*(terminalSize.columns-len(title)))+utils.colors.NORMAL)
utils.logger.info(utils.colors.GREEN+utils.colors.INVERT+(" "*(terminalSize.columns))+utils.colors.NORMAL)
elif StrategyClass == LinearSweepDecoder and not decoderState.isComplete() and decoderState.isSweepComplete():
utils.logger.info(utils.colors.YELLOW+utils.colors.INVERT+(" "*(terminalSize.columns))+utils.colors.NORMAL)
title = "Almost completed disassembly of %s. A few bytes remain encoded when using Linear Sweep method."%repr(objectFile)
utils.logger.info(utils.colors.YELLOW+utils.colors.INVERT+(title + " "*(terminalSize.columns-len(title)))+utils.colors.NORMAL)
utils.logger.info(utils.colors.YELLOW+utils.colors.INVERT+(" "*(terminalSize.columns))+utils.colors.NORMAL)
elif StrategyClass == RecursiveDescent and not decoderState.isComplete() and decoderState.isRecursiveDescentComplete():
utils.logger.info(utils.colors.YELLOW+utils.colors.INVERT+(" "*(terminalSize.columns))+utils.colors.NORMAL)
title = "Almost completed disassembly of %s. A few bytes remain encoded when using Recursive Descent method."%repr(objectFile)
utils.logger.info(utils.colors.YELLOW+utils.colors.INVERT+(title + " "*(terminalSize.columns-len(title)))+utils.colors.NORMAL)
utils.logger.info(utils.colors.YELLOW+utils.colors.INVERT+(" "*(terminalSize.columns))+utils.colors.NORMAL)
else:
utils.logger.info(utils.colors.YELLOW+utils.colors.BOLD + ("Could not finish processing %s" %repr(objectFile)) + utils.colors.NORMAL )
if __name__ == "__main__":
args = parseArgs()
# Determine the strategy
if args.recursive_descent:
StrategyClass = RecursiveDescent
else:
StrategyClass = LinearSweepDecoder
# set verbosity
if args.verbosity >= 2:
verbose = True
detail = True
elif args.verbosity >= 1:
verbose = True
detail = False
else:
verbose = False
detail = False
# Run tests...
if args.test_unit:
fromUnit.test(StrategyClass, verbose=verbose, detail=detail)
if args.test_examples:
fromExample.test(StrategyClass, None, verbose=verbose, detail=detail)
if args.binary:
main( args.binary[0], StrategyClass, verbose=verbose, detail=detail)
# TODO: accept arguments for generic file!
#fromOnline.test()
#main()