Last active
May 22, 2018 13:32
-
-
Save tomrittervg/c5e05ae1d1f470c081cfe8af4c12d4e5 to your computer and use it in GitHub Desktop.
Really rough and ugly code to parse a PE File and DWARF Information. Doesn't support a ton of stuff, only supports enough to do the one thing I needed it to do.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
DW_CHILDREN_no = 0x00 | |
DW_CHILDREN_yes = 0x01 | |
DW_LANGs = {} | |
DW_LANGs[0x0004] = 'DW_LANG_C_plus_plus' | |
DW_LANGs[0x000C] = 'DW_LANG_C99' | |
def lookup(val, db): | |
for k in db: | |
if db[k] == val: | |
return k | |
return False | |
def DW_FORM(val): | |
r = lookup(val, DW_FORMs) | |
if not r: | |
raise Exception("Could not locate the value " + hex(val) + " in the table") | |
return r | |
DW_FORMs = {} | |
DW_FORMs['DW_FORM_addr'] = 0x01 | |
DW_FORMs['DW_FORM_block2'] = 0x03 | |
DW_FORMs['DW_FORM_block4'] = 0x04 | |
DW_FORMs['DW_FORM_data2'] = 0x05 | |
DW_FORMs['DW_FORM_data4'] = 0x06 | |
DW_FORMs['DW_FORM_data8'] = 0x07 | |
DW_FORMs['DW_FORM_string'] = 0x08 | |
DW_FORMs['DW_FORM_block'] = 0x09 | |
DW_FORMs['DW_FORM_block1'] = 0x0a | |
DW_FORMs['DW_FORM_data1'] = 0x0b | |
DW_FORMs['DW_FORM_flag'] = 0x0c | |
DW_FORMs['DW_FORM_sdata'] = 0x0d | |
DW_FORMs['DW_FORM_strp'] = 0x0e | |
DW_FORMs['DW_FORM_udata'] = 0x0f | |
DW_FORMs['DW_FORM_ref_addr'] = 0x10 | |
DW_FORMs['DW_FORM_ref1'] = 0x11 | |
DW_FORMs['DW_FORM_ref2'] = 0x12 | |
DW_FORMs['DW_FORM_ref4'] = 0x13 | |
DW_FORMs['DW_FORM_ref8'] = 0x14 | |
DW_FORMs['DW_FORM_ref_udata'] = 0x15 | |
DW_FORMs['DW_FORM_indirect'] = 0x16 | |
DW_FORMs['DW_FORM_sec_offset'] = 0x17 | |
DW_FORMs['DW_FORM_exprloc'] = 0x18 | |
DW_FORMs['DW_FORM_flag_present'] = 0x19 | |
DW_FORMs['DW_FORM_ref_sig8'] = 0x20 | |
def DW_TAG(val): | |
r = lookup(val, DW_TAGs) | |
if not r: | |
if val >= 0x4080 and val <= 0xffff: | |
return 'DW_TAG_Unknown_' + format(val, "x") | |
else: | |
raise Exception("Could not locate the value " + hex(val) + " in the table") | |
return r | |
DW_TAGs = {} | |
DW_TAGs['DW_TAG_array_type'] = 0x01 | |
DW_TAGs['DW_TAG_class_type'] = 0x02 | |
DW_TAGs['DW_TAG_entry_point'] = 0x03 | |
DW_TAGs['DW_TAG_enumeration_type'] = 0x04 | |
DW_TAGs['DW_TAG_formal_parameter'] = 0x05 | |
DW_TAGs['DW_TAG_imported_declaration'] = 0x08 | |
DW_TAGs['DW_TAG_label'] = 0x0a | |
DW_TAGs['DW_TAG_lexical_block'] = 0x0b | |
DW_TAGs['DW_TAG_member'] = 0x0d | |
DW_TAGs['DW_TAG_pointer_type'] = 0x0f | |
DW_TAGs['DW_TAG_reference_type'] = 0x10 | |
DW_TAGs['DW_TAG_compile_unit'] = 0x11 | |
DW_TAGs['DW_TAG_string_type'] = 0x12 | |
DW_TAGs['DW_TAG_structure_type'] = 0x13 | |
DW_TAGs['DW_TAG_subroutine_type'] = 0x15 | |
DW_TAGs['DW_TAG_typedef'] = 0x16 | |
DW_TAGs['DW_TAG_union_type'] = 0x17 | |
DW_TAGs['DW_TAG_unspecified_parameters'] = 0x18 | |
DW_TAGs['DW_TAG_variant'] = 0x19 | |
DW_TAGs['DW_TAG_common_block'] = 0x1a | |
DW_TAGs['DW_TAG_common_inclusion'] = 0x1b | |
DW_TAGs['DW_TAG_inheritance'] = 0x1c | |
DW_TAGs['DW_TAG_inlined_subroutine'] = 0x1d | |
DW_TAGs['DW_TAG_module'] = 0x1e | |
DW_TAGs['DW_TAG_ptr_to_member_type'] = 0x1f | |
DW_TAGs['DW_TAG_set_type'] = 0x20 | |
DW_TAGs['DW_TAG_subrange_type'] = 0x21 | |
DW_TAGs['DW_TAG_with_stmt'] = 0x22 | |
DW_TAGs['DW_TAG_access_declaration'] = 0x23 | |
DW_TAGs['DW_TAG_base_type'] = 0x24 | |
DW_TAGs['DW_TAG_catch_block'] = 0x25 | |
DW_TAGs['DW_TAG_const_type'] = 0x26 | |
DW_TAGs['DW_TAG_constant'] = 0x27 | |
DW_TAGs['DW_TAG_enumerator'] = 0x28 | |
DW_TAGs['DW_TAG_file_type'] = 0x29 | |
DW_TAGs['DW_TAG_friend'] = 0x2a | |
DW_TAGs['DW_TAG_namelist'] = 0x2b | |
DW_TAGs['DW_TAG_namelist_item'] = 0x2c | |
DW_TAGs['DW_TAG_packed_type'] = 0x2d | |
DW_TAGs['DW_TAG_subprogram'] = 0x2e | |
DW_TAGs['DW_TAG_template_type_parameter'] = 0x2f | |
DW_TAGs['DW_TAG_template_value_parameter'] = 0x30 | |
DW_TAGs['DW_TAG_thrown_type'] = 0x31 | |
DW_TAGs['DW_TAG_try_block'] = 0x32 | |
DW_TAGs['DW_TAG_variant_part'] = 0x33 | |
DW_TAGs['DW_TAG_variable'] = 0x34 | |
DW_TAGs['DW_TAG_volatile_type'] = 0x35 | |
DW_TAGs['DW_TAG_DWarf_procedure'] = 0x36 | |
DW_TAGs['DW_TAG_restrict_type'] = 0x37 | |
DW_TAGs['DW_TAG_interface_type'] = 0x38 | |
DW_TAGs['DW_TAG_namespace'] = 0x39 | |
DW_TAGs['DW_TAG_imported_module'] = 0x3a | |
DW_TAGs['DW_TAG_unspecified_type'] = 0x3b | |
DW_TAGs['DW_TAG_partial_unit'] = 0x3c | |
DW_TAGs['DW_TAG_imported_unit'] = 0x3d | |
DW_TAGs['DW_TAG_condition'] = 0x3f | |
DW_TAGs['DW_TAG_shared_type'] = 0x40 | |
DW_TAGs['DW_TAG_type_unit'] = 0x41 | |
DW_TAGs['DW_TAG_rvalue_reference_type'] = 0x42 | |
DW_TAGs['DW_TAG_template_alias'] = 0x43 | |
def DW_AT(val): | |
r = lookup(val, DW_ATs) | |
if not r: | |
if val >= 0x2000 and val <= 0x3fff: | |
return 'DW_AT_Unknown_' + format(val, "x") | |
else: | |
raise Exception("Could not locate the value " + hex(val) + " in the table") | |
return r | |
DW_ATs = {} | |
DW_ATs['DW_AT_sibling'] = 0x01 | |
DW_ATs['DW_AT_location'] = 0x02 | |
DW_ATs['DW_AT_name'] = 0x03 | |
DW_ATs['DW_AT_ordering'] = 0x09 | |
DW_ATs['DW_AT_byte_size'] = 0x0b | |
DW_ATs['DW_AT_bit_offset'] = 0x0c | |
DW_ATs['DW_AT_bit_size'] = 0x0d | |
DW_ATs['DW_AT_stmt_list'] = 0x10 | |
DW_ATs['DW_AT_low_pc'] = 0x11 | |
DW_ATs['DW_AT_high_pc'] = 0x12 | |
DW_ATs['DW_AT_language'] = 0x13 | |
DW_ATs['DW_AT_discr'] = 0x15 | |
DW_ATs['DW_AT_discr_value'] = 0x16 | |
DW_ATs['DW_AT_visibility'] = 0x17 | |
DW_ATs['DW_AT_import'] = 0x18 | |
DW_ATs['DW_AT_string_length'] = 0x19 | |
DW_ATs['DW_AT_common_reference'] = 0x1a | |
DW_ATs['DW_AT_comp_dir'] = 0x1b | |
DW_ATs['DW_AT_const_value'] = 0x1c | |
DW_ATs['DW_AT_containing_type'] = 0x1d | |
DW_ATs['DW_AT_default_value'] = 0x1e | |
DW_ATs['DW_AT_inline'] = 0x20 | |
DW_ATs['DW_AT_is_optional'] = 0x21 | |
DW_ATs['DW_AT_lower_bound'] = 0x22 | |
DW_ATs['DW_AT_producer'] = 0x25 | |
DW_ATs['DW_AT_prototyped'] = 0x27 | |
DW_ATs['DW_AT_return_addr'] = 0x2a | |
DW_ATs['DW_AT_start_scope'] = 0x2c | |
DW_ATs['DW_AT_bit_stride'] = 0x2e | |
DW_ATs['DW_AT_upper_bound'] = 0x2f | |
DW_ATs['DW_AT_abstract_origin'] = 0x31 | |
DW_ATs['DW_AT_accessibility'] = 0x32 | |
DW_ATs['DW_AT_address_class'] = 0x33 | |
DW_ATs['DW_AT_artificial'] = 0x34 | |
DW_ATs['DW_AT_base_types'] = 0x35 | |
DW_ATs['DW_AT_calling_convention'] = 0x36 | |
DW_ATs['DW_AT_count'] = 0x37 | |
DW_ATs['DW_AT_data_member_location'] = 0x38 | |
DW_ATs['DW_AT_decl_column'] = 0x39 | |
DW_ATs['DW_AT_decl_file'] = 0x3a | |
DW_ATs['DW_AT_decl_line'] = 0x3b | |
DW_ATs['DW_AT_declaration'] = 0x3c | |
DW_ATs['DW_AT_discr_list'] = 0x3d | |
DW_ATs['DW_AT_encoding'] = 0x3e | |
DW_ATs['DW_AT_external'] = 0x3f | |
DW_ATs['DW_AT_frame_base'] = 0x40 | |
DW_ATs['DW_AT_friend'] = 0x41 | |
DW_ATs['DW_AT_identifier_case'] = 0x42 | |
DW_ATs['DW_AT_macro_info'] = 0x43 | |
DW_ATs['DW_AT_namelist_item'] = 0x44 | |
DW_ATs['DW_AT_priority'] = 0x45 | |
DW_ATs['DW_AT_segment'] = 0x46 | |
DW_ATs['DW_AT_specification'] = 0x47 | |
DW_ATs['DW_AT_static_link'] = 0x48 | |
DW_ATs['DW_AT_type'] = 0x49 | |
DW_ATs['DW_AT_use_location'] = 0x4a | |
DW_ATs['DW_AT_variable_parameter'] = 0x4b | |
DW_ATs['DW_AT_virtuality'] = 0x4c | |
DW_ATs['DW_AT_vtable_elem_location'] = 0x4d | |
DW_ATs['DW_AT_allocated'] = 0x4e | |
DW_ATs['DW_AT_associated'] = 0x4f | |
DW_ATs['DW_AT_data_location'] = 0x50 | |
DW_ATs['DW_AT_byte_stride'] = 0x51 | |
DW_ATs['DW_AT_entry_pc'] = 0x52 | |
DW_ATs['DW_AT_use_UTF8'] = 0x53 | |
DW_ATs['DW_AT_extension'] = 0x54 | |
DW_ATs['DW_AT_ranges'] = 0x55 | |
DW_ATs['DW_AT_trampoline'] = 0x56 | |
DW_ATs['DW_AT_call_column'] = 0x57 | |
DW_ATs['DW_AT_call_file'] = 0x58 | |
DW_ATs['DW_AT_call_line'] = 0x59 | |
DW_ATs['DW_AT_description'] = 0x5a | |
DW_ATs['DW_AT_binary_scale'] = 0x5b | |
DW_ATs['DW_AT_decimal_scale'] = 0x5c | |
DW_ATs['DW_AT_small'] = 0x5d | |
DW_ATs['DW_AT_decimal_sign'] = 0x5e | |
DW_ATs['DW_AT_digit_count'] = 0x5f | |
DW_ATs['DW_AT_picture_string'] = 0x60 | |
DW_ATs['DW_AT_mutable'] = 0x61 | |
DW_ATs['DW_AT_threads_scaled'] = 0x62 | |
DW_ATs['DW_AT_explicit'] = 0x63 | |
DW_ATs['DW_AT_object_pointer'] = 0x64 | |
DW_ATs['DW_AT_endianity'] = 0x65 | |
DW_ATs['DW_AT_elemental'] = 0x66 | |
DW_ATs['DW_AT_pure'] = 0x67 | |
DW_ATs['DW_AT_recursive'] = 0x68 | |
DW_ATs['DW_AT_signature'] = 0x69 | |
DW_ATs['DW_AT_main_subprogram'] = 0x6a | |
DW_ATs['DW_AT_data_bit_offset'] = 0x6b | |
DW_ATs['DW_AT_const_expr'] = 0x6c | |
DW_ATs['DW_AT_enum_class'] = 0x6d | |
DW_ATs['DW_AT_linkage_name'] = 0x6e | |
DW_ATs['DW_AT_string_length_bit_size'] = 0x6f | |
DW_ATs['DW_AT_string_length_byte_size'] = 0x70 | |
DW_ATs['DW_AT_rank'] = 0x71 | |
DW_ATs['DW_AT_str_offsets_base'] = 0x72 | |
DW_ATs['DW_AT_addr_base'] = 0x73 | |
DW_ATs['DW_AT_rnglists_base'] = 0x74 | |
DW_ATs['DW_AT_dwo_name'] = 0x76 | |
DW_ATs['DW_AT_reference'] = 0x77 | |
DW_ATs['DW_AT_rvalue_reference'] = 0x78 | |
DW_ATs['DW_AT_macros'] = 0x79 | |
DW_ATs['DW_AT_call_all_calls'] = 0x7a | |
DW_ATs['DW_AT_call_all_source_calls'] = 0x7b | |
DW_ATs['DW_AT_call_all_tail_calls'] = 0x7c | |
DW_ATs['DW_AT_call_return_pc'] = 0x7d | |
DW_ATs['DW_AT_call_value'] = 0x7e | |
DW_ATs['DW_AT_call_origin'] = 0x7f | |
DW_ATs['DW_AT_call_parameter'] = 0x80 | |
DW_ATs['DW_AT_call_pc'] = 0x81 | |
DW_ATs['DW_AT_call_tail_call'] = 0x82 | |
DW_ATs['DW_AT_call_target'] = 0x83 | |
DW_ATs['DW_AT_call_target_clobbered'] = 0x84 | |
DW_ATs['DW_AT_call_data_location'] = 0x85 | |
DW_ATs['DW_AT_call_data_value'] = 0x86 | |
DW_ATs['DW_AT_noreturn'] = 0x87 | |
DW_ATs['DW_AT_alignment'] = 0x88 | |
DW_ATs['DW_AT_export_symbols'] = 0x89 | |
DW_ATs['DW_AT_deleted'] = 0x8a | |
DW_ATs['DW_AT_defaulted'] = 0x8b | |
DW_ATs['DW_AT_loclists_base'] = 0x8c | |
DW_ATEs = {} | |
DW_ATEs[0x01] = 'DW_ATE_address' | |
DW_ATEs[0x02] = 'DW_ATE_boolean' | |
DW_ATEs[0x03] = 'DW_ATE_complex_float' | |
DW_ATEs[0x04] = 'DW_ATE_float' | |
DW_ATEs[0x05] = 'DW_ATE_signed' | |
DW_ATEs[0x06] = 'DW_ATE_signed_char' | |
DW_ATEs[0x07] = 'DW_ATE_unsigned' | |
DW_ATEs[0x08] = 'DW_ATE_unsigned_char' | |
DW_ATEs[0x09] = 'DW_ATE_imaginary_float' | |
DW_ATEs[0x0a] = 'DW_ATE_packed_decimal' | |
DW_ATEs[0x0b] = 'DW_ATE_numeric_string' | |
DW_ATEs[0x0c] = 'DW_ATE_edited' | |
DW_ATEs[0x0d] = 'DW_ATE_signed_fixed' | |
DW_ATEs[0x0e] = 'DW_ATE_unsigned_fixed' | |
DW_ATEs[0x0f] = 'DW_ATE_decimal_float' | |
DW_ATEs[0x10] = 'DW_ATE_UTF' | |
DW_ATEs[0x11] = 'DW_ATE_UCS' | |
DW_ATEs[0x12] = 'DW_ATE_ASCII' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import sys | |
import struct | |
import binascii | |
import argparse | |
from collections import OrderedDict | |
from dwarf_constants import * | |
class DebugAbbrev: | |
def __init__(self, f, originalOffset, data): | |
self.fileRef = f | |
self.originalOffset = originalOffset | |
self.data = data | |
self.index = 0 | |
self.offset_index = 0 | |
self.offsets = {1 : 0} | |
def translateToFileOffset(self, entryIndx): | |
if entryIndx not in self.offsets: | |
raise Exception("Got a translation request for an offset (" + str(entryIndx) + ") I haven't seen and stored.") | |
return self.originalOffset + self.offsets[entryIndx] | |
def resetToOffset(self, indx): | |
self.offsets = {1 : 0} | |
self.index = indx | |
def checkOffset(self, entryIndx): | |
# Read the next entry and store its offset no matter what | |
saveIndex = self.index | |
nextIndx = self.read_uleb128() | |
self.offsets[nextIndx] = saveIndex | |
if entryIndx in self.offsets: | |
self.index = self.offsets[entryIndx] | |
else: | |
raise Exception("Got a request for an offset (" + str(entryIndx) + ") I haven't seen and stored." + | |
" File Offset " + str(f.tell()) + " .debug_abbrev max offset " + | |
str(max(self.offsets.keys())) + " file offset ") #+ str(self.translateToFileOffset(self.offsets[max(self.offsets.keys())]))) | |
self.read_uleb128() | |
def read_uleb128(self): | |
advance, value = read_uleb128(self.data[self.index:]) | |
self.index += advance | |
return value | |
def readBytes(f, numBytes, convert=True): | |
bString = f.read(numBytes) | |
if not convert: | |
return bytearray(bString) | |
else: | |
if numBytes == 1: | |
return struct.unpack("<B", bString)[0] | |
if numBytes == 2: | |
return struct.unpack("<H", bString)[0] | |
elif numBytes == 4: | |
return struct.unpack("<I", bString)[0] | |
elif numBytes == 8: | |
return struct.unpack("<Q", bString)[0] | |
else: | |
raise "Unknown length for conversion" | |
def read_uleb128(f): | |
value = 0 | |
for i in xrange(0,5): | |
if type(f) == file: | |
byte_value = readBytes(f, 1) | |
else: | |
byte_value = f[i] | |
tmp = byte_value & 0x7f | |
value = tmp << (i * 7) | value | |
if (byte_value & 0x80) != 0x80: | |
break | |
if i == 4 and (tmp & 0xf0) != 0: | |
print "parse error on uleb128 number" | |
sys.exit(1) | |
return (i+1, value) | |
def read_leb128_(f): | |
mask=[0xffffff80,0xffffc000,0xffe00000,0xf0000000,0] | |
bitmask=[0x40,0x40,0x40,0x40,0x8] | |
value = 0 | |
for i in xrange(0,5): | |
if type(f) == file: | |
byte_value = readBytes(f, 1) | |
else: | |
byte_value = f[i] | |
print hex(byte_value) | |
tmp = byte_value & 0x7f | |
value = tmp << (i * 7) | value | |
if (byte_value & 0x80) != 0x80: | |
if bitmask[i] & tmp: | |
value |= mask[i] | |
break | |
if i == 4 and (tmp & 0xf0) != 0: | |
print "parse error on sleb128 number at file offset ", f.tell() - 4 if type(f) == file else "(not a file)" | |
sys.exit(1) | |
buffer = struct.pack("I",value) | |
value, = struct.unpack("i", buffer) | |
return (i+1, value) | |
def read_leb128( f ): | |
ret_val = None | |
bytes_used = 0 | |
cont = True | |
while cont: | |
if type(f) == file: | |
byte_value = readBytes(f, 1) | |
else: | |
byte_value = f[bytes_used] | |
val = byte_value | |
if(( val & 0x80 ) == 0): | |
cont = False | |
val = val & 0x7F | |
if ret_val is None: | |
ret_val = 0 | |
ret_val = ret_val | (val << (7*bytes_used)) | |
bytes_used = bytes_used + 1 | |
if( val & 0x40 ): | |
ret_val |= (-1 << (7*bytes_used)) | |
return (bytes_used, ret_val) | |
def readCString(f): | |
s = "" | |
b = str(readBytes(f, 1, False)) | |
while b != "\x00": | |
s += b | |
b = str(readBytes(f, 1, False)) | |
return s | |
def expect(f, expected, message=""): | |
got = readBytes(f, len(expected), False) | |
if expected != got: | |
print "Parsing", message + ": from file position", f.tell() - len(expected), "I expected to get", binascii.hexlify(expected), "but got", binascii.hexlify(got) | |
print message | |
sys.exit(1) | |
def discard(f, size, type=""): | |
got = readBytes(f, size, False) | |
def camel(varName): | |
varName = varName.title().replace(" ", "") | |
varName = varName[0].lower() + varName[1:] | |
return varName | |
def uncamel(varName): | |
a = [] | |
i = 0 | |
for c in varName: | |
if i == 0: | |
a.append(c.upper()) | |
else: | |
if c.isupper(): | |
a.append(" ") | |
a.append(c) | |
i+=1 | |
return "".join(a) | |
def readNoPrint(strName, f, numBytes, convert=True): | |
varName = camel(strName) | |
globals()[varName] = readBytes(f, numBytes, convert) | |
return varName | |
def readPrint(strName, f, numBytes, convert=True): | |
varName = readNoPrint(strName, f, numBytes, convert) | |
print strName, globals()[varName] | |
def printAll(d): | |
print "---------------" | |
for k in d: | |
print uncamel(k), d[k] | |
# ================================================ | |
def dumpStringTable(dbIsh): | |
i = 0 | |
s = [] | |
while i < len(dbIsh): | |
if i < 4: | |
pass | |
else: | |
if chr(dbIsh[i]) == "\x00": | |
print "".join(s) | |
s = [] | |
else: | |
s.append(chr(dbIsh[i])) | |
i += 1 | |
if len(s) > 0: | |
print "Leftovers in String Table:", s | |
def sectionNameStr(name): | |
s = "" | |
name = name.strip("\x00") | |
if name == ".text": | |
s = "Executable Code" | |
elif name == ".data": | |
s = "global initialized data".title() | |
elif name == ".rdata": | |
s = "global read-only data".title() | |
elif name == ".edata": | |
s = "export tables".title() | |
elif name == ".idata": | |
s = "import tables".title() | |
elif name == ".pdata": | |
s = "exception handling information".title() | |
elif name == ".xdata": | |
s = "exception information, free format".title() | |
elif name == ".reloc": | |
s = "information for relocation of library files".title() | |
elif name == ".rsrc": | |
s = "resources of the executable".title() | |
elif name == ".drective": | |
s = "linker options".title() | |
elif name == ".bss": | |
s = "uninitialized data, free format".title() | |
elif name == ".debug_aranges": | |
s = "Lookup table for mapping addresses to compilation units" | |
elif name == ".debug_frame": | |
s = "Call frame information" | |
elif name == ".debug_info": | |
s = "Core DWARF information section" | |
elif name == ".debug_line": | |
s = "Line number information" | |
elif name == ".debug_loc": | |
s = "Location lists used in the DW_AT_location attributes" | |
elif name == ".debug_macinfo": | |
s = "Macro information" | |
elif name == ".debug_pubnames": | |
s = "Lookup table for global objects and functions" | |
elif name == ".debug_pubtypes": | |
s = "Lookup table for global types" | |
elif name == ".debug_ranges": | |
s = "Address ranges used in the DW_AT_ranges attributes" | |
elif name == ".debug_str": | |
s = "String table used in .debug_info" | |
elif name == ".debug_types": | |
s = "Type descriptions " | |
if s: | |
return name + " (" + s + ")" | |
return name | |
def resourceTypeStr(type): | |
if type == 1: | |
return "cursor".title() | |
if type == 2: | |
return "bitmap".title() | |
if type == 3: | |
return "icon".title() | |
if type == 4: | |
return "menu".title() | |
if type == 5: | |
return "dialog box".title() | |
if type == 6: | |
return "string table entry".title() | |
if type == 7: | |
return "font directory".title() | |
if type == 8: | |
return "font".title() | |
if type == 9: | |
return "accelerator table".title() | |
if type == 10: | |
return "application defined resource (raw data)".title() | |
if type == 11: | |
return "message table entry".title() | |
if type == 12: | |
return "group cursor".title() | |
if type == 14: | |
return "group icon".title() | |
if type == 16: | |
return "version information".title() | |
if type == 17: | |
return "dlginclude".title() | |
if type == 19: | |
return "plug and play resource".title() | |
if type == 20: | |
return "VXD".title() | |
if type == 21: | |
return "animated cursor".title() | |
if type == 22: | |
return "animated icon".title() | |
if type == 23: | |
return "HTML".title() | |
if type == 24: | |
return "side-by-side assembly manifest".title() | |
return "Unknown" | |
DW_FORM_FUNCs = {} | |
DW_FORM_FUNCs['DW_FORM_addr'] = lambda f : readBytes(f, 4) | |
DW_FORM_FUNCs['DW_FORM_block2'] = lambda f : readBytes(f, readBytes(f, 2), False) | |
DW_FORM_FUNCs['DW_FORM_block4'] = lambda f : readBytes(f, readBytes(f, 4), False) | |
DW_FORM_FUNCs['DW_FORM_data2'] = lambda f : readBytes(f, 2) | |
DW_FORM_FUNCs['DW_FORM_data4'] = lambda f : readBytes(f, 4) | |
DW_FORM_FUNCs['DW_FORM_data8'] = lambda f : readBytes(f, 8) | |
DW_FORM_FUNCs['DW_FORM_string'] = lambda f : readCString(f) | |
DW_FORM_FUNCs['DW_FORM_block'] = lambda f : readBytes(f, read_uleb128(f)[1], False) | |
DW_FORM_FUNCs['DW_FORM_block1'] = lambda f : readBytes(f, readBytes(f, 1), False) | |
DW_FORM_FUNCs['DW_FORM_data1'] = lambda f : readBytes(f, 1) | |
DW_FORM_FUNCs['DW_FORM_flag'] = lambda f : readBytes(f, 1) | |
DW_FORM_FUNCs['DW_FORM_sdata'] = lambda f : read_leb128(f)[1] | |
DW_FORM_FUNCs['DW_FORM_strp'] = lambda f : readBytes(f, 4) if dwarfFormat == 32 else readBytes(f, 8) | |
DW_FORM_FUNCs['DW_FORM_udata'] = lambda f : read_uleb128(f)[1] | |
DW_FORM_FUNCs['DW_FORM_ref_addr'] = lambda f : Exception("DW_FORM_ref_addr not implemented") | |
DW_FORM_FUNCs['DW_FORM_ref1'] = lambda f : readBytes(f, 1) | |
DW_FORM_FUNCs['DW_FORM_ref2'] = lambda f : readBytes(f, 2) | |
DW_FORM_FUNCs['DW_FORM_ref4'] = lambda f : readBytes(f, 4) | |
DW_FORM_FUNCs['DW_FORM_ref8'] = lambda f : readBytes(f, 8) | |
DW_FORM_FUNCs['DW_FORM_ref_udata'] = lambda f : read_uleb128(f)[1] | |
DW_FORM_FUNCs['DW_FORM_indirect'] = lambda f : Exception("DW_FORM_indirect not implemented") | |
DW_FORM_FUNCs['DW_FORM_sec_offset'] = lambda f : readBytes(f, 4) if dwarfFormat == 32 else readBytes(f, 8) | |
DW_FORM_FUNCs['DW_FORM_exprloc'] = lambda f : readBytes(f, read_uleb128(f)[1], False) | |
DW_FORM_FUNCs['DW_FORM_flag_present'] = lambda f : 1 | |
DW_FORM_FUNCs['DW_FORM_ref_sig8'] = lambda f : Exception("DW_FORM_ref_sig8 not implemented") | |
DW_ATTRIBUTE_FUNCs = {} | |
DW_ATTRIBUTE_FUNCs['DW_AT_language'] = lambda s : DW_LANGs[s] | |
DW_ATTRIBUTE_FUNCs['DW_AT_low_pc'] = lambda s : hex(s) | |
DW_ATTRIBUTE_FUNCs['DW_AT_high_pc'] = lambda s : hex(s) | |
DW_ATTRIBUTE_FUNCs['DW_AT_encoding'] = lambda s : DW_ATEs[s] | |
DW_ATTRIBUTE_FUNCs['DW_AT_location'] = lambda s : hex(s) if type(s) == type(1) else binascii.hexlify(s) | |
# ================================================ | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description='Process some integers.') | |
parser.add_argument('pefiles', metavar='F', type=argparse.FileType('rb', 0), nargs='+', | |
help='pe files to parse') | |
parser.add_argument('-d', '--dwarf', required=False, action="store_true", help='parse dwarf sections') | |
parser.add_argument('-s', '--dwarf-skip', required=False, action="store_true", help='skip through dwarf compilation units') | |
parser.add_argument('--dwarf-force', required=False, action="store_true", help='Force-parses the first few units in an errored CU') | |
parser.add_argument('--dwarf-ciu', required=False, type=int, action="append", help='parse this specific dwarf compilation unit') | |
args = parser.parse_args() | |
if args.dwarf_ciu: | |
if args.dwarf_force or args.dwarf_skip: | |
print "--dwarf-ciu cannot be used with other dwarf arguments" | |
sys.exit(1) | |
args.dwarf_skip = True | |
for f in args.pefiles: | |
print "===============================================" | |
print "Parsing", f.name | |
fileSize = 0 | |
f.seek(0, os.SEEK_END) | |
fileSize = f.tell() | |
f.seek(0, os.SEEK_SET) | |
print "--------------------------------" | |
print "DOS Header" | |
expect(f, 'MZ') | |
discard(f, 0x40 - 6) | |
expect(f, bytearray([0x80, 0x00, 0x00, 0x00]), "PE Header Offset") | |
# Skip over the "This program cannot be run..." | |
discard(f, 0x80 - f.tell()) | |
print "--------------------------------" | |
print "PE Header" | |
expect(f, "PE\x00\x00") | |
readPrint("Machine", f, 2) | |
if machine == 0x14c: | |
print "\tIMAGE_FILE_MACHINE_I386" | |
elif machine == 0x8664: | |
print "\tIMAGE_FILE_MACHINE_AMD64" | |
else: | |
print "\tUnknown Machine Type" | |
readPrint("Number of Sections", f, 2) | |
discard(f, 4, "Time Date Stamp") | |
readPrint("Symbol Table Pointer", f, 4) | |
readPrint("Number Of Symbols", f, 4) | |
readPrint("Size Of Optional Header", f, 2) | |
readPrint("Characteristics", f, 2) | |
# Go Parse the Symbol Table | |
print "--------------------------------" | |
print "Taking a detour and parsing the string table...." | |
origLocation = f.tell() | |
f.seek(symbolTablePointer, os.SEEK_SET) | |
print symbolTablePointer, "+ (", numberOfSymbols, "* 18)" | |
discard(f, numberOfSymbols * 18) | |
readPrint("String Table Size", f, 4) | |
f.seek(f.tell() - 4, os.SEEK_SET) | |
stringDatabaseIsh = readBytes(f, stringTableSize, False) | |
f.seek(origLocation, os.SEEK_SET) | |
#dumpStringTable(stringDatabaseIsh) | |
print "--------------------------------" | |
print "Optional Header" | |
readPrint("PE Format", f, 2) | |
if peFormat == 0x10b: | |
peFormat = "PE32" | |
variableAddressSize = 4 | |
elif peFormat == 0x20b: | |
peFormat = "PE32+" | |
variableAddressSize = 8 | |
else: | |
raise Exception("Unknown PE Format") | |
print "PE Format", peFormat | |
discard(f, 1, "Major Linker Version") | |
discard(f, 1, "Minor Linker Version") | |
discard(f, 4, "Size of Code") | |
discard(f, 4, "Size of Init Data") | |
discard(f, 4, "Size of UnInit Data") | |
discard(f, 4, "Address of Entry Point") | |
discard(f, 4, "Base of Code") | |
if peFormat == "PE32": | |
discard(f, 4, "Base of Data") | |
discard(f, variableAddressSize, "Image Base") | |
discard(f, 4, "Section Alignment") | |
discard(f, 4, "File Alignment") | |
discard(f, 2, "Major OS Version") | |
discard(f, 2, "Minor OS Version") | |
discard(f, 2, "Major Image Version") | |
discard(f, 2, "Minor Image Version") | |
discard(f, 2, "Major SubSystem Version") | |
discard(f, 2, "Minor SubSystem Version") | |
discard(f, 4, "Win32 Version Value") | |
readPrint("Size Of Image", f, 4) | |
readPrint("Size Of Headers", f, 4) | |
discard(f, 4, "checksum") | |
discard(f, 2, "Subsystem") | |
discard(f, 2, "DLL Characteristics") | |
discard(f, variableAddressSize, "Size of Stack Reserve") | |
discard(f, variableAddressSize, "Size of Stack Commit") | |
discard(f, variableAddressSize, "Size of Heap Reserve") | |
discard(f, variableAddressSize, "Size of Heap Commit") | |
discard(f, 4, "Loader Flags") | |
discard(f, 4, "Number of RVA and Sizes") | |
print "--------------------------------" | |
print "Data Directories" | |
directoryNames = ["Export", "Import", "Resource", "Exception", "Security", "BaseRelocationTable", | |
"DebugDirectory", "CopyrightOrArchitectureSpecificData", "GobalPtr", "TLSDirectory", | |
"LoadConfigurationDiectory", "BoundImportDirectory", "ImportAddressTable", | |
"DelayLoadImportDescriptors", "COMRuntimedescriptor", "Reserved"] | |
print len(directoryNames), len(directoryNames) * 8 | |
dataDirectories = {} | |
for d in directoryNames: | |
dataDirectories[d] = OrderedDict([ | |
('name', d), | |
('virtualAddress', readBytes(f, 4)), | |
('size', readBytes(f, 4)) | |
]) | |
if dataDirectories[d]['size']: | |
printAll(dataDirectories[d]) | |
print "--------------------------------" | |
print "Sections" | |
sections = [] | |
for i in range(numberOfSections): | |
readNoPrint("Section Name", f, 8, False) | |
readNoPrint("Virtual Size", f, 4) | |
readNoPrint("Virtual Address", f, 4) | |
readNoPrint("Size Of Raw Data", f, 4) | |
readNoPrint("Pointer To Raw Data", f, 4) | |
readNoPrint("Pointer To Relocations", f, 4) | |
readNoPrint("Pointer To Line Numbers", f, 4) | |
readNoPrint("Number Of Relocations", f, 2) | |
readNoPrint("Number Of Line Numbers", f, 2) | |
readNoPrint("Characteristics", f, 4) | |
realSectionName = str(sectionName).strip("\x00") | |
if realSectionName and realSectionName[0] == "/": | |
indx = int(realSectionName[1:]) | |
s = stringDatabaseIsh[indx:] | |
s_end = s.find("\x00") | |
realSectionName = s[:s_end] | |
section = OrderedDict([ | |
('sectionName', realSectionName), | |
('sectionNameOriginal', str(sectionName)), | |
('sectionNameDetailed', sectionNameStr(realSectionName)), | |
('virtualSize', virtualSize), | |
('virtualAddress', virtualAddress), | |
('sizeOfRawData', sizeOfRawData), | |
('pointerToRawData', pointerToRawData), | |
('pointerToRelocations', pointerToRelocations), | |
('pointerToLineNumbers', pointerToLineNumbers), | |
('numberOfRelocations', numberOfRelocations), | |
('numberOfLineNumbers', numberOfLineNumbers), | |
('characteristics', characteristics) | |
]) | |
printAll(section) | |
sections.append(section) | |
if pointerToRawData > fileSize: | |
print "Error: Pointer To Raw Data for this section is greater than the filesize!" | |
sys.exit(1) | |
elif pointerToRawData + sizeOfRawData > fileSize: | |
print "Error: Size of Raw Data for this section is greater than the filesize!" | |
sys.exit(1) | |
print "--------------------------------" | |
print "Individual Sections" | |
for s in sections: | |
if s['sectionName'] == ".debug_abbrev": | |
print "--------------------------------" | |
print ".debug_abbrev" | |
f.seek(s['pointerToRawData'], os.SEEK_SET) | |
debug_abbrev = DebugAbbrev(f, s['pointerToRawData'], readBytes(f, s['sizeOfRawData'], False)) | |
debug_abbrev_size = s['sizeOfRawData'] | |
for s in sections: | |
if s['sectionName'] == ".debug_info": | |
print "--------------------------------" | |
print ".debug_info" | |
f.seek(s['pointerToRawData'], os.SEEK_SET) | |
while f.tell() < s['pointerToRawData'] + s['sizeOfRawData']: | |
compileUnitBegin = f.tell() | |
dwarfFormat = 32 | |
readNoPrint("Compile Unit Length", f, 4) | |
if compileUnitLength == 0xFFFFFFFF: | |
dwarfFormat = 64 | |
readNoPrint("Compile Unit Length", f, 8) | |
if compileUnitLength == 0: | |
print "I am", (s['pointerToRawData'] + s['sizeOfRawData']) - f.tell(), "bytes away from the end of the section." | |
print "Reading zero's until I reach it." | |
while f.tell() < s['pointerToRawData'] + s['sizeOfRawData']: | |
b = readBytes(f, 1) | |
if b != 0: | |
raise Exception("I was reading padding but then I found a non-zero byte at file offset " + str(f.tell()-1)) | |
continue | |
readNoPrint("Version", f, 2) | |
if version not in (2, 4): | |
raise Exception("I can't parse DWARF Version " + str(version)) | |
if dwarfFormat == 32: | |
readNoPrint("Debug Abbrev Offset", f, 4) | |
else: | |
readNoPrint("Debug Abbrev Offset", f, 8) | |
debug_abbrev.resetToOffset(debugAbbrevOffset) | |
readNoPrint("Address Size", f, 1) | |
nextUnitAt = compileUnitBegin + compileUnitLength + 4 | |
thisUnitErrors = debugAbbrevOffset > debug_abbrev_size | |
print "0x" + format(compileUnitBegin - s['pointerToRawData'], "08x") + ": Compile Unit: length =", | |
print "0x" + format(compileUnitLength, "08x"), "version = 0x" + format(version, "04x"), | |
print "abbr_offset = 0x" + format(debugAbbrevOffset, "04x"), "addr_size = 0x" + format(addressSize, "02x"), | |
print "(next unit at 0x" + format(nextUnitAt - s['pointerToRawData'], "08x") + ")", "file position =", compileUnitBegin, | |
print "__ERROR__" if thisUnitErrors else "" | |
if args.dwarf_skip: | |
if not args.dwarf_ciu or compileUnitBegin not in args.dwarf_ciu: | |
f.seek(nextUnitAt, os.SEEK_SET) | |
elif args.dwarf_force and not thisUnitErrors: | |
f.seek(nextUnitAt, os.SEEK_SET) | |
indent = "" | |
while f.tell() < nextUnitAt: | |
if not thisUnitErrors: | |
_, tagOffset = read_uleb128(f) | |
if tagOffset == 0: | |
if len(indent) >= 2: | |
indent = indent[:-2] | |
continue | |
debug_abbrev.checkOffset(tagOffset) | |
top_tag = DW_TAG(debug_abbrev.read_uleb128()) | |
children = debug_abbrev.read_uleb128() == DW_CHILDREN_yes | |
if args.dwarf or args.dwarf_ciu: | |
print indent, top_tag, "[" + str(tagOffset) + "]", "*" if children else "" | |
indent += " " | |
while True: | |
attribute_name = debug_abbrev.read_uleb128() | |
#print indent, "Attribute Name", hex(attribute_name) | |
attribute_form = debug_abbrev.read_uleb128() | |
#print indent, "Attribute Form", hex(attribute_form) | |
if attribute_name == 0 and attribute_form == 0: | |
if not children: | |
indent = indent[:-2] | |
break | |
try: | |
value = DW_FORM_FUNCs[DW_FORM(attribute_form)](f) | |
if args.dwarf or args.dwarf_ciu: | |
print indent, (DW_AT(attribute_name) + " [" + hex(attribute_name) + "]").ljust(30), | |
print (DW_FORM(attribute_form) + " [" + hex(attribute_form) + "]").ljust(30), | |
print DW_ATTRIBUTE_FUNCs.get(DW_AT(attribute_name), lambda s:s)(value) | |
except: | |
print "Exception raised." | |
print "\t File position:", f.tell() | |
print "\t Top Tag", top_tag, "[" + str(tagOffset) + "]", "*" if children else "" | |
print "\t Debug Abbrev Offset", debug_abbrev.translateToFileOffset(tagOffset) | |
print "\t Attribute Name", hex(attribute_name) | |
print "\t Attribute Form", hex(attribute_form) | |
print "\t Attribute Name", DW_AT(attribute_name) | |
print "\t Attribute Form", DW_FORM(attribute_form) | |
if args.dwarf: | |
print "" | |
else: # thisUnitErrors | |
_, tagOffset = read_uleb128(f) | |
if tagOffset == 0: | |
if len(indent) >= 2: | |
indent = indent[:-2] | |
continue | |
# Guesses | |
# Type to parse based on common first tags, using DW_AT_language as an | |
# indicator if this was successfull or not | |
def langBased(f): | |
output = "" | |
indent = "" | |
top_tag = 'DW_TAG_compile_unit' | |
children = True | |
output += indent + " " + top_tag + " [" + str(tagOffset) + "] " + ("*\n" if children else "\n") | |
indent += " " | |
children = [ | |
(0x25, 0x08), #('DW_AT_producer', 'DW_FORM_string'), | |
(0x13, 0x0b), #('DW_AT_language', 'DW_FORM_data1'), | |
(0x03, 0x08), #('DW_AT_name', 'DW_FORM_string'), | |
(0x1b, 0x08), #('DW_AT_comp_dir', 'DW_FORM_string') | |
] | |
for c in children: | |
attribute_name = c[0] | |
attribute_form = c[1] | |
try: | |
value = DW_FORM_FUNCs[DW_FORM(attribute_form)](f) | |
output += indent + " Recovered: " + (DW_AT(attribute_name) + " [" + hex(attribute_name) + "]").ljust(30) | |
output += (DW_FORM(attribute_form) + " [" + hex(attribute_form) + "]").ljust(30) | |
output += DW_ATTRIBUTE_FUNCs.get(DW_AT(attribute_name), lambda s:s)(value) + "\n" | |
except: | |
return False | |
print output | |
return True | |
def stringScan(f): | |
byteLimit = 250 | |
bytesRead = 0 | |
output = "" | |
this_str = "" | |
while bytesRead < byteLimit or not this_str: | |
b = readBytes(f, 1) | |
if 0x20 <= b and b <= 0x7E: | |
this_str += chr(b) | |
else: | |
if this_str: | |
# If a string ends in a null byte we believe it is a string | |
if b == 0 and len(this_str) > 1: | |
output += " Recovered: " + this_str + "\n" | |
this_str = "" | |
# If it didn't end in a null byte, we assume it was a fluke | |
else: | |
this_str = "" | |
bytesRead += 1 | |
print output | |
return True | |
guessTypes = [langBased, stringScan] | |
for g in guessTypes: | |
if not g(f): | |
continue | |
else: | |
break | |
f.seek(nextUnitAt, os.SEEK_SET) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment