-
-
Save jquirke/132d18b7bba9e3f96368bbc43230b755 to your computer and use it in GitHub Desktop.
Python script to process dSYM information from dwarfdump output
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import sys | |
import tempfile | |
import subprocess | |
import re | |
import uuid | |
import pprint | |
#import lldb | |
#========================================================================================= | |
class Hopper(dict): | |
BaseTypes = [ | |
# Base types that exist in Hopper: | |
{"uuid":"054086d7b17b4685971643925db72c00", "name":"void" , "size":None , "preferred":False, "encoding":None }, | |
{"uuid":"054086d7b17b4685971643925db72c01", "name":"int8_t" , "size":1 , "preferred":True , "encoding":"DW_ATE_signed" }, | |
{"uuid":"054086d7b17b4685971643925db72c02", "name":"uint8_t" , "size":1 , "preferred":True , "encoding":"DW_ATE_unsigned" }, | |
{"uuid":"054086d7b17b4685971643925db72c03", "name":"int16_t" , "size":2 , "preferred":True , "encoding":"DW_ATE_signed" }, | |
{"uuid":"054086d7b17b4685971643925db72c04", "name":"uint16_t" , "size":2 , "preferred":True , "encoding":"DW_ATE_unsigned" }, | |
{"uuid":"054086d7b17b4685971643925db72c05", "name":"int32_t" , "size":4 , "preferred":True , "encoding":"DW_ATE_signed" }, | |
{"uuid":"054086d7b17b4685971643925db72c06", "name":"uint32_t" , "size":4 , "preferred":True , "encoding":"DW_ATE_unsigned" }, | |
{"uuid":"054086d7b17b4685971643925db72c07", "name":"int64_t" , "size":8 , "preferred":True , "encoding":"DW_ATE_signed" }, | |
{"uuid":"054086d7b17b4685971643925db72c08", "name":"uint64_t" , "size":8 , "preferred":True , "encoding":"DW_ATE_unsigned" }, | |
{"uuid":"054086d7b17b4685971643925db72c09", "name":"float" , "size":4 , "preferred":True , "encoding":"DW_ATE_float" }, | |
{"uuid":"054086d7b17b4685971643925db72c0a", "name":"double" , "size":8 , "preferred":True , "encoding":"DW_ATE_float" }, | |
{"uuid":"054086d7b17b4685971643925db72c0b", "name":"int" , "size":None , "preferred":False, "encoding":"DW_ATE_signed" }, | |
{"uuid":"054086d7b17b4685971643925db72c0c", "name":"unsigned int" , "size":None , "preferred":False, "encoding":"DW_ATE_unsigned" }, | |
{"uuid":"054086d7b17b4685971643925db72c0d", "name":"long" , "size":8 , "preferred":False, "encoding":"DW_ATE_signed" }, | |
{"uuid":"054086d7b17b4685971643925db72c0e", "name":"unsigned long" , "size":8 , "preferred":False, "encoding":"DW_ATE_unsigned" }, | |
{"uuid":"054086d7b17b4685971643925db72c0f", "name":"long long" , "size":8 , "preferred":False, "encoding":"DW_ATE_signed" }, | |
{"uuid":"054086d7b17b4685971643925db72c10", "name":"unsigned long long" , "size":8 , "preferred":False, "encoding":"DW_ATE_unsigned" }, | |
{"uuid":"054086d7b17b4685971643925db72c11", "name":"char" , "size":1 , "preferred":True , "encoding":"DW_ATE_signed_char" }, | |
{"uuid":"054086d7b17b4685971643925db72c12", "name":"short" , "size":2 , "preferred":False, "encoding":"DW_ATE_signed" }, | |
{"uuid":"054086d7b17b4685971643925db72c13", "name":"unsigned char" , "size":1 , "preferred":True , "encoding":"DW_ATE_unsigned_char"}, | |
{"uuid":"054086d7b17b4685971643925db72c14", "name":"unsigned short" , "size":2 , "preferred":False, "encoding":"DW_ATE_unsigned" }, | |
{"uuid":"054086d7b17b4685971643925db72c15", "name":"bool" , "size":1 , "preferred":True , "encoding":"DW_ATE_boolean" }, | |
# Base types that don't exist in Hopper with substitutes that exist in Hopper: | |
{"uuid":"054086d7b17b4685971643925db72c04", "name":"char16_t" , "size":2 , "preferred":True , "encoding":"DW_ATE_UTF" }, | |
{"uuid":"054086d7b17b4685971643925db72c06", "name":"char32_t" , "size":4 , "preferred":True , "encoding":"DW_ATE_UTF" }, | |
# Base types that don't exist in Hopper: | |
{"uuid":"054086d7b17b4685971643925db72e00", "name":"long double" , "size":16 , "preferred":True , "encoding":"DW_ATE_float" }, | |
] | |
class Type(dict): | |
# Type : 16 byte type uuid, 4 byte len + name, 2 byte type | |
TYPE_pointer = 0x0011 # type uuid | |
TYPE_struct = 0x0012 # 4 byte numfields * { 4 byte len + name, type uuid, byte format, 4 byte len + comment } null | |
TYPE_union = 0x0013 # 4 byte numUnions * { 4 byte len + name, type uuid, byte format, 4 byte null } | |
TYPE_array = 0x0014 # 4 byte count, type uuid | |
TYPE_typedef = 0x0015 # 4 byte len + name, type uuid | |
TYPE_function = 0x001b # flag1 (ff), return type uuid, 2 byte numParams * {4 byte len + name, type uuid, byte format? }, ff=variadic, extra1 (6 null bytes), ff=no return, extra2 (0700=user input, or 0100=header import) | |
TYPE_enumeration = 0x001c # extra1 (00=user enum, 04=built-in enums), 4 byte numEnums * { 4 byte len + name, 8 byte signed value } | |
FORMAT_DEFAULT = 0 | |
FORMAT_HEXADECIMAL = 1 | |
FORMAT_DECIMAL = 2 | |
FORMAT_OCTAL = 3 | |
FORMAT_CHARACTER = 4 | |
FORMAT_STACKVARIABLE = 5 | |
FORMAT_OFFSET = 6 | |
FORMAT_ADDRESS = 7 | |
FORMAT_FLOAT = 8 | |
FORMAT_BINARY = 9 | |
FORMAT_STRUCTURED = 10 | |
FORMAT_ENUM = 11 | |
FORMAT_ADDRESS_DIFF=12 | |
FORMAT_NEGATE = 0x20 | |
FORMAT_LEADINGZEROES = 0x40 | |
FORMAT_SIGNED = 0x80 | |
Types = [] | |
UUIDs = {} | |
def NewUUID(self, DIE): | |
if hasattr(DIE, 'uuid'): | |
print ("Error: uuid already created :0x%08x:" % DIE.address) | |
else: | |
if DIE.compile_unit.AT_comp_dir in DIE.compile_unit.AT_name: | |
DIE.uuid = uuid.uuid5(uuid.NAMESPACE_URL, DIE.compile_unit.AT_name + (":0x%08x" % DIE.address)) | |
else: | |
DIE.uuid = uuid.uuid5(uuid.NAMESPACE_URL, DIE.compile_unit.AT_comp_dir + DIE.compile_unit.AT_name + (":0x%08x" % DIE.address)) | |
if DIE.uuid in self.UUIDs: | |
print ("Error: uuid collision :0x%08x:" % DIE.address) | |
else: | |
self.UUIDs[DIE.uuid] = DIE | |
def AddType(self, name, typetype, DIE): | |
type = self.Type() | |
type.type = typetype | |
type.DIE = DIE | |
type.name = name | |
type.DIE.type = type | |
self.NewUUID(type.DIE) | |
self.Types.append(type) | |
return type | |
def AddFunctionPointer(self, DIE, name, attype): | |
# Find artifical | |
# ^0x\w+: +TAG_formal_parameter.*\n( +AT_.*\n)* +AT_artificial.*\n( +AT_.*\n)* | |
type = self.AddType(name, self.Type.TYPE_function, DIE) | |
if attype.HasType(): | |
type.returntype = attype.GetType() | |
else: | |
type.returntype = None | |
type.variadic = False | |
type.params = [] | |
for child in attype.children: | |
if child.tag == "TAG_formal_parameter": | |
if child.HasType(): | |
if hasattr(child, "AT_artificial"): | |
if hasattr(child, "AT_name"): | |
type.params.append({"name":child.AT_name, "attype":child.GetType()}) | |
else: | |
type.params.append({"name":"this", "attype":child.GetType()}) | |
elif hasattr(child, "AT_name"): | |
type.params.append({"name":child.AT_name, "attype":child.GetType()}) | |
else: | |
type.params.append({"name":None, "attype":child.GetType()}) | |
else: | |
print("Error: unknown parameter type :0x%08x:" % child.address) | |
elif child.tag == "TAG_unspecified_parameters": | |
type.variadic = True | |
else: | |
print("Error: unknown parameter type :0x%08x:" % child.address) | |
def AddPointerToMember(self, DIE, name, attype): | |
# Find TAG_ptr_to_member_type | |
# ^0x\w+: +TAG_ptr_to_member_type.*\n( +AT_.*\n)* | |
type = self.AddType(name, self.Type.TYPE_struct, DIE) | |
DIEf = DIEDict() | |
DIEf.dSYM = DIE.dSYM | |
DIEf.address = DIE.address+1 | |
DIEf.tag = "TAG_pointer_type" | |
DIEf.attype = DIE.attype | |
DIEf.compile_unit = DIE.compile_unit | |
DIEf.dSYM.DIELookup[DIEf.address] = DIEf | |
DIEc = DIEDict() | |
DIEc.dSYM = DIE.dSYM | |
DIEc.address = DIE.address+2 | |
DIEc.tag = "TAG_pointer_type" | |
DIEc.attype = DIE.GetContainerType() | |
DIEc.compile_unit = DIE.compile_unit | |
DIEc.dSYM.DIELookup[DIEc.address] = DIEc | |
AddFunctionPointer(DIEf, None, attype) | |
AddPointer(DIEc, None, DIEc.attype) | |
type.fields = [{"name":None, "attype":DIEf}, {"name":None, "attype":DIEc}] | |
def AddPointer(self, DIE, name, attype): | |
type = self.AddType(name, self.Type.TYPE_pointer, DIE) | |
type.attype = attype | |
def AddStruct(self, DIE, name, child): | |
# Find multiple inheritance | |
# 0x\w+: +TAG_inheritance.*\n( +AT_.*\n)*\n0x(\w+): +TAG_inheritance.* | |
# Find bit fields | |
# 0x\w+: +TAG_.*\n( +AT_.*\n)* +AT_\w*bit_offset.*\n( +AT_.*\n)* | |
type = self.AddType(name, self.Type.TYPE_struct, DIE) | |
type.fields = [] | |
def AddClass(self, DIE, name, child): | |
type = self.AddType(name, self.Type.TYPE_struct, DIE) | |
type.fields = [] | |
def AddTypedef(self, DIE, name, attype, atuuid): | |
# attype is None for base type, use uuid instead | |
type = self.AddType(name, self.Type.TYPE_typedef, DIE) | |
type.attype = attype | |
type.atuuid = uuid | |
def AddBaseType(self, DIE): | |
found = False | |
for basetype in self.BaseTypes: | |
#print(basetype) | |
if basetype['name'] == DIE.AT_name and basetype['size'] == DIE.AT_byte_size and basetype['encoding'] == DIE.AT_encoding: | |
found = True | |
DIE.uuid = basetype['uuid'] | |
DIE.baseHopperType = True | |
break | |
if found == False: | |
found = False | |
for basetype in self.BaseTypes: | |
if basetype['preferred'] == True and basetype['size'] == DIE.AT_byte_size and basetype['encoding'] == DIE.AT_encoding: | |
found = True | |
self.AddTypedef(DIE, DIE.AT_name, None, basetype['uuid']) | |
break | |
if found == False: | |
print("Error: cannot find a base type :0x%08x:" % DIE.address) | |
def AddArray(self, DIE, name, attype): | |
# Find multiple sub ranges examples: | |
# ^0x\w+:([ ]+)TAG_subrange_type.*\n( +AT_.*\n)*\n0x\w+:\1TAG | |
curType = attype | |
for i in range(len(DIE.children) - 1, 0, -1): | |
child = DIE.children[i] | |
if child.tag == "TAG_subrange_type": | |
if hasattr(child, "AT_count"): | |
if i == 0: | |
type = self.AddType(name, self.Type.TYPE_aray, DIE) | |
else: | |
type = self.AddType(None, self.Type.TYPE_aray, child) | |
type.attype = curType | |
type.count = DIE.AT_count | |
else: | |
print ("Error getting count :0x%08x:" % child.address) | |
else: | |
print ("Error getting count :0x%08x:" % self.address) | |
curType = child | |
def AddEnumeration(self, DIE, name): | |
# Find multiple enumerations examples: | |
# ^0x\w+:([ ]+)TAG_enumerator.*\n( +AT_.*\n)*\n0x\w+:\1TAG | |
type = self.AddType(name, self.Type.TYPE_enumeration, DIE) | |
type.size = DIE.AT_byte_size | |
type.enumerations = [] | |
for child in DIE.children: | |
if child.tag == "TAG_enumerator": | |
enumerations.append({"name":child.AT_name, "value":child.AT_const_value}) | |
else: | |
print ("Error getting enumeration :0x%08x:" % child.address) | |
def DumpHex(self): | |
# create types for unknown base types such as "long double" | |
# go through all Types and delete duplicates | |
# go through all pointers, if pointer to hopper base type then replace pointer uuid with base type uuid | |
# pointer with no type void * | |
return | |
#========================================================================================= | |
class lldb(dict): | |
eTypeClassClass = -1 | |
eTypeClassUnion = -2 | |
eTypeClassStruct = -3 | |
#========================================================================================= | |
class DIEDict(dict): | |
def GetOffsetInBytes(self): | |
if hasattr(self, "AT_data_member_location"): | |
if (self.AT_data_member_location.__class__.__name__ != "int"): | |
print("Error in tag :%08x: %s (AT_data_member_location) containing value (%s)" % (self.address, self.tag, self.AT_data_member_location)) | |
return 0 | |
return self.AT_data_member_location | |
if hasattr(self, "AT_data_bit_offset"): | |
return self.AT_data_bit_offset >> 3 | |
return None | |
def GetOffsetInBits(self): | |
if hasattr(self, "AT_bit_offset"): | |
return self.AT_bit_offset | |
if hasattr(self, "AT_data_bit_offset"): | |
return self.AT_data_bit_offset & 7 | |
return 0 | |
def GetClass(self): | |
ctype = None | |
if self.tag == "TAG_class_type": | |
ctype = lldb.eTypeClassClass | |
elif self.tag == "TAG_union_type": | |
ctype = lldb.eTypeClassUnion | |
elif self.tag == "TAG_structure_type": | |
ctype = lldb.eTypeClassStruct | |
return ctype | |
def GetNumberOfDirectBaseClasses(self): | |
if not hasattr(self, "DirectBaseClasses"): | |
return 0 | |
return len(self.DirectBaseClasses) | |
def GetDirectBaseClassAtIndex(self, i): | |
return self.DirectBaseClasses[i] | |
def GetNumberOfVirtualBaseClasses(self): | |
if not hasattr(self, "VirtualBaseClasses"): | |
return 0 | |
return len(self.VirtualBaseClasses) | |
def GetVirtualBaseClassAtIndex(self, i): | |
return self.VirtualBaseClasses[i] | |
def GetNumberOfFields(self): | |
if not hasattr(self, "Fields"): | |
return 0 | |
return len(self.Fields) | |
def GetFieldAtIndex(self, i): | |
return self.Fields[i] | |
def HasType(self): | |
if hasattr(self, "AT_type"): | |
return True | |
return False | |
def GetType(self): | |
if self.HasType(): | |
return self.dSYM.DIELookup[self.AT_type] | |
else: | |
print("Error getting type :0x%08x:" % self.address) | |
return None | |
def GetBaseType(self): | |
if self.HasType(): | |
result = self.GetType() | |
if result.tag == "TAG_typedef": | |
return result.GetBaseType() | |
return result | |
else: | |
print("Error getting base type :0x%08x:" % self.address) | |
return None | |
def GetContainerType(self): | |
if hasattr(self, "AT_containing_type"): | |
return self.dSYM.DIELookup[self.AT_containing_type] | |
else: | |
print("Error getting conter type :0x%08x:" % self.address) | |
return None | |
def GetNameForType(self, forType): | |
if not forType and hasattr(self, "AT_name"): | |
return self.AT_name | |
elif self.tag == "TAG_structure_type": | |
return "(anonymous struct)" | |
elif self.tag == "TAG_union_type": | |
return "(anonymous union)" | |
elif self.tag == "TAG_class_type": | |
return "(anonymous class)" | |
elif self.tag == "TAG_enumeration_type": | |
return "(anonymous enum)" | |
elif self.tag == "TAG_const_type": | |
if self.HasType(): | |
return "const " + self.GetType().GetName() | |
else: | |
return "const void" | |
elif self.tag == "TAG_volatile_type": | |
if self.HasType(): | |
return "volatile " + self.GetType().GetName() | |
else: | |
return "volatile void" | |
elif self.tag == "TAG_pointer_type": | |
if self.HasType(): | |
result = self.GetType().GetName() | |
if result[-1:] == "*": | |
return result + "*" | |
else: | |
return result + " *" | |
else: | |
return "void *" | |
elif self.tag == "TAG_reference_type": | |
if self.HasType(): | |
return "&" + self.GetType().GetName() | |
else: | |
return "& void" | |
elif self.tag == "TAG_ptr_to_member_type": | |
if self.HasType(): | |
result = self.GetType().GetName() | |
else: | |
print("Error getting type :0x%08x:" % self.address) | |
result = "void" | |
if result[-1:] == "*": | |
return result + "*" | |
else: | |
return result + " *" | |
elif self.tag == "TAG_array_type": | |
counts = self.GetCounts() | |
if self.HasType(): | |
arrtype = self.GetType().GetName() | |
else: | |
print("Error getting type :0x%08x:" % self.address) | |
arrtype = "void" | |
countstr = "" | |
for count in counts: | |
if count == None: | |
countstr += "[]" | |
else: | |
countstr += "[%d]" % count | |
return arrtype + countstr | |
elif self.tag == "TAG_subroutine_type" or self.tag == "TAG_subprogram": | |
if self.HasType(): | |
returntype = self.GetType().GetName() | |
else: | |
returntype = "void" | |
i = 0 | |
result = returntype + " ()(" | |
for child in self.children: | |
i += 1 | |
if child.HasType(): | |
if hasattr(child, "AT_artificial"): | |
if i < len(self.children): | |
result += "/* " + child.GetType().GetName() + ", */ " | |
else: | |
result += "/* " + child.GetType().GetName() + " */" | |
else: | |
result += child.GetType().GetName() | |
if i < len(self.children): | |
result += ", " | |
elif child.tag == "TAG_unspecified_parameters": | |
result += "..." | |
else: | |
print("Error: unknown parameter type :0x%08x:" % self.address) | |
result += ")" | |
return result | |
elif self.tag == "TAG_member": | |
if self.GetType().GetClass() == lldb.eTypeClassUnion: | |
return "" # unnamed union member | |
else: | |
print("Error getting name for :0x%08x: %s" % (self.address, self.tag)) | |
return "" | |
def GetName(self): | |
return self.GetNameForType(False) | |
def GetByteSizeForAlign(self, forAlign, class_depth=0): | |
if forAlign and (self.tag == "TAG_class_type" or self.tag == "TAG_structure_type" or self.tag == "TAG_union_type"): | |
max_align = 1 | |
numClasses = self.GetNumberOfDirectBaseClasses() | |
for i in range(numClasses): | |
member = self.GetDirectBaseClassAtIndex(i) | |
m_type = member.GetBaseType() | |
m_align = m_type.GetAlign(class_depth+1) | |
if m_align > max_align: | |
max_align = m_align | |
numFields = self.GetNumberOfFields() | |
for i in range(numFields): | |
member = self.GetFieldAtIndex(i) | |
m_type = member.GetBaseType() | |
m_align = m_type.GetAlign() | |
if m_align > max_align: | |
max_align = m_align | |
if class_depth == 0 and hasattr(self, "AllVirtualBaseClasses"): | |
for virtualbaseclassinfo in self.AllVirtualBaseClasses: | |
member = virtualbaseclassinfo.member | |
m_type = member.GetBaseType() | |
m_align = m_type.GetAlign() | |
if m_align > max_align: | |
max_align = m_align | |
return max_align | |
if hasattr(self, "AT_byte_size"): | |
if self.AT_byte_size == 1 and (self.tag == "TAG_class_type" or self.tag == "TAG_structure_type") and not hasattr(self, "Fields"): | |
# classes have size 1 when they don't contain any fields | |
return 0 | |
#print("byte size for :0x%08x: class_depth:%d forAlign:%d" % (self.address, class_depth, forAlign)) | |
return self.AT_byte_size | |
if self.tag == "TAG_const_type": | |
return self.GetType().GetByteSizeForAlign(forAlign) | |
if self.tag == "TAG_volatile_type": | |
return self.GetType().GetByteSizeForAlign(forAlign) | |
if self.tag == "TAG_typedef": | |
return self.GetType().GetByteSizeForAlign(forAlign) | |
if self.tag == "TAG_pointer_type": | |
return self.compile_unit.addr_size | |
if self.tag == "TAG_ptr_to_member_type": | |
return self.compile_unit.addr_size * 2 | |
if self.tag == "TAG_array_type": | |
if forAlign: | |
return self.GetType().GetAlign() | |
size = self.GetType().GetByteSize() | |
counts = self.GetCounts() | |
for count in counts: | |
if count == None: | |
count = 1 | |
size *= count | |
return size | |
if self.HasType(): | |
if forAlign: | |
return self.GetType().GetAlign() | |
return self.GetType().GetByteSize() | |
if hasattr(self, "AT_bit_size") and hasattr(self, "AT_data_bit_offset"): | |
return (self.AT_data_bit_offset & 7 + self.AT_bit_size) >> 3 | |
print("Error getting byte size for :0x%08x: %s %s class_depth:%d forAlign:%d" % (self.address, self.tag, self.AT_name, class_depth, forAlign)) | |
return 0 | |
def GetByteSize(self): | |
return self.GetByteSizeForAlign(False) | |
def GetCompactSize(self, class_depth=0): | |
# doesn't include virtual classes | |
if self.tag == "TAG_class_type" or self.tag == "TAG_structure_type": | |
m_offset = None | |
numFields = self.GetNumberOfFields() | |
if numFields > 0: | |
member = self.GetFieldAtIndex(numFields-1) | |
if hasattr(self, "AT_bit_size") and hasattr(self, "AT_data_bit_offset"): | |
m_offset = (member.AT_data_bit_offset + member.AT_bit_size + 7) >> 3 | |
else: | |
m_offset = member.GetOffsetInBytes() + member.GetByteSize() | |
if m_offset == None: | |
numClasses = self.GetNumberOfDirectBaseClasses() | |
if numClasses > 0: | |
member = self.GetDirectBaseClassAtIndex(numClasses - 1) | |
m_type = member.GetBaseType() | |
m_offset = member.GetOffsetInBytes() + member.GetCompactSize(class_depth + 1) | |
if m_offset == None: | |
m_offset = 0 | |
return m_offset | |
return self.GetByteSize() | |
def GetAlign(self, class_detph=0): | |
m_size = self.GetByteSizeForAlign(True, class_detph) | |
if m_size >= 8: | |
m_align = 8 | |
elif m_size >= 4: | |
m_align = 4 | |
elif m_size >= 2: | |
m_align = 2 | |
else: | |
m_align = 1 | |
return m_align | |
def GetCounts(self): | |
counts = [] | |
for child in self.children: | |
if child.tag == "TAG_subrange_type": | |
if hasattr(child, "AT_count"): | |
counts.append(child.AT_count) | |
else: | |
counts.append(None) | |
else: | |
print ("Error getting count :0x%08x:" % self.address) | |
return counts | |
def GetBitSize(self): | |
if hasattr(self, "AT_bit_size"): | |
return self.AT_bit_size | |
else: | |
return self.GetByteSize() * 8 | |
#========================================================================================= | |
class VTableInfo(dict): | |
pass | |
class VTableItem(dict): | |
pass | |
class DerivationItem(dict): | |
pass | |
class VirtualBaseClassInfo(dict): | |
pass | |
#========================================================================================= | |
class DSYM_Reader: | |
compileUnitRE = re.compile('(0x[0-9a-f]+): Compile Unit: .* addr_size = (0x[0-9a-f]+) .*\n') | |
tagRE = re.compile('(0x[0-9a-f]+):( +)(?:Unknown )?(?:DW_)?(TAG(?:_\w+| constant: 0x[0-9a-f]+)).*\n') | |
nullRE = re.compile('(0x[0-9a-f]+): +(NULL)\n') | |
blankRE = re.compile('\n') | |
AT_locationRE = re.compile(' +(.*?) *(\))?\n') | |
AT_rangesRE = re.compile(' +(?:\[(0x[0-9a-f]+)(?: - |, )(0x[0-9a-f]+)\)(\)?))| *(End \))\n') | |
AT_byte_sizeRE = re.compile(' +(?:\[(0x[0-9a-f]+)(?: - |, )(0x[0-9a-f]+)\)(?::?[^)\n]*)(\)?))| *(End \))\n') | |
AT_REList = [ | |
re.compile(' +(?:DW_)?(AT_location)\t?\( *(0x[0-9a-f]+)(\n)'), # loop until ')\n' is found | |
re.compile(' +(?:DW_)?(AT_ranges)\t?\( *(0x[0-9a-f]+)(\n)'), # loop until 'End )\n' is found | |
re.compile(' +(?:DW_)?(AT_byte_size)\t?\( *(0x[0-9a-f]+):? *(\n)'), # loop until 'End )\n' is found | |
re.compile(' +(?:DW_)?(AT_type)\t?\( *\{(0x[0-9a-f]+)\} \( .*? *\)\n'), | |
re.compile(' +(?:DW_)?(AT_vtable_elem_location)\t?\( *<(0x[0-9a-f]+)> ([0-9a-f]{2}) ([0-9a-f]{2})(?: ([0-9a-f]{2})?)? *\)\n'), | |
re.compile(' +(?:DW_)?(AT_vtable_elem_location)\t?\( *(?:DW_)?(OP_constu) (0x[0-9a-f]+) *\)\n'), | |
re.compile(' +(?:DW_)?(AT_data_member_location)\t?\( *(?:DW_)?(OP_plus_uconst) (0x[0-9a-f]+) *\)\n'), # found this in 10.10.5_14F2511 kernel | |
re.compile(' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *\{(0x[0-9a-f]+)\}".*" *\)\n'), | |
re.compile(' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *\[(.*)\] *\)\n'), | |
re.compile(' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *\{(.*)\} *\)\n'), | |
re.compile(' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *"(.*)" *\)\n'), | |
re.compile(' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *(0x\w+) ".*" *\)\n'), | |
re.compile(' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *(.*) *\)\n'), | |
] | |
neghexRE = re.compile('^0x[8-9a-f][0-9a-f]{15} *$') | |
hexRE = re.compile('^(0x[0-9a-f]+):? *$') | |
decRE = re.compile('^[-+]?\d+ *$') | |
def ReadDIEList(self, f, parent, dSYM): | |
indent = None | |
unexpectedlist = False | |
if len(parent.children) == 1: | |
indent = parent.children[0].indent | |
elif len(parent.children) != 0: | |
print("Error: unexpected list:0x%08x %s" % (parent.address, parent.tag)) | |
for child in parent.children: | |
print(" :0x%08x %s" % (child.address, child.tag)) | |
unexpectedlist = True | |
while True: | |
DIE = self.ReadNextDIE(f, dSYM) | |
if DIE == None: | |
break | |
if unexpectedlist: | |
print("Error: first item of unexpected list %s :0x%08x" % (DIE.tag, DIE.address)) | |
unexpectedlist = False | |
if indent == None: | |
indent = DIE.indent | |
if DIE.indent > indent: | |
# indent increased, this record is the first child of the last added record | |
#print("{ %d" % DIE.indent) | |
DIE.parent = parent.children[-1] | |
DIE.parent.children.append(DIE) | |
elif DIE.indent < indent: | |
print("Error: indentation") | |
break | |
else: | |
DIE.parent = parent | |
parent.children.append(DIE) | |
if DIE.tag == "TAG_inheritance": | |
if hasattr(DIE, "AT_virtuality"): | |
if not hasattr(DIE.parent, "VirtualBaseClasses"): | |
DIE.parent.VirtualBaseClasses = [] | |
DIE.parent.VirtualBaseClasses.append(DIE) | |
#print("Adding virtual base class :0x%08x:" % DIE.address) | |
else: | |
if not hasattr(DIE.parent, "DirectBaseClasses"): | |
DIE.parent.DirectBaseClasses = [] | |
DIE.parent.DirectBaseClasses.append(DIE) | |
elif DIE.tag == "TAG_friend": | |
if not hasattr(DIE.parent, "Friends"): | |
DIE.parent.Friends = [] | |
DIE.parent.Friends.append(DIE) | |
elif hasattr(DIE, "AT_data_member_location") or hasattr(DIE, "AT_data_bit_offset"): | |
if not hasattr(DIE.parent, "Fields"): | |
DIE.parent.Fields = [] | |
DIE.parent.Fields.append(DIE) | |
elif hasattr(DIE, "AT_vtable_elem_location"): | |
if not hasattr(DIE.parent, "VTableEntries"): | |
DIE.parent.VTableEntries = {} | |
if DIE.AT_vtable_elem_location in DIE.parent.VTableEntries: | |
nameold = DIE.parent.VTableEntries[DIE.AT_vtable_elem_location].GetName() | |
namenew = DIE.GetName() | |
print("Error: duplicate VTableEntries 0x%x %s %s" % (DIE.AT_vtable_elem_location * DIE.compile_unit.addr_size, nameold, namenew)) | |
# workaround problem for Mammal and WingedAnimal examples | |
if DIE.AT_vtable_elem_location == 0 and namenew[:1] == "~" and not nameold[:1] == "~" and not 1 in DIE.parent.VTableEntries: | |
DIE.parent.VTableEntries[1] = DIE | |
else: | |
DIE.parent.VTableEntries[DIE.AT_vtable_elem_location] = DIE | |
if DIE.indent > indent: | |
self.ReadDIEList(f, DIE.parent, dSYM) | |
#print("} %d" % DIE.indent) | |
def ReadNextDIE(self, f, dSYM): | |
DIE = None | |
for line in f: | |
#print (line) | |
if self.nullRE.match(line): | |
break | |
c = self.compileUnitRE.match(line) | |
if c: | |
addr_size = int(c.group(2), 16) | |
continue | |
t = self.tagRE.match(line) | |
if t: | |
DIE = DIEDict() | |
DIE.dSYM = dSYM | |
DIE.address = int(t.group(1), 16) | |
DIE.indent = len(t.group(2)) | |
#print("indent: %d" % DIE.indent) | |
DIE.tag = t.group(3) | |
DIE.children = [] | |
#print("Added DIE :%08x:" % DIE.address) | |
for line in f: | |
if self.blankRE.match(line): | |
break | |
for atRE in self.AT_REList: | |
m = atRE.match(line) | |
if m: | |
if atRE.groups == 2: | |
if m.group(1) == "AT_bit_offset" and self.neghexRE.match(m.group(2)): | |
DIE.AT_bit_offset = -int(2**64 - int(m.group(2),16)) | |
else: | |
m2 = self.hexRE.match(m.group(2)) | |
if m2: | |
setattr(DIE, m.group(1), int(m2.group(1), 16)) | |
elif self.decRE.match(m.group(2)): | |
setattr(DIE, m.group(1), int(m.group(2), 10)) | |
else: | |
setattr(DIE, m.group(1), m.group(2)) | |
# the rest of these have more than 2 capture groups (sometimes the third capture group | |
# is the linefeed just so we can do the following special processing) | |
elif m.group(1) == "AT_data_member_location": | |
if m.group(2) == "OP_plus_uconst": | |
thenum = int(m.group(3),16) | |
DIE.AT_data_member_location = thenum | |
elif m.group(1) == "AT_vtable_elem_location": | |
#print ("AT_vtable_elem_location «%s•%s•%s»" % (m.group(1), m.group(2), m.group(3))) | |
if m.group(2) == "OP_constu": | |
thenum = int(m.group(3),16) | |
else: | |
numbytes = int(m.group(2),16) | |
thenum = 0 | |
for i in range(numbytes + 2, 3, -1): | |
part = int(m.group(i),16) | |
if (i == numbytes + 2) == (part & 128 != 0): | |
print("Error: unexpected high bit of elem location byte (%s) :%08x:" % (m.group(3), DIE.address)) | |
thenum = thenum * 128 + (part & 127) | |
if m.group(3) != "10": | |
print("Error: unexpected elem location type (%s) :%08x:" % (m.group(3), DIE.address)) | |
DIE.AT_vtable_elem_location = thenum | |
elif m.group(1) == "AT_location": | |
setattr(DIE, m.group(1), int(m.group(2), 16)) | |
lines = [] | |
for line in f: | |
m = self.AT_locationRE.match(line) | |
if m: | |
lines.append(m.group(1)) | |
#print ("AT_location «%s•%s»" % (m.group(1), m.group(2))) | |
if m.group(2) == ")": | |
break # AT_location_list finished with error | |
else: | |
print("Error in tag :%08x: (AT_location) with line %s" % (DIE.address, line)) | |
break # AT_location_list finished with error | |
DIE.AT_location_list = lines | |
elif m.group(1) == "AT_ranges": | |
DIE.AT_ranges = int(m.group(2), 16) | |
lines = [] | |
for line in f: | |
m = self.AT_rangesRE.match(line) | |
if m: | |
#print ("«%s•%s•%s•%s»" % (m.group(1), m.group(2), m.group(3), m.group(4))) | |
if m.group(4) == 'End )': | |
break # AT_ranges_list finished | |
lines.append([m.group(1), m.group(2)]) | |
if m.group(3) == ')': | |
break # AT_ranges_list finished | |
else: | |
print("Error in tag :%08x: (AT_ranges_list) with line %s" % (DIE.address, line)) | |
break # AT_ranges_list finished with error | |
DIE.AT_ranges_list = lines | |
elif m.group(1) == "AT_byte_size": | |
DIE.AT_byte_size = int(m.group(2), 16) | |
lines = [] | |
for line in f: | |
m = self.AT_byte_sizeRE.match(line) | |
if m: | |
#print ("«%s•%s•%s•%s»" % (m.group(1), m.group(2), m.group(3), m.group(4))) | |
if m.group(4) == 'End )': | |
break # AT_byte_size_list finished | |
lines.append([m.group(1), m.group(2)]) | |
if m.group(3) == ')': | |
break # AT_byte_size_list finished | |
else: | |
print("Error in tag :%08x: (AT_byte_size_list) with line %s" % (DIE.address, line)) | |
break # AT_byte_size_list finished with error | |
DIE.AT_byte_size_list = lines | |
else: | |
print("Error in tag :%08x: with line %s" % (DIE.address, line)) | |
break # AT_ created | |
dSYM.DIELookup[DIE.address] = DIE | |
if DIE.tag == "TAG_compile_unit": | |
dSYM.CompileUnits.append(DIE) | |
dSYM.currentCompileUnit = DIE | |
DIE.addr_size = addr_size | |
else: | |
DIE.compile_unit = dSYM.currentCompileUnit | |
break # DIE created | |
return DIE | |
def CheckVTables(self, msg, parent): | |
# for every class or structure, make a list of vtables (more than one vtable exists for multiple inheritance) | |
for child in parent.children: | |
numClasses = child.GetNumberOfVirtualBaseClasses() | |
for i in range(numClasses): | |
member = child.GetVirtualBaseClassAtIndex(i) | |
print("%d :0x%08x: Got virtual base class :0x%08x: %s" % (i, child.address, member.address, msg)) | |
m_type = member.GetBaseType() | |
self.CheckVTables(msg, child) | |
def dumpderivationpath(self, derivationPath): | |
s = "" | |
for derivationItem in derivationPath: | |
baseClass = derivationItem.DIE | |
if len(s) > 0: | |
s += "," | |
if derivationItem.isvirtual: | |
v = "virtual " | |
else: | |
v = "" | |
s += "%d:%s%s" % (derivationItem.index, v, baseClass.GetName()) | |
return s | |
def MakeVTables(self, derivationPath, begin_offset=0): | |
prefix = "%*s" %(4 * len(derivationPath), "") | |
thefirst = derivationPath[0].DIE | |
thelast = derivationPath[-1].DIE | |
# Does this class contain a vPtr? | |
numFields = thelast.GetNumberOfFields() | |
vPtrOffset = None | |
for i in range(numFields): | |
member = thelast.GetFieldAtIndex(i) | |
thename = member.GetName() | |
if thename != None and "vptr" in thename and hasattr(member, "AT_artificial"): | |
vPtrOffset = member.GetOffsetInBytes() + begin_offset | |
print("%sfound vptr (%s) classoffset:0x%x vptroffset:0x%x" % (prefix, self.dumpderivationpath(derivationPath), begin_offset, vPtrOffset)) | |
break | |
if vPtrOffset == None: | |
# No vPtr exists, follow base classes | |
numClasses = thelast.GetNumberOfDirectBaseClasses() | |
for i in range(numClasses): | |
member = thelast.GetDirectBaseClassAtIndex(i) | |
m_offset = member.GetOffsetInBytes() + begin_offset | |
m_type = member.GetBaseType() | |
derivationItem = DerivationItem() | |
derivationItem.DIE = m_type | |
derivationItem.index = i | |
derivationItem.isvirtual = False | |
derivationItem.VTables = thefirst.VTables | |
derivationItem.derivationPathText = derivationPath[-1].derivationPathText + "%02d" % (i + 1) | |
derivationPath.append(derivationItem) | |
print("%s[ derived (%s) classoffset:0x%x baseclassoffset:0x%x" % (prefix, self.dumpderivationpath(derivationPath), begin_offset, m_offset)) | |
self.MakeVTables(derivationPath, m_offset) | |
print("%s] derived (%s) classoffset:0x%x baseclassoffset:0x%x" % (prefix, self.dumpderivationpath(derivationPath), begin_offset, m_offset)) | |
derivationPath.pop() | |
numClasses = thelast.GetNumberOfVirtualBaseClasses() | |
for i in range(numClasses): | |
member = thelast.GetVirtualBaseClassAtIndex(i) | |
print("%s%d Checking virtual base class :0x%08x: numv:%d" % (prefix, i, member.address, len(thefirst.AllVirtualBaseClasses))) | |
m_type = member.GetBaseType() | |
if m_type.address in thefirst.IncludedVirtualBaseClasses: | |
virtualbaseclassinfo = thefirst.IncludedVirtualBaseClasses[m_type.address] | |
m_offset = virtualbaseclassinfo.offset | |
derivationItem = DerivationItem() | |
derivationItem.DIE = m_type | |
derivationItem.index = 0 | |
derivationItem.isvirtual = True | |
derivationItem.VTables = thefirst.VTablesVirtual | |
derivationItem.derivationPathText = derivationPath[-1].derivationPathText + "%02d" % (i + 31) | |
derivationPath.append(derivationItem) | |
print("%s[ virtual 2nd derived (%s) classoffset:0x%x baseclassoffset:0x%x member:%08x: type:%08x:" % (prefix, self.dumpderivationpath(derivationPath), begin_offset, m_offset, member.address, m_type.address)) | |
self.MakeVTables(derivationPath, m_offset) | |
print("%s] virtual 2nd derived (%s) classoffset:0x%x baseclassoffset:0x%x member:%08x: type:%08x:" % (prefix, self.dumpderivationpath(derivationPath), begin_offset, m_offset, member.address, m_type.address)) | |
derivationPath.pop() | |
else: | |
m_offset = thefirst.VirtualBaseClassOffset | |
m_align = m_type.GetAlign() | |
m_offset = ((m_offset + m_align-1) & -m_align) | |
thefirst.VirtualBaseClassOffset += m_type.GetByteSize() | |
derivationItem = DerivationItem() | |
derivationItem.DIE = m_type | |
derivationItem.index = 0 | |
derivationItem.isvirtual = True | |
derivationItem.VTables = thefirst.VTablesVirtual | |
derivationItem.derivationPathText = derivationPath[-1].derivationPathText + "%02d" % (i + 61) | |
virtualbaseclassinfo = VirtualBaseClassInfo() | |
virtualbaseclassinfo.member = member | |
virtualbaseclassinfo.offset = m_offset | |
thefirst.AllVirtualBaseClasses.append(virtualbaseclassinfo) | |
thefirst.IncludedVirtualBaseClasses[m_type.address] = virtualbaseclassinfo | |
derivationPath.append(derivationItem) | |
print("%s[ virtual 1st derived (%s) classoffset:0x%x baseclassoffset:0x%x member:%08x: type:%08x:" % (prefix, self.dumpderivationpath(derivationPath), begin_offset, m_offset, member.address, m_type.address)) | |
self.MakeVTables(derivationPath, m_offset) | |
print("%s] virtual 1st derived (%s) classoffset:0x%x baseclassoffset:0x%x member:%08x: type:%08x:" % (prefix, self.dumpderivationpath(derivationPath), begin_offset, m_offset, member.address, m_type.address)) | |
derivationPath.pop() | |
if vPtrOffset != None: | |
# A vPtr exists, make a vtable for it. | |
if vPtrOffset in thefirst.VTablesByOffset: | |
vtableinfo = thefirst.VTablesByOffset[vPtrOffset] | |
max = vtableinfo.max | |
else: | |
vtableinfo = VTableInfo() | |
vtableinfo.vPtrOffset = vPtrOffset | |
vtableinfo.mergedVTableEntries = {} | |
vtableinfo.derivationPathText = None | |
derivationPath[-1].VTables.append(vtableinfo) | |
thefirst.VTablesByOffset[vPtrOffset] = vtableinfo | |
max = -1 | |
mergedVTableEntries = vtableinfo.mergedVTableEntries | |
gotmultiinherit = False | |
gotvirtual = False | |
for derivationItem in reversed(derivationPath): | |
baseClass = derivationItem.DIE | |
if hasattr(baseClass, "VTableEntries"): | |
print("%sProcessing %s path:%s previouspath:%s" % (prefix, baseClass.GetName(), derivationItem.derivationPathText, vtableinfo.derivationPathText)) | |
if vtableinfo.derivationPathText == None or len(derivationItem.derivationPathText) < len(vtableinfo.derivationPathText): | |
if gotvirtual or gotmultiinherit: | |
# Trying to build vtable of multiple inheritance is hard. | |
# This is probably wrong - maybe check virtuality, and parameters, but then I might as well try to code a C++ compiler. | |
# We'll just check the name. | |
for k,w in baseClass.VTableEntries.iteritems(): | |
namesuper = w.GetName() | |
print("%s Looking for vtableitem %s" % (prefix, namesuper)) | |
for j,v1 in mergedVTableEntries.iteritems(): | |
v = v1.DIE | |
namebase = v.GetName() | |
if (namesuper == namebase or (namesuper[:1] == "~" and namebase[:1] == "~")): | |
# a "non-virtual thunk" to w is what this probably is: | |
v1.DIE = w | |
if gotvirtual: | |
v1.ThunkType = "virtual thunk to " | |
else: | |
v1.ThunkType = "non-virtual thunk to " | |
print("%s Changed vtableitem 0x%x %s%s" % (prefix, v1.DIE.compile_unit.addr_size * j, v1.ThunkType, namesuper)) | |
break | |
else: | |
for j,v in baseClass.VTableEntries.iteritems(): | |
namesuper = v.GetName() | |
print("%s Adding vtableitem 0x%x %s" % (prefix, v.compile_unit.addr_size * j, namesuper)) | |
if j in mergedVTableEntries: | |
namebase = mergedVTableEntries[j].DIE.GetName() | |
if namesuper != namebase and not (namesuper[:1] == "~" and namebase[:1] == "~"): | |
if namesuper[:1] == "~" and j == 0 and 1 in mergedVTableEntries and mergedVTableEntries[1].DIE.GetName()[:1] == "~": | |
vtableitem = VTableItem() | |
vtableitem.DIE = v | |
mergedVTableEntries[1] = vtableitem | |
print("Error: performed workaround for vtable entry function name :%08x:%s differing from super :%08x:%s" % (v.address, namebase, mergedVTableEntries[j].DIE.address, namesuper)) | |
else: | |
print("Error: vtable entry function name :%08x:%s differs from super :%08x:%s" % (v.address, namebase, mergedVTableEntries[j].DIE.address, namesuper)) | |
else: | |
vtableitem = VTableItem() | |
vtableitem.DIE = v | |
mergedVTableEntries[j] = vtableitem | |
else: | |
if j > max: | |
max = j | |
vtableitem = VTableItem() | |
vtableitem.DIE = v | |
mergedVTableEntries[j] = vtableitem | |
else: | |
print("%sSkipping" % prefix) | |
if derivationItem.index > 0 and not gotmultiinherit: | |
# index is > 0 for non primary base class of multiple inheritance class. These requires a different method to build vtable. | |
print("%sgotmultiinherit max:0x%x" % (prefix, max * baseClass.compile_unit.addr_size)) | |
gotmultiinherit = True | |
if derivationItem.isvirtual and not gotvirtual: | |
print("%sgotvirtual max:0x%x" % (prefix, max * baseClass.compile_unit.addr_size)) | |
gotvirtual = True | |
vtableinfo.max = max | |
if vtableinfo.derivationPathText == None: | |
vtableinfo.derivationPathText = derivationPath[-1].derivationPathText | |
print("%sadded vtable (%s) classoffset:0x%x vptroffset:0x%x max:%d numvtables:%d numvirtualvtables:%d" % (prefix, self.dumpderivationpath(derivationPath), begin_offset, vPtrOffset, max, len(thefirst.VTables), len(thefirst.VTablesVirtual))) | |
def MakeAllVTables(self, parent): | |
# for every class or structure, make a list of vtables (more than one vtable exists for multiple inheritance) | |
for child in parent.children: | |
if child.tag == "TAG_class_type" or child.tag == "TAG_structure_type": | |
child.VTables = [] | |
child.VTablesByOffset = {} | |
child.VTablesVirtual = [] | |
child.AllVirtualBaseClasses = [] | |
child.IncludedVirtualBaseClasses = {} | |
child.VirtualBaseClassOffset = child.GetCompactSize() | |
derivationItem = DerivationItem() | |
derivationItem.DIE = child | |
derivationItem.index = 0 | |
derivationItem.isvirtual = False | |
derivationItem.VTables = child.VTables | |
derivationItem.derivationPathText = "%02d" % 1 | |
derivationPath = [derivationItem] | |
print("[ starting (parent:0x%08x: child:0x%08x: %s)" % (parent.address, child.address, self.dumpderivationpath(derivationPath))) | |
self.MakeVTables(derivationPath) | |
print("]") | |
self.MakeAllVTables(child) | |
def Process_dSYM(self, filename): | |
if re.match(".*\.txt", filename): | |
f = open(filename, "r") | |
else: | |
f = tempfile.NamedTemporaryFile() | |
#print("Created temp file: %s" % f.name) | |
subprocess.call(["dwarfdump", filename], stdout=f) | |
f.seek(0) | |
#print("Processing file: %s" % f.name) | |
dSYM = DIEDict() | |
dSYM.currentCompileUnit = None | |
dSYM.DIELookup = {} | |
dSYM.CompileUnits = [] | |
dSYM.filename = filename | |
dSYM.children = [] | |
self.ReadDIEList(f, dSYM, dSYM) | |
del dSYM.currentCompileUnit | |
f.close() | |
self.MakeAllVTables(dSYM) | |
return dSYM | |
#========================================================================================= | |
def MakeOffsetStr(offset): | |
#return "%4d" % offset | |
return "%6s" % ("0x%x" % offset) | |
# from /Library/Developer/KDKs/KDK_10.11.5_15F34.kdk/System/Library/Kernels/kernel.dSYM/Contents/Resources/Python/lldbmacros/structanalyze.py | |
def _showStructPacking(symbol, typename, fieldname, prefix, depth, class_depth, begin_offset=0): | |
classType = symbol.GetClass() | |
if classType == lldb.eTypeClassClass : | |
ctype = "class" | |
elif classType == lldb.eTypeClassUnion : | |
ctype = "union" | |
elif classType == lldb.eTypeClassStruct : | |
ctype = "struct" | |
else: | |
ctype = "_unknown_%x" % (classType or 0) | |
if typename == None: | |
typename = symbol.GetName() or "_anon_%x" % symbol.address | |
if fieldname != None: | |
outstr = "[%4d] (%s) %s %s {" % (symbol.GetByteSize(), ctype, typename, fieldname) + "\n" | |
else: | |
outstr = "[%4d] (%s) %s {" % (symbol.GetByteSize(), ctype, typename) + "\n" | |
if hasattr(symbol, "Friends"): | |
for friend in symbol.Friends: | |
friendType = None | |
friendClass = None | |
if friend.HasType(): | |
friendType = friend.GetType() | |
elif hasattr(friend, "AT_friend"): | |
friendType = friend.dSYM.DIELookup[friend.AT_friend] | |
if friendType != None: | |
friendClass = friendType.GetClass() | |
if friendClass == lldb.eTypeClassClass : | |
ctype = "class" | |
elif friendClass == lldb.eTypeClassUnion : | |
ctype = "union" | |
elif friendClass == lldb.eTypeClassStruct : | |
ctype = "struct" | |
else: | |
ctype = "_unknown_%x" % (classType or 0) | |
outstr = outstr + prefix + " friend %s %s\n" % (ctype, friendType.GetName() or "_anon_%x" % friendType.address) | |
""" | |
Mac OS X 10.8: | |
0x009fcff0: DW_TAG_structure_type | |
DW_AT_name ("IOStatistics") | |
DW_AT_declaration (0x01) | |
0x009fd4d4: DW_TAG_friend | |
DW_AT_type (0x009fcff0 "IOStatistics") | |
DW_AT_data_member_location (DW_OP_plus_uconst 0x0) | |
DW_AT_accessibility (DW_ACCESS_public) | |
Mac OS X 10.9: | |
0x00b67148: DW_TAG_class_type | |
DW_AT_name ("IOStatistics") | |
DW_AT_declaration (0x01) | |
0x00b675ca: DW_TAG_friend | |
DW_AT_friend (0x00b67148) | |
""" | |
_compact_offset = begin_offset | |
max_union_member_size = 0 | |
max_align = 1 | |
m_align = 1 | |
numClasses = symbol.GetNumberOfDirectBaseClasses() | |
for i in range(numClasses): | |
member = symbol.GetDirectBaseClassAtIndex(i) | |
m_offset = member.GetOffsetInBytes() + begin_offset | |
m_type = member.GetType() | |
membertypename = m_type.GetName() | |
m_type = member.GetBaseType() | |
m_size = m_type.GetByteSize() | |
warningstr = "" | |
debugstr = "" # + str((begin_offset, m_offset, _compact_offset, m_size)) | |
#print(prefix, "V", membertypename, debugstr) | |
if _compact_offset > m_offset: | |
warningstr = " *** Possible memory overlap ***" | |
elif _compact_offset < m_offset: | |
align_offset = ((_compact_offset + m_align-1) & -m_align) | |
if align_offset != m_offset : | |
# ignore memory holes that may be caused by field alignment | |
#_has_memory_hole = True | |
warningstr = " *** Possible memory hole (msize:%d align:%d calc:%d calcaligned:%d actual:%d) ***" % (m_size, m_align, _compact_offset, align_offset, m_offset) | |
_compact_offset = m_offset | |
s, compact_size, m_align = _showStructPacking(m_type, membertypename, None, prefix+" ", depth + 1, class_depth + 1, m_offset) | |
if m_align > max_align: | |
max_align = m_align | |
#print(prefix, "V", membertypename, "m_align:%d max_align:%d compact_size:%d" % (m_align, max_align, compact_size)) | |
outstr += prefix + ("*%s," % MakeOffsetStr(m_offset)) + s + warningstr + debugstr + "\n" | |
_compact_offset += compact_size | |
numFields = symbol.GetNumberOfFields() | |
#_has_memory_hole = False | |
inBitField = False | |
totalBits = 0 | |
bitFieldStartByte = -1 | |
bitFieldTotalBytes = -1 | |
used_bits = 0 | |
next_used_bits = 0 | |
next_totalBits = 0 | |
for i in range(numFields): | |
member = symbol.GetFieldAtIndex(i) | |
m_offset = member.GetOffsetInBytes() + begin_offset | |
m_size_bits = member.GetBitSize() | |
m_offset_bits = member.GetOffsetInBits() | |
isBitField = hasattr(member, "AT_bit_size") | |
m_name = member.GetName() | |
m_type = member.GetType() | |
membertypename = m_type.GetName() | |
m_type = member.GetBaseType() | |
membertypeclass = m_type.GetClass() | |
m_size = m_type.GetByteSize() | |
if inBitField: | |
# continuing previously started bit fields? | |
if (not isBitField) or classType == lldb.eTypeClassUnion or (m_offset >= (bitFieldStartByte + bitFieldTotalBytes)): | |
# no, finish previously started bit field | |
if classType != lldb.eTypeClassUnion: | |
_compact_offset += bitFieldTotalBytes | |
inBitField = False | |
bitFieldStartByte = m_offset | |
used_bits = 0 | |
totalBits = 0 | |
if isBitField: | |
if not inBitField: | |
# new set of bit fields started | |
inBitField = True | |
bitFieldStartByte = m_offset | |
bitFieldTotalBytes = m_size | |
used_bits = next_used_bits | |
totalBits = next_totalBits | |
next_used_bits = 0 | |
next_totalBits = 0 | |
if next_used_bits: | |
print("Error: have carry over bits but not in new bit field next_used_bits:0x%08x at :%08x:" % (next_used_bits, member.address)) | |
next_used_bits = 0 | |
next_totalBits = 0 | |
totalBits += m_size_bits | |
m_offset_bits += (m_offset - bitFieldStartByte) * 8 | |
m_offset = bitFieldStartByte | |
# the type of a bitfield does not always mean the total size of all bit fields because you can mix sized types in a sequence of bit fields | |
while bitFieldTotalBytes * 8 < m_offset_bits + m_size_bits: | |
#print("[ bitFieldTotalBytes %d" % bitFieldTotalBytes) | |
bitFieldTotalBytes *= 2 | |
#print("] bitFieldTotalBytes %d" % bitFieldTotalBytes) | |
if m_offset_bits >= 0: | |
bitfield = " : %d // %d..%d" % (m_size_bits, m_offset_bits, m_offset_bits + m_size_bits - 1) | |
elif m_size_bits + m_offset_bits > 0: | |
bitfield = " : %d // %d..%d,%d..%d" % (m_size_bits, bitFieldTotalBytes * 8 + m_offset_bits, bitFieldTotalBytes * 8 - 1, 0, m_size_bits + m_offset_bits - 1) | |
else: | |
bitfield = " : %d // %d..%d,nothing" % (m_size_bits, bitFieldTotalBytes * 8 + m_offset_bits, bitFieldTotalBytes * 8 - 1) | |
else: | |
bitFieldStartByte = m_offset | |
bitFieldTotalBytes = m_size | |
used_bits = 0 | |
totalBits = 0 | |
bitfield = "" | |
if next_used_bits: | |
print("Error: have carry over bits but not in bit field next_used_bits:0x%08x at :%08x:" % (next_used_bits, member.address)) | |
next_used_bits = 0 | |
next_totalBits = 0 | |
warningstr = "" | |
try: | |
if m_offset_bits >= 0: | |
thebits = (~(-1 << m_size_bits)) << m_offset_bits | |
else: | |
next_totalBits = -m_offset_bits | |
next_used_bits = (~(-1 << next_totalBits)) << (bitFieldTotalBytes * 8 + m_offset_bits) | |
thebits = (~(-1 << (m_size_bits + m_offset_bits))) << 0 | |
except: | |
# negative bit offset means something like bit field overlaps next member... complicated | |
print("Error with bits used_bits:0x%08x thebits(previous):0x%08x size:%d offset:%d type:%s at :%08x:" % (used_bits, thebits, m_size_bits, m_offset_bits, m_offset_bits.__class__.__name__, member.address)) | |
thebits = 0 | |
if ((thebits & used_bits) != 0) or m_size_bits < 0 or m_size_bits + m_offset_bits > bitFieldTotalBytes * 8: | |
warningstr = " *** Possible bit field error ***" | |
used_bits = 0 | |
used_bits |= thebits | |
debugstr = "" # + str((begin_offset, m_offset, _compact_offset, m_offset_bits, m_size, m_size_bits, thebits, used_bits)) | |
if membertypeclass == lldb.eTypeClassStruct or membertypeclass == lldb.eTypeClassUnion or membertypeclass == lldb.eTypeClassClass : | |
s, compact_size, m_align = _showStructPacking(m_type, membertypename, m_name, prefix+" ", depth + 1, 0, m_offset) | |
outstr += prefix + ("*%s," % MakeOffsetStr(m_offset)) + s | |
else: | |
outstr += prefix + ("+%s,[%4d] (%s) %s%s" % (MakeOffsetStr(m_offset), m_size, membertypename, m_name, bitfield)) | |
compact_size = m_size | |
m_align = m_type.GetAlign() | |
if m_align > max_align: | |
max_align = m_align | |
#print(prefix, membertypename, m_name, "calcoff:0x%x actualoff:0x%x calcsize:%d actualsize:%d m_align:%d max_align:%d" % (_compact_offset, m_offset, compact_size, m_size, m_align, max_align)) | |
if _compact_offset > m_offset: | |
warningstr = " *** Possible memory overlap (msize:%d align:%d calc:%d actual:%d) ***" % (m_size, m_align, _compact_offset, m_offset) | |
elif _compact_offset < m_offset: | |
align_offset = ((_compact_offset + m_align-1) & -m_align) | |
if align_offset != m_offset : | |
# ignore memory holes that may be caused by field alignment | |
#_has_memory_hole = True | |
warningstr = " *** Possible memory hole (msize:%d align:%d calcoff:0x%x calcaligned:0x%x actualoff:0x%x) ***" % (m_size, m_align, _compact_offset, align_offset, m_offset) | |
_compact_offset = m_offset | |
if classType == lldb.eTypeClassUnion: | |
if m_size > max_union_member_size: | |
max_union_member_size = m_size | |
elif inBitField == False: | |
_compact_offset += m_size | |
outstr += warningstr + debugstr + "\n" | |
if next_used_bits: | |
print("Error: have carry over bits after fields next_used_bits:0x%08x at :%08x:" % (next_used_bits, member.address)) | |
next_used_bits = 0 | |
next_totalBits = 0 | |
if classType != lldb.eTypeClassUnion and inBitField: | |
_compact_offset += bitFieldTotalBytes | |
inBitField = False | |
if class_depth == 0 and hasattr(symbol, "AllVirtualBaseClasses"): | |
for virtualbaseclassinfo in symbol.AllVirtualBaseClasses: | |
member = virtualbaseclassinfo.member | |
m_type = member.GetType() | |
membertypename = m_type.GetName() | |
m_type = member.GetBaseType() | |
m_size = m_type.GetByteSize() | |
m_align = m_type.GetAlign() | |
m_offset = ((_compact_offset + m_align-1) & -m_align) | |
warningstr = " virtual" | |
debugstr = "" # + str((begin_offset, m_offset, _compact_offset, m_size)) | |
#print(prefix, "V", membertypename, debugstr) | |
_compact_offset = m_offset | |
s, compact_size, a = _showStructPacking(m_type, membertypename, None, prefix+" ", depth + 1, 0, m_offset) | |
outstr += prefix + ("*%s," % MakeOffsetStr(m_offset)) + s + warningstr + debugstr + "\n" | |
_compact_offset += compact_size | |
outstr += prefix + "}" | |
if classType == lldb.eTypeClassUnion: | |
_compact_offset += max_union_member_size | |
#if _has_memory_hole == True : | |
# outstr += " *** Warning: Struct layout leaves memory hole ***" | |
return outstr, _compact_offset - begin_offset, max_align | |
vtableFunctionRE = re.compile("(.*?) \(\)\(/\*( const)? (.*?) \*,? \*/ ?(.*)\)") | |
# group(1) = function return type | |
# group(2) = ' const' | |
# group(3) = class from artificial parameter | |
# group(4) = parameters | |
def doOneVTable(symbol, prefix, vtableinfo): | |
outstr = "" | |
numEntries = vtableinfo.max + 1 | |
if numEntries > 0: | |
if vtableinfo.vPtrOffset == 0: | |
outstr = outstr + prefix + "vtable for %s {\n" % (symbol.GetName()) | |
else: | |
outstr = outstr + prefix + "vtable for %s 0x%x {\n" % (symbol.GetName(), vtableinfo.vPtrOffset) | |
for i in range(numEntries): | |
if i in vtableinfo.mergedVTableEntries: | |
vtableitem = vtableinfo.mergedVTableEntries[i] | |
member = vtableitem.DIE | |
m_name = member.GetName() | |
membertypename = member.GetNameForType(True) | |
containertype = member.GetContainerType() | |
if containertype != None: | |
containertypename = containertype.GetName() | |
containertypenamequalifed = containertypename+"::" | |
else: | |
containertypename = "" | |
containertypenamequalifed = "" | |
if hasattr(vtableitem, "ThunkType"): | |
namemodify = vtableitem.ThunkType | |
else: | |
namemodify = "" | |
else: | |
m_name = None | |
m_size = symbol.compile_unit.addr_size | |
if m_name == None: | |
outstr += prefix + ("+%s,[%4d]\n" % (MakeOffsetStr(i * m_size), m_size)) | |
else: | |
m = vtableFunctionRE.match(membertypename) | |
if m: | |
#print("vtablefunc", m.group(0), m.group(1), m.group(2), m.group(3), m.group(4)) | |
if containertypename == m.group(3): | |
if m.group(2) == None: | |
constpart = "" | |
else: | |
constpart = m.group(2) | |
if m.group(1) == "void": | |
typepart = "" | |
else: | |
typepart = m.group(1) | |
#outstr += prefix + ("+%s,[%4d] (%s) %s%s\n" % (MakeOffsetStr(i * m_size), m_size, membertypename, containertypenamequalifed, m_name)) | |
outstr += prefix + ("+%s,[%4d] %s %s%s::%s(%s)%s\n" % (MakeOffsetStr(i * m_size), m_size, typepart, namemodify, containertypename, m_name, m.group(4), constpart)) | |
else: | |
outstr += prefix + ("+%s,[%4d] (%s) %s%s\n" % (MakeOffsetStr(i * m_size), m_size, membertypename, containertypenamequalifed, m_name)) | |
print("Error: containertype '%s' doesn't match artifical parameter '%s'" % (containertypename, m.group(3))) | |
else: | |
#print("unknownfunc", membertypename) | |
outstr += prefix + ("+%s,[%4d] (%s) %s%s\n" % (MakeOffsetStr(i * m_size), m_size, membertypename, containertypenamequalifed, m_name)) | |
outstr += prefix + "}" | |
return outstr | |
def _showVTablePacking(symbol, prefix): | |
outstr = "" | |
if hasattr(symbol, "VTables"): | |
for vtableinfo in symbol.VTables: | |
vstr = doOneVTable(symbol, prefix, vtableinfo) | |
if len(vstr) > 0: | |
if len(outstr) > 0: | |
outstr += "\n\n" | |
outstr += "%s" % vstr | |
if hasattr(symbol, "VTablesVirtual"): | |
for vtableinfo in symbol.VTablesVirtual: | |
vstr = doOneVTable(symbol, prefix, vtableinfo) | |
if len(vstr) > 0: | |
if len(outstr) > 0: | |
outstr += "\n\n" | |
outstr += "%s" % vstr | |
return outstr | |
def DumpAllStructs(parent,names): | |
for child in parent.children: | |
if hasattr(child, "AT_name"): | |
# only dump named types | |
#print("address :%08x:" % member.address) | |
# also dump typedef'd structs too | |
member = child | |
membertypename = member.GetName() | |
if member.tag == "TAG_typedef": | |
while member.tag == "TAG_typedef" and member.HasType(): | |
member = member.GetType() # follow typedefs | |
if member.GetName() == membertypename: | |
# don't do typedef if struct has same name, we'll do the struct when we get there | |
member = None | |
if member != None and member.GetClass() != None and len(member.children) > 0: | |
if names == None or member.GetName() in names: | |
print("==========================================================================================") | |
if hasattr(child, "AT_decl_file"): | |
print('0x%08x: "%s"\n' % (child.address, child.AT_decl_file)) | |
else: | |
print("0x%08x:\n" % child.address) | |
s, n, a = _showStructPacking(member, membertypename, None, "", 0, 0, 0) | |
print(s) | |
s = _showVTablePacking(member, "") | |
if s != "": | |
print(s) | |
#if member.GetName() = "_lck_grp_": break | |
DumpAllStructs(child, names) | |
def DumpAllTypes(Hopper, parent): | |
for child in parent.children: | |
name = None | |
if hasattr(child, "AT_name"): | |
name = child.AT_name | |
attype = None | |
if child.HasType(): | |
attype = child.GetType() | |
if child.tag == "TAG_compile_unit": | |
pass | |
elif child.tag == "TAG_variable": | |
pass | |
elif child.tag == "TAG_inheritance": # handled by TAG_class_type | |
pass | |
elif child.tag == "TAG_member": # handled by TAG_class_type, TAG_structure_type, TAG_union_type (AT_data_member_location) | |
pass | |
elif child.tag == "TAG_subprogram": # handled by TAG_class_type (AT_data_member_location) | |
pass | |
elif child.tag == "TAG_formal_parameter": | |
pass | |
elif child.tag == "TAG_subroutine_type": # handled by TAG_pointer_type | |
pass | |
elif child.tag == "TAG_subrange_type": # handled by TAG_array_type | |
pass | |
elif child.tag == "TAG_unspecified_parameters": # handled by TAG_subroutine_type | |
pass | |
elif child.tag == "TAG_enumerator": # handled by TAG_enumeration_type | |
pass | |
elif child.tag == "TAG_lexical_block": | |
pass | |
elif child.tag == "TAG_inlined_subroutine": | |
pass | |
elif child.tag == "TAG_GNU_template_parameter_pack": # template | |
pass | |
elif child.tag == "TAG_imported_declaration": | |
pass | |
elif child.tag == "TAG_imported_module": # points to TAG_namespace | |
pass | |
elif child.tag == "TAG_namespace": | |
pass | |
elif child.tag == "TAG_template_type_parameter": | |
pass | |
elif child.tag == "TAG_template_value_parameter": | |
pass | |
elif child.tag == "TAG_unspecified_type": | |
pass | |
elif child.tag == "TAG_pointer_type": | |
if attype != None and attype.tag == "TAG_subroutine_type": | |
Hopper.AddFunctionPointer(child, name, attype) | |
else: | |
Hopper.AddPointer(child, name, attype) | |
elif child.tag == "TAG_reference_type": | |
if name == None: | |
if attype == None: | |
name = "&" | |
else: | |
name = child.GetType().GetName() + " &" | |
Hopper.AddPointer(child, name, attype) | |
elif child.tag == "TAG_rvalue_reference_type": | |
# I don't know whan an rvalue_reference_type looks like so I use &_ | |
if name == None: | |
if attype == None: | |
name = "&_" | |
else: | |
name = child.GetType().GetName() + " &_" | |
Hopper.AddPointer(child, name, attype) | |
elif child.tag == "TAG_const_type": | |
if name == None: | |
if attype == None: | |
name = 'const' | |
else: | |
name = 'const ' + child.GetType().GetName() | |
Hopper.AddTypedef(child, name, attype, None) | |
elif child.tag == "TAG_volatile_type": | |
if name == None: | |
if attype == None: | |
name = 'volatile' | |
else: | |
name = 'volatile ' + child.GetType().GetName() | |
Hopper.AddTypedef(child, name, attype, None) | |
elif child.tag == "TAG_class_type": | |
Hopper.AddClass(child, name, child) | |
# might just be a declaration AT_declaration( true ) | |
# or it might contain children with AT_data_member_location | |
# Direct inheritance TAG_inheritance DirectBaseClasses | |
elif child.tag == "TAG_structure_type": | |
Hopper.AddStruct(child, name, child) | |
# might be a declaration - replace with define if it exists in same compileunit | |
elif child.tag == "TAG_union_type": | |
Hopper.AddStruct(child, name, child) | |
# might be a declaration - replace with define if it exists in same compileunit | |
elif child.tag == "TAG_typedef": | |
Hopper.AddTypedef(child, name, attype, None) | |
elif child.tag == "TAG_base_type": | |
Hopper.AddBaseType(child) | |
elif child.tag == "TAG_ptr_to_member_type": | |
if attype != None and attype.tag == "TAG_subroutine_type": | |
Hopper.AddPointerToMember(child, name, attype) | |
else: | |
print("Error: unexpected tag :%08x:" % child.address) | |
elif child.tag == "TAG_array_type": | |
Hopper.AddArray(child, name, attype) | |
elif child.tag == "TAG_enumeration_type": | |
Hopper.AddEnumeration(child, name) | |
#bytesize | |
else: | |
print("Error: unknown tag '%s':" % child.tag) | |
DumpAllTypes(Hopper, child) | |
if __name__ == '__main__': | |
if len(sys.argv) < 2: | |
print('Expected usage: {0} <dsym>'.format(sys.argv[0])) | |
sys.exit(1) | |
H = Hopper() | |
for i in range(1, len(sys.argv)): | |
print("==========================================================================================") | |
print("The file: %s\n" % sys.argv[i]) | |
dSYMr = DSYM_Reader() | |
dSYM = dSYMr.Process_dSYM(sys.argv[i]) | |
DumpAllStructs(dSYM, None) | |
#••••••• TO DO: Finish DumpAllTypes | |
#DumpAllTypes(H, dSYM) | |
''' | |
pp = pprint.PrettyPrinter(indent=4, depth=10) | |
pp.pprint(H.Types) | |
pp.pprint(H.UUIDs) | |
for k,v in H.UUIDs.items(): | |
pp.pprint(k) | |
for attr, value in v.__dict__.iteritems(): | |
print attr, value | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment