Skip to content

Instantly share code, notes, and snippets.

@mahmoudimus
Forked from Jinmo/_.md
Last active April 9, 2025 16:18
Show Gist options
  • Save mahmoudimus/d16aa9ed85053fc4b7e46a2d37203b21 to your computer and use it in GitHub Desktop.
Save mahmoudimus/d16aa9ed85053fc4b7e46a2d37203b21 to your computer and use it in GitHub Desktop.
C/C++ header to IDA

Usage

In IDAPython,

execfile('<path>/cxxparser.py')
parse_file('<path>/a.cpp',[r'-I<path>\LuaJIT-2.0.5\src', '-D__NT__', '-D__X64__', '-D__EA64__'])
parse_file('<path>/malloc.c',['-target=x86_64-linux-gnu'])

a.cpp

#include <lj_crecord.h>

struct MyStruct2 {
	int meh;
};

struct MyStruct: public MyStruct2 {
	struct MyStruct2 *a;
	char b;
	char c;
	int d;
};

TODO

  • create an IDE-agnostic database
  • optimize
r"""
execfile('<path>/cxxparser.py')
parse_file('<path>/a.cpp',[r'-I<path>\LuaJIT-2.0.5\src', '-D__NT__', '-D__X64__', '-D__EA64__'])
parse_file('<path>/malloc.c',['-target=x86_64-linux-gnu'])
Originally from: https://gist.github.com/Jinmo/5f131a8bf3335f747e0ae7d6d6b881a4
Modified by Mahmoud Abdelkader @ https://gist.github.com/mahmoudimus/d16aa9ed85053fc4b7e46a2d37203b21
"""
import re
import sys
from functools import reduce
from clang.cindex import (
BaseEnumeration,
CursorKind,
Index,
TranslationUnit,
TypeKind,
conf,
)
# only import idapro if we're not running in ida
if not any(sys.executable.endswith(x) for x in ["ida.exe", "ida64.exe"]):
import idapro
import idaapi
class _ParserConfig:
RAISE_ON_UNKNOWN_TYPE = False
class CallingConv(BaseEnumeration):
"""Describes the calling convention of a function."""
Default = 0
C = 1
X86StdCall = 2
X86FastCall = 3
X86ThisCall = 4
X86Pascal = 5
AAPCS = 6
AAPCS_VFP = 7
X86RegCall = 8
IntelOclBicc = 9
Win64 = 10
X86_64Win64 = Win64
X86_64SysV = 11
X86VectorCall = 12
Swift = 13
PreserveMost = 14
PreserveAll = 15
AArch64VectorCall = 16
Invalid = 100
Unexposed = 200
_kinds = []
_name_map = None
handlers = {}
idati = idaapi.get_idati()
# idati = idaapi.til_t()
if idaapi.BADADDR == 2**64 - 1:
FF_POINTER = idaapi.FF_QWORD
POINTER_SIZE = 8
else:
FF_POINTER = idaapi.FF_DWORD
POINTER_SIZE = 4
def preprocess(dict):
result = {}
for key, (ida_type, _string) in dict.items():
if _string is None:
_sv = _string
elif _string != "void":
tinfo = idaapi.tinfo_t(_string)
_sv = tinfo.get_decltype()
else:
_sv = b"\x01"
result[key] = (ida_type & 0xFFFFFFFF, _sv)
return result
def _size_to_flags(size):
return {
1: idaapi.FF_BYTE,
2: idaapi.FF_WORD,
4: idaapi.FF_DWORD,
8: idaapi.FF_QWORD,
}[size]
builtin_types = preprocess(
{
TypeKind.RECORD: (idaapi.FF_STRUCT, None),
TypeKind.ENUM: (idaapi.FF_DWORD, "int"),
TypeKind.BOOL: (_size_to_flags(idati.cc.size_b), "bool"),
TypeKind.DOUBLE: (idaapi.FF_DOUBLE, "double"),
TypeKind.LONGDOUBLE: (idaapi.FF_DOUBLE, "double"),
TypeKind.FLOAT: (idaapi.FF_FLOAT, "float"),
TypeKind.WCHAR: (idaapi.FF_WORD, "unsigned short"),
TypeKind.CHAR16: (idaapi.FF_WORD, "unsigned short"),
TypeKind.CHAR32: (idaapi.FF_DWORD, "unsigned int"),
TypeKind.SHORT: (_size_to_flags(idati.cc.size_s), "short"),
TypeKind.USHORT: (_size_to_flags(idati.cc.size_s), "unsigned short"),
TypeKind.INT: (_size_to_flags(idati.cc.size_i), "int"),
TypeKind.LONG: (_size_to_flags(idati.cc.size_l), "long"),
TypeKind.LONGLONG: (_size_to_flags(idati.cc.size_ll), "long long"),
TypeKind.UINT: (_size_to_flags(idati.cc.size_i), "unsigned int"),
TypeKind.ULONG: (_size_to_flags(idati.cc.size_l), "unsigned long"),
TypeKind.ULONGLONG: (_size_to_flags(idati.cc.size_ll), "unsigned long long"),
TypeKind.CHAR_S: (idaapi.FF_BYTE, "signed char"),
TypeKind.CHAR_U: (idaapi.FF_BYTE, "unsigned char"),
TypeKind.SCHAR: (idaapi.FF_BYTE, "signed char"),
TypeKind.UCHAR: (idaapi.FF_BYTE, "unsigned char"),
TypeKind.INT128: (idaapi.FF_OWORD, "__int128"),
TypeKind.UINT128: (idaapi.FF_OWORD, "unsigned __int128"),
TypeKind.VOID: (idaapi.FF_0VOID, "void"),
TypeKind.POINTER: (idaapi.FF_0OFF | FF_POINTER, None),
TypeKind.LVALUEREFERENCE: (idaapi.FF_0OFF | FF_POINTER, None),
}
)
callingconv_map = {
CallingConv.C: idaapi.CM_CC_CDECL,
CallingConv.X86FastCall: idaapi.CM_CC_FASTCALL,
CallingConv.X86ThisCall: idaapi.CM_CC_THISCALL,
CallingConv.X86StdCall: idaapi.CM_CC_STDCALL,
CallingConv.X86Pascal: idaapi.CM_CC_PASCAL,
}
visited = dict()
virtuals_mapping = dict()
def handle(kind):
def decorator(f):
handlers[kind] = f
return f
return decorator
@handle(CursorKind.ENUM_DECL)
def handle_enum(item, context):
members = []
for member in item.get_children():
members.append((member.spelling, member.enum_value))
enum_id = idaapi.add_enum(idaapi.BADADDR, item.spelling, 0)
for name, value in members:
idaapi.add_enum_member(enum_id, name, value, -1)
class Struct:
def __init__(self, name, is_union, flags=0):
self.is_union = is_union
self.ti = idaapi.tinfo_t()
self.udt = idaapi.udt_type_data_t()
self.udt.taudt_bits = flags
self.name = name
self.save(True)
def add_member(self, name, offset, flag, size, tif):
member = idaapi.udt_member_t()
member.offset = offset
member.name = name
member.size = size
member.type = tif
if name.endswith("_vftable"):
member.set_vftable()
self.udt.push_back(member)
def set_align(self, align):
self.udt.effalign = align
def save(self, replace=True):
name = self.name
while True:
self.ti.create_udt(
self.udt, idaapi.BTF_STRUCT if not self.is_union else idaapi.BTF_UNION
)
res = self.ti.set_named_type(
idati, name, idaapi.NTF_REPLACE if replace else 0
)
if res == idaapi.TERR_OK:
break
elif res == idaapi.TERR_SAVE: # name conflict
assert replace == False, "?!"
name = "_" + name
elif res == idaapi.TERR_WRONGNAME:
raise Exception("not allowed name: %r" % name)
self.name = name
return self.ti
def is_primitive(kind):
if kind not in builtin_types:
return False
return builtin_types[kind][1]
def resolve_pointer(type, context):
tif = idaapi.tinfo_t()
pointee = type.get_pointee()
_register_type(pointee, context)
pointee_type = idaapi.tinfo_t()
if pointee.kind in (TypeKind.UNEXPOSED,):
pointee = pointee.get_canonical()
if pointee.kind in (
TypeKind.TYPEDEF,
TypeKind.POINTER,
TypeKind.LVALUEREFERENCE,
TypeKind.ELABORATED,
):
pointee_type = _register_type(pointee, context)
elif pointee.kind == TypeKind.INVALID:
pointee_type.deserialize(idati, builtin_types[TypeKind.VOID][1], b"")
elif pointee.kind == TypeKind.FUNCTIONPROTO or is_primitive(pointee.kind):
pointee_type = _register_type(pointee, context)
else:
name = pointee.spelling
if not context.resolve(
name, lambda name: pointee_type.get_named_type(idati, name)
):
pointee_type.create_forward_decl(idati, idaapi.BTF_STRUCT, name)
if pointee_type is None:
pointee_type = idaapi.tinfo_t()
assert pointee_type.create_forward_decl(
idati, idaapi.BTF_STRUCT, pointee.spelling
)
tif.create_ptr(pointee_type)
return tif
def _make_vtable(name, virtuals, context):
# Creates a special struct(record) for vtable
class FakeType(object):
def __init__(self, kind, spelling, size=POINTER_SIZE, pointee=None):
self.kind = kind
self.spelling = spelling
self.size = size
self.pointee = pointee
def get_size(self):
return self.size
def get_canonical(self):
return self
def get_pointee(self):
return self.pointee
vtable_name = "%s_vftable" % (name)
struct = Struct(vtable_name, False, idaapi.TAUDT_VFTABLE)
for i, func in enumerate(virtuals):
size = POINTER_SIZE
flag = FF_POINTER | idaapi.FF_0OFF
member_name = "%s" % (func.spelling)
tif = resolve_function(func.type, context, class_=name)
tif.create_ptr(tif)
struct.add_member(member_name, i * POINTER_SIZE, flag, size, tif)
visited[vtable_name] = {"bases": [], "is_typedef": False, "resolved": None}
struct.save()
return FakeType(
TypeKind.POINTER,
vtable_name + " *",
pointee=FakeType(TypeKind.RECORD, vtable_name),
)
def resolve_function(type, context, flags=0, class_=None):
func = idaapi.tinfo_t()
data = idaapi.func_type_data_t()
data.flags = flags
data.rettype = _register_type(type.get_result(), context)
data.stkargs = 0
data.spoiled.clear()
data.clear()
cc = CallingConv.from_id(conf.lib.clang_getFunctionTypeCallingConv(type))
# ida only supports cdecl + ellipsis when varargs exists
if type.is_function_variadic():
data.cc = idaapi.CM_CC_ELLIPSIS
elif class_:
# you can use one of these
data.cc = idaapi.CM_CC_THISCALL
# data.cc = idaapi.CM_CC_FASTCALL
else:
data.cc = callingconv_map.get(cc, idaapi.CM_CC_CDECL)
if class_:
funcarg = idaapi.funcarg_t()
class_type_ = _create_forward_declaration(class_)
class_type_.create_ptr(class_type_)
funcarg.type = class_type_
data.push_back(funcarg)
for argument in type.argument_types():
funcarg = idaapi.funcarg_t()
funcarg.type = _register_type(argument, context)
data.push_back(funcarg)
func.create_func(data)
func.get_func_details(data)
return func
def _create_forward_declaration(typename):
tif = idaapi.tinfo_t()
if tif.get_named_type(idati, typename):
return tif
tif.create_forward_decl(idati, idaapi.BTF_STRUCT, typename)
return tif
def _register_type(type, context, bases=[], virtuals=[]):
global debug
typename = context.name(type.spelling)
found = visited.get(typename)
if found:
return found["resolved"]
result = {
"bases": bases,
"is_typedef": type.kind == TypeKind.TYPEDEF,
"resolved": None,
}
if type.kind == TypeKind.UNEXPOSED:
type = type.get_canonical()
if type.kind not in (TypeKind.TYPEDEF, TypeKind.ELABORATED):
visited[typename] = result
if type.kind == TypeKind.ELABORATED:
result["resolved"] = tif = idaapi.tinfo_t()
tif.create_typedef(idati, typename, idaapi.BTF_STRUCT)
visited[typename] = result
return tif
if type.kind == TypeKind.VARIABLEARRAY:
tif = idaapi.tinfo_t()
tif.create_ptr(_register_type(type.element_type, context))
result["resolved"] = tif
return tif
if type.kind in (TypeKind.CONSTANTARRAY, TypeKind.VECTOR, TypeKind.INCOMPLETEARRAY):
count = type.element_count if type.kind != TypeKind.INCOMPLETEARRAY else 1
tif = idaapi.tinfo_t()
debug = type
tif.create_array(_register_type(type.element_type, context), count)
result["resolved"] = tif
return tif
if is_primitive(type.kind):
tif = idaapi.tinfo_t()
assert tif.deserialize(idati, builtin_types[type.kind][1], b"")
result["resolved"] = tif
return tif
if type.kind == TypeKind.FUNCTIONPROTO:
result["resolved"] = tif = resolve_function(type, context)
return tif
if type.kind in (TypeKind.POINTER, TypeKind.LVALUEREFERENCE):
result["resolved"] = tif = resolve_pointer(type, context)
return tif
if type.kind == TypeKind.TYPEDEF:
canonical = type.get_canonical()
original = canonical.spelling
# if original == typename:
# del visited[typename]
tif = _register_type(canonical, context)
if original != typename and tif:
tif.set_named_type(idati, typename, idaapi.NTF_TYPE)
result["resolved"] = tif
visited[typename] = result
origkey = typename.split("<")[0]
target = canonical.spelling.split("<")[0]
if target in virtuals_mapping:
virtuals_mapping[origkey] = virtuals_mapping[target]
return tif
else:
debug = type
return tif
if type.kind == TypeKind.RECORD:
align = type.get_align()
item = type
unique_sizes = set(x.get_field_offsetof() for x in type.get_fields())
if len(unique_sizes) == 1 and list(unique_sizes)[0] == -2:
is_union = False
should_guess = True
else:
is_union = len(unique_sizes) == 1 and len(list(type.get_fields())) != 1
should_guess = False
if item.get_size() == -2:
# forward declaration
tif = _create_forward_declaration(typename)
del visited[typename]
result["resolved"] = tif
return tif
members = []
offset = 0
# populate_bases(members, base)
delta = 0
has_virtuals = False
for i, base in enumerate(bases):
base_type = base.type
base_size = base.type.get_size()
base_align = base.type.get_align()
if virtuals_mapping[context.name(base_type.spelling.split("<")[0])]:
has_virtuals = True
has_virtuals = has_virtuals or len(virtuals)
for i, base in enumerate(bases):
base_type = base.type
base_size = base.type.get_size()
base_align = base.type.get_align()
base_virtuals = virtuals_mapping[
context.name(base_type.spelling.split("<")[0])
]
vtable_delta = (
POINTER_SIZE if has_virtuals and not base_virtuals and i == 0 else 0
)
for member in base_type.get_fields():
# If has virtuals and not first, we should substract vtable pointer size
members.append(
(
vtable_delta * 8
+ offset
+ member.get_field_offsetof()
- delta * 8,
member.type,
"base%d_%s" % (i, member.spelling),
)
)
print(delta, offset // 8, member.get_field_offsetof() // 8)
if i:
members.insert(
0,
(
offset,
_make_vtable(
context.name(base.spelling), base_virtuals, context
),
"base%d__vftable" % i,
),
)
_register_type(base_type, context)
offset += (base_size + base_align - 1) // base_align * base_align * 8
if not i:
virtuals = base_virtuals + virtuals
__visited = set()
virtuals = [
x
for x in virtuals
if (x.spelling, x.type.spelling) not in __visited
and (__visited.add((x.spelling, x.type.spelling)) or True)
]
if virtuals:
members.insert(
0, (0, _make_vtable(typename, virtuals, context), "__vftable")
)
virtuals_mapping[typename] = virtuals
for member in item.get_fields():
if member.is_bitfield():
continue
if member.kind == CursorKind.FIELD_DECL:
members.append(
(member.get_field_offsetof(), member.type, member.spelling)
)
else:
continue
struc = Struct(typename, is_union, idaapi.TAUDT_CPPOBJ if virtuals else 0)
struc.set_align(align.bit_length() - 1)
for offset, type, name_ in members:
size = type.get_size()
if not name_:
name_ = "__offset%x" % (offset >> 3)
if size < 0:
if type.kind == TypeKind.INCOMPLETEARRAY:
# later fixed to array
size = type.element_type.get_size()
else:
print(type)
if _ParserConfig.RAISE_ON_UNKNOWN_TYPE:
raise Exception("Unknown type: %s" % type)
continue
flag = 0
canonical = type.get_canonical()
tif = None
if canonical.kind == TypeKind.RECORD:
tif = _register_type(canonical, context)
elif canonical.kind in (TypeKind.POINTER, TypeKind.LVALUEREFERENCE):
tif = resolve_pointer(canonical, context)
else:
if canonical.kind in builtin_types:
flag |= builtin_types[canonical.kind][0]
tif = _register_type(canonical, context)
if offset % 8:
continue
res = struc.add_member(name_, offset >> 3, flag, size, tif)
tif = result["resolved"] = struc.save()
return tif
@handle(CursorKind.CLASS_DECL)
@handle(CursorKind.CLASS_TEMPLATE)
@handle(CursorKind.STRUCT_DECL)
@handle(CursorKind.UNION_DECL)
def handle_struct(item, context):
# Is there any way to check if it's forward declaration or not?
if len(list(item.get_children())) == 0 and item.type.get_size() > 1:
# forward class/struct declaration
return
bases = []
virtuals = []
virtuals_mapping[context.name(item.spelling)] = virtuals
for member in item.get_children():
if member.kind == CursorKind.CXX_BASE_SPECIFIER:
bases.append(member)
elif member.kind in (
CursorKind.CXX_METHOD,
CursorKind.DESTRUCTOR,
CursorKind.CONSTRUCTOR,
):
if member.is_virtual_method():
virtuals.append(member)
_register_type(item.type, context, bases, virtuals)
@handle(CursorKind.TYPEDEF_DECL)
@handle(CursorKind.TYPE_ALIAS_DECL)
@handle(CursorKind.FUNCTION_DECL)
@handle(CursorKind.VAR_DECL)
def typedefs(item, context):
type = _register_type(item.type, context)
if item.kind in (CursorKind.FUNCTION_DECL, CursorKind.VAR_DECL):
address = idaapi.get_name_ea_simple(item.spelling)
if address != idaapi.BADADDR:
res = idaapi.apply_tinfo(
address, type, idaapi.TINFO_DELAYFUNC | idaapi.TINFO_DEFINITE
)
@handle(CursorKind.NAMESPACE)
def namespace(item, context):
process_cursor(item, context.nest_namespace(item.spelling))
@handle(CursorKind.LINKAGE_SPEC)
@handle(CursorKind.UNEXPOSED_DECL)
def linkage(item, context):
process_cursor(item, context)
def parse_file(path, args=[]):
parse_file_with_settings(path, _ParserConfig, args)
def parse_file_with_settings(path, opts, args=[]):
index = Index.create()
tx = index.parse(path, args)
if idaapi.BADADDR == 2**64 - 1:
args.insert(0, "-m64")
else:
args.insert(0, "-m32")
process_cursor(tx.cursor)
def parse_ast(path, args=[]):
index = Index.create()
tx = TranslationUnit.from_ast_file(path, index)
if idaapi.BADADDR == 2**64 - 1:
args.insert(0, "-m64")
else:
args.insert(0, "-m32")
process_cursor(tx.cursor)
class Context(object):
def __init__(self, namespaces=[]):
self.namespaces = namespaces
def nest_namespace(self, namespace):
return Context(namespaces=self.namespaces + [namespace])
def name(self, name):
return Context._generate_name(name, self.namespaces)
@staticmethod
def _generate_name(name, namespaces):
name = re.sub("^(const |volatile |struct |union |class )+", "", name)
name = re.sub(r"[^a-zA-Z0-9:<>=]", "_", name)
if not namespaces:
return name
prefix = "::".join(namespaces) + "::"
if name.startswith(prefix):
return name
return "%s%s" % (prefix, name)
def resolve(self, name, predicate):
return reduce(
lambda acc, item: acc or predicate(Context._generate_name(name, item)),
(self.namespaces[:-i] for i in range(len(self.namespaces) + 1)),
False,
)
def process_cursor(cursor, context=None):
if context is None:
context = Context()
for item in cursor.get_children():
print(item.location.file.name, item.location.line, item.kind, item.spelling)
if item.kind in handlers:
handlers[item.kind](item, context)
else:
continue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment