Skip to content

Instantly share code, notes, and snippets.

@mrexodia
Created June 13, 2025 15:39
Show Gist options
  • Save mrexodia/c6aa2dec49a8b090509a313494b6e8c7 to your computer and use it in GitHub Desktop.
Save mrexodia/c6aa2dec49a8b090509a313494b6e8c7 to your computer and use it in GitHub Desktop.
Dumb ELF mapper POC
import logging
from dataclasses import dataclass
from typing import Optional
from enum import Enum
from elftools.elf.elffile import ELFFile
from elftools.elf.relocation import RelocationSection
from elftools.elf.sections import SymbolTableSection
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger()
@dataclass
class Segment:
start: int
size: int
prot: int
@dataclass
class Module:
load_base: int
load_size: int
init_array: list[int]
entry: Optional[int]
segments: list[Segment]
class Architecture(Enum):
x86_64 = 0
arm64 = 1
# TODO: rewrite this cleaner
def align(addr, size, growl):
import ctypes
UC_MEM_ALIGN = 0x1000
to = ctypes.c_uint64(UC_MEM_ALIGN).value
mask = ctypes.c_uint64(0xFFFFFFFFFFFFFFFF).value ^ ctypes.c_uint64(to - 1).value
right = addr + size
right = (right + to - 1) & mask
addr &= mask
size = right - addr
if growl:
size = (size + to - 1) & mask
return addr, size
EMU_PROT_NONE = 0
EMU_PROT_READ = 1
EMU_PROT_WRITE = 2
EMU_PROT_EXEC = 4
EMU_PROT_ALL = EMU_PROT_READ | EMU_PROT_WRITE | EMU_PROT_EXEC
PF_X = 0x1 # Executable
PF_W = 0x2 # Writable
PF_R = 0x4 # Readable
# From http://infocenter.arm.com/help/topic/com.arm.doc.ihi0044f/IHI0044F_aaelf.pdf
R_ARM_ABS32 = 2
R_ARM_GLOB_DAT = 21
R_ARM_JUMP_SLOT = 22
R_ARM_RELATIVE = 23
# https://github.com/frida/tinycc/blob/a438164dd4c453ae62c1224b4b7997507a388b3d/tccelf.h#L2454
# https://github.com/frida/tinycc/blob/a438164dd4c453ae62c1224b4b7997507a388b3d/arm64-link.c#L273
R_AARCH64_GLOB_DAT = 1025 # GOT entry
R_AARCH64_JUMP_SLOT = 1026 # PLT entry
R_AARCH64_RELATIVE = 1027 # adjust by program base
# x86 relocation types
# https://github.com/torvalds/linux/blob/master/arch/x86/include/asm/elf.h#L47
R_X86_64_NONE = 0 # No relocation
R_X86_64_64 = 1 # Direct 64 bit zero extended
R_X86_64_PC32 = 2 # PC relative 32 bit signed
R_X86_64_GOT32 = 3 # 32 bit GOT entry
R_X86_64_PLT32 = 4 # 32 bit PLT address
R_X86_64_COPY = 5 # Copy symbol at runtime
R_X86_64_GLOB_DAT = 6 # Create GOT entry
R_X86_64_JUMP_SLOT = 7 # Create PLT entry
R_X86_64_RELATIVE = 8 # Adjust by program base
R_X86_64_GOTPCREL = 9 # 32 bit signed pc relative offset to GOT
R_X86_64_32 = 10 # Direct 32 bit zero extended
R_X86_64_32S = 11 # Direct 32 bit sign extended
R_X86_64_16 = 12 # Direct 16 bit zero extended
R_X86_64_PC16 = 13 # 16 bit sign extended pc relative
R_X86_64_8 = 14 # Direct 8 bit sign extended
R_X86_64_PC8 = 15 # 8 bit sign extended pc relative
R_X86_64_PC64 = 24 # Place relative 64-bit
PF_X = 0x1 # Executable
PF_W = 0x2 # Writable
PF_R = 0x4 # Readable
def get_segment_protection(prot_in):
prot = 0
if (prot_in & PF_R) != 0:
prot |= EMU_PROT_READ
if (prot_in & PF_W) != 0:
prot |= EMU_PROT_WRITE
if (prot_in & PF_X) != 0:
prot |= EMU_PROT_EXEC
return prot
class ELFMapper:
def __init__(self):
self.symbol_hooks = {}
self.fake_base = 0x1337000000
self.fake_index = 0
self.fake_symbols = {}
def mem_map(self, addr: int, size: int, prot: int):
raise NotImplementedError()
def mem_protect(self, addr: int, size: int, prot: int):
raise NotImplementedError()
def mem_write(self, addr: int, data: bytes):
raise NotImplementedError()
def mem_read(self, addr: int, size: int) -> bytes:
raise NotImplementedError()
def check_arch(self, arch: Architecture):
raise NotImplementedError()
def _elf_lookup_symbol(self, name):
logger.debug(f"Looking up symbol: {name}")
symbol = self.fake_base + self.fake_index * 0x100
self.fake_index += 1
self.fake_symbols[symbol] = name
return symbol
def _elf_get_symval(self, elf, elf_base, symbol):
if symbol.name in self.symbol_hooks:
return self.symbol_hooks[symbol.name]
if symbol["st_shndx"] == "SHN_UNDEF":
# External symbol, lookup value.
target = self._elf_lookup_symbol(symbol.name)
if target is None:
# Extern symbol not found
if symbol["st_info"]["bind"] == "STB_WEAK":
# Weak symbol initialized as 0
return 0
else:
logger.error(f"Undefined external symbol: {symbol.name}")
return None
else:
return target
elif symbol["st_shndx"] == "SHN_ABS":
# Absolute symbol.
return elf_base + symbol["st_value"]
else:
# Internally defined symbol.
return elf_base + symbol["st_value"]
def load_module(self, filename, load_base=None) -> Module:
logger.debug(f"Loading module {filename}")
with open(filename, "rb") as fstream:
elf = ELFFile(fstream)
elf_arch = elf.get_machine_arch()
if elf_arch == "x64":
arch = Architecture.x86_64
elif elf_arch == "AArch64":
arch = Architecture.arm64
else:
raise NotImplementedError(f"Unsupported architecture: {elf_arch}")
self.check_arch(arch)
dynamic = elf.header.e_type == "ET_DYN"
if not dynamic:
raise NotImplementedError("Only ET_DYN is supported at the moment.")
# Parse program header (Execution view).
# - LOAD (determinate what parts of the ELF file get mapped into memory)
# TODO: do not skip segment types, it looks like at least Android is also loading other segments
load_segments = [x for x in elf.iter_segments()]
# load_segments = [x for x in elf.iter_segments() if x.header.p_type == "PT_LOAD"]
# Find bounds of the load segments.
bound_low = 0
bound_high = 0
for segment in load_segments:
if segment.header.p_memsz == 0 or segment.header.p_type != "PT_LOAD":
logging.debug(f"Skipping segment {segment.header.p_type}")
continue
logger.debug(f"base: {hex(segment.header.p_vaddr)}")
if bound_low > segment.header.p_vaddr:
bound_low = segment.header.p_vaddr
high = segment.header.p_vaddr + segment.header.p_memsz
if bound_high < high:
bound_high = high
# Retrieve a base address for this module.
_, load_size = align(bound_low, bound_high - bound_low, True)
if load_base is None:
load_base = bound_low
assert load_base != 0, "cannot load at 0"
logger.debug(f"Reserving {hex(load_size)} bytes of memory, at {hex(load_base)}")
self.mem_map(load_base, load_size, EMU_PROT_NONE)
logger.debug(f"Base address {hex(load_base)}")
segments = []
for segment in load_segments:
print(f"Segment type: {segment.header.p_type}")
print(f"Segment flags: {segment.header.p_flags}")
print(f"Segment vaddr: {hex(segment.header.p_vaddr)}")
print(f"Segment memsz: {hex(segment.header.p_memsz)}")
print(f"Segment filesz: {hex(segment.header.p_filesz)}")
print(f"Segment offset: {hex(segment.header.p_offset)}")
print(f"Segment align: {hex(segment.header.p_align)}")
print("")
if segment.header.p_type != "PT_LOAD":
continue
prot = get_segment_protection(segment.header.p_flags)
prot = prot if prot != 0 else EMU_PROT_ALL
seg_addr = load_base + segment.header.p_vaddr
seg_size = segment.header.p_memsz
seg_addr_aligned, seg_size_aligned = align(seg_addr, seg_size, True)
# NOTE: in case of already mapped memory make it writable
# TODO: create function to check if memory is already mapped
try:
self.mem_map(seg_addr_aligned, seg_size_aligned, EMU_PROT_ALL)
except:
self.mem_protect(seg_addr_aligned, seg_size_aligned, EMU_PROT_ALL)
# NOTE: If chainged to back to old pretection memory is not executable at where it needs to be
# self.mem_protect(seg_addr_aligned, seg_size_aligned, prot)
segments.append(Segment(seg_addr_aligned, seg_size_aligned, prot))
data = segment.data()
assert len(data) <= seg_size, "Need to implement padding rest of the size with 00" # NOTE: this might be <=
# self.mem_write(seg_addr_aligned, data)
self.mem_write(seg_addr, data)
logger.debug(f"segment {hex(seg_addr)}[{hex(seg_size)}] -> {hex(seg_addr_aligned)}[{hex(seg_size_aligned)}]")
# Resolve all symbols.
symbols_resolved = dict()
symbols_list = []
for section in elf.iter_sections():
if not isinstance(section, SymbolTableSection):
continue
itersymbols = section.iter_symbols()
next(itersymbols) # Skip first symbol which is always NULL.
for i, symbol in enumerate(itersymbols):
symbol_address = self._elf_get_symval(elf, load_base, symbol)
if symbol_address is not None:
logger.debug(f"symbol[{i} -> {symbol.name}] = {hex(symbol_address)}")
symbols_resolved[symbol.name] = (symbol_address, symbol)
symbols_list.append((symbol_address, symbol.name))
# Relocate.
for section in elf.iter_sections():
if not isinstance(section, RelocationSection):
continue
# print(f"section: {section.name}")
# print(f"section type: {hex(section['sh_addr'])}")
for rel in section.iter_relocations():
rel_addr = load_base + rel["r_offset"] # Location where relocation should happen
rel_info_type = rel["r_info_type"]
rel_info_sym = rel["r_info_sym"]
rel_addend = rel["r_addend"]
if arch == Architecture.x86_64:
if rel_info_type == R_X86_64_RELATIVE:
assert rel_info_sym == 0, "Relative relocation must be against NULL symbol."
# Load address at which it was linked originally.
print(hex(rel_addr))
value_orig_bytes = self.mem_read(rel_addr, 8)
value_orig = int.from_bytes(value_orig_bytes, byteorder="little")
# HACK: detect 'prelinked' or 'implicit' relocation
# https://github.com/Vector35/view-elf/blob/ec099a2b0bbffb82a84af5ccd9843eeb3687f568/elfview.cpp#L1001
# https://github.com/NationalSecurityAgency/ghidra/blob/b070f86b4d8833255b04d11100c6efcc1dd02770/Ghidra/Processors/x86/src/main/java/ghidra/app/util/bin/format/elf/relocation/X86_64_ElfRelocationHandler.java#L263
# It is not entirely clear how this works in reality
if value_orig == 0:
# The original address is not known.
# Use the load base as a fallback.
value = load_base + rel_addend
else:
value = load_base + value_orig
logger.debug(f"R_X86_64_RELATIVE: [{hex(rel_addr)}] = {hex(value)}")
# Write the new value
self.mem_write(rel_addr, value.to_bytes(8, byteorder="little"))
# self.mem_write(rel_addr, value.to_bytes(8, byteorder="little"))
elif rel_info_type == R_X86_64_JUMP_SLOT:
address, name = symbols_list[rel_info_sym - 1]
logger.debug(f"R_X86_64_JUMP_SLOT: [{hex(rel_addr)}] = {hex(address)} -> {name}")
self.mem_write(rel_addr, address.to_bytes(8, byteorder="little"))
elif rel_info_type == R_X86_64_GLOB_DAT:
address, name = symbols_list[rel_info_sym - 1]
logger.debug(f"R_X86_64_GLOB_DAT: [{hex(rel_addr)}] = {hex(address)} -> {name}")
self.mem_write(rel_addr, address.to_bytes(8, byteorder="little"))
# self.mem_write(rel_addr, address.to_bytes(8, byteorder="little"))
else:
logger.error(f"Unhandled relocation type {rel_info_type}")
raise NotImplementedError()
elif arch == Architecture.arm64:
if rel_info_type == R_AARCH64_RELATIVE:
assert rel_info_sym == 0, "Relative relocation must be against NULL symbol."
# Load address at which it was linked originally.
value_orig_bytes = self.mem_read(rel_addr, 8)
value_orig = int.from_bytes(value_orig_bytes, byteorder="little")
# HACK: detect 'prelinked' or 'implicit' relocation
# https://github.com/Vector35/view-elf/blob/ec099a2b0bbffb82a84af5ccd9843eeb3687f568/elfview.cpp#L1001
# https://github.com/NationalSecurityAgency/ghidra/blob/b070f86b4d8833255b04d11100c6efcc1dd02770/Ghidra/Processors/x86/src/main/java/ghidra/app/util/bin/format/elf/relocation/X86_64_ElfRelocationHandler.java#L263
# It is not entirely clear how this works in reality
if value_orig == 0:
# The original address is not known.
# Use the load base as a fallback.
value = load_base + rel_addend
else:
value = load_base + value_orig
logger.debug(f"R_AARCH64_RELATIVE: [{hex(rel_addr)}] = {hex(value)}")
# Write the new value
self.mem_write(rel_addr, value.to_bytes(8, byteorder="little"))
elif rel_info_type == R_AARCH64_JUMP_SLOT:
address, name = symbols_list[rel_info_sym - 1]
logger.debug(f"R_AARCH64_JUMP_SLOT: [{hex(rel_addr)}] = {hex(address)} -> {name}")
self.mem_write(rel_addr, address.to_bytes(8, byteorder="little"))
elif rel_info_type == R_AARCH64_GLOB_DAT:
address, name = symbols_list[rel_info_sym - 1]
logger.debug(f"R_AARCH64_GLOB_DAT: [{hex(rel_addr)}] = {hex(address)} -> {name}")
self.mem_write(rel_addr, address.to_bytes(8, byteorder="little"))
else:
logger.error(f"Unhandled relocation type {rel_info_type}")
raise NotImplementedError()
else:
raise NotImplementedError(f"Unsupported architecture: {self.arch}")
# Find init array.
init_array_size = 0
init_array_addr = 0
init_array = []
for x in elf.iter_segments():
if x.header.p_type == "PT_DYNAMIC":
for tag in x.iter_tags():
if tag.entry.d_tag == "DT_INIT_ARRAYSZ":
init_array_size = tag.entry.d_val
elif tag.entry.d_tag == "DT_INIT_ARRAY":
init_array_addr = tag.entry.d_val + load_base
# Load pointers from init array
logger.debug(f"init_array: {hex(init_array_addr)}[{hex(init_array_size)}]")
if init_array_addr and init_array_size:
for i in range(0, init_array_size // 8):
addr = init_array_addr + (i * 8)
data = self.mem_read(addr, 8)
value = int.from_bytes(self.mem_read(addr, 8), byteorder="little")
logger.debug(f"init_array[{i}]: {hex(value)}")
init_array.append(value)
if elf.header.e_entry > 0:
entry = load_base + elf.header.e_entry
logger.debug(f"entry: {hex(entry)}")
else:
entry = None
logger.debug(f"entry: <NONE>")
return Module(load_base, load_size, init_array, entry, segments)
import argparse
from icicle import Icicle, MemoryProtection
class IcicleELFMapper(ELFMapper):
def __init__(self):
super().__init__()
self.ic: Icicle = None
def convert_protection(self, prot: int) -> MemoryProtection:
if prot == EMU_PROT_NONE:
return MemoryProtection.NoAccess
elif prot == EMU_PROT_READ:
return MemoryProtection.ReadOnly
elif prot == EMU_PROT_WRITE:
return MemoryProtection.ReadWrite
elif prot == EMU_PROT_EXEC or prot == EMU_PROT_READ | EMU_PROT_EXEC:
return MemoryProtection.ExecuteRead
elif prot == EMU_PROT_READ | EMU_PROT_WRITE:
return MemoryProtection.ReadWrite
elif prot == EMU_PROT_WRITE | EMU_PROT_EXEC or prot == EMU_PROT_ALL:
return MemoryProtection.ExecuteReadWrite
else:
raise NotImplementedError(f"Unsupported protection: {prot}")
def convert_arch(self, arch: Architecture) -> str:
if arch == Architecture.x86_64:
return "x86_64"
elif arch == Architecture.arm64:
return "aarch64"
else:
raise NotImplementedError(f"Unsupported architecture: {arch}")
def check_arch(self, arch):
if self.ic is None:
self.ic = Icicle(self.convert_arch(arch))
else:
assert self.ic.architecture == self.convert_arch(arch), "architecture mismatch"
def mem_map(self, addr: int, size: int, prot: int):
logger.debug(f"mem_map({hex(addr)}, {hex(size)}, {prot})")
self.ic.mem_map(addr, size, self.convert_protection(prot))
def mem_protect(self, addr: int, size: int, prot: int):
logger.debug(f"mem_protect({hex(addr)}, {hex(size)}, {prot})")
self.ic.mem_protect(addr, size, self.convert_protection(prot))
def mem_write(self, addr: int, data: bytes):
self.ic.mem_write(addr, data)
def mem_read(self, addr: int, size: int) -> bytes:
return self.ic.mem_read(addr, size)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("elf_file", help="ELF file to map")
parser.add_argument("--load-base", help="Base address to load the module at")
args = parser.parse_args()
if args.load_base is not None:
load_base = int(args.load_base, 16)
else:
load_base = None
mapper = IcicleELFMapper()
module = mapper.load_module(args.elf_file, load_base)
print("\nLoaded module:")
print(f" load_size: {hex(module.load_size)}")
print(f" init_array: {module.init_array}")
print(f" entry: {hex(module.entry)}")
for segment in module.segments:
print(f" segment {hex(segment.start)}-{hex(segment.start + segment.size)} {segment.prot} -> {mapper.convert_protection(segment.prot)}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment