Created
June 13, 2025 15:39
-
-
Save mrexodia/c6aa2dec49a8b090509a313494b6e8c7 to your computer and use it in GitHub Desktop.
Dumb ELF mapper POC
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
from dataclasses import dataclass | |
from typing import Optional | |
from enum import Enum | |
from elftools.elf.elffile import ELFFile | |
from elftools.elf.relocation import RelocationSection | |
from elftools.elf.sections import SymbolTableSection | |
logging.basicConfig(level=logging.DEBUG) | |
logger = logging.getLogger() | |
@dataclass | |
class Segment: | |
start: int | |
size: int | |
prot: int | |
@dataclass | |
class Module: | |
load_base: int | |
load_size: int | |
init_array: list[int] | |
entry: Optional[int] | |
segments: list[Segment] | |
class Architecture(Enum): | |
x86_64 = 0 | |
arm64 = 1 | |
# TODO: rewrite this cleaner | |
def align(addr, size, growl): | |
import ctypes | |
UC_MEM_ALIGN = 0x1000 | |
to = ctypes.c_uint64(UC_MEM_ALIGN).value | |
mask = ctypes.c_uint64(0xFFFFFFFFFFFFFFFF).value ^ ctypes.c_uint64(to - 1).value | |
right = addr + size | |
right = (right + to - 1) & mask | |
addr &= mask | |
size = right - addr | |
if growl: | |
size = (size + to - 1) & mask | |
return addr, size | |
EMU_PROT_NONE = 0 | |
EMU_PROT_READ = 1 | |
EMU_PROT_WRITE = 2 | |
EMU_PROT_EXEC = 4 | |
EMU_PROT_ALL = EMU_PROT_READ | EMU_PROT_WRITE | EMU_PROT_EXEC | |
PF_X = 0x1 # Executable | |
PF_W = 0x2 # Writable | |
PF_R = 0x4 # Readable | |
# From http://infocenter.arm.com/help/topic/com.arm.doc.ihi0044f/IHI0044F_aaelf.pdf | |
R_ARM_ABS32 = 2 | |
R_ARM_GLOB_DAT = 21 | |
R_ARM_JUMP_SLOT = 22 | |
R_ARM_RELATIVE = 23 | |
# https://github.com/frida/tinycc/blob/a438164dd4c453ae62c1224b4b7997507a388b3d/tccelf.h#L2454 | |
# https://github.com/frida/tinycc/blob/a438164dd4c453ae62c1224b4b7997507a388b3d/arm64-link.c#L273 | |
R_AARCH64_GLOB_DAT = 1025 # GOT entry | |
R_AARCH64_JUMP_SLOT = 1026 # PLT entry | |
R_AARCH64_RELATIVE = 1027 # adjust by program base | |
# x86 relocation types | |
# https://github.com/torvalds/linux/blob/master/arch/x86/include/asm/elf.h#L47 | |
R_X86_64_NONE = 0 # No relocation | |
R_X86_64_64 = 1 # Direct 64 bit zero extended | |
R_X86_64_PC32 = 2 # PC relative 32 bit signed | |
R_X86_64_GOT32 = 3 # 32 bit GOT entry | |
R_X86_64_PLT32 = 4 # 32 bit PLT address | |
R_X86_64_COPY = 5 # Copy symbol at runtime | |
R_X86_64_GLOB_DAT = 6 # Create GOT entry | |
R_X86_64_JUMP_SLOT = 7 # Create PLT entry | |
R_X86_64_RELATIVE = 8 # Adjust by program base | |
R_X86_64_GOTPCREL = 9 # 32 bit signed pc relative offset to GOT | |
R_X86_64_32 = 10 # Direct 32 bit zero extended | |
R_X86_64_32S = 11 # Direct 32 bit sign extended | |
R_X86_64_16 = 12 # Direct 16 bit zero extended | |
R_X86_64_PC16 = 13 # 16 bit sign extended pc relative | |
R_X86_64_8 = 14 # Direct 8 bit sign extended | |
R_X86_64_PC8 = 15 # 8 bit sign extended pc relative | |
R_X86_64_PC64 = 24 # Place relative 64-bit | |
PF_X = 0x1 # Executable | |
PF_W = 0x2 # Writable | |
PF_R = 0x4 # Readable | |
def get_segment_protection(prot_in): | |
prot = 0 | |
if (prot_in & PF_R) != 0: | |
prot |= EMU_PROT_READ | |
if (prot_in & PF_W) != 0: | |
prot |= EMU_PROT_WRITE | |
if (prot_in & PF_X) != 0: | |
prot |= EMU_PROT_EXEC | |
return prot | |
class ELFMapper: | |
def __init__(self): | |
self.symbol_hooks = {} | |
self.fake_base = 0x1337000000 | |
self.fake_index = 0 | |
self.fake_symbols = {} | |
def mem_map(self, addr: int, size: int, prot: int): | |
raise NotImplementedError() | |
def mem_protect(self, addr: int, size: int, prot: int): | |
raise NotImplementedError() | |
def mem_write(self, addr: int, data: bytes): | |
raise NotImplementedError() | |
def mem_read(self, addr: int, size: int) -> bytes: | |
raise NotImplementedError() | |
def check_arch(self, arch: Architecture): | |
raise NotImplementedError() | |
def _elf_lookup_symbol(self, name): | |
logger.debug(f"Looking up symbol: {name}") | |
symbol = self.fake_base + self.fake_index * 0x100 | |
self.fake_index += 1 | |
self.fake_symbols[symbol] = name | |
return symbol | |
def _elf_get_symval(self, elf, elf_base, symbol): | |
if symbol.name in self.symbol_hooks: | |
return self.symbol_hooks[symbol.name] | |
if symbol["st_shndx"] == "SHN_UNDEF": | |
# External symbol, lookup value. | |
target = self._elf_lookup_symbol(symbol.name) | |
if target is None: | |
# Extern symbol not found | |
if symbol["st_info"]["bind"] == "STB_WEAK": | |
# Weak symbol initialized as 0 | |
return 0 | |
else: | |
logger.error(f"Undefined external symbol: {symbol.name}") | |
return None | |
else: | |
return target | |
elif symbol["st_shndx"] == "SHN_ABS": | |
# Absolute symbol. | |
return elf_base + symbol["st_value"] | |
else: | |
# Internally defined symbol. | |
return elf_base + symbol["st_value"] | |
def load_module(self, filename, load_base=None) -> Module: | |
logger.debug(f"Loading module {filename}") | |
with open(filename, "rb") as fstream: | |
elf = ELFFile(fstream) | |
elf_arch = elf.get_machine_arch() | |
if elf_arch == "x64": | |
arch = Architecture.x86_64 | |
elif elf_arch == "AArch64": | |
arch = Architecture.arm64 | |
else: | |
raise NotImplementedError(f"Unsupported architecture: {elf_arch}") | |
self.check_arch(arch) | |
dynamic = elf.header.e_type == "ET_DYN" | |
if not dynamic: | |
raise NotImplementedError("Only ET_DYN is supported at the moment.") | |
# Parse program header (Execution view). | |
# - LOAD (determinate what parts of the ELF file get mapped into memory) | |
# TODO: do not skip segment types, it looks like at least Android is also loading other segments | |
load_segments = [x for x in elf.iter_segments()] | |
# load_segments = [x for x in elf.iter_segments() if x.header.p_type == "PT_LOAD"] | |
# Find bounds of the load segments. | |
bound_low = 0 | |
bound_high = 0 | |
for segment in load_segments: | |
if segment.header.p_memsz == 0 or segment.header.p_type != "PT_LOAD": | |
logging.debug(f"Skipping segment {segment.header.p_type}") | |
continue | |
logger.debug(f"base: {hex(segment.header.p_vaddr)}") | |
if bound_low > segment.header.p_vaddr: | |
bound_low = segment.header.p_vaddr | |
high = segment.header.p_vaddr + segment.header.p_memsz | |
if bound_high < high: | |
bound_high = high | |
# Retrieve a base address for this module. | |
_, load_size = align(bound_low, bound_high - bound_low, True) | |
if load_base is None: | |
load_base = bound_low | |
assert load_base != 0, "cannot load at 0" | |
logger.debug(f"Reserving {hex(load_size)} bytes of memory, at {hex(load_base)}") | |
self.mem_map(load_base, load_size, EMU_PROT_NONE) | |
logger.debug(f"Base address {hex(load_base)}") | |
segments = [] | |
for segment in load_segments: | |
print(f"Segment type: {segment.header.p_type}") | |
print(f"Segment flags: {segment.header.p_flags}") | |
print(f"Segment vaddr: {hex(segment.header.p_vaddr)}") | |
print(f"Segment memsz: {hex(segment.header.p_memsz)}") | |
print(f"Segment filesz: {hex(segment.header.p_filesz)}") | |
print(f"Segment offset: {hex(segment.header.p_offset)}") | |
print(f"Segment align: {hex(segment.header.p_align)}") | |
print("") | |
if segment.header.p_type != "PT_LOAD": | |
continue | |
prot = get_segment_protection(segment.header.p_flags) | |
prot = prot if prot != 0 else EMU_PROT_ALL | |
seg_addr = load_base + segment.header.p_vaddr | |
seg_size = segment.header.p_memsz | |
seg_addr_aligned, seg_size_aligned = align(seg_addr, seg_size, True) | |
# NOTE: in case of already mapped memory make it writable | |
# TODO: create function to check if memory is already mapped | |
try: | |
self.mem_map(seg_addr_aligned, seg_size_aligned, EMU_PROT_ALL) | |
except: | |
self.mem_protect(seg_addr_aligned, seg_size_aligned, EMU_PROT_ALL) | |
# NOTE: If chainged to back to old pretection memory is not executable at where it needs to be | |
# self.mem_protect(seg_addr_aligned, seg_size_aligned, prot) | |
segments.append(Segment(seg_addr_aligned, seg_size_aligned, prot)) | |
data = segment.data() | |
assert len(data) <= seg_size, "Need to implement padding rest of the size with 00" # NOTE: this might be <= | |
# self.mem_write(seg_addr_aligned, data) | |
self.mem_write(seg_addr, data) | |
logger.debug(f"segment {hex(seg_addr)}[{hex(seg_size)}] -> {hex(seg_addr_aligned)}[{hex(seg_size_aligned)}]") | |
# Resolve all symbols. | |
symbols_resolved = dict() | |
symbols_list = [] | |
for section in elf.iter_sections(): | |
if not isinstance(section, SymbolTableSection): | |
continue | |
itersymbols = section.iter_symbols() | |
next(itersymbols) # Skip first symbol which is always NULL. | |
for i, symbol in enumerate(itersymbols): | |
symbol_address = self._elf_get_symval(elf, load_base, symbol) | |
if symbol_address is not None: | |
logger.debug(f"symbol[{i} -> {symbol.name}] = {hex(symbol_address)}") | |
symbols_resolved[symbol.name] = (symbol_address, symbol) | |
symbols_list.append((symbol_address, symbol.name)) | |
# Relocate. | |
for section in elf.iter_sections(): | |
if not isinstance(section, RelocationSection): | |
continue | |
# print(f"section: {section.name}") | |
# print(f"section type: {hex(section['sh_addr'])}") | |
for rel in section.iter_relocations(): | |
rel_addr = load_base + rel["r_offset"] # Location where relocation should happen | |
rel_info_type = rel["r_info_type"] | |
rel_info_sym = rel["r_info_sym"] | |
rel_addend = rel["r_addend"] | |
if arch == Architecture.x86_64: | |
if rel_info_type == R_X86_64_RELATIVE: | |
assert rel_info_sym == 0, "Relative relocation must be against NULL symbol." | |
# Load address at which it was linked originally. | |
print(hex(rel_addr)) | |
value_orig_bytes = self.mem_read(rel_addr, 8) | |
value_orig = int.from_bytes(value_orig_bytes, byteorder="little") | |
# HACK: detect 'prelinked' or 'implicit' relocation | |
# https://github.com/Vector35/view-elf/blob/ec099a2b0bbffb82a84af5ccd9843eeb3687f568/elfview.cpp#L1001 | |
# https://github.com/NationalSecurityAgency/ghidra/blob/b070f86b4d8833255b04d11100c6efcc1dd02770/Ghidra/Processors/x86/src/main/java/ghidra/app/util/bin/format/elf/relocation/X86_64_ElfRelocationHandler.java#L263 | |
# It is not entirely clear how this works in reality | |
if value_orig == 0: | |
# The original address is not known. | |
# Use the load base as a fallback. | |
value = load_base + rel_addend | |
else: | |
value = load_base + value_orig | |
logger.debug(f"R_X86_64_RELATIVE: [{hex(rel_addr)}] = {hex(value)}") | |
# Write the new value | |
self.mem_write(rel_addr, value.to_bytes(8, byteorder="little")) | |
# self.mem_write(rel_addr, value.to_bytes(8, byteorder="little")) | |
elif rel_info_type == R_X86_64_JUMP_SLOT: | |
address, name = symbols_list[rel_info_sym - 1] | |
logger.debug(f"R_X86_64_JUMP_SLOT: [{hex(rel_addr)}] = {hex(address)} -> {name}") | |
self.mem_write(rel_addr, address.to_bytes(8, byteorder="little")) | |
elif rel_info_type == R_X86_64_GLOB_DAT: | |
address, name = symbols_list[rel_info_sym - 1] | |
logger.debug(f"R_X86_64_GLOB_DAT: [{hex(rel_addr)}] = {hex(address)} -> {name}") | |
self.mem_write(rel_addr, address.to_bytes(8, byteorder="little")) | |
# self.mem_write(rel_addr, address.to_bytes(8, byteorder="little")) | |
else: | |
logger.error(f"Unhandled relocation type {rel_info_type}") | |
raise NotImplementedError() | |
elif arch == Architecture.arm64: | |
if rel_info_type == R_AARCH64_RELATIVE: | |
assert rel_info_sym == 0, "Relative relocation must be against NULL symbol." | |
# Load address at which it was linked originally. | |
value_orig_bytes = self.mem_read(rel_addr, 8) | |
value_orig = int.from_bytes(value_orig_bytes, byteorder="little") | |
# HACK: detect 'prelinked' or 'implicit' relocation | |
# https://github.com/Vector35/view-elf/blob/ec099a2b0bbffb82a84af5ccd9843eeb3687f568/elfview.cpp#L1001 | |
# https://github.com/NationalSecurityAgency/ghidra/blob/b070f86b4d8833255b04d11100c6efcc1dd02770/Ghidra/Processors/x86/src/main/java/ghidra/app/util/bin/format/elf/relocation/X86_64_ElfRelocationHandler.java#L263 | |
# It is not entirely clear how this works in reality | |
if value_orig == 0: | |
# The original address is not known. | |
# Use the load base as a fallback. | |
value = load_base + rel_addend | |
else: | |
value = load_base + value_orig | |
logger.debug(f"R_AARCH64_RELATIVE: [{hex(rel_addr)}] = {hex(value)}") | |
# Write the new value | |
self.mem_write(rel_addr, value.to_bytes(8, byteorder="little")) | |
elif rel_info_type == R_AARCH64_JUMP_SLOT: | |
address, name = symbols_list[rel_info_sym - 1] | |
logger.debug(f"R_AARCH64_JUMP_SLOT: [{hex(rel_addr)}] = {hex(address)} -> {name}") | |
self.mem_write(rel_addr, address.to_bytes(8, byteorder="little")) | |
elif rel_info_type == R_AARCH64_GLOB_DAT: | |
address, name = symbols_list[rel_info_sym - 1] | |
logger.debug(f"R_AARCH64_GLOB_DAT: [{hex(rel_addr)}] = {hex(address)} -> {name}") | |
self.mem_write(rel_addr, address.to_bytes(8, byteorder="little")) | |
else: | |
logger.error(f"Unhandled relocation type {rel_info_type}") | |
raise NotImplementedError() | |
else: | |
raise NotImplementedError(f"Unsupported architecture: {self.arch}") | |
# Find init array. | |
init_array_size = 0 | |
init_array_addr = 0 | |
init_array = [] | |
for x in elf.iter_segments(): | |
if x.header.p_type == "PT_DYNAMIC": | |
for tag in x.iter_tags(): | |
if tag.entry.d_tag == "DT_INIT_ARRAYSZ": | |
init_array_size = tag.entry.d_val | |
elif tag.entry.d_tag == "DT_INIT_ARRAY": | |
init_array_addr = tag.entry.d_val + load_base | |
# Load pointers from init array | |
logger.debug(f"init_array: {hex(init_array_addr)}[{hex(init_array_size)}]") | |
if init_array_addr and init_array_size: | |
for i in range(0, init_array_size // 8): | |
addr = init_array_addr + (i * 8) | |
data = self.mem_read(addr, 8) | |
value = int.from_bytes(self.mem_read(addr, 8), byteorder="little") | |
logger.debug(f"init_array[{i}]: {hex(value)}") | |
init_array.append(value) | |
if elf.header.e_entry > 0: | |
entry = load_base + elf.header.e_entry | |
logger.debug(f"entry: {hex(entry)}") | |
else: | |
entry = None | |
logger.debug(f"entry: <NONE>") | |
return Module(load_base, load_size, init_array, entry, segments) | |
import argparse | |
from icicle import Icicle, MemoryProtection | |
class IcicleELFMapper(ELFMapper): | |
def __init__(self): | |
super().__init__() | |
self.ic: Icicle = None | |
def convert_protection(self, prot: int) -> MemoryProtection: | |
if prot == EMU_PROT_NONE: | |
return MemoryProtection.NoAccess | |
elif prot == EMU_PROT_READ: | |
return MemoryProtection.ReadOnly | |
elif prot == EMU_PROT_WRITE: | |
return MemoryProtection.ReadWrite | |
elif prot == EMU_PROT_EXEC or prot == EMU_PROT_READ | EMU_PROT_EXEC: | |
return MemoryProtection.ExecuteRead | |
elif prot == EMU_PROT_READ | EMU_PROT_WRITE: | |
return MemoryProtection.ReadWrite | |
elif prot == EMU_PROT_WRITE | EMU_PROT_EXEC or prot == EMU_PROT_ALL: | |
return MemoryProtection.ExecuteReadWrite | |
else: | |
raise NotImplementedError(f"Unsupported protection: {prot}") | |
def convert_arch(self, arch: Architecture) -> str: | |
if arch == Architecture.x86_64: | |
return "x86_64" | |
elif arch == Architecture.arm64: | |
return "aarch64" | |
else: | |
raise NotImplementedError(f"Unsupported architecture: {arch}") | |
def check_arch(self, arch): | |
if self.ic is None: | |
self.ic = Icicle(self.convert_arch(arch)) | |
else: | |
assert self.ic.architecture == self.convert_arch(arch), "architecture mismatch" | |
def mem_map(self, addr: int, size: int, prot: int): | |
logger.debug(f"mem_map({hex(addr)}, {hex(size)}, {prot})") | |
self.ic.mem_map(addr, size, self.convert_protection(prot)) | |
def mem_protect(self, addr: int, size: int, prot: int): | |
logger.debug(f"mem_protect({hex(addr)}, {hex(size)}, {prot})") | |
self.ic.mem_protect(addr, size, self.convert_protection(prot)) | |
def mem_write(self, addr: int, data: bytes): | |
self.ic.mem_write(addr, data) | |
def mem_read(self, addr: int, size: int) -> bytes: | |
return self.ic.mem_read(addr, size) | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("elf_file", help="ELF file to map") | |
parser.add_argument("--load-base", help="Base address to load the module at") | |
args = parser.parse_args() | |
if args.load_base is not None: | |
load_base = int(args.load_base, 16) | |
else: | |
load_base = None | |
mapper = IcicleELFMapper() | |
module = mapper.load_module(args.elf_file, load_base) | |
print("\nLoaded module:") | |
print(f" load_size: {hex(module.load_size)}") | |
print(f" init_array: {module.init_array}") | |
print(f" entry: {hex(module.entry)}") | |
for segment in module.segments: | |
print(f" segment {hex(segment.start)}-{hex(segment.start + segment.size)} {segment.prot} -> {mapper.convert_protection(segment.prot)}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment