Created
April 28, 2022 20:46
-
-
Save herrcore/ba185787ee4df117e90f1accbf3749dc to your computer and use it in GitHub Desktop.
Extract function and basic block info from IDA to be used as "known good" data for testing other tools
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################################## | |
# | |
# To be run from IDA batch mode: | |
# | |
# "c:\Program Files\IDA Pro 7.5\ida.exe" -c -A -S"c:\Users\admin\Documents\scripts\binary_map.py" z:\tmp\pe\pe.trickbot.x86 | |
# | |
# | |
# | |
# | |
############################################################################## | |
import idautils | |
import idc | |
import idaapi | |
import json | |
import os | |
# Wait for analysis to complete | |
idc.auto_wait() | |
################################################################################## | |
RETS = [ | |
idaapi.NN_retf, | |
idaapi.NN_retfd, | |
idaapi.NN_retfq, | |
idaapi.NN_retfw, | |
idaapi.NN_retn, | |
idaapi.NN_retnd, | |
idaapi.NN_retnq, | |
idaapi.NN_retnw, | |
idaapi.NN_iretw, | |
idaapi.NN_iret, | |
idaapi.NN_iretd, | |
idaapi.NN_iretq] | |
COND_BRANCHES = [ | |
idaapi.NN_ja, | |
idaapi.NN_jae, | |
idaapi.NN_jb, | |
idaapi.NN_jbe, | |
idaapi.NN_jc, | |
idaapi.NN_jcxz, | |
idaapi.NN_je, | |
idaapi.NN_jecxz, | |
idaapi.NN_jg, | |
idaapi.NN_jge, | |
idaapi.NN_jl, | |
idaapi.NN_jle, | |
idaapi.NN_jna, | |
idaapi.NN_jnae, | |
idaapi.NN_jnb, | |
idaapi.NN_jnbe, | |
idaapi.NN_jnc, | |
idaapi.NN_jne, | |
idaapi.NN_jng, | |
idaapi.NN_jnge, | |
idaapi.NN_jnl, | |
idaapi.NN_jnle, | |
idaapi.NN_jno, | |
idaapi.NN_jnp, | |
idaapi.NN_jns, | |
idaapi.NN_jnz, | |
idaapi.NN_jo, | |
idaapi.NN_jp, | |
idaapi.NN_jpe, | |
idaapi.NN_jpo, | |
idaapi.NN_jrcxz, | |
idaapi.NN_js, | |
idaapi.NN_jz,] | |
UCOND_BRANCHES = [ | |
idaapi.NN_jmp, | |
idaapi.NN_jmpfi, | |
idaapi.NN_jmpni, | |
idaapi.NN_jmpshort] | |
BRANCHES = COND_BRANCHES + UCOND_BRANCHES | |
CONDITIONALS = BRANCHES + RETS | |
# "bytes": "81 ff f6 ad f1 00 0f 84 7f 01 00 00", | |
# "instructions": 2, | |
# "offset": 106268, | |
# "size": 12, | |
# binary map will contain the final output | |
binary_map = [] | |
def tohex(data): | |
import binascii | |
hex_string = binascii.hexlify(data) | |
hex_array = [hex_string[i:i+2].decode('utf-8') for i in range(0,len(hex_string), 2)] | |
return ' '.join(hex_array) | |
for fn_start in idautils.Functions(): | |
print(f"Processing function: {hex(fn_start)}") | |
# Add fn to map | |
fn_offset = idaapi.get_fileregion_offset(fn_start) | |
binary_map.append({'offset':fn_offset, 'type':"function"}) | |
# Parse bb from function | |
flowchart = idaapi.FlowChart(idaapi.get_func(fn_start)) | |
# bb_map[bb_address]={start, end} | |
bb_map = {} | |
for block in flowchart: | |
block_start = block.start_ea | |
block_end = block.end_ea | |
bb_map[block_start] = {'start':block_start, 'end':block_end} | |
# Sort dictionary by key | |
sorted_bb_map = {k: v for k, v in sorted(list(bb_map.items()))} | |
# Search for normalized bb and combine them into non-normalized bb | |
for bb_addr in list(sorted_bb_map.keys())[::-1]: | |
block_end = sorted_bb_map[bb_addr].get('end') | |
end_head = prev_head(block_end) | |
# Test if this is a conditional or not | |
ins = ida_ua.insn_t() | |
idaapi.decode_insn(ins, end_head) | |
if ins.itype not in CONDITIONALS: | |
#print(f"bb_addr: {hex(bb_addr)} doesn't end with conditional! - end {hex(block_end)}") | |
# Combine with previous block | |
for candidate_bb_addr in sorted_bb_map: | |
if candidate_bb_addr == block_end: | |
#print(f"found a match {hex(candidate_bb_addr)} - new end {hex(sorted_bb_map[candidate_bb_addr].get('end'))}") | |
sorted_bb_map[bb_addr]['end'] = sorted_bb_map[candidate_bb_addr].get('end') | |
break | |
# Add the bb_map to our binary_map | |
for block_address in sorted_bb_map: | |
block = sorted_bb_map[block_address] | |
block_start = block.get('start') | |
block_end = block.get('end') | |
# Get data for binlex test | |
# "bytes": "81 ff f6 ad f1 00 0f 84 7f 01 00 00", | |
# "instructions": 2, | |
# "offset": 106268, | |
# "size": 12, | |
offset = idaapi.get_fileregion_offset(block_start) | |
size = block_end - block_start | |
instructions = 0 | |
head_ptr = block_start | |
while head_ptr < block_end: | |
instructions += 1 | |
head_ptr = next_head(head_ptr) | |
raw_bytes = get_bytes(block_start, size) | |
bytes = '' | |
if raw_bytes is None: | |
print(f"ERROR no bytes! block_start: {hex(block_start)} block_end: {hex(block_end)} ") | |
continue | |
bytes = tohex(raw_bytes) | |
binary_map.append({'bytes':bytes, 'instruction':instructions, 'offset':offset, 'size':size, 'type':"block"}) | |
################################################################################### | |
input_file_path = idc.get_input_file_path() | |
output_file_path = input_file_path + '.ida.json' | |
with open(output_file_path, 'w') as fp: | |
fp.write(json.dumps(binary_map)) | |
idc.qexit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment