-
-
Save saagarjha/a70d44951cb72f82efee3317d80ac07f to your computer and use it in GitHub Desktop.
// To compile: clang++ -arch x86_64 -arch arm64 -std=c++20 library_injector.cpp -lbsm -lEndpointSecurity -o library_injector, | |
// then codesign with com.apple.developer.endpoint-security.client and run the | |
// program as root. | |
#include <EndpointSecurity/EndpointSecurity.h> | |
#include <algorithm> | |
#include <array> | |
#include <bsm/libbsm.h> | |
#include <cstddef> | |
#include <cstdint> | |
#include <cstdlib> | |
#include <cstring> | |
#include <dispatch/dispatch.h> | |
#include <functional> | |
#include <iostream> | |
#include <mach-o/dyld.h> | |
#include <mach-o/dyld_images.h> | |
#include <mach-o/loader.h> | |
#include <mach-o/nlist.h> | |
#include <mach/mach.h> | |
#ifdef __arm64__ | |
#include <mach/arm/thread_state.h> | |
#elif __x86_64__ | |
#include <mach/i386/thread_state.h> | |
#else | |
#error "Only arm64 and x86_64 are currently supported" | |
#endif | |
#if __has_feature(ptrauth_calls) | |
#include <ptrauth.h> | |
#endif | |
#include <regex> | |
#include <span> | |
#include <stdexcept> | |
#include <string> | |
#include <sys/ptrace.h> | |
#include <sys/sysctl.h> | |
#include <unistd.h> | |
#include <vector> | |
#define ensure(condition) \ | |
do { \ | |
if (!(condition)) { \ | |
throw std::runtime_error(std::string("") + "Check \"" + #condition "\" failed at " + \ | |
__FILE__ + ":" + std::to_string(__LINE__) + " in function " + __FUNCTION__); \ | |
} \ | |
} while (0) | |
#define CS_OPS_STATUS 0 | |
#define CS_ENFORCEMENT 0x00001000 | |
extern "C" { | |
int csops(pid_t pid, unsigned int ops, void *useraddr, size_t usersize); | |
}; | |
auto is_translated(pid_t pid) { | |
auto name = std::array{CTL_KERN, KERN_PROC, KERN_PROC_PID, pid}; | |
kinfo_proc proc; | |
size_t size = sizeof(proc); | |
ensure(!sysctl(name.data(), name.size(), &proc, &size, nullptr, 0) && size == sizeof(proc)); | |
return !!(proc.kp_proc.p_flag & P_TRANSLATED); | |
} | |
auto is_cs_enforced(pid_t pid) { | |
int flags; | |
ensure(!csops(pid, CS_OPS_STATUS, &flags, sizeof(flags))); | |
return !!(flags & CS_ENFORCEMENT); | |
} | |
template <typename T> | |
T scan(task_port_t task, std::uintptr_t &address) { | |
T t; | |
vm_size_t count; | |
ensure(vm_read_overwrite(task, address, sizeof(t), reinterpret_cast<pointer_t>(&t), &count) == KERN_SUCCESS && count == sizeof(t)); | |
address += sizeof(t); | |
return t; | |
} | |
std::vector<std::uintptr_t> read_string_array(task_port_t task, std::uintptr_t &base) { | |
auto strings = std::vector<std::uintptr_t>{}; | |
std::uintptr_t string; | |
do { | |
string = scan<std::uintptr_t>(task, base); | |
strings.push_back(string); | |
} while (string); | |
strings.pop_back(); | |
return strings; | |
} | |
std::string read_string(task_port_t task, std::uintptr_t address) { | |
auto string = std::string{}; | |
char c; | |
do { | |
c = scan<char>(task, address); | |
string.push_back(c); | |
} while (c); | |
string.pop_back(); | |
return string; | |
} | |
std::uintptr_t rearrange_stack(task_port_t task, const std::string &library, std::uintptr_t sp) { | |
auto loadAddress = scan<std::uintptr_t>(task, sp); | |
auto argc = scan<std::uintptr_t>(task, sp); | |
auto argvAddresses = read_string_array(task, sp); | |
auto envpAddresses = read_string_array(task, sp); | |
auto appleAddresses = read_string_array(task, sp); | |
auto stringReader = std::bind(read_string, task, std::placeholders::_1); | |
auto argv = std::vector<std::string>{}; | |
std::transform(argvAddresses.begin(), argvAddresses.end(), std::back_inserter(argv), stringReader); | |
auto envp = std::vector<std::string>{}; | |
std::transform(envpAddresses.begin(), envpAddresses.end(), std::back_inserter(envp), stringReader); | |
auto apple = std::vector<std::string>{}; | |
std::transform(appleAddresses.begin(), appleAddresses.end(), std::back_inserter(apple), stringReader); | |
auto dyld_insert_libraries = std::find_if(envp.begin(), envp.end(), [](const auto &string) { | |
return string.starts_with("DYLD_INSERT_LIBRARIES="); | |
}); | |
if (dyld_insert_libraries != envp.end()) { | |
*dyld_insert_libraries += ":" + library; | |
} else { | |
auto variable = "DYLD_INSERT_LIBRARIES=" + library; | |
envp.push_back(variable); | |
} | |
envp.push_back("DYLD_SHARED_REGION=1"); | |
argvAddresses.clear(); | |
envpAddresses.clear(); | |
appleAddresses.clear(); | |
auto strings = std::vector<char>{}; | |
auto arrayGenerator = [&strings](auto &addresses, const auto &string) { | |
addresses.push_back(strings.size()); | |
std::copy(string.begin(), string.end(), std::back_inserter(strings)); | |
strings.push_back('\0'); | |
}; | |
std::for_each(argv.begin(), argv.end(), std::bind(arrayGenerator, std::ref(argvAddresses), std::placeholders::_1)); | |
std::for_each(envp.begin(), envp.end(), std::bind(arrayGenerator, std::ref(envpAddresses), std::placeholders::_1)); | |
std::for_each(apple.begin(), apple.end(), std::bind(arrayGenerator, std::ref(appleAddresses), std::placeholders::_1)); | |
sp -= strings.size(); | |
sp = sp / sizeof(std::uintptr_t) * sizeof(std::uintptr_t); | |
ensure(vm_write(task, sp, reinterpret_cast<vm_offset_t>(strings.data()), strings.size()) == KERN_SUCCESS); | |
auto rebaser = [sp](auto &&address) { | |
address += sp; | |
}; | |
std::for_each(argvAddresses.begin(), argvAddresses.end(), rebaser); | |
std::for_each(envpAddresses.begin(), envpAddresses.end(), rebaser); | |
std::for_each(appleAddresses.begin(), appleAddresses.end(), rebaser); | |
auto addresses = std::vector<std::uintptr_t>{}; | |
std::copy(argvAddresses.begin(), argvAddresses.end(), std::back_inserter(addresses)); | |
addresses.push_back(0); | |
std::copy(envpAddresses.begin(), envpAddresses.end(), std::back_inserter(addresses)); | |
addresses.push_back(0); | |
std::copy(appleAddresses.begin(), appleAddresses.end(), std::back_inserter(addresses)); | |
addresses.push_back(0); | |
sp -= addresses.size() * sizeof(std::uintptr_t); | |
ensure(vm_write(task, sp, reinterpret_cast<vm_offset_t>(addresses.data()), addresses.size() * sizeof(std::uintptr_t)) == KERN_SUCCESS); | |
sp -= sizeof(std::uintptr_t); | |
ensure(vm_write(task, sp, reinterpret_cast<vm_offset_t>(&argc), sizeof(std::uintptr_t)) == KERN_SUCCESS); | |
sp -= sizeof(std::uintptr_t); | |
ensure(vm_write(task, sp, reinterpret_cast<vm_offset_t>(&loadAddress), sizeof(std::uintptr_t)) == KERN_SUCCESS); | |
return sp; | |
} | |
__asm__( | |
".globl _amfi_flags_patch_start\n" | |
".globl _amfi_flags_patch_end\n" | |
"_amfi_flags_patch_start:\n" | |
#if __arm64__ | |
"\tmov x2, #0x5f\n" | |
"\tstr x2, [x1]\n" | |
"\tmov x0, #0\n" | |
"\tret\n" | |
#elif __x86_64__ | |
".intel_syntax noprefix\n" | |
"\tmov QWORD PTR [rsi], 0x5f\n" | |
"\txor rax, rax\n" | |
"\tret\n" | |
#endif | |
"_amfi_flags_patch_end:\n"); | |
extern char amfi_flags_patch_start; | |
extern char amfi_flags_patch_end; | |
#if __arm64__ | |
// This is a clever but incredibly lazy patch. On arm64, the first five | |
// instructions of _dyld_start are as follows: | |
// | |
// mov x0, sp | |
// and sp, x0, #~15 | |
// mov fp, #0 | |
// mov lr, #0 | |
// b start | |
// | |
// We need to bump sp down a bit due to injecting DYLD_INSERT_LIBRARIES, but | |
// because of thread_set_state_allowed we can't set it directly. So we inject | |
// instructions to do it in here. At process startup fp and lr happen to be set | |
// to 0 by the kernel already, which gives us the space to sneak in two extra | |
// instructions. (If we wanted to be slightly less lazy, we could take advantage | |
// of the kernel's laziness and align sp ourselves when writing the initial | |
// stack. This would let us overwrite the instruction aligning sp.) | |
__asm__( | |
".globl _dyld_start_patch_start\n" | |
".globl _dyld_start_patch_end\n" | |
".globl _dyld_start_check_start\n" | |
".globl _dyld_start_check_end\n" | |
"\n" | |
"_dyld_start_patch_start:\n" | |
"_dyld_start_check_start:\n" | |
/* sub sp, sp, [offset & 0xfff] */ // Added dynamically | |
/* sub sp, sp, [offset & ~0xfff], lsl 12 */ // Added dynamically | |
"mov x0, sp\n" | |
"and sp, x0, #~15\n" | |
"_dyld_start_patch_end:\n" | |
// Used as a sanity check | |
"mov fp, #0\n" | |
"mov lr, #0\n" | |
"_dyld_start_check_end:\n"); | |
#elif __x86_64__ | |
// A similar patch for x86_64. The initial sequence is this: | |
// | |
// mov rdi, rsp | |
// and rsp, -16 | |
// mov rbp, 0 | |
// push 0 | |
// jmp start | |
// | |
// We can golf it down with code that is equivalent (save for xor ebp, ebp, | |
// which sets flags-but in this case it doesn't adjust them from what the | |
// kernel sets already, and there isn't any code that relies on its value | |
// anyway). | |
__asm__( | |
".intel_syntax noprefix\n" | |
".globl _dyld_start_patch_start\n" | |
".globl _dyld_start_patch_end\n" | |
".globl _dyld_start_check_start\n" | |
".globl _dyld_start_check_end\n" | |
"\n" | |
"_dyld_start_patch_start:\n" | |
/* sub rsp, [offset] */ // Added dynamically | |
"push rsp\n" | |
"pop rdi\n" | |
"and rsp, -16\n" | |
"xor ebp, ebp\n" | |
"push rbp\n" | |
"_dyld_start_patch_end:\n" | |
"_dyld_start_check_start:\n" | |
"mov rdi, rsp\n" | |
"and rsp, -16\n" | |
"mov rbp, 0\n" | |
"push 0\n" | |
"_dyld_start_check_end:\n"); | |
#endif | |
extern char dyld_start_patch_start; | |
extern char dyld_start_patch_end; | |
extern char dyld_start_check_start; | |
extern char dyld_start_check_end; | |
void write_patch(task_t task, std::uintptr_t address, void *patch_start, void *patch_end) { | |
ensure(vm_protect(task, address / PAGE_SIZE * PAGE_SIZE, PAGE_SIZE, false, VM_PROT_READ | VM_PROT_WRITE | VM_PROT_COPY) == KERN_SUCCESS); | |
ensure(vm_write(task, address, reinterpret_cast<vm_offset_t>(patch_start), reinterpret_cast<std::uintptr_t>(patch_end) - reinterpret_cast<std::uintptr_t>(patch_start)) == KERN_SUCCESS); | |
ensure(vm_protect(task, address / PAGE_SIZE * PAGE_SIZE, PAGE_SIZE, false, VM_PROT_READ | VM_PROT_EXECUTE) == KERN_SUCCESS); | |
} | |
void patch_restrictions(task_t task, std::uintptr_t pc) { | |
task_dyld_info_data_t dyldInfo; | |
mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT; | |
ensure(task_info(mach_task_self(), TASK_DYLD_INFO, reinterpret_cast<task_info_t>(&dyldInfo), &count) == KERN_SUCCESS); | |
auto all_image_infos = reinterpret_cast<dyld_all_image_infos *>(dyldInfo.all_image_info_addr); | |
const auto header = reinterpret_cast<const mach_header_64 *>(all_image_infos->dyldImageLoadAddress); | |
auto location = reinterpret_cast<std::uintptr_t>(header + 1); | |
auto base = reinterpret_cast<std::uintptr_t>(header); | |
for (unsigned i = 0; i < header->ncmds; ++i) { | |
auto command = reinterpret_cast<load_command *>(location); | |
if (command->cmd == LC_SYMTAB) { | |
auto command = reinterpret_cast<symtab_command *>(location); | |
auto symbols = std::span{reinterpret_cast<nlist_64 *>(base + command->symoff), command->nsyms}; | |
auto _dyld_start = std::find_if(symbols.begin(), symbols.end(), [base, command](const auto &symbol) { | |
return !std::strcmp(reinterpret_cast<char *>(base + command->stroff) + symbol.n_un.n_strx, "__dyld_start"); | |
}); | |
auto amfi_check_dyld_policy_self = std::find_if(symbols.begin(), symbols.end(), [base, command](const auto &symbol) { | |
return !std::strcmp(reinterpret_cast<char *>(base + command->stroff) + symbol.n_un.n_strx, "_amfi_check_dyld_policy_self"); | |
}); | |
write_patch(task, pc + amfi_check_dyld_policy_self->n_value - _dyld_start->n_value, &amfi_flags_patch_start, &amfi_flags_patch_end); | |
return; | |
} | |
location += command->cmdsize; | |
} | |
ensure(false); | |
} | |
void inject(pid_t pid, const std::string &library) { | |
task_port_t task; | |
ensure(task_for_pid(mach_task_self(), pid, &task) == KERN_SUCCESS); | |
thread_act_array_t threads; | |
mach_msg_type_number_t count; | |
ensure(task_threads(task, &threads, &count) == KERN_SUCCESS); | |
ensure(count == 1); | |
#if __arm64__ | |
arm_thread_state64_t state; | |
count = ARM_THREAD_STATE64_COUNT; | |
thread_state_flavor_t flavor = ARM_THREAD_STATE64; | |
#elif __x86_64__ | |
x86_thread_state64_t state; | |
count = x86_THREAD_STATE64_COUNT; | |
thread_state_flavor_t flavor = x86_THREAD_STATE64; | |
#endif | |
ensure(thread_get_state(*threads, flavor, reinterpret_cast<thread_state_t>(&state), &count) == KERN_SUCCESS); | |
#if __arm64__ | |
ensure(thread_convert_thread_state(*threads, THREAD_CONVERT_THREAD_STATE_TO_SELF, flavor, reinterpret_cast<thread_state_t>(&state), count, reinterpret_cast<thread_state_t>(&state), &count) == KERN_SUCCESS); | |
auto sp = rearrange_stack(task, library, arm_thread_state64_get_sp(state)); | |
patch_restrictions(task, arm_thread_state64_get_pc(state)); | |
if (__builtin_available(macOS 14.4, *)) { | |
} else { | |
arm_thread_state64_set_sp(state, sp); | |
ensure(thread_convert_thread_state(*threads, THREAD_CONVERT_THREAD_STATE_FROM_SELF, flavor, reinterpret_cast<thread_state_t>(&state), count, reinterpret_cast<thread_state_t>(&state), &count) == KERN_SUCCESS); | |
} | |
#elif __x86_64__ | |
auto sp = rearrange_stack(task, library, static_cast<std::uintptr_t>(state.__rsp)); | |
state.__rsp = sp; | |
patch_restrictions(task, state.__rip); | |
#endif | |
if (__builtin_available(macOS 14.4, *)) { | |
#if __arm64__ | |
auto address = arm_thread_state64_get_pc(state); | |
#elif __x86_64__ | |
auto address = state.__rip; | |
#endif | |
auto expected = std::span{&dyld_start_check_start, &dyld_start_check_end}; | |
auto actual = std::vector(expected.begin(), expected.end()); | |
vm_size_t count; | |
ensure(vm_read_overwrite(task, address, actual.size(), reinterpret_cast<pointer_t>(actual.data()), &count) == KERN_SUCCESS && count == expected.size()); | |
ensure(std::equal(expected.begin(), expected.end(), actual.begin(), actual.end())); | |
#if __arm64__ | |
auto difference = arm_thread_state64_get_sp(state) - sp; | |
auto stack_adjustment = std::array{ | |
// sub sp, sp, difference & 0xfff | |
std::byte{0xff}, | |
static_cast<std::byte>(0x03 | (difference & 0x3f) << 2), | |
static_cast<std::byte>(0x00 | (difference & 0xfc0) >> 6), | |
std::byte{0xd1}, | |
// sub sp, sp, difference & ~0xfff, lsl #12 | |
std::byte{0xff}, | |
static_cast<std::byte>(0x03 | ((difference >> 12) & 0x3f) << 2), | |
static_cast<std::byte>(0x40 | ((difference >> 12) & 0xfc0) >> 6), | |
std::byte{0xd1}, | |
}; | |
#elif __x86_64__ | |
auto difference = state.__rsp - sp; | |
auto stack_adjustment = std::array{ | |
// sub rsp, difference | |
std::byte{0x48}, | |
std::byte{0x81}, | |
std::byte{0xec}, | |
static_cast<std::byte>((difference >> 0) & 0xff), | |
static_cast<std::byte>((difference >> 8) & 0xff), | |
static_cast<std::byte>((difference >> 16) & 0xff), | |
static_cast<std::byte>((difference >> 24) & 0xff), | |
}; | |
#endif | |
write_patch(task, address, stack_adjustment.begin(), stack_adjustment.end()); | |
write_patch(task, address + stack_adjustment.size(), &dyld_start_patch_start, &dyld_start_patch_end); | |
} else { | |
ensure(thread_set_state(*threads, flavor, reinterpret_cast<thread_state_t>(&state), count) == KERN_SUCCESS); | |
} | |
mach_port_deallocate(mach_task_self(), *threads); | |
vm_deallocate(mach_task_self(), (vm_address_t)threads, sizeof(*threads)); | |
} | |
int main(int argc, char **argv, char **envp) { | |
if (!getenv("DYLD_SHARED_REGION")) { | |
uint32_t length = 0; | |
std::string path; | |
_NSGetExecutablePath(path.data(), &length); | |
path = std::string('0', length); | |
ensure(!_NSGetExecutablePath(path.data(), &length)); | |
std::vector<const char *> environment; | |
while (*envp) { | |
environment.push_back(*envp++); | |
} | |
// This happens to disable dyld-in-cache. | |
environment.push_back("DYLD_SHARED_REGION=1"); | |
environment.push_back(nullptr); | |
execve(path.c_str(), argv, const_cast<char **>(environment.data())); | |
ensure(false); | |
} | |
if (argc < 3) { | |
std::cerr << "Usage: " << *argv << " <library to inject> <process paths...>" << std::endl; | |
std::exit(EXIT_FAILURE); | |
} | |
auto library = *++argv; | |
std::vector<std::regex> processes; | |
for (auto process : std::span(++argv, argc - 2)) { | |
processes.push_back(std::regex(process)); | |
} | |
es_client_t *client = NULL; | |
ensure(es_new_client(&client, ^(es_client_t *client, const es_message_t *message) { | |
switch (message->event_type) { | |
case ES_EVENT_TYPE_AUTH_EXEC: { | |
const char *name = message->event.exec.target->executable->path.data; | |
for (const auto &process : processes) { | |
pid_t pid = audit_token_to_pid(message->process->audit_token); | |
if (std::regex_search(name, process) && is_translated(getpid()) == is_translated(pid)) { | |
if (is_cs_enforced(pid)) { | |
ensure(!ptrace(PT_ATTACHEXC, pid, nullptr, 0)); | |
// Work around FB9786809 | |
dispatch_after(dispatch_time(DISPATCH_TIME_NOW, 1'000'000'000), dispatch_get_main_queue(), ^{ | |
ensure(!ptrace(PT_DETACH, pid, nullptr, 0)); | |
}); | |
} | |
inject(pid, library); | |
} | |
} | |
es_respond_auth_result(client, message, ES_AUTH_RESULT_ALLOW, false); | |
break; | |
} | |
default: | |
ensure(false && "Unexpected event type!"); | |
} | |
}) == ES_NEW_CLIENT_RESULT_SUCCESS); | |
es_event_type_t events[] = {ES_EVENT_TYPE_AUTH_EXEC}; | |
ensure(es_subscribe(client, events, sizeof(events) / sizeof(*events)) == ES_RETURN_SUCCESS); | |
dispatch_main(); | |
} |
The library_injector itself doesn't crash, just the application.
Installed 14.4.1, got the same crash, then replaced if (__builtin_available(macOS 14.4, *))
with if (__builtin_available(macOS 14.5, *))
and then injection works on 14.4.1.
Hmm, I'll take a look. I guess this rolled out in 14.5?
The old path works on 14.4.1 so it seems.
This is definitely a noob question, but when signing this binary I'm assuming that I need an .entitlements
file like this:
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>com.apple.developer.endpoint-security.client</key>
<true/>
</dict>
</plist>
and then I would run something like this?
codesign --force --sign "Apple Development: Steven Hepting (XXXXXXXXXX)" --entitlements entitlements.plist library_injector
@leochou0729 have you tried the new patch on intel yet?
Great approach! I tried it on Sequoia 15.4 on Intel (24E247, XNU 11417.101.15) and ran into two issues:
- The code does
state.__rsp = sp;
(line 325) which causesauto difference = state.__rsp - sp;
to always be 0 (trivial patch below) - Symbol resolution with
DYLD_INFO
no longer seems to work (symoff
of theLC_SYMTAB
points to garbage). Perhaps I'm holding it wrong - I shouldn't need to load and parse the Mach-O again
--- a
+++ b
@@ -322,7 +332,6 @@
}
#elif __x86_64__
auto sp = rearrange_stack(task, library, static_cast<std::uintptr_t>(state.__rsp));
- state.__rsp = sp;
patch_restrictions(task, state.__rip);
#endif
if (__builtin_available(macOS 14.4, *)) {
@@ -369,6 +378,9 @@
write_patch(task, address + stack_adjustment.size(), &dyld_start_patch_start, &dyld_start_patch_end);
} else {
+#if __x86_64__
+ state.__rsp = sp;
+#endif
ensure(thread_set_state(*threads, flavor, reinterpret_cast<thread_state_t>(&state), count) == KERN_SUCCESS);
}
@zhaofengli confirmed that it works, thanks for the patch
@orion1vi Does symbol resolution in patch_restrictions
work for you? For me the symtab just contains garbage. I'm lazy so hardcoding the offset for now.
The code does state.__rsp = sp; (line 325) which causes auto difference = state.__rsp - sp; to always be 0 (trivial patch below)
Yeah, that's wrong. I think I broke this for x86-64 when I added 14.4+ support. I believe the actual patch should be to conditionalize setting it for 14.4 and below, because for those versions we don't need to calculate difference anyway. And for newer versions we use the alternative trick to set rsp (I'm not sure if it's required, but it would be nice to check. I don't have any easy access to Intel machines…)
Symbol resolution with DYLD_INFO no longer seems to work (symoff of the LC_SYMTAB points to garbage).
Can you check to make sure that dyld-in-cache hasn't activated? The LC_SYMTAB in /usr/lib/dyld is reasonable for me.
@zhaofengli i don't recall it ever working for me, so i also hard code it
And for newer versions we use the alternative trick to set rsp (I'm not sure if it's required, but it would be nice to check. I don't have any easy access to Intel machines…)
That's the weird thing - On 15.4 (24E248, XNU 11417.101.15, arm64) the old set_thread_state
path seems to work for some reason, even though a quick skim over the (slightly outdated) XNU source suggests it shouldn't have worked 1. I haven't disassembled the kernel yet and I'm probably missing something here.
Edit: thread_set_state
actually does not work for system executables. On x86-64, I get killed by GUARD_TYPE_MACH_PORT
and on arm64e I get KERN_PROTECTION_FAILURE
returned. The new trick is necessary on arm64e, and on x86-64 I'm using the private com.apple.private.thread-set-state
entitlement with my amfid script for now.
Can you check to make sure that dyld-in-cache hasn't activated? The LC_SYMTAB in /usr/lib/dyld is reasonable for me.
Looks like it (breakpoint dyld`restartWithDyldInCache
) but . I ended up parsing dyld from disk, which I need to do for Rosetta anyways (see below). Edit: DYLD_SHARED_REGION
doesn't appear to disable it for meDYLD_SHARED_REGION=1
does disable dyld-in-cache (I was somehow setting the environment wrong).
I also looked into getting this to work with Rosetta executables. The process is stopped at the entry point of the Rosetta runtime and the kernel inserts the following three u64 integers at sp
(reference), with the regular initial stack frame at sp + 24
:
#[derive(Debug)]
#[repr(C, packed)]
struct RosettaStackFrame {
/// File descriptor of dyld.
dynlinker_fd: u64,
/// Address of the Mach-O header of the X86-64 dyld loaded in memory.
dynlinker_mach_header: u64,
/// File descriptor of the main executable (X86-64).
main_binary_fd: u64,
}
Here it's possible to patch the X86_64 version of dyld to get around the restrictions (I used dynasm_rs to help me assemble easily), but it's annoying due to the Rosetta AOT cache. I haven't figured out a clean way to patch without poisoning the cache. Edit: It's actually very easy: Just setting dynlinker_fd
to an invalid fd bypasses the cache.
For me, completely disabling SIP is unacceptable and I want to inject while keeping the security measures intact as much as possible. To make playing with restricted/private entitlements easier, I wrote a simple script that attaches to amfid to selectively bypass validation. It can be used with just the Debugging Restrictions disabled (csrutil enable --without debug
). Since this is already dangerous enough (root effectively has access to all entitlements again by debugging amfid), you may want to re-enforce some of the restrictions in your own EndpointSecurity module.
Footnotes
-
My injector does not have
com.apple.private.thread-set-state
and I have not told amfi to get out of my way ↩
I suspect library_injector is crashing (and thus the process it injects into is also crashing because it isn't done with its work). Do you see any crash logs for that?