本文为看雪论坛优秀文章
看雪论坛作者ID:KerryS
Dobby一共两个功能,其一是inlinehook,其二是指令插桩,两者原理差不多,主要介绍指令插桩。
int res_instument = DobbyInstrument((void *) addr, offset_name_handler);
//handler即我们自定义的回调
//RegisterContext为寄存器上下文,HookEntrtInfo为hook一些必要信息,比如hook地址等
void offset_name_handler(RegisterContext *ctx, const HookEntryInfo *info)
typedef struct _RegisterContext {
uint32_t dummy_0;
uint32_t dummy_1;
uint32_t dummy_2;
uint32_t sp;
union {
uint32_t r[13];
struct {
uint32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12;
} regs;
} general;
uint32_t lr;
} RegisterContext
//HookEntryInfo为hook地址及id
typedef struct _HookEntryInfo {
int hook_id;
union {
void *target_address;
void *function_address;
void *instruction_address;
};
} HookEntryInfo;
一
工作原理
------------------------------------------------------------------------------
process 6089
0x9d639d32 nop
0x9d639d34 ldr.w pc, [pc, #-0x0]
0x9d639d38 //地址 0xcea0a0ac
0x9d639d38 0 1 2 3 4 5 6 7 8 9 A B C D E F 0123456789ABCDEF
00000000 ac a0 a0 ce ....
------------------------------------------------------------------------------
0xcea0a0ac ldr ip, [pc]
0xcea0a0b0 ldr pc, [pc]
0xcea0a0b4 //地址 0xa2305b80
0xcea0a0b8 //地址 0xcea0a000
0xcea0a0b4 0 1 2 3 4 5 6 7 8 9 A B C D E F 0123456789ABCDEF
00000000 80 5b 30 a2 00 a0 a0 ce
0xcea0a000 sub sp, sp, #0x38
0xcea0a004 str lr, [sp, #0x34]
0xcea0a008 str ip, [sp, #0x30]
0xcea0a00c str fp, [sp, #0x2c]
0xcea0a010 str sl, [sp, #0x28]
0xcea0a014 str sb, [sp, #0x24]
0xcea0a018 str r8, [sp, #0x20]
0xcea0a01c str r7, [sp, #0x1c]
0xcea0a020 str r6, [sp, #0x18]
0xcea0a024 str r5, [sp, #0x14]
0xcea0a028 str r4, [sp, #0x10]
0xcea0a02c str r3, [sp, #0xc]
0xcea0a030 str r2, [sp, #8]
0xcea0a034 str r1, [sp, #4]
0xcea0a038 str r0, [sp]
0xcea0a03c add r0, sp, #0x38
0xcea0a040 sub sp, sp, #8
0xcea0a044 str r0, [sp, #4]
0xcea0a048 sub sp, sp, #8
0xcea0a04c mov r0, sp
0xcea0a050 mov r1, ip
0xcea0a054 bl #0xcea0a05c
0xcea0a058 b #0xcea0a064
0xcea0a05c ldr pc, [pc, #-4]
0xcea0a060 //地址 0x9d2b43e1
0xcea0a060 0 1 2 3 4 5 6 7 8 9 A B C D E F 0123456789ABCDEF
00000000 e1 43 2b 9d
void instrument_call_forward_handler(RegisterContext *ctx, HookEntry *entry) {
DynamicBinaryInstrumentRouting *route = (DynamicBinaryInstrumentRouting *)entry->route;
if (route->handler) {
DBICallTy handler;
HookEntryInfo entry_info;
entry_info.hook_id = entry->id;
entry_info.instruction_address = entry->instruction_address;
handler = (DBICallTy)route->handler;
(*handler)(ctx, (const HookEntryInfo *)&entry_info);
}
// set prologue bridge next hop address with origin instructions that have been relocated(patched)
set_routing_bridge_next_hop(ctx, entry->relocated_origin_instructions);
}
0xcea0a064 add sp, sp, #8
0xcea0a068 add sp, sp, #8
0xcea0a06c pop {r0}
0xcea0a070 pop {r1}
0xcea0a074 pop {r2}
0xcea0a078 pop {r3}
0xcea0a07c pop {r4}
0xcea0a080 pop {r5}
0xcea0a084 pop {r6}
0xcea0a088 pop {r7}
0xcea0a08c pop {r8}
0xcea0a090 pop {sb}
0xcea0a094 pop {sl}
0xcea0a098 pop {fp}
0xcea0a09c pop {ip}
0xcea0a0a0 pop {lr}
0xcea0a0a4 mov pc, ip
// set prologue bridge next hop address with origin instructions that have been relocated(patched)
set_routing_bridge_next_hop(ctx, entry->relocated_origin_instructions);
void set_routing_bridge_next_hop(RegisterContext *ctx, void *address) {
*reinterpret_cast<void **>(&ctx->general.regs.r12) = address;
}
------------------------------------------------------------------------------
process 6089
0xcea0a0c0 nop
0xcea0a0c2 nop
0xcea0a0c4 push {r0, r1, r2, lr}
0xcea0a0c6 nop
0xcea0a0c8 cbz r0, #0xcea0a0cc
0xcea0a0ca nop
0xcea0a0cc b.w #0xcea0a0d0
0xcea0a0d0 ldr.w pc, [pc, #0x14] 0xcea0a0d0 + 0x14+thumb_pc_offset(4)=0xcea0a0e8,即 0x9d639d45
0xcea0a0d4 nop
0xcea0a0d6 nop
0xcea0a0d8 add r2, sp, #8
0xcea0a0da nop
0xcea0a0dc str r1, [r2, #-0x4]!
0xcea0a0e0 ldr.w pc, [pc, #-0x0] 同理,0x9d639d3d
0xcea0a0e4 //地址 0x9d639d3d
0xcea0a0e8 //地址 0x9d639d45
0xcea0a0e4 0 1 2 3 4 5 6 7 8 9 A B C D E F 0123456789ABCDEF
00000000 3d 9d 63 9d 45 9d 63 9d =.c.E.c.
------------------------------------------------------------------------------
二
代码详解
void DynamicBinaryInstrumentRouting::DispatchRouting() {
BuildDynamicBinaryInstrumentRouting();
// generate relocated code which size == trampoline size
GenerateRelocatedCode(trampoline_buffer_->getSize());
}
void DynamicBinaryInstrumentRouting::BuildDynamicBinaryInstrumentRouting() {
// create closure trampoline jump to prologue_routing_dispath with the `entry_` data
ClosureTrampolineEntry *closure_trampoline;
void *handler = (void *)instrument_routing_dispatch;
#if __APPLE__
#if __has_feature(ptrauth_calls)
handler = __builtin_ptrauth_strip(handler, ptrauth_key_asia);
#endif
#endif
closure_trampoline = ClosureTrampoline::CreateClosureTrampoline(entry_, handler);
this->SetTrampolineTarget(closure_trampoline->address);
DLOG(0, "[closure bridge] Carry data %p ", entry_);
DLOG(0, "[closure bridge] Create prologue_dispatch_bridge %p", closure_trampoline->address);
// generate trampoline buffer, run before `GenerateRelocatedCode`
GenerateTrampolineBuffer(entry_->target_address, GetTrampolineTarget());
}
ClosureTrampolineEntry *ClosureTrampoline::CreateClosureTrampoline(void *carry_data, void *carry_handler) {
ClosureTrampolineEntry *entry = nullptr;
entry = new ClosureTrampolineEntry;
#ifdef ENABLE_CLOSURE_TRAMPOLINE_TEMPLATE
#define CLOSURE_TRAMPOLINE_SIZE (7 * 4)
// use closure trampoline template code, find the executable memory and patch it.
Code *code = Code::FinalizeCodeFromAddress(closure_trampoline_template, CLOSURE_TRAMPOLINE_SIZE);
#else
// use assembler and codegen modules instead of template_code
#include "TrampolineBridge/ClosureTrampolineBridge/AssemblyClosureTrampoline.h"
#define _ turbo_assembler_.
TurboAssembler turbo_assembler_(0);
PseudoLabel entry_label;
PseudoLabel forward_bridge_label;
_ Ldr(r12, &entry_label);
_ Ldr(pc, &forward_bridge_label);
_ PseudoBind(&entry_label);
_ EmitAddress((uint32_t)entry);
_ PseudoBind(&forward_bridge_label);
_ EmitAddress((uint32_t)get_closure_bridge());
AssemblyCodeChunk *code = nullptr;
code = AssemblyCodeBuilder::FinalizeFromTurboAssembler(&turbo_assembler_);
entry->address = (void *)code->raw_instruction_start();
entry->size = code->raw_instruction_size();
entry->carry_data = carry_data;
entry->carry_handler = carry_handler;
delete code;
return entry;
#endif
}
void *get_closure_bridge() {
// if already initialized, just return.
if (closure_bridge)
return closure_bridge;
// check if enable the inline-assembly closure_bridge_template
#if ENABLE_CLOSURE_BRIDGE_TEMPLATE
extern void closure_bridge_tempate();
closure_bridge = closure_bridge_template;
// otherwise, use the Assembler build the closure_bridge
#else
#define _ turbo_assembler_.
TurboAssembler turbo_assembler_(0);
_ sub(sp, sp, Operand(14 * 4));
_ str(lr, MemOperand(sp, 13 * 4));
_ str(r12, MemOperand(sp, 12 * 4));
_ str(r11, MemOperand(sp, 11 * 4));
_ str(r10, MemOperand(sp, 10 * 4));
_ str(r9, MemOperand(sp, 9 * 4));
_ str(r8, MemOperand(sp, 8 * 4));
_ str(r7, MemOperand(sp, 7 * 4));
_ str(r6, MemOperand(sp, 6 * 4));
_ str(r5, MemOperand(sp, 5 * 4));
_ str(r4, MemOperand(sp, 4 * 4));
_ str(r3, MemOperand(sp, 3 * 4));
_ str(r2, MemOperand(sp, 2 * 4));
_ str(r1, MemOperand(sp, 1 * 4));
_ str(r0, MemOperand(sp, 0 * 4));
// store sp
_ add(r0, sp, Operand(14 * 4));
_ sub(sp, sp, Operand(8));
_ str(r0, MemOperand(sp, 4));
// stack align
_ sub(sp, sp, Operand(8));
_ mov(r0, Operand(sp));
_ mov(r1, Operand(r12));
_ CallFunction(ExternalReference((void *)intercept_routing_common_bridge_handler));
// stack align
_ add(sp, sp, Operand(8));
// restore sp placeholder stack
_ add(sp, sp, Operand(8));
_ ldr(r0, MemOperand(sp, 4, PostIndex));
_ ldr(r1, MemOperand(sp, 4, PostIndex));
_ ldr(r2, MemOperand(sp, 4, PostIndex));
_ ldr(r3, MemOperand(sp, 4, PostIndex));
_ ldr(r4, MemOperand(sp, 4, PostIndex));
_ ldr(r5, MemOperand(sp, 4, PostIndex));
_ ldr(r6, MemOperand(sp, 4, PostIndex));
_ ldr(r7, MemOperand(sp, 4, PostIndex));
_ ldr(r8, MemOperand(sp, 4, PostIndex));
_ ldr(r9, MemOperand(sp, 4, PostIndex));
_ ldr(r10, MemOperand(sp, 4, PostIndex));
_ ldr(r11, MemOperand(sp, 4, PostIndex));
_ ldr(r12, MemOperand(sp, 4, PostIndex));
_ ldr(lr, MemOperand(sp, 4, PostIndex));
// auto switch A32 & T32 with `least significant bit`, refer `docs/A32_T32_states_switch.md`
_ mov(pc, Operand(r12));
AssemblyCodeChunk *code = AssemblyCodeBuilder::FinalizeFromTurboAssembler(&turbo_assembler_);
closure_bridge = (void *)code->raw_instruction_start();
DLOG(0, "[closure bridge] Build the closure bridge at %p", closure_bridge);
#endif
return (void *)closure_bridge;
}
bool InterceptRouting::GenerateTrampolineBuffer(void *src, void *dst) {
CodeBufferBase *trampoline_buffer = NULL;
// if near branch trampoline plugin enabled
if (RoutingPluginManager::near_branch_trampoline) {
RoutingPluginInterface *plugin = NULL;
plugin = reinterpret_cast<RoutingPluginInterface *>(RoutingPluginManager::near_branch_trampoline);
if (plugin->GenerateTrampolineBuffer(this, src, dst) == false) {
DLOG(0, "Failed enable near branch trampoline plugin");
}
}
if (this->GetTrampolineBuffer() == NULL) {
trampoline_buffer = GenerateNormalTrampolineBuffer((addr_t)src, (addr_t)dst);
this->SetTrampolineBuffer(trampoline_buffer);
DLOG(0, "[trampoline] Generate trampoline buffer %p -> %p", src, dst);
}
return true;
}
bool InterceptRouting::GenerateRelocatedCode(int tramp_size) {
// generate original code
AssemblyCodeChunk *origin = NULL;
origin = AssemblyCodeBuilder::FinalizeFromAddress((addr_t)entry_->target_address, tramp_size);
origin_ = origin;
// generate the relocated code
AssemblyCodeChunk *relocated = NULL;
relocated = AssemblyCodeBuilder::FinalizeFromAddress(0, 0);
relocated_ = relocated;
void *relocate_buffer = NULL;
relocate_buffer = entry_->target_address;
GenRelocateCodeAndBranch(relocate_buffer, origin, relocated);
if (relocated->raw_instruction_start() == 0)
return false;
// set the relocated instruction address
entry_->relocated_origin_instructions = (void *)relocated->raw_instruction_start();
DLOG(0, "[insn relocate] origin %p - %d", origin->raw_instruction_start(), origin->raw_instruction_size());
DLOG(0, "[insn relocate] relocated %p - %d", relocated->raw_instruction_start(), relocated->raw_instruction_size());
// save original prologue
memcpy((void *)entry_->origin_chunk_.chunk_buffer, (void *)origin_->raw_instruction_start(),
origin_->raw_instruction_size());
entry_->origin_chunk_.chunk.re_init_region_range(origin_);
return true;
}
void GenRelocateCodeAndBranch(void *buffer, AssemblyCodeChunk *origin, AssemblyCodeChunk *relocated) {
CodeBuffer *code_buffer = new CodeBuffer(64);
ThumbTurboAssembler thumb_turbo_assembler_(0, code_buffer);
#define thumb_ thumb_turbo_assembler_.
TurboAssembler arm_turbo_assembler_(0, code_buffer);
#define arm_ arm_turbo_assembler_.
Assembler *curr_assembler_ = NULL;
AssemblyCodeChunk origin_chunk;
origin_chunk.init_region_range(origin->raw_instruction_start(), origin->raw_instruction_size());
bool entry_is_thumb = origin->raw_instruction_start() % 2;
if (entry_is_thumb) {
origin->re_init_region_range(origin->raw_instruction_start() - THUMB_ADDRESS_FLAG, origin->raw_instruction_size());
}
LiteMutableArray relo_map(8);
relocate_remain:
addr32_t execute_state_changed_pc = 0;
bool is_thumb = origin_chunk.raw_instruction_start() % 2;
if (is_thumb) {
curr_assembler_ = &thumb_turbo_assembler_;
buffer = (void *)((addr_t)buffer - THUMB_ADDRESS_FLAG);
addr32_t origin_code_start_aligned = origin_chunk.raw_instruction_start() - THUMB_ADDRESS_FLAG;
// remove thumb address flag
origin_chunk.re_init_region_range(origin_code_start_aligned, origin_chunk.raw_instruction_size());
gen_thumb_relocate_code(&relo_map, &thumb_turbo_assembler_, buffer, &origin_chunk, relocated,
&execute_state_changed_pc);
if (thumb_turbo_assembler_.GetExecuteState() == ARMExecuteState) {
// relocate interrupt as execute state changed
if (execute_state_changed_pc < origin_chunk.raw_instruction_start() + origin_chunk.raw_instruction_size()) {
// re-init the origin
int relocate_remain_size =
origin_chunk.raw_instruction_start() + origin_chunk.raw_instruction_size() - execute_state_changed_pc;
// current execute state is ARMExecuteState, so not need `+ THUMB_ADDRESS_FLAG`
origin_chunk.re_init_region_range(execute_state_changed_pc, relocate_remain_size);
// update buffer
buffer = (void *)((addr_t)buffer + (execute_state_changed_pc - origin_code_start_aligned));
// add nop to align ARM
if (thumb_turbo_assembler_.pc_offset() % 4)
thumb_turbo_assembler_.t1_nop();
goto relocate_remain;
}
}
} else {
curr_assembler_ = &arm_turbo_assembler_;
gen_arm_relocate_code(&relo_map, &arm_turbo_assembler_, buffer, &origin_chunk, relocated,
&execute_state_changed_pc);
if (arm_turbo_assembler_.GetExecuteState() == ThumbExecuteState) {
// relocate interrupt as execute state changed
if (execute_state_changed_pc < origin_chunk.raw_instruction_start() + origin_chunk.raw_instruction_size()) {
// re-init the origin
int relocate_remain_size =
origin_chunk.raw_instruction_start() + origin_chunk.raw_instruction_size() - execute_state_changed_pc;
// current execute state is ThumbExecuteState, add THUMB_ADDRESS_FLAG
origin_chunk.re_init_region_range(execute_state_changed_pc + THUMB_ADDRESS_FLAG, relocate_remain_size);
// update buffer
buffer = (void *)((addr_t)buffer + (execute_state_changed_pc - origin_chunk.raw_instruction_start()));
goto relocate_remain;
}
}
}
// TODO:
// if last instr is unlink branch, skip
//dkl 调回插桩点之后继续执行
addr32_t rest_instr_addr = origin_chunk.raw_instruction_start() + origin_chunk.raw_instruction_size();
if (curr_assembler_ == &thumb_turbo_assembler_) {
// Branch to the rest of instructions
thumb_ AlignThumbNop();
thumb_ t2_ldr(pc, MemOperand(pc, 0));
// Get the real branch address
thumb_ EmitAddress(rest_instr_addr + THUMB_ADDRESS_FLAG);
} else {
// Branch to the rest of instructions
CodeGen codegen(&arm_turbo_assembler_);
// Get the real branch address
codegen.LiteralLdrBranch(rest_instr_addr);
}
// Realize all the Pseudo-Label-Data
thumb_turbo_assembler_.RelocBind();
// Realize all the Pseudo-Label-Data
//dkl 在这里会修正之前lable link的ldr指令,
arm_turbo_assembler_.RelocBind();
// Generate executable code
{
// assembler without specific memory address
AssemblyCodeChunk *cchunk;
cchunk = MemoryArena::AllocateCodeChunk(code_buffer->getSize());
if (cchunk == nullptr)
return;
thumb_turbo_assembler_.SetRealizedAddress(cchunk->address);
arm_turbo_assembler_.SetRealizedAddress(cchunk->address);
// fixup the instr branch into trampoline(has been modified)
reloc_label_fixup(origin, &relo_map, &thumb_turbo_assembler_, &arm_turbo_assembler_);
AssemblyCodeChunk *code = NULL;
code = AssemblyCodeBuilder::FinalizeFromTurboAssembler(curr_assembler_);
relocated->re_init_region_range(code->raw_instruction_start(), code->raw_instruction_size());
delete code;
}
// thumb
if (entry_is_thumb) {
// add thumb address flag
relocated->re_init_region_range(relocated->raw_instruction_start() + THUMB_ADDRESS_FLAG,
relocated->raw_instruction_size());
}
// clean
{
thumb_turbo_assembler_.ClearCodeBuffer();
arm_turbo_assembler_.ClearCodeBuffer();
delete code_buffer;
}
}
static void Thumb1RelocateSingleInstr(ThumbTurboAssembler *turbo_assembler, LiteMutableArray *thumb_labels,
int16_t instr, addr32_t from_pc, addr32_t to_pc,
addr32_t *execute_state_changed_pc_ptr) {
bool is_instr_relocated = false;
_ AlignThumbNop();
uint32_t val = 0, op = 0, rt = 0, rm = 0, rn = 0, rd = 0, shift = 0, cond = 0;
int32_t offset = 0;
int32_t op0 = 0, op1 = 0;
op0 = bits(instr, 10, 15);
// [F3.2.3 Special data instructions and branch and exchange]
if (op0 == 0b010001) {
op0 = bits(instr, 8, 9);
// [Add, subtract, compare, move (two high registers)]
if (op0 != 0b11) {
int rs = bits(instr, 3, 6);
// rs is PC register
if (rs == 15) {
val = from_pc;
uint16_t rewrite_inst = 0;
rewrite_inst = (instr & 0xff87) | LeftShift((VOLATILE_REGISTER.code()), 4, 3);
ThumbRelocLabelEntry *label = new ThumbRelocLabelEntry(val, false);
_ AppendRelocLabelEntry(label);
_ T2_Ldr(VOLATILE_REGISTER, label);
_ EmitInt16(rewrite_inst);
is_instr_relocated = true;
}
}
// compare branch (cbz, cbnz)
if ((instr & 0xf500) == 0xb100) {
uint16_t imm5 = bits(instr, 3, 7);
uint16_t i = bit(instr, 9);
uint32_t offset = (i << 6) | (imm5 << 1);
val = from_pc + offset;
rn = bits(instr, 0, 2);
//ThumbTurboAssembler 的data_labels_记录所有的ThumbRelocLabelEntry,保存着要跳转的地址,同时绑定了跳转指令,等待后续把要跳转的地址找到合适的内存储存后,一起修复好
// 即,修复前 ldr pc,xxx 修复后 ldr pc, [pc,offset],pc+offset就是存储要跳转地址的内存
ThumbRelocLabelEntry *label = new ThumbRelocLabelEntry(val + 1, true);
_ AppendRelocLabelEntry(label);
// imm5 = bits(0x4 >> 1, 1, 5);
//dkl 修复
imm5 = bits(0, 1, 5);
i = bit(0x4 >> 1, 6);
_ EmitInt16((instr & 0xfd07) | imm5 << 3 | i << 9);
_ t1_nop(); // manual align
_ t2_b(0);
//这个label持有要跳转过去的地址,跳转采用ldr pc 的方式,这个label同时又采用PseudoLabelInstruction结构体绑定到指令上,所以,已经具备了跳转的全部信息了,
// 只差把跳转地址存到合适的位置,然后修复ldr即可,修复工作好像是后面统一处理, thumb_turbo_assembler_.RelocBind();在这里修正
_ T2_Ldr(pc, label);
is_instr_relocated = true;
}
// if the instr do not needed relocate, just rewrite the origin
if (!is_instr_relocated) {
#if 0
if (from_pc % Thumb2_INST_LEN)
_ t1_nop();
#endif
_ EmitInt16(instr);
}
}
三
收获
#define offsetof(t, d) __builtin_offsetof(t, d)
#define container_of(ptr, type, member) \
({ \
const __typeof(((type *)0)->member) *__mptr = (ptr); \
(type *)((char *)__mptr - offsetof(type, member)); \
})
四
使用Dobby过程中遇到的问题
五
总结
看雪ID:KerryS
https://bbs.pediy.com/user-home-844633.htm
# 往期推荐
球分享
球点赞
球在看
点击“阅读原文”,了解更多!