一
前言
二
正文
DetourUpdateThread
函数更新线程,但需要由调用方传入需要进行更新线程的句柄:LONG WINAPI DetourUpdateThread(_In_ HANDLE hThread);
GetThreadContext
与SetThreadContext
准确地调整线程上下文中的PC(程序计数器)到正确位置,实现参考Detours/src/detours.cpp于4b8c659f · microsoft/Detours。[!TIP]
虽然它的官方示例“Using Detours”中有DetourUpdateThread(GetCurrentThread())
这样的代码,但这用法无意义且无效,应使用其更新进程中除当前线程外的所有线程,详见DetourUpdateThread
。但即便以正确的方式更新线程,也会带来一个新的风险,见技术Wiki:更新线程时避免堆死锁。
while (GetThreadContext(hThread, &ctx))
{
...
if (nTries < 3)
{
// oops - we should try to get the instruction pointer out of here.
ODPRINTF((L"mhooks: SuspendOneThread: suspended thread %d - IP is at %p - IS COLLIDING WITH CODE", dwThreadId, pIp));
ResumeThread(hThread);
Sleep(100);
SuspendThread(hThread);
nTries++;
}
...
}
NTSTATUS
detour_thread_suspend(
_Outptr_result_maybenull_ PHANDLE* SuspendedHandles,
_Out_ PULONG SuspendedHandleCount)
{
NTSTATUS Status;
ULONG i, ThreadCount, SuspendedCount;
PSYSTEM_PROCESS_INFORMATION pSPI, pCurrentSPI;
PSYSTEM_THREAD_INFORMATION pSTI;
PHANDLE Buffer;
HANDLE ThreadHandle, CurrentPID, CurrentTID;
OBJECT_ATTRIBUTES ObjectAttributes = RTL_CONSTANT_OBJECT_ATTRIBUTES(NULL, 0);/* Get system process and thread information */
i = _1MB;
_Try_alloc:
pSPI = (PSYSTEM_PROCESS_INFORMATION)detour_memory_alloc(i);
if (pSPI == NULL)
{
return STATUS_NO_MEMORY;
}
Status = NtQuerySystemInformation(SystemProcessInformation, pSPI, i, &i);
if (!NT_SUCCESS(Status))
{
detour_memory_free(pSPI);
if (Status == STATUS_INFO_LENGTH_MISMATCH)
{
goto _Try_alloc;
}
return Status;
}/* Find current process and threads */
CurrentPID = NtGetCurrentProcessId();
pCurrentSPI = pSPI;
while (pCurrentSPI->UniqueProcessId != CurrentPID)
{
if (pCurrentSPI->NextEntryOffset == 0)
{
Status = STATUS_NOT_FOUND;
goto _Exit;
}
pCurrentSPI = (PSYSTEM_PROCESS_INFORMATION)Add2Ptr(pCurrentSPI, pCurrentSPI->NextEntryOffset);
}
pSTI = (PSYSTEM_THREAD_INFORMATION)Add2Ptr(pCurrentSPI, sizeof(*pCurrentSPI));/* Skip if no other threads */
ThreadCount = pCurrentSPI->NumberOfThreads - 1;
if (ThreadCount == 0)
{
*SuspendedHandles = NULL;
*SuspendedHandleCount = 0;
Status = STATUS_SUCCESS;
goto _Exit;
}/* Create handle array */
Buffer = (PHANDLE)detour_memory_alloc(ThreadCount * sizeof(HANDLE));
if (Buffer == NULL)
{
Status = STATUS_NO_MEMORY;
goto _Exit;
}/* Suspend threads */
SuspendedCount = 0;
CurrentTID = NtGetCurrentThreadId();
for (i = 0; i < pCurrentSPI->NumberOfThreads; i++)
{
if (pSTI[i].ClientId.UniqueThread == CurrentTID ||
!NT_SUCCESS(NtOpenThread(&ThreadHandle,
THREAD_SUSPEND_RESUME | THREAD_GET_CONTEXT | THREAD_SET_CONTEXT,
&ObjectAttributes,
&pSTI[i].ClientId)))
{
continue;
}
if (NT_SUCCESS(NtSuspendThread(ThreadHandle, NULL)))
{
_Analysis_assume_(SuspendedCount < ThreadCount);
Buffer[SuspendedCount++] = ThreadHandle;
} else
{
NtClose(ThreadHandle);
}
}/* Return suspended thread handles */
if (SuspendedCount == 0)
{
detour_memory_free(Buffer);
*SuspendedHandles = NULL;
} else
{
*SuspendedHandles = Buffer;
}
*SuspendedHandleCount = SuspendedCount;
Status = STATUS_SUCCESS;_Exit:
detour_memory_free(pSPI);
return Status;
}
NTSTATUS
detour_thread_update(
_In_ HANDLE ThreadHandle,
_In_ PDETOUR_OPERATION PendingOperations)
{
NTSTATUS Status;
PDETOUR_OPERATION o;
CONTEXT cxt;
BOOL bUpdateContext;cxt.ContextFlags = CONTEXT_CONTROL;
Status = NtGetContextThread(ThreadHandle, &cxt);
if (!NT_SUCCESS(Status))
{
return Status;
}for (o = PendingOperations; o != NULL; o = o->pNext)
{
bUpdateContext = FALSE;
if (o->fIsRemove)
{
if (cxt.CONTEXT_PC >= (ULONG_PTR)o->pTrampoline &&
cxt.CONTEXT_PC < ((ULONG_PTR)o->pTrampoline + sizeof(o->pTrampoline)))
{
cxt.CONTEXT_PC = (ULONG_PTR)o->pbTarget +
detour_align_from_trampoline(o->pTrampoline, (BYTE)(cxt.CONTEXT_PC - (ULONG_PTR)o->pTrampoline));
bUpdateContext = TRUE;
}
} else
{
if (cxt.CONTEXT_PC >= (ULONG_PTR)o->pbTarget &&
cxt.CONTEXT_PC < ((ULONG_PTR)o->pbTarget + o->pTrampoline->cbRestore))
{
cxt.CONTEXT_PC = (ULONG_PTR)o->pTrampoline +
detour_align_from_target(o->pTrampoline, (BYTE)(cxt.CONTEXT_PC - (ULONG_PTR)o->pbTarget));
bUpdateContext = TRUE;
}
}
if (bUpdateContext)
{
Status = NtSetContextThread(ThreadHandle, &cxt);
break;
}
}return Status;
}
VOID
detour_thread_resume(
_In_reads_(SuspendedHandleCount) _Frees_ptr_ PHANDLE SuspendedHandles,
_In_ ULONG SuspendedHandleCount)
{
ULONG i;for (i = 0; i < SuspendedHandleCount; i++)
{
NtResumeThread(SuspendedHandles[i], NULL);
NtClose(SuspendedHandles[i]);
}
detour_memory_free(SuspendedHandles);
}
NtQuerySystemInformation
以获取当前进程所有线程NtSuspendThread
挂起除当前线程外的所有线程NtResumeThread
恢复挂起的线程new/delete
),更新线程时如果挂起了另一个也使用此堆且正持有堆锁的线程,Detours再访问此堆就会发生死锁。Furthermore, you would be best served to take the heap lock (HeapLock) before suspending the thread, because the Detours library will allocate memory during thread suspension.
此外,最好在挂起线程前占有堆锁(HeapLock),因为Detours库将在线程挂起期间分配内存。
HeapUserThread
)不断调用malloc/free
(等效于new/delete
):while (!g_bStop)
{
p = malloc(4);
if (p != NULL)
{
free(p);
}
}
SetHookThread
)不断使用Detours或SlimDetours挂钩和脱钩:while (!g_bStop)
{
hr = HookTransactionBegin(g_eEngineType);
if (FAILED(hr))
{
break;
}
if (g_eEngineType == EngineMicrosoftDetours)
{
hr = HRESULT_FROM_WIN32(DetourUpdateThread((HANDLE)lpThreadParameter));
if (FAILED(hr))
{
break;
}
}
hr = HookAttach(g_eEngineType, EnableHook, (PVOID*)&g_pfnEqualRect, Hooked_EqualRect);
if (FAILED(hr))
{
HookTransactionAbort(g_eEngineType);
break;
}
hr = HookTransactionCommit(g_eEngineType);
if (FAILED(hr))
{
break;
}EnableHook = !EnableHook;
}
[!NOTE]
SlimDetours会自动更新线程(参考技术Wiki:应用内联钩子时自动更新线程https://github.com/KNSoft/KNSoft.SlimDetours/blob/main/Docs/TechWiki/Update%20Threads%20Automatically%20When%20Applying%20Inline%20Hooks/README.zh-CN.md),所以不存在DetourUpdateThread
这样的函数。
g_bStop = TRUE;
)后再次等待10秒,如果超时则大概率发生死锁,将触发断点,可以在调试器中观察这2个线程的调用栈进行确认。例如指定使用Detours运行此示例"Demo.exe -Run DeadLock -Engine=MSDetours"
,以下调用栈可见堆死锁:Worker Thread Demo.exe!HeapUserThread Demo.exe!heap_alloc_dbg_internal
[External Code]
Demo.exe!heap_alloc_dbg_internal(const unsigned __int64 size, const int block_use, const char * const file_name, const int line_number) Line 359
Demo.exe!heap_alloc_dbg(const unsigned __int64 size, const int block_use, const char * const file_name, const int line_number) Line 450
Demo.exe!_malloc_dbg(unsigned __int64 size, int block_use, const char * file_name, int line_number) Line 496
Demo.exe!malloc(unsigned __int64 size) Line 27
Demo.exe!HeapUserThread(void * lpThreadParameter) Line 29
[External Code]Worker Thread Demo.exe!SetHookThread Demo.exe!__acrt_lock
[External Code]
Demo.exe!__acrt_lock(__acrt_lock_id _Lock) Line 55
Demo.exe!heap_alloc_dbg_internal(const unsigned __int64 size, const int block_use, const char * const file_name, const int line_number) Line 309
Demo.exe!heap_alloc_dbg(const unsigned __int64 size, const int block_use, const char * const file_name, const int line_number) Line 450
Demo.exe!_malloc_dbg(unsigned __int64 size, int block_use, const char * file_name, int line_number) Line 496
Demo.exe!malloc(unsigned __int64 size) Line 27
[External Code]
Demo.exe!DetourDetach(void * * ppPointer, void * pDetour) Line 2392
Demo.exe!HookAttach(_DEMO_ENGINE_TYPE EngineType, int Enable, void * * ppPointer, void * pDetour) Line 140
Demo.exe!SetHookThread(void * lpThreadParameter) Line 65
[External Code]
"Demo.exe -Run DeadLock -Engine=SlimDetours"
则能顺利通过。VirtualAlloc
分配内存页代替HeapAlloc
分配堆内存,是上文末尾提到的一个解决方案。_detour_memory_heap = RtlCreateHeap(HEAP_NO_SERIALIZE | HEAP_GROWABLE, NULL, 0, 0, NULL, NULL);
[!NOTE]
Detours已有事务机制,SlimDetours新添功能“延迟挂钩”也用了SRW锁,所以此堆无需序列化访问。
MH_Initialize
中创建,而SlimDetours在首个被调用的内存分配函数中进行一次初始化时创建,故没有也不需要单独的初始化函数。//////////////////////////////////////////////////////////////////////////////
//
// Region reserved for system DLLs, which cannot be used for trampolines.
//
static PVOID s_pSystemRegionLowerBound = (PVOID)(ULONG_PTR)0x70000000;
static PVOID s_pSystemRegionUpperBound = (PVOID)(ULONG_PTR)0x80000000;
Ntdll.dll
之后1GB范围为要规避的保留范围是合理的,这个考虑与上面提到的PR一致。要注意这个范围可能被分成两块,例如以下场排布场景:Ntdll.dll
被ASLR随机加载到保留范围内较低的内存地址,后续DLL随后排布触底时,将切换到保留范围顶部继续排布,在这个情况下“Ntdll.dll
之后的1GB范围”便是2块不连续的区域。NtQuerySystemInformation
获得比硬编码更确切的用户地址空间范围,协助约束Trampoline的选址,参考KNSoft.SlimDetours/Source/SlimDetours/Memory.c于main · KNSoft/KNSoft.SlimDetours(https://github.com/KNSoft/KNSoft.SlimDetours/blob/main/Source/SlimDetours/Memory.c)。LoadLibraryW
+GetProcAddress
)。LdrpLoaderLock
变为了专用的LdrpDllNotificationLock
。总之,请保持回调尽可能简单。[!TIP]
如果你想了解Windows上“DLL加载通知”的内部实现,参考我为ReactOS贡献的ReactOS PR #6795。不要参考WINE的实现,因为它截至此文编写时存在错误,例如,它的LdrUnregisterDllNotification
没有检查节点是否处于链表中就进行了移除。
SlimDetoursDelayAttach
函数注册延迟挂钩,具体可参考该函数声明上方的注释以及示例:DelayHook(https://github.com/KNSoft/KNSoft.SlimDetours/blob/main/Source/Demo/DelayHook.c)。SlimDetoursDelayAttach
注册对User32.dll!EqualRect
API的延迟挂钩,并通过检查它和LdrGetDllHandle
的返回值确认此时User32.dll
并未加载:/* Register SlimDetours delay hook */
hr = SlimDetoursDelayAttach((PVOID*)&g_pfnEqualRect,
Hooked_EqualRect,
g_usUser32.Buffer,
g_asEqualRect.Buffer,
DelayAttachCallback,
NULL);
if (FAILED(hr))
{
TEST_FAIL("SlimDetoursDelayAttach failed with 0x%08lX\n", hr);
return;
} else if (hr != HRESULT_FROM_NT(STATUS_PENDING))
{
TEST_FAIL("SlimDetoursDelayAttach succeeded with 0x%08lX, which is not using delay attach\n", hr);
return;
}/* Make sure user32.dll is not loaded yet */
Status = LdrGetDllHandle(NULL, NULL, &g_usUser32, &hUser32);
if (NT_SUCCESS(Status))
{
TEST_SKIP("user32.dll is loaded, test cannot continue\n");
return;
} else if (Status != STATUS_DLL_NOT_FOUND)
{
TEST_SKIP("LdrGetDllHandle failed with 0x%08lX\n", Status);
return;
}
LdrLoadDll
加载User32.dll
:/* Load user32.dll now */
Status = LdrLoadDll(NULL, NULL, &g_usUser32, &hUser32);
if (!NT_SUCCESS(Status))
{
TEST_SKIP("LdrLoadDll failed with 0x%08lX\n", Status);
return;
}
User32.dll
成功加载,则之前注册的延迟挂钩应已挂钩完成,进而验证延迟挂钩回调被正确调用以及User32.dll!EqualRect
函数被成功挂钩:/* Delay attach callback should be called and EqualRect is hooked successfully */
TEST_OK(g_bDelayAttach);
Status = LdrGetProcedureAddress(hUser32, &g_asEqualRect, 0, (PVOID*)&pfnEqualRect);
if (NT_SUCCESS(Status))
{
TEST_OK(pfnEqualRect(&rc1, &rc2) == TRUE);
TEST_OK(g_lEqualRectCount == 1);
} else
{
TEST_SKIP("LdrGetProcedureAddress failed with 0x%08lX\n", Status);
}
三
结语
kernel32.dll
的执念转而直面ntdll.dll
,再补过上面正文的4个问题,便不比已有的轮子差了,至此可告一段落。此时松口气回想一下导师的那个回答了又像是没回答的“如答”,好像确实他都回答了。现在如若换成我,我也是这个回答,不过不会带有犹豫。实践自是出真知,但与理论一致后才觉得更牢靠。看雪ID:Ratin
https://bbs.kanxue.com/user-home-853701.htm
# 往期推荐
球分享
球点赞
球在看
点击阅读原文查看更多