我惧怕失败,怕努力后徒劳无功,怕辜负他人的信任,不过比起这些,我想我更怕失去抽刀而出的勇气。 -----by 天河
很多人看到VM类型的题目会放弃,因为认为自己办不到,认为自己在有限的时间不一定做得完。我就不一样了,以前我会觉得自己完不成然后去麻痹自己,骗自己,告诉自己去做其他的题在一样的时间能获得更多的分数。直到最近接触了一个VM逆向题,我突然明白了,这东西抓条狗来都能乱杀,原来我连🐕都不如。
逆向中的虚拟机保护是一种基于虚拟机的代码保护技术。它将基于x86汇编系统中的可执行代码转换为字节码指令系统的代码,来达到不被轻易逆向和篡改的目的。 打开你的调试器,olldbg,x64dbg又或者其他的调试器,随便丢个可执行程序进去,出来的就是VM虚拟机的结构。
实际上就是指令,堆栈,寄存器,没什么新鲜的东西。
打开EXEINFO查看
一个人畜无害的x64程序。
打开IDA找到程序入口点,一步一步找到main函数。
signed __int64 __usercall [email protected]<rax>(unsigned int [email protected]<ebx>)
{
sub_140001D40();
return sub_1400017E4(a1);
}
跟进17E4
signed __int64 __usercall [email protected]<rax>(unsigned int [email protected]<ebx>)
{
__int64 v1; // rcx
char v2; // si
__int64 v3; // rcx
__int64 v5; // rcx
__int64 *v6; // rax
__int64 v7; // rcx
__int64 *v8; // rbx
__int64 v9; // rax
_QWORD *v10; // rax
__int64 v11; // rcx
_QWORD *v12; // rbx
__int64 v13; // rcx
__int64 v14; // rbx
__int64 v15; // rcx
int v16; // ecx
__int64 v17; // rcx
__int64 v18; // rcx if ( !(unsigned __int8)sub_140001B2C(1i64) )
{
sub_140001E60(7i64);
goto LABEL_20;
}
v2 = 0;
LOBYTE(a1) = sub_140001AF0(v1);
v3 = (unsigned int)dword_1400061B0;
if ( dword_1400061B0 == 1 )
{
LABEL_20:
sub_140001E60(7i64);
goto LABEL_21;
}
if ( dword_1400061B0 )
{
v2 = 1;
}
else
{
dword_1400061B0 = 1;
if ( (unsigned int)initterm_e(&unk_1400031E8, &unk_140003200) )
return 255i64;
initterm(&unk_1400031D0, &unk_1400031E0);
dword_1400061B0 = 2;
}
LOBYTE(v3) = a1;
sub_140001C9C(v3);
v6 = (__int64 *)sub_140001E48(v5);
v8 = v6;
if ( *v6 && (unsigned __int8)sub_140001C04(v6) )
{
v9 = *v8;
sub_140002360(0i64, 2i64);
}
v10 = (_QWORD *)sub_140001E50(v7);
v12 = v10;
if ( *v10 && (unsigned __int8)sub_140001C04(v10) )
register_thread_local_exe_atexit_callback(*v12);
get_initial_narrow_environment(v11);
v14 = *(_QWORD *)_p___argv(v13);
v16 = *(_DWORD *)_p___argc(v15);
a1 = sub_140001660();
if ( !(unsigned __int8)sub_140001FB4(v17) )
LABEL_21:
exit(a1);
if ( !v2 )
cexit();
LOBYTE(v18) = 1;
sub_140001CC0(v18, 0i64);
return a1;
}
找到_p___argc位置 下面的140001660就是主函数。主函数如下
__int64 sub_140001660()
{
unsigned int i; // eax sub_140001230();
for ( i = dword_140006240; i <= 0xAE; dword_140006240 = i )
{
((void (__fastcall *)(_QWORD, _QWORD))qword_140006250[dword_140005040[i]])(
(unsigned int)dword_140005040[i + 1],
(unsigned int)dword_140005040[i + 2]);
i = dword_140006240 + 3;
}
return 0i64;
}
可以看到下面是一个循环,根据140005040的数据选择140006250对应的函数指针使用,至此基本确定了,这是个VM虚拟机保护的程序。可以看到140006240每次循环都会加3,很明显这个是用来当作EIP的寄存器。可以看到每次循环都是用第一个dword来选择执行对应的指令,第二三个dword则是其参数。接着分析上面的140001230函数。
BOOL sub_140001230()
{
BOOL result; // eax
signed int v1; // ecx dword_140006480 = 2142398792;
dword_140006548 = 2142398792;
dword_140006484 = 1726509553;
dword_14000654C = 1726509553;
dword_140006488 = 1615422181;
dword_140006550 = 1615422181;
dword_14000648C = 983335478;
dword_140006554 = 983335478;
dword_140006490 = 1741443998;
dword_140006558 = 1741443998;
dword_140006494 = 1555054860;
dword_14000655C = 1555054860;
dword_140006498 = 585848500;
dword_140006560 = 585848500;
dword_14000649C = 576101823;
dword_140006564 = -2086329401;
dword_1400064A0 = -1923450198;
dword_140006568 = -2064692914;
dword_1400064A4 = 811825454;
dword_14000656C = -1962990350;
dword_1400064A8 = 831452081;
dword_140006570 = 106170276;
dword_1400064AC = 1572130305;
dword_140006574 = 461576420;
dword_1400064B0 = -1923938802;
dword_140006578 = -2064692914;
dword_1400064B4 = 654177345;
dword_14000657C = 106170276;
dword_1400064B8 = 484858795;
dword_140006580 = 1217882660;
dword_1400064BC = 654177345;
dword_140006584 = 576101823;
dword_1400064C0 = 811825454;
dword_140006588 = 549686290;
dword_1400064** = 326132160;
dword_14000658C = 464587811;
dword_1400064C8 = 484858795;
dword_140006590 = 106170276;
dword_1400064CC = 1585819400;
dword_140006594 = 1362403976;
dword_1400064D0 = 831452081;
dword_140006598 = -2064692914;
dword_1400064D4 = 654177345;
dword_14000659C = 106170276;
dword_1400064D8 = 1572130305;
dword_1400065A0 = 549686290;
dword_1400064DC = 326132160;
dword_1400065A4 = -1923450198;
dword_1400064E0 = 43556645;
dword_1400065A8 = 106170276;
dword_1400064E4 = 1585819400;
dword_1400065AC = 106170276;
dword_1400064E8 = 654177345;
dword_1400065B0 = 654177345;
dword_1400064EC = 1485564831;
dword_1400065B4 = -1962990350;
dword_1400064F0 = 1572130305;
dword_1400065B8 = 1362403976;
dword_1400064F4 = 43556645;
dword_1400065BC = 106170276;
dword_1400064F8 = 1348669712;
dword_1400065C0 = 28007239;
dword_1400064FC = 1348669712;
dword_1400065** = 1362403976;
dword_140006500 = 1485564831;
dword_1400065C8 = -1873890234;
dword_140006504 = 43556645;
dword_1400065CC = 461576420;
dword_140006508 = 1572130305;
dword_1400065D0 = 106170276;
dword_14000650C = 576101823;
dword_1400065D4 = 484858795;
dword_140006510 = 831452081;
dword_1400065D8 = 576101823;
dword_140006514 = 831452081;
dword_1400065DC = 890720561;
dword_140006518 = 1043615709;
dword_1400065E0 = -1923450198;
dword_14000651C = 1718482311;
dword_1400065E4 = 1718482311;
dword_140006520 = 0;
dword_1400065E8 = 0;
qword_140006250[0] = (__int64)sub_140001070;
qword_140006258 = (__int64)sub_140001080;
qword_140006260 = (__int64)sub_1400010B0;
qword_140006268 = (__int64)sub_1400010C0;
qword_140006270 = (__int64)sub_1400010F0;
qword_140006278 = (__int64)sub_140001110;
qword_140006280 = (__int64)sub_140001130;
qword_140006288 = (__int64)sub_140001150;
qword_140006290 = (__int64)sub_140001170;
qword_140006298 = (__int64)sub_140001190;
qword_1400062A0 = (__int64)sub_1400011B0;
qword_1400062A8 = (__int64)sub_1400011D0;
qword_1400062B0 = (__int64)sub_140001200;
qword_1400062B8 = (__int64)sub_140001210;
result = IsDebuggerPresent();
v1 = dword_140005240;
if ( result )
v1 = 150;
dword_140005240 = v1;
return result;
}
简单分析一下140001230函数,前面一大段都是在进行变量的赋值。先主要看后面。 给140006250开始的地址,初始化为了一些函数地址,很显然这就是作者设置的开发指令。在设置结束后,使用了IsDebuggerPresent来确定调试器是否存在,存在的时候修改140005240处的值,仔细看这个修改的位置可以发现这是虚拟机要执行指令的一部分。接下来就是分析对应的指令了。
__int64 __fastcall sub_140001070(unsigned int a1, int a2)
{
__int64 result; // rax result = a1;
dword_140006220[a1] = a2;
return result;
}
实现了一个140006220[a1]=a2
的功能,猜测可能是寄存器也可能是堆栈,一切皆有可能嘛,谁知道开发者的脑回路是什么样的。
__int64 __fastcall sub_140001080(int a1, int a2)
{
__int64 result; // rax result = (unsigned int)dword_1400062F0[dword_140006220[a2]];
dword_140006220[a1] = result;
return result;
}
实现了一个140006220[a1]=1400062F0[140006220[a2]]
的功能,看着像一个通过寄存器从堆栈加载数据的功能。
__int64 __fastcall sub_1400010B0(int a1, unsigned int a2)
{
return sub_140001010((__int64)&unk_140003250, a1 ^ a2);
}
跟进140001010发现其是个输出的函数
__int64 sub_140001010(__int64 a1, ...)
{
__int64 v1; // rdi
__int64 v2; // rbx
_QWORD *v3; // rax
va_list va; // [rsp+58h] [rbp+10h] va_start(va, a1);
v1 = a1;
v2 = _acrt_iob_func(1i64);
v3 = (_QWORD *)sub_140001000();
return _stdio_common_vfprintf(*v3, v2, v1, 0i64, (__int64 *)va);
}
实现了一个print a1^a2的功能
int __fastcall sub_1400010C0(unsigned int a1)
{
__int64 v1; // rbx
int result; // eax v1 = a1;
result = getchar();
dword_1400062F0[dword_140006220[v1]] = result;
return result;
}
实现了一个1400062F0[140006220[v1]]=getchar
的功能,至此确定了 1400062F0大概率是堆栈,而140006220则为寄存器,这个函数实际上就是把代码读到堆栈里
__int64 __fastcall sub_1400010F0(int a1, int a2)
{
__int64 result; // rax result = (unsigned int)dword_140006220[a2];
dword_140006220[a1] += result;
return result;
}
实现了一个140006220[a1]+=140006220[a2]
的功能
__int64 __fastcall sub_140001110(int a1, int a2)
{
int *v2; // r8
__int64 result; // rax v2 = &dword_140006220[a1];
result = (unsigned int)(dword_140006220[a2] * *v2);
*v2 = result;
return result;
}
实现了一个140006220[a1]=140006220[a1]*140006220[a2]
的功能
__int64 __fastcall sub_140001130(int a1, int a2)
{
__int64 result; // rax result = (unsigned int)dword_140006220[a2];
dword_140006220[a1] -= result;
return result;
}
实现了一个140006220[a1]-=140006220[a2]
的功能
__int64 __fastcall sub_140001150(int a1, int a2)
{
int *v2; // r8
__int64 result; // rax v2 = &dword_140006220[a1];
result = *v2 / (unsigned int)dword_140006220[a2];
*v2 %= (unsigned int)dword_140006220[a2];
return result;
}
实现了一个140006220[a1]%=140006220[a2]
的功能
__int64 __fastcall sub_140001170(unsigned int a1, int a2)
{
__int64 result; // rax result = a1;
dword_140006244 = dword_140006220[a1] == a2;
return result;
}
实现了一个140006244= 140006220[a1]==a2
的功能,实际上是对140006220[a1]的值和a2进行比较,将比较的结果赋值给140006244,猜测其为标志寄存器。
__int64 __fastcall sub_140001190(unsigned int a1)
{
__int64 result; // rax result = (unsigned int)dword_140006240;
if ( dword_140006244 )
result = a1;
dword_140006240 = result;
return result;
}
实现了以下功能
if (140006244 ==1){
140006240=a1
}
else:
{
140006240=140006240
}
根据标志寄存器的值修改修改EIP寄存器的值,实际上是个跳转指令类似jz。
__int64 __fastcall sub_1400011B0(unsigned int a1)
{
__int64 result; // rax result = (unsigned int)dword_140006240;
if ( !dword_140006244 )
result = a1;
dword_140006240 = result;
return result;
}
实现如下功能
if 140006244==0
140006240=a1
根据标志寄存器的值修改修改EIP寄存器的值,实际上是个跳转指令类似jnz。
__int64 __fastcall sub_1400011D0(int a1, int a2)
{
__int64 result; // rax result = (unsigned int)dword_140006220[a2];
dword_1400062F0[dword_140006220[a1]] = result;
return result;
}
实现了一个1400062F0[140006220[a1]]=140006220[a2]
功能,实际上是取出a1寄存器的值,放入a2的寄存器中。
void __noreturn sub_140001200()
{
exit(0);
}
没啥说的,一个退出程序的功能
__int64 __fastcall sub_140001210(int a1, int a2)
{
__int64 result; // rax result = (unsigned int)dword_140006220[a2];
dword_140006244 = dword_140006220[a1] == result;
return result;
}
实现了一个140006244= 140006220[a1]==140006220[a2]
功能,实际上是比较两个寄存器。 到这里指令功能基本明晰,下一步就是把程序运行的opcode 也就是虚拟机解释执行的代码转换成我们看得懂的汇编了。
这里可以写个解析指令的小脚本,来辅助转换成更易读的代码,转换后如下
0 print 141^253
3 print 175^195
6 print 195^185
9 print 228^196
12 print 52^81
15 print 53^91
18 print 125^9
21 print 46^75
24 print 182^196
27 print 191^159
30 print 27^125
33 print 79^35
36 print 222^191
39 print 149^242
42 print 41^19
45 140006220[0]=0
48 140006220[1]=1
51 1400062f0[140006220[0]]=getchar //0
54 140006220[0]+=140006220[1]
57 140006244= 140006220[0]==41
60 jnz eip=48//0
63 jz eip=63 //0
66 140006220[0]=23
69 140006220[1]=37
72 140006220[2]=2486650401
75 140006220[4]=1
78 140006220[6]=0
81 140006220[3]=0
84 140006220[5]=1400062f0[140006220[6]]
87 140006220[5]*=140006220[0]
90 140006220[5]+=140006220[1]
93 140006220[5]%=140006220[2]
96 140006220[3]+=140006220[4]
99 140006244= 140006220[3]==32
102 jnz eip=84//0
105 1400062f0[140006220[6]]=140006220[5]
108 140006220[6]+=140006220[4]
111 140006244= 140006220[6]==41
114 jnz eip=78//0
117 jz eip=117 //0
120 140006220[0]=0
123 140006220[1]=1
126 140006220[3]=100
129 140006220[2]=1400062f0[140006220[0]]
132 140006220[4]=1400062f0[140006220[3]]
135 140006244= 140006220[2]==140006220[4]
138 jnz eip=165//0
141 140006220[0]+=140006220[1]
144 140006220[3]+=140006220[1]
147 140006244= 140006220[0]==40
150 jz eip=153 //0
153 jnz eip=126//0
156 print 35^90
159 print 57^92
162 print 98^17
165 exit //0 0
168 print 56^86
171 print 38^73
input[i]=((input[i]*23+37)%2486650401)
执行32次 计算后的数据放回栈上。 人类的赞歌就是勇气的赞歌,人类的伟大就是勇气的伟大。祝愿我们每个人都有抽刀而出的勇气。