在我们CTF题目逆向过程中,遇到一些规模比较大的函数时,想快速理解其功能是不太容易的,我们可以丢给chatGPT去看,本文以2022年之江杯初赛的逆向题目为例,看看chatGPT的分析能力。
题目下载:
https://github.com/Inv0k3r/pwnable_files/raw/master/vrun2.exe
初步用ida和ghidra反编译一下部分代码,因为太长了所以只贴一部分:
_BOOL8 __fastcall func3(__int64 a1, __int64 a2, __int64 a3)
{
v4 = (unsigned int *)(a2 + 4);
v5 = 0i64;
v6 = a1 - a2;
v7 = a3 - a2;
v8 = 2i64;
do
{
v9 = v5 + *(unsigned int *)((char *)v4 + v6 - 4) + *(v4 - 1);
*(unsigned int *)((char *)v4 + v7 - 4) = v9;
v10 = *v4 + (v9 > 0xFFFFFFFF) + (unsigned __int64)*(unsigned int *)((char *)v4 + v6);
*(unsigned int *)((char *)v4 + v7) = v10;
v11 = (v10 > 0xFFFFFFFF) + v4[1] + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 + 4);
*(unsigned int *)((char *)v4 + v7 + 4) = v11;
v12 = (v11 > 0xFFFFFFFF) + v4[2] + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 + 8);
*(unsigned int *)((char *)v4 + v7 + 8) = v12;
v13 = (v12 > 0xFFFFFFFF) + v4[3] + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 + 12);
*(unsigned int *)((char *)v4 + v7 + 12) = v13;
v14 = (v13 > 0xFFFFFFFF) + v4[4] + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 + 16);
*(unsigned int *)((char *)v4 + v7 + 16) = v14;
v15 = (v14 > 0xFFFFFFFF) + v4[5] + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 + 20);
*(unsigned int *)((char *)v4 + v7 + 20) = v15;
v16 = (v15 > 0xFFFFFFFF) + v4[6] + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 + 24);
*(unsigned int *)((char *)v4 + v7 + 24) = v16;
v17 = (v16 > 0xFFFFFFFF) + v4[7] + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 + 28);
*(unsigned int *)((char *)v4 + v7 + 28) = v17;
v18 = (v17 > 0xFFFFFFFF) + v4[8] + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 + 32);
*(unsigned int *)((char *)v4 + v7 + 32) = v18;
v19 = v4[9] + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 + 36);
v4 += 16;
v20 = (v18 > 0xFFFFFFFF) + v19;
*(unsigned int *)((char *)v4 + v7 - 28) = v20;
v21 = (v20 > 0xFFFFFFFF) + *(v4 - 6) + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 - 24);
*(unsigned int *)((char *)v4 + v7 - 24) = v21;
v22 = (v21 > 0xFFFFFFFF) + *(v4 - 5) + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 - 20);
*(unsigned int *)((char *)v4 + v7 - 20) = v22;
v23 = (v22 > 0xFFFFFFFF) + *(v4 - 4) + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 - 16);
*(unsigned int *)((char *)v4 + v7 - 16) = v23;
v24 = (v23 > 0xFFFFFFFF) + *(v4 - 3) + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 - 12);
*(unsigned int *)((char *)v4 + v7 - 12) = v24;
result = v24 > 0xFFFFFFFF;
v26 = result + *(v4 - 2) + (unsigned __int64)*(unsigned int *)((char *)v4 + v6 - 8);
*(unsigned int *)((char *)v4 + v7 - 8) = v26;
v5 = v26 > 0xFFFFFFFF;
--v8;
}
while ( v8 );
return result;
}
可以看出来代码很乱,可能有经验的逆向赛棍可以一眼看出是大数算法,但是我被里面的两个硬编码的字符串误导了一下,误以为是某种使用密钥的加密算法,最后结合调试查看数据才理清计算流程,这是我当时写的解法:
import time
# 还原计算流程
def calc(str1, str2):
nums1 = []
for i in range(4):
nums1.append(int('0x' + str1[i * 8 : (i + 1) * 8], 16))
nums2 = []
for i in range(4):
nums2.append(int('0x' + str2[i * 8 : (i + 1) * 8], 16))
result = (((((((nums1[0] * nums2[0]) * 0x100000000) + (nums1[0] * nums2[1])) * 0x100000000) + (nums1[0] * nums2[2])) * 0x100000000) + (nums1[0] * nums2[3])) * 0x1000000000000000000000000 + \
(((((((nums1[1] * nums2[0]) * 0x100000000) + (nums1[1] * nums2[1])) * 0x100000000) + (nums1[1] * nums2[2])) * 0x100000000) + (nums1[1] * nums2[3])) * 0x10000000000000000 + \
(((((((nums1[2] * nums2[0]) * 0x100000000) + (nums1[2] * nums2[1])) * 0x100000000) + (nums1[2] * nums2[2])) * 0x100000000) + (nums1[2] * nums2[3])) * 0x100000000 + \
(((((((nums1[3] * nums2[0]) * 0x100000000) + (nums1[3] * nums2[1])) * 0x100000000) + (nums1[3] * nums2[2])) * 0x100000000) + (nums1[3] * nums2[3]))
print(hex(result))
return result
# input1 = 'AAAAAAAABBBBBBBBCCCCCCCCDDDDDDDD'
input1 = 'f34857597362863874859743a772cd73'
input2 = '18975633241537485357262533468472'
a = calc(input1, input1)
b = calc(input2, input1)
print(hex(a - b))
a = 0x71c71c71ddddddddf5c28f5c41fdb9740c83fb72cf13579bc3b2a19061d950c9
b = 0x1064e42219b1d544b312672109a6b99ae5003bd1834d750faa1f926fe4a0c06a
c = 0x6162384fc42c089942b0283b3856ffd92783bfa14bc5e28c19930f207d38905f
real_c = 0xcfd3d07e418bc8e081e32ed7195f942021834b00f7244eb73df68550cfcc9873
from z3 import *
s = Solver()
a = Int('a')
b = Int('b')
c = Int('c')
d = Int('d')
s.add((((((((((a * a) * 0x100000000) + (a * b)) * 0x100000000) + (a * c)) * 0x100000000) + (a * d)) * 0x1000000000000000000000000 + (((((((b * a) * 0x100000000) + (b * b)) * 0x100000000) + (b * c)) * 0x100000000) + (b * d)) * 0x10000000000000000 + (((((((c * a) * 0x100000000) + (c * b)) * 0x100000000) + (c * c)) * 0x100000000) + (c * d)) * 0x100000000 + (((((((d * a) * 0x100000000) + (d * b)) * 0x100000000) + (d * c)) * 0x100000000) + (d * d)))-((((((((0x18975633 * a) * 0x100000000) + (0x18975633 * b)) * 0x100000000) + (0x18975633 * c)) * 0x100000000) + (0x18975633 * d)) * 0x1000000000000000000000000 + (((((((0x24153748 * a) * 0x100000000) + (0x24153748 * b)) * 0x100000000) + (0x24153748 * c)) * 0x100000000) + (0x24153748 * d)) * 0x10000000000000000 + (((((((0x53572625 * a) * 0x100000000) + (0x53572625 * b)) * 0x100000000) + (0x53572625 * c)) * 0x100000000) + (0x53572625 * d)) * 0x100000000 + (((((((0x33468472 * a) * 0x100000000) + (0x33468472 * b)) * 0x100000000) + (0x33468472 * c)) * 0x100000000) + (0x33468472 * d)))) == 0xcfd3d07e418bc8e081e32ed7195f942021834b00f7244eb73df68550cfcc9873)
s.add(a > 0)
s.add(b > 0)
s.add(c > 0)
s.add(d > 0)
s.add(a < 0x100000000)
s.add(b < 0x100000000)
s.add(c < 0x100000000)
s.add(d < 0x100000000)
if s.check() == sat:
result = s.model()
print(result)
做完了才意识到是一个大数算法。
但是如果我们分别将几个函数的反编译丢给chatGPT,结果如下:
chatGPT一眼就看出来是大数算法,然后我们结合一下简单的调试可知,程序主要是把我们的输入的一个32位16进制串做如下运算:
input * input - input * 0x18975633241537485357262533468472 = 0xcfd3d07e418bc8e081e32ed7195f942021834b00f7244eb73df68550cfcc9873
import math
a = 1
b = -0x18975633241537485357262533468472
c = -0xcfd3d07e418bc8e081e32ed7195f942021834b00f7244eb73df68550cfcc9873
print(hex((-b + int(math.sqrt(b * b - 4 * a * c))) // 2))
from z3 import *
s = Solver()
a = Int('a')
s.add(a * a + a * b + c == 0)
s.add(a > 0)
if s.check() == sat:
print(s.model())
实际上面让chatGPT推测结果的输入代码用的是ghidra反编译的代码,虽然和ida反编译出来的代码差不多,但是ida的代码丢进去之后并没有推测出功能,而ghidra反编译的代码就可以推测出上面的大数算法。
然后我找了一下之前的ida chatgpt插件,使用插件的变量自动重命名,再使用功能推测,ida也成功推测出了函数的功能:
但是另外几个函数就不太行了:
所以我魔改了一下插件,利用selenium直接启动一个chrome去跟chatgpt交互(见文章末尾),这样就可以反复提出不同要求来微调结果,并且可以发多段代码,让它从一个整体的角度去分析功能,比如先给他发了前面可以分析出大数乘法的代码,在分析出大数乘法后,再发送大数加法的代码就可以分析出来了:
而直接发送大数加法的代码就不一定能分析出来:
地址:
https://github.com/Inv0k3r/Gepetto-ChatGPT
主要是修改了原版使用的付费api为免费的聊天栏,然后加了手动登录(防止每次打开IDA都要登录)和仅发送反编译代码的功能(便于微调)。
装好插件后在里面配置一下账号密码代理以及自动过验证码,然后就是保证装了chrome就可以用了。
听说最近chatgpt在推高级版了,希望有个更好用的接口,目前这个魔改插件作为临时使用还可以,如果有更好的接口应该可以实现一些更有意思的功能。