0%

C++反汇编与逆向分析 - 函数

当函数执行时,程序流程会转移到函数体的实现位置处,只要遇到return语句或”}”符号才返回到下一条语句的地址处;

运行环境:

  • 操作系统: Windows 7家庭版
  • 编译器:VC6 VS2013

函数

栈帧

栈在内存中是一块特殊的存储空间,它的原则是”先进后出”即最先被存入的数据最后释放。汇编通常使用push指令和pop指令对栈空间执行数据压入和数据弹出操作;

我们使用Debug版本查看:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
// Test01.cpp : 定义控制台应用程序的入口点。
//

#include "stdafx.h"
#include <stdlib.h>


int fun(int a, int b)
{
return a * b;
}

int _tmain(int argc, _TCHAR* argv[])
{
int n = fun(argc, argc * 8);
return 0;
}


;函数体 debug版本
int fun(int a, int b)
{
010B13C0 push ebp
010B13C1 mov ebp,esp ;保存栈指针
010B13C3 sub esp,0C0h ;拉开局部变量空间
010B13C9 push ebx
010B13CA push esi
010B13CB push edi ;保存寄存器环境
010B13CC lea edi,[ebp-0C0h]
010B13D2 mov ecx,30h
010B13D7 mov eax,0CCCCCCCCh
010B13DC rep stos dword ptr es:[edi] ;局部变量填CC
return a * b;
010B13DE mov eax,dword ptr [a]
010B13E1 imul eax,dword ptr [b] ;函数体
}
010B13E5 pop edi ;恢复寄存器环境
010B13E6 pop esi
010B13E7 pop ebx
010B13E8 mov esp,ebp ;局部变量释放空间
010B13EA pop ebp ;恢复栈指针
010B13EB ret


;Release版本
;main函数
.text:00401010 ; int __cdecl main(int argc, const char **argv, const char **envp)
.text:00401010 _main proc near ; CODE XREF: start+AF↓p
.text:00401010
.text:00401010 argc = dword ptr 4
.text:00401010 argv = dword ptr 8
.text:00401010 envp = dword ptr 0Ch
.text:00401010
.text:00401010 mov eax, [esp+argc]
.text:00401014 lea ecx, ds:0[eax*8]
.text:0040101B push ecx
.text:0040101C push eax
.text:0040101D call sub_401000
.text:00401022 add esp, 8
.text:00401025 xor eax, eax
.text:00401027 retn
.text:00401027 _main endp
.text:00401027

;fun函数
.text:00401000 ; =============== S U B R O U T I N E =======================================
.text:00401000
.text:00401000
.text:00401000 sub_401000 proc near ; CODE XREF: _main+D↓p
.text:00401000
.text:00401000 arg_0 = dword ptr 4
.text:00401000 arg_4 = dword ptr 8
.text:00401000
.text:00401000 mov eax, [esp+arg_0] ;使用ESP寻址
.text:00401004 imul eax, [esp+arg_4]
.text:00401009 retn
.text:00401009 sub_401000 endp

ESP标识栈顶,EBP标识栈底

但是当我们的函数中存在局部变量的情况下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
.text:00401000 sub_401000      proc near               ; CODE XREF: _main+D↓p
.text:00401000
.text:00401000 var_4 = dword ptr -4
.text:00401000 arg_0 = dword ptr 4
.text:00401000 arg_4 = dword ptr 8
.text:00401000
.text:00401000 push ecx ;这一行相当于开辟局部变量空间
.text:00401001 lea eax, [esp+0] ; sub esp,4
.text:00401005 mov [esp+4+var_4], 0
.text:0040100D push eax
.text:0040100E push offset aD ; "%d"
.text:00401013 call _scanf
.text:00401018 mov eax, [esp+10h]
.text:0040101C imul eax, [esp+14h]
.text:00401021 add esp, 0Ch
.text:00401024 retn
.text:00401024 sub_401000 endp

IDA在分析中帮我们模拟了一个栈环境:

-4标识我们的局部变量,+4标识我们的参数;

在高版本的VS编译器中,已经不在使用ESP寄存器进行寻址了,改用EBP寄存器寻址,全无优化的反汇编代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
.text:00401000 sub_401000      proc near               ; CODE XREF: _wmain+F↓p
.text:00401000
.text:00401000 var_4 = dword ptr -4
.text:00401000 arg_0 = dword ptr 8
.text:00401000 arg_4 = dword ptr 0Ch
.text:00401000
.text:00401000 push ebp
.text:00401001 mov ebp, esp ;保存寄存器环境
.text:00401003 push ecx ;拉开局部变量空间。拉开栈顶
.text:00401004 lea eax, [ebp+var_4] ;局部变量赋值
.text:00401007 push eax
.text:00401008 push offset Format ; "%d"
.text:0040100D call ds:scanf
.text:00401013 mov eax, [ebp+arg_0]
.text:00401016 add esp, 8
.text:00401019 imul eax, [ebp+arg_4]
.text:0040101D add eax, [ebp+var_4]
.text:00401020 mov esp, ebp
.text:00401022 pop ebp
.text:00401023 retn
.text:00401023 sub_401000 endp

我们继续查看IDA为我们模拟的栈空间:

stack表示调用方的栈;

调用约定

cdecl

1
2
3
4
5
6
7
8
9
10
11
	int n = fun_c_cdecl(argc, argc * 8, 5, 6);
01371C0E push 6
01371C10 push 5
01371C12 mov eax,dword ptr [argc]
01371C15 shl eax,3
01371C18 push eax
01371C19 mov ecx,dword ptr [argc]
01371C1C push ecx
01371C1D call fun_c_cdecl (0137119Fh)
01371C22 add esp,10h ;平衡堆栈
01371C25 mov dword ptr [n],eax

C约定是传参外部平衡堆栈;

快速识别的方法,当我们在参数中传递一个函数的时候:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
;调用方
00401218 push 6
0040121A mov eax,dword ptr [ebp+8]
0040121D shl eax,3
00401220 push eax
00401221 push 6
00401223 push 5
00401225 mov ecx,dword ptr [ebp+8]
00401228 shl ecx,3
0040122B push ecx
0040122C mov edx,dword ptr [ebp+8]
0040122F push edx
00401230 call @ILT+15(fun_c_stdcall) (00401014)
00401235 push eax
00401236 mov eax,dword ptr [ebp+8]
00401239 push eax
0040123A call @ILT+20(fun_c_cdecl) (00401019)
0040123F add esp,10h
00401242 mov dword ptr [ebp-4],eax


;函数实现
00401040 push ebp
00401041 mov ebp,esp
00401043 sub esp,44h
00401046 push ebx
00401047 push esi
00401048 push edi
00401049 lea edi,[ebp-44h]
0040104C mov ecx,11h
00401051 mov eax,0CCCCCCCCh
00401056 rep stos dword ptr [edi]
11: int n = 0;
00401058 mov dword ptr [ebp-4],0
12: scanf("%d", &n);
0040105F lea eax,[ebp-4]
00401062 push eax
00401063 push offset string "%d" (0042501c)
00401068 call scanf (004012b0)
0040106D add esp,8
13: return a * b + c / d - n;
00401070 mov ecx,dword ptr [ebp+8]
00401073 imul ecx,dword ptr [ebp+0Ch]
00401077 mov eax,dword ptr [ebp+10h]
0040107A cdq
0040107B idiv eax,dword ptr [ebp+14h]
0040107E add ecx,eax
00401080 sub ecx,dword ptr [ebp-4]
00401083 mov eax,ecx
14: }
00401085 pop edi
00401086 pop esi
00401087 pop ebx
00401088 add esp,44h
0040108B cmp ebp,esp
0040108D call __chkesp (00401310)
00401092 mov esp,ebp
00401094 pop ebp
00401095 ret

在函数内部观察到有参数的访问,在ret指令处未看到平衡操作,这种特点为C约定(ret i8)

无参函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
;调用方:
41: int n3 = fun_c_cdecl_x();
00401271 call @ILT+10(fun_c_cdecl_x) (0040100f)

;函数体:
004010B0 push ebp
004010B1 mov ebp,esp
004010B3 sub esp,44h
004010B6 push ebx
004010B7 push esi
004010B8 push edi
004010B9 lea edi,[ebp-44h]
004010BC mov ecx,11h
004010C1 mov eax,0CCCCCCCCh
004010C6 rep stos dword ptr [edi]
17: int n = 0;
004010C8 mov dword ptr [ebp-4],0
18: scanf("%d", &n);
004010CF lea eax,[ebp-4]
004010D2 push eax
004010D3 push offset string "%d" (0042501c)
004010D8 call scanf (004012b0)
004010DD add esp,8
19: return n;
004010E0 mov eax,dword ptr [ebp-4]
20: }
004010E3 pop edi
004010E4 pop esi
004010E5 pop ebx
004010E6 add esp,44h
004010E9 cmp ebp,esp
004010EB call __chkesp (00401310)
004010F0 mov esp,ebp
004010F2 pop ebp
004010F3 ret
;

无参函数可随意还原为任意约定,动态库的调用约定有区别;

动态库符号编译后,C约定为对其函数做符号命名前缀加’‘,本例命名为“_fuc_c”

stdcall约定调用后,链接库函数被命名为”_fun_stdcall@16” ;前缀加下划线@+参数字节数;

stdcall寄存器不参数信息传递工作

stdcall

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46

;函数主体
29:
30: int __fastcall fun_c_fastcall(int a, int b, int c, int d)
31: {
00401180 push ebp
00401181 mov ebp,esp
00401183 sub esp,4Ch
00401186 push ebx
00401187 push esi
00401188 push edi
00401189 push ecx
0040118A lea edi,[ebp-4Ch]
0040118D mov ecx,13h ;ecx使用
00401192 mov eax,0CCCCCCCCh
00401197 rep stos dword ptr [edi]
00401199 pop ecx
0040119A mov dword ptr [ebp-8],edx
0040119D mov dword ptr [ebp-4],ecx
32: int n = 0;
004011A0 mov dword ptr [ebp-0Ch],0
33: scanf("%d", &n);
004011A7 lea eax,[ebp-0Ch]
004011AA push eax
004011AB push offset string "%d" (0042501c)
004011B0 call scanf (004012b0)
004011B5 add esp,8
34: return a * b + c / d - n;
004011B8 mov ecx,dword ptr [ebp-4]
004011BB imul ecx,dword ptr [ebp-8]
004011BF mov eax,dword ptr [ebp+8]
004011C2 cdq
004011C3 idiv eax,dword ptr [ebp+0Ch]
004011C6 add ecx,eax
004011C8 sub ecx,dword ptr [ebp-0Ch]
004011CB mov eax,ecx
35: }
004011CD pop edi
004011CE pop esi
004011CF pop ebx
004011D0 add esp,4Ch
004011D3 cmp ebp,esp
004011D5 call __chkesp (00401310)
004011DA mov esp,ebp
004011DC pop ebp
004011DD ret 8

ret后面有值,一定是stdcall或fastcall

vs在调用fastcall时使用ecd和edx传递参数的;

fastcall

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
;调用方
0040125C push 6
0040125E push 5
00401260 mov edx,dword ptr [ebp+8]
00401263 shl edx,3
00401266 mov ecx,dword ptr [ebp+8]
00401269 call @ILT+0(fun_c_fastcall) (00401005)
0040126E mov dword ptr [ebp-0Ch],eax

;函数实现
30: int __fastcall fun_c_fastcall(int a, int b, int c, int d)
31: {
00401180 push ebp
00401181 mov ebp,esp
00401183 sub esp,4Ch
00401186 push ebx
00401187 push esi
00401188 push edi
00401189 push ecx
0040118A lea edi,[ebp-4Ch]
0040118D mov ecx,13h
00401192 mov eax,0CCCCCCCCh
00401197 rep stos dword ptr [edi]
00401199 pop ecx
0040119A mov dword ptr [ebp-8],edx ;edx未初始化使用,和调用方配合,调用方使用edx,ecx传递参数
0040119D mov dword ptr [ebp-4],ecx
32: int n = 0;
004011A0 mov dword ptr [ebp-0Ch],0
33: scanf("%d", &n);
004011A7 lea eax,[ebp-0Ch]
004011AA push eax
004011AB push offset string "%d" (0042501c)
004011B0 call scanf (004012b0)
004011B5 add esp,8
34: return a * b + c / d - n;
004011B8 mov ecx,dword ptr [ebp-4]
004011BB imul ecx,dword ptr [ebp-8]
004011BF mov eax,dword ptr [ebp+8]
004011C2 cdq
004011C3 idiv eax,dword ptr [ebp+0Ch]
004011C6 add ecx,eax
004011C8 sub ecx,dword ptr [ebp-0Ch]
004011CB mov eax,ecx
35: }
004011CD pop edi
004011CE pop esi
004011CF pop ebx
004011D0 add esp,4Ch
004011D3 cmp ebp,esp
004011D5 call __chkesp (00401310)
004011DA mov esp,ebp
004011DC pop ebp
004011DD ret 8


当只有一个参数时,只会使用ecx寄存器;

寄存器ECX和EDX分别参与信息的传递工作

更多的参数通过栈传递;

在函数内部有栈访问,查阅RET指令处,可见平衡栈操作;

该约定非标准;实际分析中,应结合具体案例分析;

变量识别

1
2
3
4
5
6
7
8
9
10
11
12
13
static int g_nTest = 0x8765093;

int main(int argc, char* argv[])
{
int n = 999;
scanf("%d",&n);
scanf("%d",&g_nTest);
scanf("%d",&argc);
printf("n = %d\n",n + argc);
printf("g_nTest = %d\n",g_nTest + argc);
printf("Hello World!\n");
return 0;
}

全局变量

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
00401010                          | 55                   | push ebp                                        |
00401011 | 8BEC | mov ebp,esp |
00401013 | 83EC 44 | sub esp,44 |
00401016 | 53 | push ebx |
00401017 | 56 | push esi |
00401018 | 57 | push edi |
00401019 | 8D7D BC | lea edi,dword ptr ss:[ebp-44] |
0040101C | B9 11000000 | mov ecx,11 |
00401021 | B8 CCCCCCCC | mov eax,CCCCCCCC |
00401026 | F3:AB | rep stosd |
00401028 | C745 FC E7030000 | mov dword ptr ss:[ebp-4],3E7 |局部变量
0040102F | 8D45 FC | lea eax,dword ptr ss:[ebp-4] |
00401032 | 50 | push eax |
00401033 | 68 48504200 | push test.425048 | 425048:"%d"
00401038 | E8 23010000 | call test.401160 | printf
0040103D | 83C4 08 | add esp,8 |
00401040 | 68 307A4200 | push test.427A30 |全局变量
00401045 | 68 48504200 | push test.425048 | 425048:"%d"
0040104A | E8 11010000 | call test.401160 |
0040104F | 83C4 08 | add esp,8 |
00401052 | 8D4D 08 | lea ecx,dword ptr ss:[ebp+8] |参数访问
00401055 | 51 | push ecx |
00401056 | 68 48504200 | push test.425048 | 425048:"%d"
0040105B | E8 00010000 | call test.401160 |
00401060 | 83C4 08 | add esp,8 |
00401063 | 8B55 FC | mov edx,dword ptr ss:[ebp-4] | edx:EntryPoint
00401066 | 0355 08 | add edx,dword ptr ss:[ebp+8] | edx:EntryPoint
00401069 | 52 | push edx | edx:EntryPoint
0040106A | 68 3C504200 | push test.42503C | 42503C:"n = %d\n"
0040106F | E8 6C000000 | call test.4010E0 |
00401074 | 83C4 08 | add esp,8 |
00401077 | A1 307A4200 | mov eax,dword ptr ds:[427A30] |
0040107C | 0345 08 | add eax,dword ptr ss:[ebp+8] |

在IDA中识别 为这样:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
.text:00401000 ; int __cdecl main(int argc, const char **argv, const char **envp)
.text:00401000 _main proc near ; CODE XREF: start+AF↓p
.text:00401000
.text:00401000 var_4 = dword ptr -4
.text:00401000 argc = dword ptr 4
.text:00401000 argv = dword ptr 8
.text:00401000 envp = dword ptr 0Ch
.text:00401000
.text:00401000 push ecx
.text:00401001 lea eax, [esp+4+var_4]
.text:00401005 mov [esp+4+var_4], 3E7h
.text:0040100D push eax
.text:0040100E push offset aD ; "%d"
.text:00401013 call _scanf
.text:00401018 push offset dword_408030 ;全局变量通过地址访问
.text:0040101D push offset aD ; "%d"
.text:00401022 call _scanf
.text:00401027 lea ecx, [esp+14h+argc]
.text:0040102B push ecx
.text:0040102C push offset aD ; "%d"
.text:00401031 call _scanf
.text:00401036 mov edx, [esp+1Ch+argc]
.text:0040103A mov eax, [esp+1Ch+var_4]
.text:0040103E add eax, edx
.text:00401040 push eax
.text:00401041 push offset aND ; "n = %d\n"
.text:00401046 call sub_401080
.text:0040104B mov ecx, dword_408030
.text:00401051 mov edx, [esp+24h+argc]
.text:00401055 add ecx, edx
.text:00401057 push ecx
.text:00401058 push offset aGNtestD ; "g_nTest = %d\n"
.text:0040105D call sub_401080
.text:00401062 push offset aHelloWorld ; "Hello World!\n"
.text:00401067 call sub_401080
.text:0040106C xor eax, eax
.text:0040106E add esp, 30h
.text:00401071 retn
.text:00401071 _main endp
.text:00401071

全局代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
// Test.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//

#include <iostream>
int GetInt()
{
return 3;
}
int GetInt1()
{
return 4;
}
int GetInt2()
{
return 5;
}
int GetInt3()
{
return 6;
}
int GetInt4()
{
return 7;
}
static int g_nTest = GetInt();
static int g_nTest1 = GetInt1() + g_nTest;
static int g_nTest2 = GetInt2() / 7;
static int g_nTest3 = GetInt3() % -8;
static int g_nTest4 = GetInt4();
int main(int argc, char* argv[])
{
int n = 999;
scanf("%d", &n);
scanf("%d", &g_nTest);
scanf("%d", &argc);
printf("n = %d\n", n + argc);
printf("g_nTest = %d\n", g_nTest + argc);
printf("Hello World!\n");
return 0;
}


// 运行程序: Ctrl + F5 或调试 >“开始执行(不调试)”菜单
// 调试程序: F5 或调试 >“开始调试”菜单

// 入门使用技巧:
// 1. 使用解决方案资源管理器窗口添加/管理文件
// 2. 使用团队资源管理器窗口连接到源代码管理
// 3. 使用输出窗口查看生成输出和其他消息
// 4. 使用错误列表窗口查看错误
// 5. 转到“项目”>“添加新项”以创建新的代码文件,或转到“项目”>“添加现有项”以将现有代码文件添加到项目
// 6. 将来,若要再次打开此项目,请转到“文件”>“打开”>“项目”并选择 .sln 文件

这里直接反汇编Release版本:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
start:
.text:00401407 ; __unwind { // __SEH_prolog4
.text:00401407 call sub_4016AF
.text:0040140C jmp loc_401285
.text:0040140C ; } // starts at 401407
.text:0040140C start endp ; sp-analysis failed

;进入loc_401285
.text:00401285 ; __unwind { // __SEH_prolog4
.text:00401285 push 14h
.text:00401287 push offset unk_402560
.text:0040128C call __SEH_prolog4
.text:00401291 push 1
.text:00401293 call sub_401487
.text:00401298 pop ecx
.text:00401299 test al, al
.text:0040129B jz loc_4013F1
.text:004012A1 xor bl, bl
.text:004012A3 mov [ebp-19h], bl
.text:004012A6 ; __try { // __except at loc_4013C0
.text:004012A6 and dword ptr [ebp-4], 0
.text:004012AA call sub_401455
.text:004012AF mov [ebp-24h], al
.text:004012B2 mov eax, dword_403044
.text:004012B7 xor ecx, ecx
.text:004012B9 inc ecx
.text:004012BA cmp eax, ecx
.text:004012BC jz loc_4013F1
.text:004012C2 test eax, eax
.text:004012C4 jnz short loc_40130F
.text:004012C6 mov dword_403044, ecx
.text:004012CC push offset unk_4020F8
.text:004012D1 push offset unk_4020EC
.text:004012D6 call _initterm_e
.text:004012DB pop ecx
.text:004012DC pop ecx
.text:004012DD test eax, eax
.text:004012DF jz short loc_4012F2
.text:004012DF ; } // starts at 4012A6
.text:004012E1 mov dword ptr [ebp-4], 0FFFFFFFEh
.text:004012E8 mov eax, 0FFh
.text:004012ED jmp loc_4013E1
.text:004012F2 ; ---------------------------------------------------------------------------
.text:004012F2
.text:004012F2 loc_4012F2: ; CODE XREF: start-128↑j
.text:004012F2 push offset unk_4020E8
.text:004012F7 push offset unk_4020CC
.text:004012FC call _initterm
.text:00401301 pop ecx
.text:00401302 pop ecx
.text:00401303 mov dword_403044, 2
.text:0040130D jmp short loc_401314
.text:0040130F ; ---------------------------------------------------------------------------

;继续进入_initterm
.rdata:004020D0 dd offset sub_401273 ; set_newMode 库函数
.rdata:004020D4 dd offset sub_401070 ;这几个函数分别代表我们写的函数
.rdata:004020D8 dd offset sub_401000
.rdata:004020DC dd offset sub_401020
.rdata:004020E0 dd offset sub_401040
.rdata:004020E4 dd offset sub_401060