gdb运行时结合汇编堆栈分析

时间:2023-03-08 21:13:41
gdb运行时结合汇编堆栈分析

一、从源代码文件到可执行文件

        从C文件到可执行文件,一般来说需要两步,先将每个C文件编译成.o文件,再把多个.o文件和链接库一起链接成可执行文件。但具体来说,其实是分为四步,下面以example.c为例进行说明。
gdb运行时结合汇编堆栈分析
#define MYINT int

short addend1 = 1;
static int addend2 = 2;
const static long addend3 = 3; static MYINT g(MYINT x)
{
return x + addend1;
} static const MYINT f(MYINT x)
{
return g(x + addend2);
} MYINT main(void)
{
return f(8) + addend3;
}
gdb运行时结合汇编堆栈分析

第一步: 预处理,进行宏替换等工作。执行gcc -E -o example.cpp example.c,得到example.cpp如下:

gdb运行时结合汇编堆栈分析
# 1 "example.c"
# 1 "<built-in>"
# 1 "<命令行>"
# 1 "example.c" short addend1 = 1;
static int addend2 = 2;
const static long addend3 = 3; static int g(int x)
{
return x + addend1;
} static const int f(int x)
{
return g(x + addend2);
} int main(void)
{
return f(8) + addend3;
}
gdb运行时结合汇编堆栈分析

第二步:将预处理文件编译成汇编文件。执行 gcc -x cpp-output -S -fno-asynchronous-unwind-tables -o example.s example.cpp,加入 -fno-asynchronous-unwind-tables是为了禁止生成.cfi代码。生成的汇编代码如下:

gdb运行时结合汇编堆栈分析
    .file    "example.c"       ; C文件的文件名
.globl addend1 ; 全局变量
.data ; 数据段
; short addend1 = 1;开始
.align 2 ; 地址对齐,按2的整数倍对齐
.type addend1, @object ; 类型是对象
.size addend1, 2 ; 占两个字节
addend1: ; 起始地址
.value 1 ; 初始值
; static int addend2 = 2;开始
.align 4
.type addend2, @object
.size addend2, 4
addend2:
.long 2
.section .rodata ; 常量存储区开始
.align 4
.type addend3, @object
.size addend3, 4
addend3:
.long 3
.text ; 代码段开始
.type g, @function ; 函数g
g: ; g的起始地址
pushl %ebp ; %ebp入栈
movl %esp, %ebp ; 当前函数栈从%esp开始
movzwl addend1, %eax ; 把short放入%eax
cwtl
addl 8(%ebp), %eax ; int + short
popl %ebp
ret
.size g, .-g
.type f, @function
f:
pushl %ebp
movl %esp, %ebp
subl $4, %esp ; 为调用g时传递参数准备空间
movl addend2, %eax ; 在%eax中计算实参
addl 8(%ebp), %eax
movl %eax, (%esp) ; 实参入栈
call g
leave
ret
.size f, .-f
.globl main ; main未加static,是全局可见的
.type main, @function
main:
pushl %ebp
movl %esp, %ebp
subl $4, %esp
movl $8, (%esp)
call f
movl addend3, %edx
addl %edx, %eax
leave
ret
.size main, .-main
.ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
.section .note.GNU-stack,"",@progbits
  
gdb运行时结合汇编堆栈分析
        由汇编代码可见:1.未加static的全局变量和函数都生成了相应的.globl代码,表示是全局的;2.int和long是4字节的;3.const变量放在常量存储区.rodata处。
        第三步,将汇编代码编译成二进制目标文件,gcc -x assembler -c example.s。生成example.o文件,用objdump -D example.o察看,得到如下信息:
gdb运行时结合汇编堆栈分析
example.o:     file format elf32-i386

Disassembly of section .text:

00000000 <g>:
0: 55 push %ebp
1: 89 e5 mov %esp,%ebp
3: 0f b7 05 00 00 00 00 movzwl 0x0,%eax
a: 98 cwtl
b: 03 45 08 add 0x8(%ebp),%eax
e: 5d pop %ebp
f: c3 ret 00000010 <f>:
10: 55 push %ebp
11: 89 e5 mov %esp,%ebp
13: 83 ec 04 sub $0x4,%esp
16: a1 04 00 00 00 mov 0x4,%eax
1b: 03 45 08 add 0x8(%ebp),%eax
1e: 89 04 24 mov %eax,(%esp)
21: e8 da ff ff ff call 0 <g>
26: c9 leave
27: c3 ret 00000028 <main>:
28: 55 push %ebp
29: 89 e5 mov %esp,%ebp
2b: 83 ec 04 sub $0x4,%esp
2e: c7 04 24 08 00 00 00 movl $0x8,(%esp)
35: e8 d6 ff ff ff call 10 <f>
3a: 8b 15 00 00 00 00 mov 0x0,%edx
40: 01 d0 add %edx,%eax
42: c9 leave
43: c3 ret Disassembly of section .data: 00000000 <addend1>:
0: 01 00 add %eax,(%eax)
... 00000004 <addend2>:
4: 02 00 add (%eax),%al
... Disassembly of section .rodata: 00000000 <addend3>:
0: 03 00 add (%eax),%eax
... Disassembly of section .comment: 00000000 <.comment>:
0: 00 47 43 add %al,0x43(%edi)
3: 43 inc %ebx
4: 3a 20 cmp (%eax),%ah
6: 28 55 62 sub %dl,0x62(%ebp)
9: 75 6e jne 79 <main+0x51>
b: 74 75 je 82 <main+0x5a>
d: 2f das
e: 4c dec %esp
f: 69 6e 61 72 6f 20 34 imul $0x34206f72,0x61(%esi),%ebp
16: 2e 36 2e 33 2d 31 75 cs ss xor %cs:%ss:0x75627531,%ebp
1d: 62 75
1f: 6e outsb %ds:(%esi),(%dx)
20: 74 75 je 97 <main+0x6f>
22: 35 29 20 34 2e xor $0x2e342029,%eax
27: 36 2e 33 00 ss xor %cs:%ss:(%eax),%eax
gdb运行时结合汇编堆栈分析

第四步,将目标代码编译成可执行文件, gcc -o example example.o。此时可以继续用objdump -D example > example.objdump察看,可见example.objdump文件有728行,已经加入了大量的代码,其中我们自己写的部分是:

gdb运行时结合汇编堆栈分析
080483b4 <g>:
80483b4: 55 push %ebp
80483b5: 89 e5 mov %esp,%ebp
80483b7: 0f b7 05 10 a0 04 08 movzwl 0x804a010,%eax
80483be: 98 cwtl
80483bf: 03 45 08 add 0x8(%ebp),%eax
80483c2: 5d pop %ebp
80483c3: c3 ret 080483c4 <f>:
80483c4: 55 push %ebp
80483c5: 89 e5 mov %esp,%ebp
80483c7: 83 ec 04 sub $0x4,%esp
80483ca: a1 14 a0 04 08 mov 0x804a014,%eax
80483cf: 03 45 08 add 0x8(%ebp),%eax
80483d2: 89 04 24 mov %eax,(%esp)
80483d5: e8 da ff ff ff call 80483b4 <g>
80483da: c9 leave
80483db: c3 ret 080483dc <main>:
80483dc: 55 push %ebp
80483dd: 89 e5 mov %esp,%ebp
80483df: 83 ec 04 sub $0x4,%esp
80483e2: c7 04 24 08 00 00 00 movl $0x8,(%esp)
80483e9: e8 d6 ff ff ff call 80483c4 <f>
80483ee: 8b 15 d0 84 04 08 mov 0x80484d0,%edx
80483f4: 01 d0 add %edx,%eax
80483f6: c9 leave
80483f7: c3 ret
80483f8: 90 nop
80483f9: 90 nop
80483fa: 90 nop
80483fb: 90 nop
80483fc: 90 nop
80483fd: 90 nop
80483fe: 90 nop
80483ff: 90 nop ...
...
... Disassembly of section .data: 0804a008 <__data_start>:
804a008: 00 00 add %al,(%eax)
... 0804a00c <__dso_handle>:
804a00c: 00 00 add %al,(%eax)
... 0804a010 <addend1>:
804a010: 01 00 add %eax,(%eax)
... 0804a014 <addend2>:
804a014: 02 00 add (%eax),%al
... Disassembly of section .bss: 0804a018 <completed.6159>:
804a018: 00 00 add %al,(%eax)
... 0804a01c <dtor_idx.6161>:
804a01c: 00 00 add %al,(%eax)
...
gdb运行时结合汇编堆栈分析
        可见此时的代码已经有了它运行时的实际地址,并且.rodata段也已经不存在了。
        然后还可以用readelf -a example > example.elf 察看该可执行文件的ELF头部信息,共221行,这里只摘录前57行:
gdb运行时结合汇编堆栈分析
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: Intel 80386
Version: 0x1
Entry point address: 0x8048300
Start of program headers: 52 (bytes into file)
Start of section headers: 4416 (bytes into file)
Flags: 0x0
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 9
Size of section headers: 40 (bytes)
Number of section headers: 30
Section header string table index: 27 Section Headers:
[Nr] Name Type Addr Off Size ES Flg Lk Inf Al
[ 0] NULL 00000000 000000 000000 00 0 0 0
[ 1] .interp PROGBITS 08048154 000154 000013 00 A 0 0 1
[ 2] .note.ABI-tag NOTE 08048168 000168 000020 00 A 0 0 4
[ 3] .note.gnu.build-i NOTE 08048188 000188 000024 00 A 0 0 4
[ 4] .gnu.hash GNU_HASH 080481ac 0001ac 000020 04 A 5 0 4
[ 5] .dynsym DYNSYM 080481cc 0001cc 000040 10 A 6 1 4
[ 6] .dynstr STRTAB 0804820c 00020c 000045 00 A 0 0 1
[ 7] .gnu.version VERSYM 08048252 000252 000008 02 A 5 0 2
[ 8] .gnu.version_r VERNEED 0804825c 00025c 000020 00 A 6 1 4
[ 9] .rel.dyn REL 0804827c 00027c 000008 08 A 5 0 4
[10] .rel.plt REL 08048284 000284 000010 08 A 5 12 4
[11] .init PROGBITS 08048294 000294 00002e 00 AX 0 0 4
[12] .plt PROGBITS 080482d0 0002d0 000030 04 AX 0 0 16
[13] .text PROGBITS 08048300 000300 0001ac 00 AX 0 0 16
[14] .fini PROGBITS 080484ac 0004ac 00001a 00 AX 0 0 4
[15] .rodata PROGBITS 080484c8 0004c8 00000c 00 A 0 0 4
[16] .eh_frame_hdr PROGBITS 080484d4 0004d4 00002c 00 A 0 0 4
[17] .eh_frame PROGBITS 08048500 000500 0000a4 00 A 0 0 4
[18] .ctors PROGBITS 08049f14 000f14 000008 00 WA 0 0 4
[19] .dtors PROGBITS 08049f1c 000f1c 000008 00 WA 0 0 4
[20] .jcr PROGBITS 08049f24 000f24 000004 00 WA 0 0 4
[21] .dynamic DYNAMIC 08049f28 000f28 0000c8 08 WA 6 0 4
[22] .got PROGBITS 08049ff0 000ff0 000004 04 WA 0 0 4
[23] .got.plt PROGBITS 08049ff4 000ff4 000014 04 WA 0 0 4
[24] .data PROGBITS 0804a008 001008 000010 00 WA 0 0 4
[25] .bss NOBITS 0804a018 001018 000008 00 WA 0 0 4
[26] .comment PROGBITS 00000000 001018 00002a 01 MS 0 0 1
[27] .shstrtab STRTAB 00000000 001042 0000fc 00 0 0 1
[28] .symtab SYMTAB 00000000 0015f0 000450 10 29 49 4
[29] .strtab STRTAB 00000000 001a40 000209 00 0 0 1
Key to Flags:
W (write), A (alloc), X (execute), M (merge), S (strings)
I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)
O (extra OS processing required) o (OS specific), p (processor specific)
gdb运行时结合汇编堆栈分析

二、运行时堆栈分析

为了使用gdb进行调试,用gcc - g example.c -o example重新编译代码,然后gdb example进入gdb调试。

在main函数入口处设置断点,运行程序,然后察看运行到的汇编指令、此时的寄存器数据和堆栈:

gdb运行时结合汇编堆栈分析
(gdb) b 17
Breakpoint 1 at 0x80483e2: file example.c, line 17.
(gdb) r
Starting program: /home/qpx/操作系统/example Breakpoint 1, main () at example.c:19
19 return f(8) + addend3;
(gdb) disassemble
Dump of assembler code for function main:
0x080483dc <+0>: push %ebp
0x080483dd <+1>: mov %esp,%ebp
0x080483df <+3>: sub $0x4,%esp
=> 0x080483e2 <+6>: movl $0x8,(%esp)
0x080483e9 <+13>: call 0x80483c4 <f>
0x080483ee <+18>: mov 0x80484d0,%edx
0x080483f4 <+24>: add %edx,%eax
0x080483f6 <+26>: leave
0x080483f7 <+27>: ret
End of assembler dump.
(gdb) info registers
eax 0x1 1
ecx 0xbffff394 -1073745004
edx 0xbffff324 -1073745116
ebx 0xb7fc2ff4 -1208209420
esp 0xbffff2f4 0xbffff2f4
ebp 0xbffff2f8 0xbffff2f8
esi 0x0 0
edi 0x0 0
eip 0x80483e2 0x80483e2 <main+6>
eflags 0x200282 [ SF IF ID ]
cs 0x73 115
ss 0x7b 123
ds 0x7b 123
es 0x7b 123
fs 0x0 0
gs 0x33 51

(gdb) x/2 0xbffff2f4
  0xbffff2f4: 0x000000000 x00000000

gdb运行时结合汇编堆栈分析

可见此时主函数的栈基址为0xbffff2f8,而%esp已经下移4字节准备为函数 f 传递参数8,但目前%esp所指堆栈内容为0,%ebp所指内容也为0。下面展示每一步时%esp、%ebp和堆栈内容的变化:

gdb运行时结合汇编堆栈分析

gdb运行时结合汇编堆栈分析

call指令将下一条指令的地址入栈:

gdb运行时结合汇编堆栈分析

将上一个函数的基址入栈,从当前%esp开始作为新基址:

gdb运行时结合汇编堆栈分析

先为传参做准备:

gdb运行时结合汇编堆栈分析

实参的计算在%eax中进行:

gdb运行时结合汇编堆栈分析

实参入栈:

gdb运行时结合汇编堆栈分析

gdb运行时结合汇编堆栈分析

call指令将下一条指令的地址入栈:

gdb运行时结合汇编堆栈分析gdb运行时结合汇编堆栈分析

计算short+int:

gdb运行时结合汇编堆栈分析gdb运行时结合汇编堆栈分析

pop %ebp指令将栈顶弹到%ebp中,同时%esp增加4字节:

gdb运行时结合汇编堆栈分析

ret指令将栈顶弹给%eip

gdb运行时结合汇编堆栈分析

因为函数 f 修改了%esp,所以用leave指令恢复。leave指令先将%esp对其到%ebp,然后把栈顶弹给%ebp:

gdb运行时结合汇编堆栈分析

gdb运行时结合汇编堆栈分析

gdb运行时结合汇编堆栈分析

gdb运行时结合汇编堆栈分析

程序最终结束。