实验一:使用符号执行工具klee对软件进行破解(来自于klee官网)

时间:2021-08-29 02:27:02

原文地址:https://gitlab.com/Manouchehri/Matryoshka-Stage-2/blob/master/stage2.md

实验用代码下载地址:https://gitlab.com/Manouchehri/Matryoshka-Stage-2/blob/master/stage2.md

下面是我自己的实验进程:

2017-05-09 16:18:09

1. linux 64位提权并执行stage2.bin文件

实验一:使用符号执行工具klee对软件进行破解(来自于klee官网)

可以看到,输入错误的密码时,会提示Try again...

 

2. 对stage2.bin文件利用IDA拍摄6.6版本定位出main函数

程序入口点start看到:

实验一:使用符号执行工具klee对软件进行破解(来自于klee官网)

查询一下__libc_start_main函数的功能:

int __libc_start_main(int *(main) (int, char * *, char * *), int argc, char * * ubp_av, void (*init) (void), void (*fini) (void), void (*rtld_fini) (void), void (* stack_end));

第一个函数参数是main,也就是二进制程序中的mov rdi, offset sub_4006F2。

查看sub_4006F2的函数调用流程图,可以确定这个是main函数。因为调用流程里为false的时候,会输出Try again;没有输入的时候,输出pass等等。图比较大,这里不粘贴了。

 

3.反汇编出C代码。

可以按F5,使用Hex-Rays Decompiler反汇编出main函数的代码。也可以点击IDA的File>produce file>create C File。我们使用后者,生成的C文件的内容如下:、

/* This file has been generated by the Hex-Rays decompiler.
Copyright (c) 2007-2014 Hex-Rays <info@hex-rays.com>

Detected compiler: GNU C++
*/

#include
<defs.h>


//-------------------------------------------------------------------------
// Function declarations

int init_proc();
// ssize_t write(int fd, const void *buf, size_t n);
// size_t strlen(const char *s);
// int fprintf(FILE *stream, const char *format, ...);
// int __gmon_start__(void); weak
// size_t fwrite(const void *ptr, size_t size, size_t n, FILE *s);
signed int sub_400590();
int sub_4005C0();
signed
int sub_400600();
int sub_400620();
__int64 __fastcall sub_40064D(
const char *a1);
int __fastcall sub_4006F2(int a1, __int64 a2);
void term_proc();

//-------------------------------------------------------------------------
// Data declarations

char byte_400A60[] = { '6' }; // weak
FILE *stdout; // idb
char byte_608068; // weak
// extern _UNKNOWN _gmon_start__; weak


//----- (00000000004004C8) ----------------------------------------------------
int init_proc()
{
_UNKNOWN
*v0; // rax@1

v0
= &_gmon_start__;
if ( &_gmon_start__ )
LODWORD(v0)
= __gmon_start__();
return (signed int)v0;
}
// 400540: using guessed type int __gmon_start__(void);

//----- (0000000000400560) ----------------------------------------------------
#error "400566: positive sp value has been found (funcsize=3)"

//----- (0000000000400590) ----------------------------------------------------
signed int sub_400590()
{
return 7;
}

//----- (00000000004005C0) ----------------------------------------------------
int sub_4005C0()
{
return 0;
}

//----- (0000000000400600) ----------------------------------------------------
signed int sub_400600()
{
signed
int result; // eax@2

if ( !byte_608068 )
{
result
= sub_400590();
byte_608068
= 1;
}
return result;
}
// 608068: using guessed type char byte_608068;

//----- (0000000000400620) ----------------------------------------------------
int sub_400620()
{
return sub_4005C0();
}
// 400620: could not find valid save-restore pair for rbp

//----- (000000000040064D) ----------------------------------------------------
__int64 __fastcall sub_40064D(const char *a1)
{
char buf; // [sp+13h] [bp-Dh]@2
int v3; // [sp+14h] [bp-Ch]@1
int v4; // [sp+18h] [bp-8h]@1
int v5; // [sp+1Ch] [bp-4h]@1

fwrite(
"Good good!\n", 1uLL, 0xBuLL, stdout);
v3
= 0;
v4
= 0;
v5
= strlen(a1);
while ( v3 <= 25183 )
{
buf
= byte_400A60[(signed __int64)v3] ^ a1[v4 % v5];
write(
2, &buf, 1uLL);
++v3;
++v4;
}
return 0LL;
}

//----- (00000000004006F2) ----------------------------------------------------
int __fastcall sub_4006F2(int a1, __int64 a2)
{
int result; // eax@2
__int64 v3; // rbx@10
signed int v4; // [sp+1Ch] [bp-14h]@4

if ( a1 == 2 )
{
if ( 42 * (strlen(*(const char **)(a2 + 8)) + 1) != 504 )
goto LABEL_31;
v4
= 1;
if ( **(_BYTE **)(a2 + 8) != 80 )
v4
= 0;
if ( 2 * *(_BYTE *)(*(_QWORD *)(a2 + 8) + 3LL) != 200 )
v4
= 0;
if ( **(_BYTE **)(a2 + 8) + 16 != *(_BYTE *)(*(_QWORD *)(a2 + 8) + 6LL) - 16 )
v4
= 0;
v3
= *(_BYTE *)(*(_QWORD *)(a2 + 8) + 5LL);
if ( v3 != 9 * strlen(*(const char **)(a2 + 8)) - 4 )
v4
= 0;
if ( *(_BYTE *)(*(_QWORD *)(a2 + 8) + 1LL) != *(_BYTE *)(*(_QWORD *)(a2 + 8) + 7LL) )
v4
= 0;
if ( *(_BYTE *)(*(_QWORD *)(a2 + 8) + 1LL) != *(_BYTE *)(*(_QWORD *)(a2 + 8) + 10LL) )
v4
= 0;
if ( *(_BYTE *)(*(_QWORD *)(a2 + 8) + 1LL) - 17 != **(_BYTE **)(a2 + 8) )
v4
= 0;
if ( *(_BYTE *)(*(_QWORD *)(a2 + 8) + 3LL) != *(_BYTE *)(*(_QWORD *)(a2 + 8) + 9LL) )
v4
= 0;
if ( *(_BYTE *)(*(_QWORD *)(a2 + 8) + 4LL) != 105 )
v4
= 0;
if ( *(_BYTE *)(*(_QWORD *)(a2 + 8) + 2LL) - *(_BYTE *)(*(_QWORD *)(a2 + 8) + 1LL) != 13 )
v4
= 0;
if ( *(_BYTE *)(*(_QWORD *)(a2 + 8) + 8LL) - *(_BYTE *)(*(_QWORD *)(a2 + 8) + 7LL) != 13 )
v4
= 0;
if ( v4 )
result
= sub_40064D(*(const char **)(a2 + 8));
else
LABEL_31:
result
= fprintf(stdout, "Try again...\n", a2);
}
else
{
result
= fprintf(stdout, "Usage: %s <pass>\n", *(_QWORD *)a2, a2);
}
return result;
}

//----- (0000000000400A34) ----------------------------------------------------
void term_proc()
{
;
}

#error "There were 1 decompilation failure(s) on 9 function(s)"

 

 

4. 既然是破解,我们只需要在通过验证的地方,做一些改动。所以,围绕main函数(这里是sub_4006F2函数)我们去除所有不想干的代码,提取出主要流程,比如,sub_40064D实现的功能是在验证通过的时候,输出密码验证通过的信息,那么我就直接用printf("good!")进行代替,以简化破解难度。改动后的代码是:

 

#include <defs.h>
int __fastcall sub_4006F2(int a1, __int64 a2)
{
int result; // eax@2
__int64 v3; // rbx@10
signed int v4; // [sp+1Ch] [bp-14h]@4

if ( a1 == 2 )
{
if ( 42 * (strlen(*(const char **)(a2 + 8)) + 1) != 504 )
goto LABEL_31;
v4
= 1;
if ( **(_BYTE **)(a2 + 8) != 80 )
v4
= 0;
if ( 2 * *(_BYTE *)(*(_QWORD *)(a2 + 8) + 3LL) != 200 )
v4
= 0;
if ( **(_BYTE **)(a2 + 8) + 16 != *(_BYTE *)(*(_QWORD *)(a2 + 8) + 6LL) - 16 )
v4
= 0;
v3
= *(_BYTE *)(*(_QWORD *)(a2 + 8) + 5LL);
if ( v3 != 9 * strlen(*(const char **)(a2 + 8)) - 4 )
v4
= 0;
if ( *(_BYTE *)(*(_QWORD *)(a2 + 8) + 1LL) != *(_BYTE *)(*(_QWORD *)(a2 + 8) + 7LL) )
v4
= 0;
if ( *(_BYTE *)(*(_QWORD *)(a2 + 8) + 1LL) != *(_BYTE *)(*(_QWORD *)(a2 + 8) + 10LL) )
v4
= 0;
if ( *(_BYTE *)(*(_QWORD *)(a2 + 8) + 1LL) - 17 != **(_BYTE **)(a2 + 8) )
v4
= 0;
if ( *(_BYTE *)(*(_QWORD *)(a2 + 8) + 3LL) != *(_BYTE *)(*(_QWORD *)(a2 + 8) + 9LL) )
v4
= 0;
if ( *(_BYTE *)(*(_QWORD *)(a2 + 8) + 4LL) != 105 )
v4
= 0;
if ( *(_BYTE *)(*(_QWORD *)(a2 + 8) + 2LL) - *(_BYTE *)(*(_QWORD *)(a2 + 8) + 1LL) != 13 )
v4
= 0;
if ( *(_BYTE *)(*(_QWORD *)(a2 + 8) + 8LL) - *(_BYTE *)(*(_QWORD *)(a2 + 8) + 7LL) != 13 )
v4
= 0;
if ( v4 )
printf(
"good\n");
else
LABEL_31:
result
= fprintf(stdout, "Try again...\n", a2);
}
else
{
result
= fprintf(stdout, "Usage: %s <pass>\n", *(_QWORD *)a2, a2);
}
return result;
}

 

 

 

5. 当然,这离可执行还差很远。我们还要改动为KLEE能够分析的形式。但是在满足klee能够分析之前,我们要先将其改动为可以编译执行的C文件。

我们一边用经验去改,一边用编译器同时编译,根据编译不通过所提示的错误进行改动。

主要改动如下:

  • 第一处

将defs.h文件从IDA的目录中复制到test.c(改动代码放置位置)的相同目录中去。Defs.h文件中保存的是IDA生成的C文件中相关类型的定义。

  • 第二处:

../src/test.c:35:5: warning: second argument of ‘main’ should be ‘char **’ [-Wmain]

 int main(int a1, __int64 a2)

将__int64改为char**(这是经验)

  • 第三处:

../src/test.c:71:7: warning: implicit declaration of function ‘sub_40064D’ [-Wimplicit-function-declaration]

       result = sub_40064D(*(const char **)(a2 + 8));

       ^

../src/test.c:74:7: warning: too many arguments for format [-Wformat-extra-args]

       result = fprintf(stdout, "Try again...\n", a2);

       ^

../src/test.c:78:5: warning: format ‘%s’ expects argument of type ‘char *’, but argument 3 has type ‘uint64’ [-Wformat=]

result = fprintf(stdout, "Usage: %s <pass>\n", *(_QWORD *)a2, a2);

所以将fprintf改为printf。并且去掉return result。

  • 第四处

代码中有大量的类型转换,我们在IDA对sub_4006F2按F5弹出的反汇编后C代码的窗口中,右键选择Hiden casts.

实验一:使用符号执行工具klee对软件进行破解(来自于klee官网)

可以看到,之前的QWORD *和BYTE *等IDA中的类型,都去除了。所以,再次改动后的代码是:

#include <stdio.h>
#include
<string.h>
#include
<stdlib.h>
#include
"defs.h"
int main(int a1, char ** a2)
{

__int64 v3;
// rbx@10
signed int v4; // [sp+1Ch] [bp-14h]@4

if ( a1 == 2 )
{
if ( 42 * (strlen(*(a2 + 8)) + 1) != 504 )
goto LABEL_31;
v4
= 1;
if ( **(a2 + 8) != 80 )
v4
= 0;
if ( 2 * *(*(a2 + 8) + 3LL) != 200 )
v4
= 0;
if ( **(a2 + 8) + 16 != *(*(a2 + 8) + 6LL) - 16 )
v4
= 0;
v3
= *(*(a2 + 8) + 5LL);
if ( v3 != 9 * strlen(*(a2 + 8)) - 4 )
v4
= 0;
if ( *(*(a2 + 8) + 1LL) != *(*(a2 + 8) + 7LL) )
v4
= 0;
if ( *(*(a2 + 8) + 1LL) != *(*(a2 + 8) + 10LL) )
v4
= 0;
if ( *(*(a2 + 8) + 1LL) - 17 != **(a2 + 8) )
v4
= 0;
if ( *(*(a2 + 8) + 3LL) != *(*(a2 + 8) + 9LL) )
v4
= 0;
if ( *(*(a2 + 8) + 4LL) != 105 )
v4
= 0;
if ( *(*(a2 + 8) + 2LL) - *(*(a2 + 8) + 1LL) != 13 )
v4
= 0;
if ( *(*(a2 + 8) + 8LL) - *(*(a2 + 8) + 7LL) != 13 )
v4
= 0;
if ( v4 )
printf(
"good\n");
else
LABEL_31:
printf(
"Try again...\n");
}
else
{
printf(
"Usage: %s <pass>\n", *a2);
}
return 0;
}

 

6. 使用KLEE进行分析

改动

if ( v4 )
printf(
"good\n");

 为:

if ( v4 ){
printf(
"good\n");
klee_assert(
0);
}

 并且添加两个头文件:

#include <assert.h>

#include <klee/klee.h>

下一步使用KLEE进行符号执行:KLEE官网见(http://klee.github.io/)

需要大家自行准备一下KLEE的知识。这里不再赘述。

执行下面的脚本。test.c为可以编译通过的c文件。test-klee.c为添加klee_assert等内容以后的文件

clang -I /home/klee/xiaojiework/klee/include/ -emit-llvm -c -g test-klee.c
klee
--optimize --libc=uclibc --posix-runtime test-klee.bc --sym-arg 100

结果是:

 实验一:使用符号执行工具klee对软件进行破解(来自于klee官网)

出现了问题,没有像官网那样,报assertion的错误,而且探寻的路径只有一条。究竟是怎么回事???

 

7. 问题所在。

经过一系列研究,才发现问题的所在。

首先,int a1,char **a2是main函数中用于接收命令行信息的参数。a1是命令行输入的字符串的个数。a2[0]就是程序名称。a2[1]是程序后的第一个参数。

比如,你输入stage2.bin “123”,那么传入test.c中main函数的,就是a1为2,a2为char**类型,包含两个字符串,分别是“stage2.bin”和“123”。

所以,a2[1]存储的是密码字符串。但是我们反汇编出的程序反复处理的是*(a2+8)位置的字符串。这也是klee发现out of bound pointer的原因。

 

8.问题是如何导致的?又如何解决?

首先,明确一个问题。

如果a2是char **类型,那么*(a2+1)和a2[1]是一样的。因为char **是char *的指针,a2+1会自动解释为真正a2的地址值加上8(也就是8个字节)。 

如果a2是__int64类型,那么*(a2+8)和a2[1]的值是相等的。当然,要对a2+8先cast为char **类型以后,才能用*取值。

这是因为C编译器对a2+n计算的处理是这样做的。

如果a2是int类型,那么就是简单+n。

如果a2是指针类型,则加上a2指向的对象大小*n。比如,a2是char**类型的时候,a2+1,就是a2+1*(sizeof(char*))。由于64位程序下指针类型都是64位,即8个字节。所以a2+1的真正计算值是a2值+8。

C语言的cast特别多。对cast机制,可以自行补课。这里不再赘述。

我的错误是:

一开始反汇编出的代码,将a2作为__int64类型处理,所以代码中用

 ( *(_BYTE *)(*(_QWORD *)(a2 + 8) + 1LL) 

 

表示a2[1][1]。也就是将__int64类型的地址a2,将其作为char**类型的时候,计算a2[1][1]的正确过程。

我在后续将反汇编出的C代码立求编译通过的时候,简单的在main函数括号内,将a2的类型改正为cahr**了,但是后面计算a2[1][1]的方式还没有变。

这就是错误的原因。

正确的做法:在IDA F5反汇编出的代码窗口,右键main函数括号内的a2,选择set lvar type为char **a2。可以看到main函数的代码为:

 

int __fastcall sub_4006F2(int a1, char **a2)
{
int result; // eax@2
__int64 v3; // rbx@10
signed int v4; // [sp+1Ch] [bp-14h]@4

if ( a1 == 2 )
{
if ( 42 * (strlen(a2[1]) + 1) != 504 )
goto LABEL_31;
v4
= 1;
if ( *a2[1] != 80 )
v4
= 0;
if ( 2 * a2[1][3] != 200 )
v4
= 0;
if ( *a2[1] + 16 != a2[1][6] - 16 )
v4
= 0;
v3
= a2[1][5];
if ( v3 != 9 * strlen(a2[1]) - 4 )
v4
= 0;
if ( a2[1][1] != a2[1][7] )
v4
= 0;
if ( a2[1][1] != a2[1][10] )
v4
= 0;
if ( a2[1][1] - 17 != *a2[1] )
v4
= 0;
if ( a2[1][3] != a2[1][9] )
v4
= 0;
if ( a2[1][4] != 105 )
v4
= 0;
if ( a2[1][2] - a2[1][1] != 13 )
v4
= 0;
if ( a2[1][8] - a2[1][7] != 13 )
v4
= 0;
if ( v4 )
result
= sub_40064D(a2[1]);
else
LABEL_31:
result
= fprintf(stdout, "Try again...\n", a2);
}
else
{
result
= fprintf(stdout, "Usage: %s <pass>\n", *a2, a2);
}
return result;
}

 

可以看到,这才是正确的代码。之前对a2的类型改动以后,没有在代码中对计算a2[1]的方式进行改动。还是按照a2为__int64类型时的计算方式。

 

9. 计算出软件密钥。

执行前述的脚本:

klee@ubuntu:~/kleeexperiment/Keygenningwithklee/stage2$ sh ./xiaojie.sh 
KLEE: NOTE: Using klee
-uclibc : /usr/local/lib/x86_64-linux-gnu/klee/runtime/klee-uclibc.bca
KLEE: NOTE: Using model:
/usr/local/lib/x86_64-linux-gnu/klee/runtime/libkleeRuntimePOSIX.bca
KLEE: output directory
is "/home/klee/kleeexperiment/Keygenningwithklee/stage2/klee-out-0"
KLEE: Using STP solver backend
KLEE: WARNING: undefined reference to function: puts
KLEE: WARNING ONCE: calling external: syscall(
16, 0, 21505, 35075632)
KLEE: WARNING ONCE: Alignment of memory
from call "malloc" is not modelled. Using alignment of 8.
KLEE: WARNING ONCE: calling external: puts(
46355200)
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Good good
!
Try again...
KLEE: ERROR:
/home/klee/kleeexperiment/Keygenningwithklee/stage2/main.c:42: ASSERTION FAIL: 0
KLEE: NOTE: now ignoring
this error at this location
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...
Try again...

KLEE: done: total instructions
= 5619
KLEE: done: completed paths
= 102
KLEE: done: generated tests
= 102
klee@ubuntu:
~/kleeexperiment/Keygenningwithklee/stage2$

生成的文件中,test000027.assert.err是klee_assert报错的位置。其实也就是密码验证通过的位置。查看第27个测试用例的详细内容:

实验一:使用符号执行工具klee对软件进行破解(来自于klee官网)

可以看到Pandi_panda为密钥。

我们验证一下,可以看到通过验证了。

实验一:使用符号执行工具klee对软件进行破解(来自于klee官网)