bpf: handle pseudo BPF_LD_IMM64 insn
[deliverable/linux.git] / kernel / bpf / verifier.c
1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
11 */
12 #include <linux/kernel.h>
13 #include <linux/types.h>
14 #include <linux/slab.h>
15 #include <linux/bpf.h>
16 #include <linux/filter.h>
17 #include <net/netlink.h>
18 #include <linux/file.h>
19 #include <linux/vmalloc.h>
20
21 /* bpf_check() is a static code analyzer that walks eBPF program
22 * instruction by instruction and updates register/stack state.
23 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
24 *
25 * The first pass is depth-first-search to check that the program is a DAG.
26 * It rejects the following programs:
27 * - larger than BPF_MAXINSNS insns
28 * - if loop is present (detected via back-edge)
29 * - unreachable insns exist (shouldn't be a forest. program = one function)
30 * - out of bounds or malformed jumps
31 * The second pass is all possible path descent from the 1st insn.
32 * Since it's analyzing all pathes through the program, the length of the
33 * analysis is limited to 32k insn, which may be hit even if total number of
34 * insn is less then 4K, but there are too many branches that change stack/regs.
35 * Number of 'branches to be analyzed' is limited to 1k
36 *
37 * On entry to each instruction, each register has a type, and the instruction
38 * changes the types of the registers depending on instruction semantics.
39 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
40 * copied to R1.
41 *
42 * All registers are 64-bit.
43 * R0 - return register
44 * R1-R5 argument passing registers
45 * R6-R9 callee saved registers
46 * R10 - frame pointer read-only
47 *
48 * At the start of BPF program the register R1 contains a pointer to bpf_context
49 * and has type PTR_TO_CTX.
50 *
51 * Verifier tracks arithmetic operations on pointers in case:
52 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
53 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
54 * 1st insn copies R10 (which has FRAME_PTR) type into R1
55 * and 2nd arithmetic instruction is pattern matched to recognize
56 * that it wants to construct a pointer to some element within stack.
57 * So after 2nd insn, the register R1 has type PTR_TO_STACK
58 * (and -20 constant is saved for further stack bounds checking).
59 * Meaning that this reg is a pointer to stack plus known immediate constant.
60 *
61 * Most of the time the registers have UNKNOWN_VALUE type, which
62 * means the register has some value, but it's not a valid pointer.
63 * (like pointer plus pointer becomes UNKNOWN_VALUE type)
64 *
65 * When verifier sees load or store instructions the type of base register
66 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, FRAME_PTR. These are three pointer
67 * types recognized by check_mem_access() function.
68 *
69 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
70 * and the range of [ptr, ptr + map's value_size) is accessible.
71 *
72 * registers used to pass values to function calls are checked against
73 * function argument constraints.
74 *
75 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
76 * It means that the register type passed to this function must be
77 * PTR_TO_STACK and it will be used inside the function as
78 * 'pointer to map element key'
79 *
80 * For example the argument constraints for bpf_map_lookup_elem():
81 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
82 * .arg1_type = ARG_CONST_MAP_PTR,
83 * .arg2_type = ARG_PTR_TO_MAP_KEY,
84 *
85 * ret_type says that this function returns 'pointer to map elem value or null'
86 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
87 * 2nd argument should be a pointer to stack, which will be used inside
88 * the helper function as a pointer to map element key.
89 *
90 * On the kernel side the helper function looks like:
91 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
92 * {
93 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
94 * void *key = (void *) (unsigned long) r2;
95 * void *value;
96 *
97 * here kernel can access 'key' and 'map' pointers safely, knowing that
98 * [key, key + map->key_size) bytes are valid and were initialized on
99 * the stack of eBPF program.
100 * }
101 *
102 * Corresponding eBPF program may look like:
103 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
104 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
105 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
106 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
107 * here verifier looks at prototype of map_lookup_elem() and sees:
108 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
109 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
110 *
111 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
112 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
113 * and were initialized prior to this call.
114 * If it's ok, then verifier allows this BPF_CALL insn and looks at
115 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
116 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
117 * returns ether pointer to map value or NULL.
118 *
119 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
120 * insn, the register holding that pointer in the true branch changes state to
121 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
122 * branch. See check_cond_jmp_op().
123 *
124 * After the call R0 is set to return type of the function and registers R1-R5
125 * are set to NOT_INIT to indicate that they are no longer readable.
126 */
127
128 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
129
130 /* single container for all structs
131 * one verifier_env per bpf_check() call
132 */
133 struct verifier_env {
134 struct bpf_prog *prog; /* eBPF program being verified */
135 struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
136 u32 used_map_cnt; /* number of used maps */
137 };
138
139 /* verbose verifier prints what it's seeing
140 * bpf_check() is called under lock, so no race to access these global vars
141 */
142 static u32 log_level, log_size, log_len;
143 static char *log_buf;
144
145 static DEFINE_MUTEX(bpf_verifier_lock);
146
147 /* log_level controls verbosity level of eBPF verifier.
148 * verbose() is used to dump the verification trace to the log, so the user
149 * can figure out what's wrong with the program
150 */
151 static void verbose(const char *fmt, ...)
152 {
153 va_list args;
154
155 if (log_level == 0 || log_len >= log_size - 1)
156 return;
157
158 va_start(args, fmt);
159 log_len += vscnprintf(log_buf + log_len, log_size - log_len, fmt, args);
160 va_end(args);
161 }
162
163 static const char *const bpf_class_string[] = {
164 [BPF_LD] = "ld",
165 [BPF_LDX] = "ldx",
166 [BPF_ST] = "st",
167 [BPF_STX] = "stx",
168 [BPF_ALU] = "alu",
169 [BPF_JMP] = "jmp",
170 [BPF_RET] = "BUG",
171 [BPF_ALU64] = "alu64",
172 };
173
174 static const char *const bpf_alu_string[] = {
175 [BPF_ADD >> 4] = "+=",
176 [BPF_SUB >> 4] = "-=",
177 [BPF_MUL >> 4] = "*=",
178 [BPF_DIV >> 4] = "/=",
179 [BPF_OR >> 4] = "|=",
180 [BPF_AND >> 4] = "&=",
181 [BPF_LSH >> 4] = "<<=",
182 [BPF_RSH >> 4] = ">>=",
183 [BPF_NEG >> 4] = "neg",
184 [BPF_MOD >> 4] = "%=",
185 [BPF_XOR >> 4] = "^=",
186 [BPF_MOV >> 4] = "=",
187 [BPF_ARSH >> 4] = "s>>=",
188 [BPF_END >> 4] = "endian",
189 };
190
191 static const char *const bpf_ldst_string[] = {
192 [BPF_W >> 3] = "u32",
193 [BPF_H >> 3] = "u16",
194 [BPF_B >> 3] = "u8",
195 [BPF_DW >> 3] = "u64",
196 };
197
198 static const char *const bpf_jmp_string[] = {
199 [BPF_JA >> 4] = "jmp",
200 [BPF_JEQ >> 4] = "==",
201 [BPF_JGT >> 4] = ">",
202 [BPF_JGE >> 4] = ">=",
203 [BPF_JSET >> 4] = "&",
204 [BPF_JNE >> 4] = "!=",
205 [BPF_JSGT >> 4] = "s>",
206 [BPF_JSGE >> 4] = "s>=",
207 [BPF_CALL >> 4] = "call",
208 [BPF_EXIT >> 4] = "exit",
209 };
210
211 static void print_bpf_insn(struct bpf_insn *insn)
212 {
213 u8 class = BPF_CLASS(insn->code);
214
215 if (class == BPF_ALU || class == BPF_ALU64) {
216 if (BPF_SRC(insn->code) == BPF_X)
217 verbose("(%02x) %sr%d %s %sr%d\n",
218 insn->code, class == BPF_ALU ? "(u32) " : "",
219 insn->dst_reg,
220 bpf_alu_string[BPF_OP(insn->code) >> 4],
221 class == BPF_ALU ? "(u32) " : "",
222 insn->src_reg);
223 else
224 verbose("(%02x) %sr%d %s %s%d\n",
225 insn->code, class == BPF_ALU ? "(u32) " : "",
226 insn->dst_reg,
227 bpf_alu_string[BPF_OP(insn->code) >> 4],
228 class == BPF_ALU ? "(u32) " : "",
229 insn->imm);
230 } else if (class == BPF_STX) {
231 if (BPF_MODE(insn->code) == BPF_MEM)
232 verbose("(%02x) *(%s *)(r%d %+d) = r%d\n",
233 insn->code,
234 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
235 insn->dst_reg,
236 insn->off, insn->src_reg);
237 else if (BPF_MODE(insn->code) == BPF_XADD)
238 verbose("(%02x) lock *(%s *)(r%d %+d) += r%d\n",
239 insn->code,
240 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
241 insn->dst_reg, insn->off,
242 insn->src_reg);
243 else
244 verbose("BUG_%02x\n", insn->code);
245 } else if (class == BPF_ST) {
246 if (BPF_MODE(insn->code) != BPF_MEM) {
247 verbose("BUG_st_%02x\n", insn->code);
248 return;
249 }
250 verbose("(%02x) *(%s *)(r%d %+d) = %d\n",
251 insn->code,
252 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
253 insn->dst_reg,
254 insn->off, insn->imm);
255 } else if (class == BPF_LDX) {
256 if (BPF_MODE(insn->code) != BPF_MEM) {
257 verbose("BUG_ldx_%02x\n", insn->code);
258 return;
259 }
260 verbose("(%02x) r%d = *(%s *)(r%d %+d)\n",
261 insn->code, insn->dst_reg,
262 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
263 insn->src_reg, insn->off);
264 } else if (class == BPF_LD) {
265 if (BPF_MODE(insn->code) == BPF_ABS) {
266 verbose("(%02x) r0 = *(%s *)skb[%d]\n",
267 insn->code,
268 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
269 insn->imm);
270 } else if (BPF_MODE(insn->code) == BPF_IND) {
271 verbose("(%02x) r0 = *(%s *)skb[r%d + %d]\n",
272 insn->code,
273 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
274 insn->src_reg, insn->imm);
275 } else if (BPF_MODE(insn->code) == BPF_IMM) {
276 verbose("(%02x) r%d = 0x%x\n",
277 insn->code, insn->dst_reg, insn->imm);
278 } else {
279 verbose("BUG_ld_%02x\n", insn->code);
280 return;
281 }
282 } else if (class == BPF_JMP) {
283 u8 opcode = BPF_OP(insn->code);
284
285 if (opcode == BPF_CALL) {
286 verbose("(%02x) call %d\n", insn->code, insn->imm);
287 } else if (insn->code == (BPF_JMP | BPF_JA)) {
288 verbose("(%02x) goto pc%+d\n",
289 insn->code, insn->off);
290 } else if (insn->code == (BPF_JMP | BPF_EXIT)) {
291 verbose("(%02x) exit\n", insn->code);
292 } else if (BPF_SRC(insn->code) == BPF_X) {
293 verbose("(%02x) if r%d %s r%d goto pc%+d\n",
294 insn->code, insn->dst_reg,
295 bpf_jmp_string[BPF_OP(insn->code) >> 4],
296 insn->src_reg, insn->off);
297 } else {
298 verbose("(%02x) if r%d %s 0x%x goto pc%+d\n",
299 insn->code, insn->dst_reg,
300 bpf_jmp_string[BPF_OP(insn->code) >> 4],
301 insn->imm, insn->off);
302 }
303 } else {
304 verbose("(%02x) %s\n", insn->code, bpf_class_string[class]);
305 }
306 }
307
308 /* return the map pointer stored inside BPF_LD_IMM64 instruction */
309 static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
310 {
311 u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
312
313 return (struct bpf_map *) (unsigned long) imm64;
314 }
315
316 /* look for pseudo eBPF instructions that access map FDs and
317 * replace them with actual map pointers
318 */
319 static int replace_map_fd_with_map_ptr(struct verifier_env *env)
320 {
321 struct bpf_insn *insn = env->prog->insnsi;
322 int insn_cnt = env->prog->len;
323 int i, j;
324
325 for (i = 0; i < insn_cnt; i++, insn++) {
326 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
327 struct bpf_map *map;
328 struct fd f;
329
330 if (i == insn_cnt - 1 || insn[1].code != 0 ||
331 insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
332 insn[1].off != 0) {
333 verbose("invalid bpf_ld_imm64 insn\n");
334 return -EINVAL;
335 }
336
337 if (insn->src_reg == 0)
338 /* valid generic load 64-bit imm */
339 goto next_insn;
340
341 if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
342 verbose("unrecognized bpf_ld_imm64 insn\n");
343 return -EINVAL;
344 }
345
346 f = fdget(insn->imm);
347
348 map = bpf_map_get(f);
349 if (IS_ERR(map)) {
350 verbose("fd %d is not pointing to valid bpf_map\n",
351 insn->imm);
352 fdput(f);
353 return PTR_ERR(map);
354 }
355
356 /* store map pointer inside BPF_LD_IMM64 instruction */
357 insn[0].imm = (u32) (unsigned long) map;
358 insn[1].imm = ((u64) (unsigned long) map) >> 32;
359
360 /* check whether we recorded this map already */
361 for (j = 0; j < env->used_map_cnt; j++)
362 if (env->used_maps[j] == map) {
363 fdput(f);
364 goto next_insn;
365 }
366
367 if (env->used_map_cnt >= MAX_USED_MAPS) {
368 fdput(f);
369 return -E2BIG;
370 }
371
372 /* remember this map */
373 env->used_maps[env->used_map_cnt++] = map;
374
375 /* hold the map. If the program is rejected by verifier,
376 * the map will be released by release_maps() or it
377 * will be used by the valid program until it's unloaded
378 * and all maps are released in free_bpf_prog_info()
379 */
380 atomic_inc(&map->refcnt);
381
382 fdput(f);
383 next_insn:
384 insn++;
385 i++;
386 }
387 }
388
389 /* now all pseudo BPF_LD_IMM64 instructions load valid
390 * 'struct bpf_map *' into a register instead of user map_fd.
391 * These pointers will be used later by verifier to validate map access.
392 */
393 return 0;
394 }
395
396 /* drop refcnt of maps used by the rejected program */
397 static void release_maps(struct verifier_env *env)
398 {
399 int i;
400
401 for (i = 0; i < env->used_map_cnt; i++)
402 bpf_map_put(env->used_maps[i]);
403 }
404
405 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
406 static void convert_pseudo_ld_imm64(struct verifier_env *env)
407 {
408 struct bpf_insn *insn = env->prog->insnsi;
409 int insn_cnt = env->prog->len;
410 int i;
411
412 for (i = 0; i < insn_cnt; i++, insn++)
413 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
414 insn->src_reg = 0;
415 }
416
417 int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
418 {
419 char __user *log_ubuf = NULL;
420 struct verifier_env *env;
421 int ret = -EINVAL;
422
423 if (prog->len <= 0 || prog->len > BPF_MAXINSNS)
424 return -E2BIG;
425
426 /* 'struct verifier_env' can be global, but since it's not small,
427 * allocate/free it every time bpf_check() is called
428 */
429 env = kzalloc(sizeof(struct verifier_env), GFP_KERNEL);
430 if (!env)
431 return -ENOMEM;
432
433 env->prog = prog;
434
435 /* grab the mutex to protect few globals used by verifier */
436 mutex_lock(&bpf_verifier_lock);
437
438 if (attr->log_level || attr->log_buf || attr->log_size) {
439 /* user requested verbose verifier output
440 * and supplied buffer to store the verification trace
441 */
442 log_level = attr->log_level;
443 log_ubuf = (char __user *) (unsigned long) attr->log_buf;
444 log_size = attr->log_size;
445 log_len = 0;
446
447 ret = -EINVAL;
448 /* log_* values have to be sane */
449 if (log_size < 128 || log_size > UINT_MAX >> 8 ||
450 log_level == 0 || log_ubuf == NULL)
451 goto free_env;
452
453 ret = -ENOMEM;
454 log_buf = vmalloc(log_size);
455 if (!log_buf)
456 goto free_env;
457 } else {
458 log_level = 0;
459 }
460
461 ret = replace_map_fd_with_map_ptr(env);
462 if (ret < 0)
463 goto skip_full_check;
464
465 /* ret = do_check(env); */
466
467 skip_full_check:
468
469 if (log_level && log_len >= log_size - 1) {
470 BUG_ON(log_len >= log_size);
471 /* verifier log exceeded user supplied buffer */
472 ret = -ENOSPC;
473 /* fall through to return what was recorded */
474 }
475
476 /* copy verifier log back to user space including trailing zero */
477 if (log_level && copy_to_user(log_ubuf, log_buf, log_len + 1) != 0) {
478 ret = -EFAULT;
479 goto free_log_buf;
480 }
481
482 if (ret == 0 && env->used_map_cnt) {
483 /* if program passed verifier, update used_maps in bpf_prog_info */
484 prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
485 sizeof(env->used_maps[0]),
486 GFP_KERNEL);
487
488 if (!prog->aux->used_maps) {
489 ret = -ENOMEM;
490 goto free_log_buf;
491 }
492
493 memcpy(prog->aux->used_maps, env->used_maps,
494 sizeof(env->used_maps[0]) * env->used_map_cnt);
495 prog->aux->used_map_cnt = env->used_map_cnt;
496
497 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
498 * bpf_ld_imm64 instructions
499 */
500 convert_pseudo_ld_imm64(env);
501 }
502
503 free_log_buf:
504 if (log_level)
505 vfree(log_buf);
506 free_env:
507 if (!prog->aux->used_maps)
508 /* if we didn't copy map pointers into bpf_prog_info, release
509 * them now. Otherwise free_bpf_prog_info() will release them.
510 */
511 release_maps(env);
512 kfree(env);
513 mutex_unlock(&bpf_verifier_lock);
514 return ret;
515 }
This page took 0.04282 seconds and 5 git commands to generate.