net: filter: x86: split bpf_jit_compile()
[deliverable/linux.git] / arch / x86 / net / bpf_jit_comp.c
1 /* bpf_jit_comp.c : BPF JIT compiler
2 *
3 * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; version 2
8 * of the License.
9 */
10 #include <linux/moduleloader.h>
11 #include <asm/cacheflush.h>
12 #include <linux/netdevice.h>
13 #include <linux/filter.h>
14 #include <linux/if_vlan.h>
15 #include <linux/random.h>
16
17 /*
18 * Conventions :
19 * EAX : BPF A accumulator
20 * EBX : BPF X accumulator
21 * RDI : pointer to skb (first argument given to JIT function)
22 * RBP : frame pointer (even if CONFIG_FRAME_POINTER=n)
23 * ECX,EDX,ESI : scratch registers
24 * r9d : skb->len - skb->data_len (headlen)
25 * r8 : skb->data
26 * -8(RBP) : saved RBX value
27 * -16(RBP)..-80(RBP) : BPF_MEMWORDS values
28 */
29 int bpf_jit_enable __read_mostly;
30
31 /*
32 * assembly code in arch/x86/net/bpf_jit.S
33 */
34 extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
35 extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
36 extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
37 extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
38 extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
39
40 static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
41 {
42 if (len == 1)
43 *ptr = bytes;
44 else if (len == 2)
45 *(u16 *)ptr = bytes;
46 else {
47 *(u32 *)ptr = bytes;
48 barrier();
49 }
50 return ptr + len;
51 }
52
53 #define EMIT(bytes, len) do { prog = emit_code(prog, bytes, len); } while (0)
54
55 #define EMIT1(b1) EMIT(b1, 1)
56 #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
57 #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
58 #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
59 #define EMIT1_off32(b1, off) do { EMIT1(b1); EMIT(off, 4);} while (0)
60
61 #define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */
62 #define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */
63
64 static inline bool is_imm8(int value)
65 {
66 return value <= 127 && value >= -128;
67 }
68
69 static inline bool is_near(int offset)
70 {
71 return offset <= 127 && offset >= -128;
72 }
73
74 #define EMIT_JMP(offset) \
75 do { \
76 if (offset) { \
77 if (is_near(offset)) \
78 EMIT2(0xeb, offset); /* jmp .+off8 */ \
79 else \
80 EMIT1_off32(0xe9, offset); /* jmp .+off32 */ \
81 } \
82 } while (0)
83
84 /* list of x86 cond jumps opcodes (. + s8)
85 * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
86 */
87 #define X86_JB 0x72
88 #define X86_JAE 0x73
89 #define X86_JE 0x74
90 #define X86_JNE 0x75
91 #define X86_JBE 0x76
92 #define X86_JA 0x77
93
94 #define EMIT_COND_JMP(op, offset) \
95 do { \
96 if (is_near(offset)) \
97 EMIT2(op, offset); /* jxx .+off8 */ \
98 else { \
99 EMIT2(0x0f, op + 0x10); \
100 EMIT(offset, 4); /* jxx .+off32 */ \
101 } \
102 } while (0)
103
104 #define COND_SEL(CODE, TOP, FOP) \
105 case CODE: \
106 t_op = TOP; \
107 f_op = FOP; \
108 goto cond_branch
109
110
111 #define SEEN_DATAREF 1 /* might call external helpers */
112 #define SEEN_XREG 2 /* ebx is used */
113 #define SEEN_MEM 4 /* use mem[] for temporary storage */
114
115 static inline void bpf_flush_icache(void *start, void *end)
116 {
117 mm_segment_t old_fs = get_fs();
118
119 set_fs(KERNEL_DS);
120 smp_wmb();
121 flush_icache_range((unsigned long)start, (unsigned long)end);
122 set_fs(old_fs);
123 }
124
125 #define CHOOSE_LOAD_FUNC(K, func) \
126 ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
127
128 /* Helper to find the offset of pkt_type in sk_buff
129 * We want to make sure its still a 3bit field starting at a byte boundary.
130 */
131 #define PKT_TYPE_MAX 7
132 static int pkt_type_offset(void)
133 {
134 struct sk_buff skb_probe = {
135 .pkt_type = ~0,
136 };
137 char *ct = (char *)&skb_probe;
138 unsigned int off;
139
140 for (off = 0; off < sizeof(struct sk_buff); off++) {
141 if (ct[off] == PKT_TYPE_MAX)
142 return off;
143 }
144 pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n");
145 return -1;
146 }
147
148 struct bpf_binary_header {
149 unsigned int pages;
150 /* Note : for security reasons, bpf code will follow a randomly
151 * sized amount of int3 instructions
152 */
153 u8 image[];
154 };
155
156 static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen,
157 u8 **image_ptr)
158 {
159 unsigned int sz, hole;
160 struct bpf_binary_header *header;
161
162 /* Most of BPF filters are really small,
163 * but if some of them fill a page, allow at least
164 * 128 extra bytes to insert a random section of int3
165 */
166 sz = round_up(proglen + sizeof(*header) + 128, PAGE_SIZE);
167 header = module_alloc(sz);
168 if (!header)
169 return NULL;
170
171 memset(header, 0xcc, sz); /* fill whole space with int3 instructions */
172
173 header->pages = sz / PAGE_SIZE;
174 hole = sz - (proglen + sizeof(*header));
175
176 /* insert a random number of int3 instructions before BPF code */
177 *image_ptr = &header->image[prandom_u32() % hole];
178 return header;
179 }
180
181 struct jit_context {
182 unsigned int cleanup_addr; /* epilogue code offset */
183 int pc_ret0; /* bpf index of first RET #0 instruction (if any) */
184 u8 seen;
185 };
186
187 static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
188 int oldproglen, struct jit_context *ctx)
189 {
190 const struct sock_filter *filter = bpf_prog->insns;
191 int flen = bpf_prog->len;
192 u8 temp[64];
193 u8 *prog;
194 int ilen, i, proglen;
195 int t_offset, f_offset;
196 u8 t_op, f_op, seen = 0;
197 u8 *func;
198 unsigned int cleanup_addr = ctx->cleanup_addr;
199 u8 seen_or_pass0 = ctx->seen;
200
201 /* no prologue/epilogue for trivial filters (RET something) */
202 proglen = 0;
203 prog = temp;
204
205 if (seen_or_pass0) {
206 EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */
207 EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */
208 /* note : must save %rbx in case bpf_error is hit */
209 if (seen_or_pass0 & (SEEN_XREG | SEEN_DATAREF))
210 EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */
211 if (seen_or_pass0 & SEEN_XREG)
212 CLEAR_X(); /* make sure we dont leek kernel memory */
213
214 /*
215 * If this filter needs to access skb data,
216 * loads r9 and r8 with :
217 * r9 = skb->len - skb->data_len
218 * r8 = skb->data
219 */
220 if (seen_or_pass0 & SEEN_DATAREF) {
221 if (offsetof(struct sk_buff, len) <= 127)
222 /* mov off8(%rdi),%r9d */
223 EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len));
224 else {
225 /* mov off32(%rdi),%r9d */
226 EMIT3(0x44, 0x8b, 0x8f);
227 EMIT(offsetof(struct sk_buff, len), 4);
228 }
229 if (is_imm8(offsetof(struct sk_buff, data_len)))
230 /* sub off8(%rdi),%r9d */
231 EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len));
232 else {
233 EMIT3(0x44, 0x2b, 0x8f);
234 EMIT(offsetof(struct sk_buff, data_len), 4);
235 }
236
237 if (is_imm8(offsetof(struct sk_buff, data)))
238 /* mov off8(%rdi),%r8 */
239 EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data));
240 else {
241 /* mov off32(%rdi),%r8 */
242 EMIT3(0x4c, 0x8b, 0x87);
243 EMIT(offsetof(struct sk_buff, data), 4);
244 }
245 }
246 }
247
248 switch (filter[0].code) {
249 case BPF_S_RET_K:
250 case BPF_S_LD_W_LEN:
251 case BPF_S_ANC_PROTOCOL:
252 case BPF_S_ANC_IFINDEX:
253 case BPF_S_ANC_MARK:
254 case BPF_S_ANC_RXHASH:
255 case BPF_S_ANC_CPU:
256 case BPF_S_ANC_VLAN_TAG:
257 case BPF_S_ANC_VLAN_TAG_PRESENT:
258 case BPF_S_ANC_QUEUE:
259 case BPF_S_ANC_PKTTYPE:
260 case BPF_S_LD_W_ABS:
261 case BPF_S_LD_H_ABS:
262 case BPF_S_LD_B_ABS:
263 /* first instruction sets A register (or is RET 'constant') */
264 break;
265 default:
266 /* make sure we dont leak kernel information to user */
267 CLEAR_A(); /* A = 0 */
268 }
269
270 for (i = 0; i < flen; i++) {
271 unsigned int K = filter[i].k;
272
273 switch (filter[i].code) {
274 case BPF_S_ALU_ADD_X: /* A += X; */
275 seen |= SEEN_XREG;
276 EMIT2(0x01, 0xd8); /* add %ebx,%eax */
277 break;
278 case BPF_S_ALU_ADD_K: /* A += K; */
279 if (!K)
280 break;
281 if (is_imm8(K))
282 EMIT3(0x83, 0xc0, K); /* add imm8,%eax */
283 else
284 EMIT1_off32(0x05, K); /* add imm32,%eax */
285 break;
286 case BPF_S_ALU_SUB_X: /* A -= X; */
287 seen |= SEEN_XREG;
288 EMIT2(0x29, 0xd8); /* sub %ebx,%eax */
289 break;
290 case BPF_S_ALU_SUB_K: /* A -= K */
291 if (!K)
292 break;
293 if (is_imm8(K))
294 EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */
295 else
296 EMIT1_off32(0x2d, K); /* sub imm32,%eax */
297 break;
298 case BPF_S_ALU_MUL_X: /* A *= X; */
299 seen |= SEEN_XREG;
300 EMIT3(0x0f, 0xaf, 0xc3); /* imul %ebx,%eax */
301 break;
302 case BPF_S_ALU_MUL_K: /* A *= K */
303 if (is_imm8(K))
304 EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */
305 else {
306 EMIT2(0x69, 0xc0); /* imul imm32,%eax */
307 EMIT(K, 4);
308 }
309 break;
310 case BPF_S_ALU_DIV_X: /* A /= X; */
311 seen |= SEEN_XREG;
312 EMIT2(0x85, 0xdb); /* test %ebx,%ebx */
313 if (ctx->pc_ret0 > 0) {
314 /* addrs[pc_ret0 - 1] is start address of target
315 * (addrs[i] - 4) is the address following this jmp
316 * ("xor %edx,%edx; div %ebx" being 4 bytes long)
317 */
318 EMIT_COND_JMP(X86_JE, addrs[ctx->pc_ret0 - 1] -
319 (addrs[i] - 4));
320 } else {
321 EMIT_COND_JMP(X86_JNE, 2 + 5);
322 CLEAR_A();
323 EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */
324 }
325 EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */
326 break;
327 case BPF_S_ALU_MOD_X: /* A %= X; */
328 seen |= SEEN_XREG;
329 EMIT2(0x85, 0xdb); /* test %ebx,%ebx */
330 if (ctx->pc_ret0 > 0) {
331 /* addrs[pc_ret0 - 1] is start address of target
332 * (addrs[i] - 6) is the address following this jmp
333 * ("xor %edx,%edx; div %ebx;mov %edx,%eax" being 6 bytes long)
334 */
335 EMIT_COND_JMP(X86_JE, addrs[ctx->pc_ret0 - 1] -
336 (addrs[i] - 6));
337 } else {
338 EMIT_COND_JMP(X86_JNE, 2 + 5);
339 CLEAR_A();
340 EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 6)); /* jmp .+off32 */
341 }
342 EMIT2(0x31, 0xd2); /* xor %edx,%edx */
343 EMIT2(0xf7, 0xf3); /* div %ebx */
344 EMIT2(0x89, 0xd0); /* mov %edx,%eax */
345 break;
346 case BPF_S_ALU_MOD_K: /* A %= K; */
347 if (K == 1) {
348 CLEAR_A();
349 break;
350 }
351 EMIT2(0x31, 0xd2); /* xor %edx,%edx */
352 EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */
353 EMIT2(0xf7, 0xf1); /* div %ecx */
354 EMIT2(0x89, 0xd0); /* mov %edx,%eax */
355 break;
356 case BPF_S_ALU_DIV_K: /* A /= K */
357 if (K == 1)
358 break;
359 EMIT2(0x31, 0xd2); /* xor %edx,%edx */
360 EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */
361 EMIT2(0xf7, 0xf1); /* div %ecx */
362 break;
363 case BPF_S_ALU_AND_X:
364 seen |= SEEN_XREG;
365 EMIT2(0x21, 0xd8); /* and %ebx,%eax */
366 break;
367 case BPF_S_ALU_AND_K:
368 if (K >= 0xFFFFFF00) {
369 EMIT2(0x24, K & 0xFF); /* and imm8,%al */
370 } else if (K >= 0xFFFF0000) {
371 EMIT2(0x66, 0x25); /* and imm16,%ax */
372 EMIT(K, 2);
373 } else {
374 EMIT1_off32(0x25, K); /* and imm32,%eax */
375 }
376 break;
377 case BPF_S_ALU_OR_X:
378 seen |= SEEN_XREG;
379 EMIT2(0x09, 0xd8); /* or %ebx,%eax */
380 break;
381 case BPF_S_ALU_OR_K:
382 if (is_imm8(K))
383 EMIT3(0x83, 0xc8, K); /* or imm8,%eax */
384 else
385 EMIT1_off32(0x0d, K); /* or imm32,%eax */
386 break;
387 case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
388 case BPF_S_ALU_XOR_X:
389 seen |= SEEN_XREG;
390 EMIT2(0x31, 0xd8); /* xor %ebx,%eax */
391 break;
392 case BPF_S_ALU_XOR_K: /* A ^= K; */
393 if (K == 0)
394 break;
395 if (is_imm8(K))
396 EMIT3(0x83, 0xf0, K); /* xor imm8,%eax */
397 else
398 EMIT1_off32(0x35, K); /* xor imm32,%eax */
399 break;
400 case BPF_S_ALU_LSH_X: /* A <<= X; */
401 seen |= SEEN_XREG;
402 EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */
403 break;
404 case BPF_S_ALU_LSH_K:
405 if (K == 0)
406 break;
407 else if (K == 1)
408 EMIT2(0xd1, 0xe0); /* shl %eax */
409 else
410 EMIT3(0xc1, 0xe0, K);
411 break;
412 case BPF_S_ALU_RSH_X: /* A >>= X; */
413 seen |= SEEN_XREG;
414 EMIT4(0x89, 0xd9, 0xd3, 0xe8); /* mov %ebx,%ecx; shr %cl,%eax */
415 break;
416 case BPF_S_ALU_RSH_K: /* A >>= K; */
417 if (K == 0)
418 break;
419 else if (K == 1)
420 EMIT2(0xd1, 0xe8); /* shr %eax */
421 else
422 EMIT3(0xc1, 0xe8, K);
423 break;
424 case BPF_S_ALU_NEG:
425 EMIT2(0xf7, 0xd8); /* neg %eax */
426 break;
427 case BPF_S_RET_K:
428 if (!K) {
429 if (ctx->pc_ret0 == -1)
430 ctx->pc_ret0 = i;
431 CLEAR_A();
432 } else {
433 EMIT1_off32(0xb8, K); /* mov $imm32,%eax */
434 }
435 /* fallinto */
436 case BPF_S_RET_A:
437 if (seen_or_pass0) {
438 if (i != flen - 1) {
439 EMIT_JMP(cleanup_addr - addrs[i]);
440 break;
441 }
442 if (seen_or_pass0 & SEEN_XREG)
443 EMIT4(0x48, 0x8b, 0x5d, 0xf8); /* mov -8(%rbp),%rbx */
444 EMIT1(0xc9); /* leaveq */
445 }
446 EMIT1(0xc3); /* ret */
447 break;
448 case BPF_S_MISC_TAX: /* X = A */
449 seen |= SEEN_XREG;
450 EMIT2(0x89, 0xc3); /* mov %eax,%ebx */
451 break;
452 case BPF_S_MISC_TXA: /* A = X */
453 seen |= SEEN_XREG;
454 EMIT2(0x89, 0xd8); /* mov %ebx,%eax */
455 break;
456 case BPF_S_LD_IMM: /* A = K */
457 if (!K)
458 CLEAR_A();
459 else
460 EMIT1_off32(0xb8, K); /* mov $imm32,%eax */
461 break;
462 case BPF_S_LDX_IMM: /* X = K */
463 seen |= SEEN_XREG;
464 if (!K)
465 CLEAR_X();
466 else
467 EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */
468 break;
469 case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */
470 seen |= SEEN_MEM;
471 EMIT3(0x8b, 0x45, 0xf0 - K*4);
472 break;
473 case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */
474 seen |= SEEN_XREG | SEEN_MEM;
475 EMIT3(0x8b, 0x5d, 0xf0 - K*4);
476 break;
477 case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */
478 seen |= SEEN_MEM;
479 EMIT3(0x89, 0x45, 0xf0 - K*4);
480 break;
481 case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
482 seen |= SEEN_XREG | SEEN_MEM;
483 EMIT3(0x89, 0x5d, 0xf0 - K*4);
484 break;
485 case BPF_S_LD_W_LEN: /* A = skb->len; */
486 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
487 if (is_imm8(offsetof(struct sk_buff, len)))
488 /* mov off8(%rdi),%eax */
489 EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len));
490 else {
491 EMIT2(0x8b, 0x87);
492 EMIT(offsetof(struct sk_buff, len), 4);
493 }
494 break;
495 case BPF_S_LDX_W_LEN: /* X = skb->len; */
496 seen |= SEEN_XREG;
497 if (is_imm8(offsetof(struct sk_buff, len)))
498 /* mov off8(%rdi),%ebx */
499 EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len));
500 else {
501 EMIT2(0x8b, 0x9f);
502 EMIT(offsetof(struct sk_buff, len), 4);
503 }
504 break;
505 case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
506 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
507 if (is_imm8(offsetof(struct sk_buff, protocol))) {
508 /* movzwl off8(%rdi),%eax */
509 EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol));
510 } else {
511 EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
512 EMIT(offsetof(struct sk_buff, protocol), 4);
513 }
514 EMIT2(0x86, 0xc4); /* ntohs() : xchg %al,%ah */
515 break;
516 case BPF_S_ANC_IFINDEX:
517 if (is_imm8(offsetof(struct sk_buff, dev))) {
518 /* movq off8(%rdi),%rax */
519 EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev));
520 } else {
521 EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */
522 EMIT(offsetof(struct sk_buff, dev), 4);
523 }
524 EMIT3(0x48, 0x85, 0xc0); /* test %rax,%rax */
525 EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6));
526 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
527 EMIT2(0x8b, 0x80); /* mov off32(%rax),%eax */
528 EMIT(offsetof(struct net_device, ifindex), 4);
529 break;
530 case BPF_S_ANC_MARK:
531 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
532 if (is_imm8(offsetof(struct sk_buff, mark))) {
533 /* mov off8(%rdi),%eax */
534 EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark));
535 } else {
536 EMIT2(0x8b, 0x87);
537 EMIT(offsetof(struct sk_buff, mark), 4);
538 }
539 break;
540 case BPF_S_ANC_RXHASH:
541 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
542 if (is_imm8(offsetof(struct sk_buff, hash))) {
543 /* mov off8(%rdi),%eax */
544 EMIT3(0x8b, 0x47, offsetof(struct sk_buff, hash));
545 } else {
546 EMIT2(0x8b, 0x87);
547 EMIT(offsetof(struct sk_buff, hash), 4);
548 }
549 break;
550 case BPF_S_ANC_QUEUE:
551 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
552 if (is_imm8(offsetof(struct sk_buff, queue_mapping))) {
553 /* movzwl off8(%rdi),%eax */
554 EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping));
555 } else {
556 EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
557 EMIT(offsetof(struct sk_buff, queue_mapping), 4);
558 }
559 break;
560 case BPF_S_ANC_CPU:
561 #ifdef CONFIG_SMP
562 EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */
563 EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */
564 #else
565 CLEAR_A();
566 #endif
567 break;
568 case BPF_S_ANC_VLAN_TAG:
569 case BPF_S_ANC_VLAN_TAG_PRESENT:
570 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
571 if (is_imm8(offsetof(struct sk_buff, vlan_tci))) {
572 /* movzwl off8(%rdi),%eax */
573 EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, vlan_tci));
574 } else {
575 EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
576 EMIT(offsetof(struct sk_buff, vlan_tci), 4);
577 }
578 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
579 if (filter[i].code == BPF_S_ANC_VLAN_TAG) {
580 EMIT3(0x80, 0xe4, 0xef); /* and $0xef,%ah */
581 } else {
582 EMIT3(0xc1, 0xe8, 0x0c); /* shr $0xc,%eax */
583 EMIT3(0x83, 0xe0, 0x01); /* and $0x1,%eax */
584 }
585 break;
586 case BPF_S_ANC_PKTTYPE:
587 {
588 int off = pkt_type_offset();
589
590 if (off < 0)
591 return -EINVAL;
592 if (is_imm8(off)) {
593 /* movzbl off8(%rdi),%eax */
594 EMIT4(0x0f, 0xb6, 0x47, off);
595 } else {
596 /* movbl off32(%rdi),%eax */
597 EMIT3(0x0f, 0xb6, 0x87);
598 EMIT(off, 4);
599 }
600 EMIT3(0x83, 0xe0, PKT_TYPE_MAX); /* and $0x7,%eax */
601 break;
602 }
603 case BPF_S_LD_W_ABS:
604 func = CHOOSE_LOAD_FUNC(K, sk_load_word);
605 common_load: seen |= SEEN_DATAREF;
606 t_offset = func - (image + addrs[i]);
607 EMIT1_off32(0xbe, K); /* mov imm32,%esi */
608 EMIT1_off32(0xe8, t_offset); /* call */
609 break;
610 case BPF_S_LD_H_ABS:
611 func = CHOOSE_LOAD_FUNC(K, sk_load_half);
612 goto common_load;
613 case BPF_S_LD_B_ABS:
614 func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
615 goto common_load;
616 case BPF_S_LDX_B_MSH:
617 func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
618 seen |= SEEN_DATAREF | SEEN_XREG;
619 t_offset = func - (image + addrs[i]);
620 EMIT1_off32(0xbe, K); /* mov imm32,%esi */
621 EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
622 break;
623 case BPF_S_LD_W_IND:
624 func = sk_load_word;
625 common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG;
626 t_offset = func - (image + addrs[i]);
627 if (K) {
628 if (is_imm8(K)) {
629 EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */
630 } else {
631 EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
632 EMIT(K, 4);
633 }
634 } else {
635 EMIT2(0x89,0xde); /* mov %ebx,%esi */
636 }
637 EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */
638 break;
639 case BPF_S_LD_H_IND:
640 func = sk_load_half;
641 goto common_load_ind;
642 case BPF_S_LD_B_IND:
643 func = sk_load_byte;
644 goto common_load_ind;
645 case BPF_S_JMP_JA:
646 t_offset = addrs[i + K] - addrs[i];
647 EMIT_JMP(t_offset);
648 break;
649 COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE);
650 COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB);
651 COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE);
652 COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE);
653 COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE);
654 COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB);
655 COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE);
656 COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE);
657
658 cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
659 t_offset = addrs[i + filter[i].jt] - addrs[i];
660
661 /* same targets, can avoid doing the test :) */
662 if (filter[i].jt == filter[i].jf) {
663 EMIT_JMP(t_offset);
664 break;
665 }
666
667 switch (filter[i].code) {
668 case BPF_S_JMP_JGT_X:
669 case BPF_S_JMP_JGE_X:
670 case BPF_S_JMP_JEQ_X:
671 seen |= SEEN_XREG;
672 EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */
673 break;
674 case BPF_S_JMP_JSET_X:
675 seen |= SEEN_XREG;
676 EMIT2(0x85, 0xd8); /* test %ebx,%eax */
677 break;
678 case BPF_S_JMP_JEQ_K:
679 if (K == 0) {
680 EMIT2(0x85, 0xc0); /* test %eax,%eax */
681 break;
682 }
683 case BPF_S_JMP_JGT_K:
684 case BPF_S_JMP_JGE_K:
685 if (K <= 127)
686 EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */
687 else
688 EMIT1_off32(0x3d, K); /* cmp imm32,%eax */
689 break;
690 case BPF_S_JMP_JSET_K:
691 if (K <= 0xFF)
692 EMIT2(0xa8, K); /* test imm8,%al */
693 else if (!(K & 0xFFFF00FF))
694 EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */
695 else if (K <= 0xFFFF) {
696 EMIT2(0x66, 0xa9); /* test imm16,%ax */
697 EMIT(K, 2);
698 } else {
699 EMIT1_off32(0xa9, K); /* test imm32,%eax */
700 }
701 break;
702 }
703 if (filter[i].jt != 0) {
704 if (filter[i].jf && f_offset)
705 t_offset += is_near(f_offset) ? 2 : 5;
706 EMIT_COND_JMP(t_op, t_offset);
707 if (filter[i].jf)
708 EMIT_JMP(f_offset);
709 break;
710 }
711 EMIT_COND_JMP(f_op, f_offset);
712 break;
713 default:
714 /* hmm, too complex filter, give up with jit compiler */
715 return -EINVAL;
716 }
717 ilen = prog - temp;
718 if (image) {
719 if (unlikely(proglen + ilen > oldproglen)) {
720 pr_err("bpb_jit_compile fatal error\n");
721 return -EFAULT;
722 }
723 memcpy(image + proglen, temp, ilen);
724 }
725 proglen += ilen;
726 addrs[i] = proglen;
727 prog = temp;
728 }
729 /* last bpf instruction is always a RET :
730 * use it to give the cleanup instruction(s) addr
731 */
732 ctx->cleanup_addr = proglen - 1; /* ret */
733 if (seen_or_pass0)
734 ctx->cleanup_addr -= 1; /* leaveq */
735 if (seen_or_pass0 & SEEN_XREG)
736 ctx->cleanup_addr -= 4; /* mov -8(%rbp),%rbx */
737
738 ctx->seen = seen;
739
740 return proglen;
741 }
742
743 void bpf_jit_compile(struct sk_filter *prog)
744 {
745 struct bpf_binary_header *header = NULL;
746 int proglen, oldproglen = 0;
747 struct jit_context ctx = {};
748 u8 *image = NULL;
749 int *addrs;
750 int pass;
751 int i;
752
753 if (!bpf_jit_enable)
754 return;
755
756 if (!prog || !prog->len)
757 return;
758
759 addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
760 if (!addrs)
761 return;
762
763 /* Before first pass, make a rough estimation of addrs[]
764 * each bpf instruction is translated to less than 64 bytes
765 */
766 for (proglen = 0, i = 0; i < prog->len; i++) {
767 proglen += 64;
768 addrs[i] = proglen;
769 }
770 ctx.cleanup_addr = proglen;
771 ctx.seen = SEEN_XREG | SEEN_DATAREF | SEEN_MEM;
772 ctx.pc_ret0 = -1;
773
774 for (pass = 0; pass < 10; pass++) {
775 proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
776 if (proglen <= 0) {
777 image = NULL;
778 if (header)
779 module_free(NULL, header);
780 goto out;
781 }
782 if (image) {
783 if (proglen != oldproglen)
784 pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
785 proglen, oldproglen);
786 break;
787 }
788 if (proglen == oldproglen) {
789 header = bpf_alloc_binary(proglen, &image);
790 if (!header)
791 goto out;
792 }
793 oldproglen = proglen;
794 }
795
796 if (bpf_jit_enable > 1)
797 bpf_jit_dump(prog->len, proglen, 0, image);
798
799 if (image) {
800 bpf_flush_icache(header, image + proglen);
801 set_memory_ro((unsigned long)header, header->pages);
802 prog->bpf_func = (void *)image;
803 prog->jited = 1;
804 }
805 out:
806 kfree(addrs);
807 }
808
809 static void bpf_jit_free_deferred(struct work_struct *work)
810 {
811 struct sk_filter *fp = container_of(work, struct sk_filter, work);
812 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
813 struct bpf_binary_header *header = (void *)addr;
814
815 set_memory_rw(addr, header->pages);
816 module_free(NULL, header);
817 kfree(fp);
818 }
819
820 void bpf_jit_free(struct sk_filter *fp)
821 {
822 if (fp->jited) {
823 INIT_WORK(&fp->work, bpf_jit_free_deferred);
824 schedule_work(&fp->work);
825 } else {
826 kfree(fp);
827 }
828 }
This page took 0.06511 seconds and 5 git commands to generate.