netdevice: Support DSA tagging when DSA is built as a module
[deliverable/linux.git] / net / core / filter.c
CommitLineData
1da177e4
LT
1/*
2 * Linux Socket Filter - Kernel level socket filtering
3 *
bd4cf0ed
AS
4 * Based on the design of the Berkeley Packet Filter. The new
5 * internal format has been designed by PLUMgrid:
1da177e4 6 *
bd4cf0ed
AS
7 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
8 *
9 * Authors:
10 *
11 * Jay Schulist <jschlst@samba.org>
12 * Alexei Starovoitov <ast@plumgrid.com>
13 * Daniel Borkmann <dborkman@redhat.com>
1da177e4
LT
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
19 *
20 * Andi Kleen - Fix a few bad bugs and races.
4df95ff4 21 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
1da177e4
LT
22 */
23
24#include <linux/module.h>
25#include <linux/types.h>
1da177e4
LT
26#include <linux/mm.h>
27#include <linux/fcntl.h>
28#include <linux/socket.h>
29#include <linux/in.h>
30#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/if_packet.h>
5a0e3ad6 33#include <linux/gfp.h>
1da177e4
LT
34#include <net/ip.h>
35#include <net/protocol.h>
4738c1db 36#include <net/netlink.h>
1da177e4
LT
37#include <linux/skbuff.h>
38#include <net/sock.h>
39#include <linux/errno.h>
40#include <linux/timer.h>
1da177e4 41#include <asm/uaccess.h>
40daafc8 42#include <asm/unaligned.h>
1da177e4 43#include <linux/filter.h>
86e4ca66 44#include <linux/ratelimit.h>
46b325c7 45#include <linux/seccomp.h>
f3335031 46#include <linux/if_vlan.h>
1da177e4 47
43db6d65
SH
48/**
49 * sk_filter - run a packet through a socket filter
50 * @sk: sock associated with &sk_buff
51 * @skb: buffer to filter
43db6d65
SH
52 *
53 * Run the filter code and then cut skb->data to correct size returned by
54 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
55 * than pkt_len we keep whole skb->data. This is the socket level
56 * wrapper to sk_run_filter. It returns 0 if the packet should
57 * be accepted or -EPERM if the packet should be tossed.
58 *
59 */
60int sk_filter(struct sock *sk, struct sk_buff *skb)
61{
62 int err;
63 struct sk_filter *filter;
64
c93bdd0e
MG
65 /*
66 * If the skb was allocated from pfmemalloc reserves, only
67 * allow SOCK_MEMALLOC sockets to use it as this socket is
68 * helping free memory
69 */
70 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
71 return -ENOMEM;
72
43db6d65
SH
73 err = security_sock_rcv_skb(sk, skb);
74 if (err)
75 return err;
76
80f8f102
ED
77 rcu_read_lock();
78 filter = rcu_dereference(sk->sk_filter);
43db6d65 79 if (filter) {
0a14842f 80 unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
0d7da9dd 81
43db6d65
SH
82 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
83 }
80f8f102 84 rcu_read_unlock();
43db6d65
SH
85
86 return err;
87}
88EXPORT_SYMBOL(sk_filter);
89
30743837 90static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
bd4cf0ed 91{
56193d1b 92 return skb_get_poff((struct sk_buff *)(unsigned long) ctx);
bd4cf0ed
AS
93}
94
30743837 95static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
bd4cf0ed 96{
eb9672f4 97 struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
bd4cf0ed
AS
98 struct nlattr *nla;
99
100 if (skb_is_nonlinear(skb))
101 return 0;
102
05ab8f26
MK
103 if (skb->len < sizeof(struct nlattr))
104 return 0;
105
30743837 106 if (a > skb->len - sizeof(struct nlattr))
bd4cf0ed
AS
107 return 0;
108
30743837 109 nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
bd4cf0ed
AS
110 if (nla)
111 return (void *) nla - (void *) skb->data;
112
113 return 0;
114}
115
30743837 116static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
bd4cf0ed 117{
eb9672f4 118 struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
bd4cf0ed
AS
119 struct nlattr *nla;
120
121 if (skb_is_nonlinear(skb))
122 return 0;
123
05ab8f26
MK
124 if (skb->len < sizeof(struct nlattr))
125 return 0;
126
30743837 127 if (a > skb->len - sizeof(struct nlattr))
bd4cf0ed
AS
128 return 0;
129
30743837
DB
130 nla = (struct nlattr *) &skb->data[a];
131 if (nla->nla_len > skb->len - a)
bd4cf0ed
AS
132 return 0;
133
30743837 134 nla = nla_find_nested(nla, x);
bd4cf0ed
AS
135 if (nla)
136 return (void *) nla - (void *) skb->data;
137
138 return 0;
139}
140
30743837 141static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
bd4cf0ed
AS
142{
143 return raw_smp_processor_id();
144}
145
4cd3675e 146/* note that this only generates 32-bit random numbers */
30743837 147static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
4cd3675e 148{
eb9672f4 149 return prandom_u32();
4cd3675e
CG
150}
151
bd4cf0ed 152static bool convert_bpf_extensions(struct sock_filter *fp,
2695fb55 153 struct bpf_insn **insnp)
bd4cf0ed 154{
2695fb55 155 struct bpf_insn *insn = *insnp;
bd4cf0ed
AS
156
157 switch (fp->k) {
158 case SKF_AD_OFF + SKF_AD_PROTOCOL:
159 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
160
e430f34e 161 /* A = *(u16 *) (CTX + offsetof(protocol)) */
f8f6d679
DB
162 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
163 offsetof(struct sk_buff, protocol));
bd4cf0ed 164 /* A = ntohs(A) [emitting a nop or swap16] */
f8f6d679 165 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
bd4cf0ed
AS
166 break;
167
168 case SKF_AD_OFF + SKF_AD_PKTTYPE:
233577a2
HFS
169 *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
170 PKT_TYPE_OFFSET());
9739eef1 171 *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
0dcceabb
AS
172#ifdef __BIG_ENDIAN_BITFIELD
173 insn++;
f666f87b 174 *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5);
0dcceabb 175#endif
bd4cf0ed
AS
176 break;
177
178 case SKF_AD_OFF + SKF_AD_IFINDEX:
179 case SKF_AD_OFF + SKF_AD_HATYPE:
bd4cf0ed
AS
180 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
181 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
f8f6d679
DB
182 BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0);
183
184 *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
185 BPF_REG_TMP, BPF_REG_CTX,
186 offsetof(struct sk_buff, dev));
187 /* if (tmp != 0) goto pc + 1 */
188 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
189 *insn++ = BPF_EXIT_INSN();
190 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
191 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
192 offsetof(struct net_device, ifindex));
193 else
194 *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
195 offsetof(struct net_device, type));
bd4cf0ed
AS
196 break;
197
198 case SKF_AD_OFF + SKF_AD_MARK:
199 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
200
9739eef1
AS
201 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
202 offsetof(struct sk_buff, mark));
bd4cf0ed
AS
203 break;
204
205 case SKF_AD_OFF + SKF_AD_RXHASH:
206 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
207
9739eef1
AS
208 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
209 offsetof(struct sk_buff, hash));
bd4cf0ed
AS
210 break;
211
212 case SKF_AD_OFF + SKF_AD_QUEUE:
213 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
214
9739eef1
AS
215 *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
216 offsetof(struct sk_buff, queue_mapping));
bd4cf0ed
AS
217 break;
218
219 case SKF_AD_OFF + SKF_AD_VLAN_TAG:
220 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
221 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
bd4cf0ed
AS
222 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
223
e430f34e 224 /* A = *(u16 *) (CTX + offsetof(vlan_tci)) */
f8f6d679
DB
225 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
226 offsetof(struct sk_buff, vlan_tci));
bd4cf0ed 227 if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
9739eef1
AS
228 *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A,
229 ~VLAN_TAG_PRESENT);
bd4cf0ed 230 } else {
9739eef1 231 /* A >>= 12 */
f8f6d679 232 *insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
9739eef1
AS
233 /* A &= 1 */
234 *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1);
bd4cf0ed
AS
235 }
236 break;
237
238 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
239 case SKF_AD_OFF + SKF_AD_NLATTR:
240 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
241 case SKF_AD_OFF + SKF_AD_CPU:
4cd3675e 242 case SKF_AD_OFF + SKF_AD_RANDOM:
e430f34e 243 /* arg1 = CTX */
f8f6d679 244 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
bd4cf0ed 245 /* arg2 = A */
f8f6d679 246 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
bd4cf0ed 247 /* arg3 = X */
f8f6d679 248 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
e430f34e 249 /* Emit call(arg1=CTX, arg2=A, arg3=X) */
bd4cf0ed
AS
250 switch (fp->k) {
251 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
f8f6d679 252 *insn = BPF_EMIT_CALL(__skb_get_pay_offset);
bd4cf0ed
AS
253 break;
254 case SKF_AD_OFF + SKF_AD_NLATTR:
f8f6d679 255 *insn = BPF_EMIT_CALL(__skb_get_nlattr);
bd4cf0ed
AS
256 break;
257 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
f8f6d679 258 *insn = BPF_EMIT_CALL(__skb_get_nlattr_nest);
bd4cf0ed
AS
259 break;
260 case SKF_AD_OFF + SKF_AD_CPU:
f8f6d679 261 *insn = BPF_EMIT_CALL(__get_raw_cpu_id);
bd4cf0ed 262 break;
4cd3675e 263 case SKF_AD_OFF + SKF_AD_RANDOM:
f8f6d679 264 *insn = BPF_EMIT_CALL(__get_random_u32);
4cd3675e 265 break;
bd4cf0ed
AS
266 }
267 break;
268
269 case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
9739eef1
AS
270 /* A ^= X */
271 *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
bd4cf0ed
AS
272 break;
273
274 default:
275 /* This is just a dummy call to avoid letting the compiler
276 * evict __bpf_call_base() as an optimization. Placed here
277 * where no-one bothers.
278 */
279 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
280 return false;
281 }
282
283 *insnp = insn;
284 return true;
285}
286
287/**
8fb575ca 288 * bpf_convert_filter - convert filter program
bd4cf0ed
AS
289 * @prog: the user passed filter program
290 * @len: the length of the user passed filter program
291 * @new_prog: buffer where converted program will be stored
292 * @new_len: pointer to store length of converted program
293 *
294 * Remap 'sock_filter' style BPF instruction set to 'sock_filter_ext' style.
295 * Conversion workflow:
296 *
297 * 1) First pass for calculating the new program length:
8fb575ca 298 * bpf_convert_filter(old_prog, old_len, NULL, &new_len)
bd4cf0ed
AS
299 *
300 * 2) 2nd pass to remap in two passes: 1st pass finds new
301 * jump offsets, 2nd pass remapping:
2695fb55 302 * new_prog = kmalloc(sizeof(struct bpf_insn) * new_len);
8fb575ca 303 * bpf_convert_filter(old_prog, old_len, new_prog, &new_len);
bd4cf0ed
AS
304 *
305 * User BPF's register A is mapped to our BPF register 6, user BPF
306 * register X is mapped to BPF register 7; frame pointer is always
307 * register 10; Context 'void *ctx' is stored in register 1, that is,
308 * for socket filters: ctx == 'struct sk_buff *', for seccomp:
309 * ctx == 'struct seccomp_data *'.
310 */
8fb575ca
AS
311int bpf_convert_filter(struct sock_filter *prog, int len,
312 struct bpf_insn *new_prog, int *new_len)
bd4cf0ed
AS
313{
314 int new_flen = 0, pass = 0, target, i;
2695fb55 315 struct bpf_insn *new_insn;
bd4cf0ed
AS
316 struct sock_filter *fp;
317 int *addrs = NULL;
318 u8 bpf_src;
319
320 BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
30743837 321 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
bd4cf0ed 322
6f9a093b 323 if (len <= 0 || len > BPF_MAXINSNS)
bd4cf0ed
AS
324 return -EINVAL;
325
326 if (new_prog) {
99e72a0f 327 addrs = kcalloc(len, sizeof(*addrs), GFP_KERNEL);
bd4cf0ed
AS
328 if (!addrs)
329 return -ENOMEM;
330 }
331
332do_pass:
333 new_insn = new_prog;
334 fp = prog;
335
f8f6d679
DB
336 if (new_insn)
337 *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
bd4cf0ed
AS
338 new_insn++;
339
340 for (i = 0; i < len; fp++, i++) {
2695fb55
AS
341 struct bpf_insn tmp_insns[6] = { };
342 struct bpf_insn *insn = tmp_insns;
bd4cf0ed
AS
343
344 if (addrs)
345 addrs[i] = new_insn - new_prog;
346
347 switch (fp->code) {
348 /* All arithmetic insns and skb loads map as-is. */
349 case BPF_ALU | BPF_ADD | BPF_X:
350 case BPF_ALU | BPF_ADD | BPF_K:
351 case BPF_ALU | BPF_SUB | BPF_X:
352 case BPF_ALU | BPF_SUB | BPF_K:
353 case BPF_ALU | BPF_AND | BPF_X:
354 case BPF_ALU | BPF_AND | BPF_K:
355 case BPF_ALU | BPF_OR | BPF_X:
356 case BPF_ALU | BPF_OR | BPF_K:
357 case BPF_ALU | BPF_LSH | BPF_X:
358 case BPF_ALU | BPF_LSH | BPF_K:
359 case BPF_ALU | BPF_RSH | BPF_X:
360 case BPF_ALU | BPF_RSH | BPF_K:
361 case BPF_ALU | BPF_XOR | BPF_X:
362 case BPF_ALU | BPF_XOR | BPF_K:
363 case BPF_ALU | BPF_MUL | BPF_X:
364 case BPF_ALU | BPF_MUL | BPF_K:
365 case BPF_ALU | BPF_DIV | BPF_X:
366 case BPF_ALU | BPF_DIV | BPF_K:
367 case BPF_ALU | BPF_MOD | BPF_X:
368 case BPF_ALU | BPF_MOD | BPF_K:
369 case BPF_ALU | BPF_NEG:
370 case BPF_LD | BPF_ABS | BPF_W:
371 case BPF_LD | BPF_ABS | BPF_H:
372 case BPF_LD | BPF_ABS | BPF_B:
373 case BPF_LD | BPF_IND | BPF_W:
374 case BPF_LD | BPF_IND | BPF_H:
375 case BPF_LD | BPF_IND | BPF_B:
376 /* Check for overloaded BPF extension and
377 * directly convert it if found, otherwise
378 * just move on with mapping.
379 */
380 if (BPF_CLASS(fp->code) == BPF_LD &&
381 BPF_MODE(fp->code) == BPF_ABS &&
382 convert_bpf_extensions(fp, &insn))
383 break;
384
f8f6d679 385 *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
bd4cf0ed
AS
386 break;
387
f8f6d679
DB
388 /* Jump transformation cannot use BPF block macros
389 * everywhere as offset calculation and target updates
390 * require a bit more work than the rest, i.e. jump
391 * opcodes map as-is, but offsets need adjustment.
392 */
393
394#define BPF_EMIT_JMP \
bd4cf0ed
AS
395 do { \
396 if (target >= len || target < 0) \
397 goto err; \
398 insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
399 /* Adjust pc relative offset for 2nd or 3rd insn. */ \
400 insn->off -= insn - tmp_insns; \
401 } while (0)
402
f8f6d679
DB
403 case BPF_JMP | BPF_JA:
404 target = i + fp->k + 1;
405 insn->code = fp->code;
406 BPF_EMIT_JMP;
bd4cf0ed
AS
407 break;
408
409 case BPF_JMP | BPF_JEQ | BPF_K:
410 case BPF_JMP | BPF_JEQ | BPF_X:
411 case BPF_JMP | BPF_JSET | BPF_K:
412 case BPF_JMP | BPF_JSET | BPF_X:
413 case BPF_JMP | BPF_JGT | BPF_K:
414 case BPF_JMP | BPF_JGT | BPF_X:
415 case BPF_JMP | BPF_JGE | BPF_K:
416 case BPF_JMP | BPF_JGE | BPF_X:
417 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
418 /* BPF immediates are signed, zero extend
419 * immediate into tmp register and use it
420 * in compare insn.
421 */
f8f6d679 422 *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
bd4cf0ed 423
e430f34e
AS
424 insn->dst_reg = BPF_REG_A;
425 insn->src_reg = BPF_REG_TMP;
bd4cf0ed
AS
426 bpf_src = BPF_X;
427 } else {
e430f34e
AS
428 insn->dst_reg = BPF_REG_A;
429 insn->src_reg = BPF_REG_X;
bd4cf0ed
AS
430 insn->imm = fp->k;
431 bpf_src = BPF_SRC(fp->code);
1da177e4 432 }
bd4cf0ed
AS
433
434 /* Common case where 'jump_false' is next insn. */
435 if (fp->jf == 0) {
436 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
437 target = i + fp->jt + 1;
f8f6d679 438 BPF_EMIT_JMP;
bd4cf0ed 439 break;
1da177e4 440 }
bd4cf0ed
AS
441
442 /* Convert JEQ into JNE when 'jump_true' is next insn. */
443 if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
444 insn->code = BPF_JMP | BPF_JNE | bpf_src;
445 target = i + fp->jf + 1;
f8f6d679 446 BPF_EMIT_JMP;
bd4cf0ed 447 break;
0b05b2a4 448 }
bd4cf0ed
AS
449
450 /* Other jumps are mapped into two insns: Jxx and JA. */
451 target = i + fp->jt + 1;
452 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
f8f6d679 453 BPF_EMIT_JMP;
bd4cf0ed
AS
454 insn++;
455
456 insn->code = BPF_JMP | BPF_JA;
457 target = i + fp->jf + 1;
f8f6d679 458 BPF_EMIT_JMP;
bd4cf0ed
AS
459 break;
460
461 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
462 case BPF_LDX | BPF_MSH | BPF_B:
9739eef1 463 /* tmp = A */
f8f6d679 464 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A);
1268e253 465 /* A = BPF_R0 = *(u8 *) (skb->data + K) */
f8f6d679 466 *insn++ = BPF_LD_ABS(BPF_B, fp->k);
9739eef1 467 /* A &= 0xf */
f8f6d679 468 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
9739eef1 469 /* A <<= 2 */
f8f6d679 470 *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
9739eef1 471 /* X = A */
f8f6d679 472 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
9739eef1 473 /* A = tmp */
f8f6d679 474 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
bd4cf0ed
AS
475 break;
476
477 /* RET_K, RET_A are remaped into 2 insns. */
478 case BPF_RET | BPF_A:
479 case BPF_RET | BPF_K:
f8f6d679
DB
480 *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ?
481 BPF_K : BPF_X, BPF_REG_0,
482 BPF_REG_A, fp->k);
9739eef1 483 *insn = BPF_EXIT_INSN();
bd4cf0ed
AS
484 break;
485
486 /* Store to stack. */
487 case BPF_ST:
488 case BPF_STX:
f8f6d679
DB
489 *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
490 BPF_ST ? BPF_REG_A : BPF_REG_X,
491 -(BPF_MEMWORDS - fp->k) * 4);
bd4cf0ed
AS
492 break;
493
494 /* Load from stack. */
495 case BPF_LD | BPF_MEM:
496 case BPF_LDX | BPF_MEM:
f8f6d679
DB
497 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
498 BPF_REG_A : BPF_REG_X, BPF_REG_FP,
499 -(BPF_MEMWORDS - fp->k) * 4);
bd4cf0ed
AS
500 break;
501
502 /* A = K or X = K */
503 case BPF_LD | BPF_IMM:
504 case BPF_LDX | BPF_IMM:
f8f6d679
DB
505 *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
506 BPF_REG_A : BPF_REG_X, fp->k);
bd4cf0ed
AS
507 break;
508
509 /* X = A */
510 case BPF_MISC | BPF_TAX:
f8f6d679 511 *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
bd4cf0ed
AS
512 break;
513
514 /* A = X */
515 case BPF_MISC | BPF_TXA:
f8f6d679 516 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
bd4cf0ed
AS
517 break;
518
519 /* A = skb->len or X = skb->len */
520 case BPF_LD | BPF_W | BPF_LEN:
521 case BPF_LDX | BPF_W | BPF_LEN:
f8f6d679
DB
522 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
523 BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
524 offsetof(struct sk_buff, len));
bd4cf0ed
AS
525 break;
526
f8f6d679 527 /* Access seccomp_data fields. */
bd4cf0ed 528 case BPF_LDX | BPF_ABS | BPF_W:
9739eef1
AS
529 /* A = *(u32 *) (ctx + K) */
530 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
bd4cf0ed
AS
531 break;
532
f8f6d679 533 /* Unkown instruction. */
1da177e4 534 default:
bd4cf0ed 535 goto err;
1da177e4 536 }
bd4cf0ed
AS
537
538 insn++;
539 if (new_prog)
540 memcpy(new_insn, tmp_insns,
541 sizeof(*insn) * (insn - tmp_insns));
bd4cf0ed 542 new_insn += insn - tmp_insns;
1da177e4
LT
543 }
544
bd4cf0ed
AS
545 if (!new_prog) {
546 /* Only calculating new length. */
547 *new_len = new_insn - new_prog;
548 return 0;
549 }
550
551 pass++;
552 if (new_flen != new_insn - new_prog) {
553 new_flen = new_insn - new_prog;
554 if (pass > 2)
555 goto err;
bd4cf0ed
AS
556 goto do_pass;
557 }
558
559 kfree(addrs);
560 BUG_ON(*new_len != new_flen);
1da177e4 561 return 0;
bd4cf0ed
AS
562err:
563 kfree(addrs);
564 return -EINVAL;
1da177e4
LT
565}
566
bd4cf0ed
AS
567/* Security:
568 *
2d5311e4 569 * A BPF program is able to use 16 cells of memory to store intermediate
bd4cf0ed
AS
570 * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()).
571 *
2d5311e4
ED
572 * As we dont want to clear mem[] array for each packet going through
573 * sk_run_filter(), we check that filter loaded by user never try to read
574 * a cell if not previously written, and we check all branches to be sure
25985edc 575 * a malicious user doesn't try to abuse us.
2d5311e4 576 */
ec31a05c 577static int check_load_and_stores(const struct sock_filter *filter, int flen)
2d5311e4 578{
34805931 579 u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
2d5311e4
ED
580 int pc, ret = 0;
581
582 BUILD_BUG_ON(BPF_MEMWORDS > 16);
34805931 583
99e72a0f 584 masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
2d5311e4
ED
585 if (!masks)
586 return -ENOMEM;
34805931 587
2d5311e4
ED
588 memset(masks, 0xff, flen * sizeof(*masks));
589
590 for (pc = 0; pc < flen; pc++) {
591 memvalid &= masks[pc];
592
593 switch (filter[pc].code) {
34805931
DB
594 case BPF_ST:
595 case BPF_STX:
2d5311e4
ED
596 memvalid |= (1 << filter[pc].k);
597 break;
34805931
DB
598 case BPF_LD | BPF_MEM:
599 case BPF_LDX | BPF_MEM:
2d5311e4
ED
600 if (!(memvalid & (1 << filter[pc].k))) {
601 ret = -EINVAL;
602 goto error;
603 }
604 break;
34805931
DB
605 case BPF_JMP | BPF_JA:
606 /* A jump must set masks on target */
2d5311e4
ED
607 masks[pc + 1 + filter[pc].k] &= memvalid;
608 memvalid = ~0;
609 break;
34805931
DB
610 case BPF_JMP | BPF_JEQ | BPF_K:
611 case BPF_JMP | BPF_JEQ | BPF_X:
612 case BPF_JMP | BPF_JGE | BPF_K:
613 case BPF_JMP | BPF_JGE | BPF_X:
614 case BPF_JMP | BPF_JGT | BPF_K:
615 case BPF_JMP | BPF_JGT | BPF_X:
616 case BPF_JMP | BPF_JSET | BPF_K:
617 case BPF_JMP | BPF_JSET | BPF_X:
618 /* A jump must set masks on targets */
2d5311e4
ED
619 masks[pc + 1 + filter[pc].jt] &= memvalid;
620 masks[pc + 1 + filter[pc].jf] &= memvalid;
621 memvalid = ~0;
622 break;
623 }
624 }
625error:
626 kfree(masks);
627 return ret;
628}
629
34805931
DB
630static bool chk_code_allowed(u16 code_to_probe)
631{
632 static const bool codes[] = {
633 /* 32 bit ALU operations */
634 [BPF_ALU | BPF_ADD | BPF_K] = true,
635 [BPF_ALU | BPF_ADD | BPF_X] = true,
636 [BPF_ALU | BPF_SUB | BPF_K] = true,
637 [BPF_ALU | BPF_SUB | BPF_X] = true,
638 [BPF_ALU | BPF_MUL | BPF_K] = true,
639 [BPF_ALU | BPF_MUL | BPF_X] = true,
640 [BPF_ALU | BPF_DIV | BPF_K] = true,
641 [BPF_ALU | BPF_DIV | BPF_X] = true,
642 [BPF_ALU | BPF_MOD | BPF_K] = true,
643 [BPF_ALU | BPF_MOD | BPF_X] = true,
644 [BPF_ALU | BPF_AND | BPF_K] = true,
645 [BPF_ALU | BPF_AND | BPF_X] = true,
646 [BPF_ALU | BPF_OR | BPF_K] = true,
647 [BPF_ALU | BPF_OR | BPF_X] = true,
648 [BPF_ALU | BPF_XOR | BPF_K] = true,
649 [BPF_ALU | BPF_XOR | BPF_X] = true,
650 [BPF_ALU | BPF_LSH | BPF_K] = true,
651 [BPF_ALU | BPF_LSH | BPF_X] = true,
652 [BPF_ALU | BPF_RSH | BPF_K] = true,
653 [BPF_ALU | BPF_RSH | BPF_X] = true,
654 [BPF_ALU | BPF_NEG] = true,
655 /* Load instructions */
656 [BPF_LD | BPF_W | BPF_ABS] = true,
657 [BPF_LD | BPF_H | BPF_ABS] = true,
658 [BPF_LD | BPF_B | BPF_ABS] = true,
659 [BPF_LD | BPF_W | BPF_LEN] = true,
660 [BPF_LD | BPF_W | BPF_IND] = true,
661 [BPF_LD | BPF_H | BPF_IND] = true,
662 [BPF_LD | BPF_B | BPF_IND] = true,
663 [BPF_LD | BPF_IMM] = true,
664 [BPF_LD | BPF_MEM] = true,
665 [BPF_LDX | BPF_W | BPF_LEN] = true,
666 [BPF_LDX | BPF_B | BPF_MSH] = true,
667 [BPF_LDX | BPF_IMM] = true,
668 [BPF_LDX | BPF_MEM] = true,
669 /* Store instructions */
670 [BPF_ST] = true,
671 [BPF_STX] = true,
672 /* Misc instructions */
673 [BPF_MISC | BPF_TAX] = true,
674 [BPF_MISC | BPF_TXA] = true,
675 /* Return instructions */
676 [BPF_RET | BPF_K] = true,
677 [BPF_RET | BPF_A] = true,
678 /* Jump instructions */
679 [BPF_JMP | BPF_JA] = true,
680 [BPF_JMP | BPF_JEQ | BPF_K] = true,
681 [BPF_JMP | BPF_JEQ | BPF_X] = true,
682 [BPF_JMP | BPF_JGE | BPF_K] = true,
683 [BPF_JMP | BPF_JGE | BPF_X] = true,
684 [BPF_JMP | BPF_JGT | BPF_K] = true,
685 [BPF_JMP | BPF_JGT | BPF_X] = true,
686 [BPF_JMP | BPF_JSET | BPF_K] = true,
687 [BPF_JMP | BPF_JSET | BPF_X] = true,
688 };
689
690 if (code_to_probe >= ARRAY_SIZE(codes))
691 return false;
692
693 return codes[code_to_probe];
694}
695
1da177e4 696/**
4df95ff4 697 * bpf_check_classic - verify socket filter code
1da177e4
LT
698 * @filter: filter to verify
699 * @flen: length of filter
700 *
701 * Check the user's filter code. If we let some ugly
702 * filter code slip through kaboom! The filter must contain
93699863
KK
703 * no references or jumps that are out of range, no illegal
704 * instructions, and must end with a RET instruction.
1da177e4 705 *
7b11f69f
KK
706 * All jumps are forward as they are not signed.
707 *
708 * Returns 0 if the rule set is legal or -EINVAL if not.
1da177e4 709 */
4df95ff4 710int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
1da177e4 711{
aa1113d9 712 bool anc_found;
34805931 713 int pc;
1da177e4 714
1b93ae64 715 if (flen == 0 || flen > BPF_MAXINSNS)
1da177e4
LT
716 return -EINVAL;
717
34805931 718 /* Check the filter code now */
1da177e4 719 for (pc = 0; pc < flen; pc++) {
ec31a05c 720 const struct sock_filter *ftest = &filter[pc];
93699863 721
34805931
DB
722 /* May we actually operate on this code? */
723 if (!chk_code_allowed(ftest->code))
cba328fc 724 return -EINVAL;
34805931 725
93699863 726 /* Some instructions need special checks */
34805931
DB
727 switch (ftest->code) {
728 case BPF_ALU | BPF_DIV | BPF_K:
729 case BPF_ALU | BPF_MOD | BPF_K:
730 /* Check for division by zero */
b6069a95
ED
731 if (ftest->k == 0)
732 return -EINVAL;
733 break;
34805931
DB
734 case BPF_LD | BPF_MEM:
735 case BPF_LDX | BPF_MEM:
736 case BPF_ST:
737 case BPF_STX:
738 /* Check for invalid memory addresses */
93699863
KK
739 if (ftest->k >= BPF_MEMWORDS)
740 return -EINVAL;
741 break;
34805931
DB
742 case BPF_JMP | BPF_JA:
743 /* Note, the large ftest->k might cause loops.
93699863
KK
744 * Compare this with conditional jumps below,
745 * where offsets are limited. --ANK (981016)
746 */
34805931 747 if (ftest->k >= (unsigned int)(flen - pc - 1))
93699863 748 return -EINVAL;
01f2f3f6 749 break;
34805931
DB
750 case BPF_JMP | BPF_JEQ | BPF_K:
751 case BPF_JMP | BPF_JEQ | BPF_X:
752 case BPF_JMP | BPF_JGE | BPF_K:
753 case BPF_JMP | BPF_JGE | BPF_X:
754 case BPF_JMP | BPF_JGT | BPF_K:
755 case BPF_JMP | BPF_JGT | BPF_X:
756 case BPF_JMP | BPF_JSET | BPF_K:
757 case BPF_JMP | BPF_JSET | BPF_X:
758 /* Both conditionals must be safe */
e35bedf3 759 if (pc + ftest->jt + 1 >= flen ||
93699863
KK
760 pc + ftest->jf + 1 >= flen)
761 return -EINVAL;
cba328fc 762 break;
34805931
DB
763 case BPF_LD | BPF_W | BPF_ABS:
764 case BPF_LD | BPF_H | BPF_ABS:
765 case BPF_LD | BPF_B | BPF_ABS:
aa1113d9 766 anc_found = false;
34805931
DB
767 if (bpf_anc_helper(ftest) & BPF_ANC)
768 anc_found = true;
769 /* Ancillary operation unknown or unsupported */
aa1113d9
DB
770 if (anc_found == false && ftest->k >= SKF_AD_OFF)
771 return -EINVAL;
01f2f3f6
HPP
772 }
773 }
93699863 774
34805931 775 /* Last instruction must be a RET code */
01f2f3f6 776 switch (filter[flen - 1].code) {
34805931
DB
777 case BPF_RET | BPF_K:
778 case BPF_RET | BPF_A:
2d5311e4 779 return check_load_and_stores(filter, flen);
cba328fc 780 }
34805931 781
cba328fc 782 return -EINVAL;
1da177e4 783}
4df95ff4 784EXPORT_SYMBOL(bpf_check_classic);
1da177e4 785
7ae457c1
AS
786static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
787 const struct sock_fprog *fprog)
a3ea269b 788{
009937e7 789 unsigned int fsize = bpf_classic_proglen(fprog);
a3ea269b
DB
790 struct sock_fprog_kern *fkprog;
791
792 fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
793 if (!fp->orig_prog)
794 return -ENOMEM;
795
796 fkprog = fp->orig_prog;
797 fkprog->len = fprog->len;
798 fkprog->filter = kmemdup(fp->insns, fsize, GFP_KERNEL);
799 if (!fkprog->filter) {
800 kfree(fp->orig_prog);
801 return -ENOMEM;
802 }
803
804 return 0;
805}
806
7ae457c1 807static void bpf_release_orig_filter(struct bpf_prog *fp)
a3ea269b
DB
808{
809 struct sock_fprog_kern *fprog = fp->orig_prog;
810
811 if (fprog) {
812 kfree(fprog->filter);
813 kfree(fprog);
814 }
815}
816
7ae457c1
AS
817static void __bpf_prog_release(struct bpf_prog *prog)
818{
819 bpf_release_orig_filter(prog);
820 bpf_prog_free(prog);
821}
822
34c5bd66
PN
823static void __sk_filter_release(struct sk_filter *fp)
824{
7ae457c1
AS
825 __bpf_prog_release(fp->prog);
826 kfree(fp);
34c5bd66
PN
827}
828
47e958ea 829/**
46bcf14f 830 * sk_filter_release_rcu - Release a socket filter by rcu_head
47e958ea
PE
831 * @rcu: rcu_head that contains the sk_filter to free
832 */
fbc907f0 833static void sk_filter_release_rcu(struct rcu_head *rcu)
47e958ea
PE
834{
835 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
836
34c5bd66 837 __sk_filter_release(fp);
47e958ea 838}
fbc907f0
DB
839
840/**
841 * sk_filter_release - release a socket filter
842 * @fp: filter to remove
843 *
844 * Remove a filter from a socket and release its resources.
845 */
846static void sk_filter_release(struct sk_filter *fp)
847{
848 if (atomic_dec_and_test(&fp->refcnt))
849 call_rcu(&fp->rcu, sk_filter_release_rcu);
850}
851
852void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
853{
7ae457c1 854 u32 filter_size = bpf_prog_size(fp->prog->len);
fbc907f0 855
278571ba
AS
856 atomic_sub(filter_size, &sk->sk_omem_alloc);
857 sk_filter_release(fp);
fbc907f0 858}
47e958ea 859
278571ba
AS
860/* try to charge the socket memory if there is space available
861 * return true on success
862 */
863bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
bd4cf0ed 864{
7ae457c1 865 u32 filter_size = bpf_prog_size(fp->prog->len);
278571ba
AS
866
867 /* same check as in sock_kmalloc() */
868 if (filter_size <= sysctl_optmem_max &&
869 atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
870 atomic_inc(&fp->refcnt);
871 atomic_add(filter_size, &sk->sk_omem_alloc);
872 return true;
bd4cf0ed 873 }
278571ba 874 return false;
bd4cf0ed
AS
875}
876
7ae457c1 877static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
bd4cf0ed
AS
878{
879 struct sock_filter *old_prog;
7ae457c1 880 struct bpf_prog *old_fp;
34805931 881 int err, new_len, old_len = fp->len;
bd4cf0ed
AS
882
883 /* We are free to overwrite insns et al right here as it
884 * won't be used at this point in time anymore internally
885 * after the migration to the internal BPF instruction
886 * representation.
887 */
888 BUILD_BUG_ON(sizeof(struct sock_filter) !=
2695fb55 889 sizeof(struct bpf_insn));
bd4cf0ed 890
bd4cf0ed
AS
891 /* Conversion cannot happen on overlapping memory areas,
892 * so we need to keep the user BPF around until the 2nd
893 * pass. At this time, the user BPF is stored in fp->insns.
894 */
895 old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
896 GFP_KERNEL);
897 if (!old_prog) {
898 err = -ENOMEM;
899 goto out_err;
900 }
901
902 /* 1st pass: calculate the new program length. */
8fb575ca 903 err = bpf_convert_filter(old_prog, old_len, NULL, &new_len);
bd4cf0ed
AS
904 if (err)
905 goto out_err_free;
906
907 /* Expand fp for appending the new filter representation. */
908 old_fp = fp;
60a3b225 909 fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
bd4cf0ed
AS
910 if (!fp) {
911 /* The old_fp is still around in case we couldn't
912 * allocate new memory, so uncharge on that one.
913 */
914 fp = old_fp;
915 err = -ENOMEM;
916 goto out_err_free;
917 }
918
bd4cf0ed
AS
919 fp->len = new_len;
920
2695fb55 921 /* 2nd pass: remap sock_filter insns into bpf_insn insns. */
8fb575ca 922 err = bpf_convert_filter(old_prog, old_len, fp->insnsi, &new_len);
bd4cf0ed 923 if (err)
8fb575ca 924 /* 2nd bpf_convert_filter() can fail only if it fails
bd4cf0ed
AS
925 * to allocate memory, remapping must succeed. Note,
926 * that at this time old_fp has already been released
278571ba 927 * by krealloc().
bd4cf0ed
AS
928 */
929 goto out_err_free;
930
7ae457c1 931 bpf_prog_select_runtime(fp);
5fe821a9 932
bd4cf0ed
AS
933 kfree(old_prog);
934 return fp;
935
936out_err_free:
937 kfree(old_prog);
938out_err:
7ae457c1 939 __bpf_prog_release(fp);
bd4cf0ed
AS
940 return ERR_PTR(err);
941}
942
7ae457c1 943static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp)
302d6637
JP
944{
945 int err;
946
bd4cf0ed 947 fp->bpf_func = NULL;
286aad3c 948 fp->jited = false;
302d6637 949
4df95ff4 950 err = bpf_check_classic(fp->insns, fp->len);
418c96ac 951 if (err) {
7ae457c1 952 __bpf_prog_release(fp);
bd4cf0ed 953 return ERR_PTR(err);
418c96ac 954 }
302d6637 955
bd4cf0ed
AS
956 /* Probe if we can JIT compile the filter and if so, do
957 * the compilation of the filter.
958 */
302d6637 959 bpf_jit_compile(fp);
bd4cf0ed
AS
960
961 /* JIT compiler couldn't process this filter, so do the
962 * internal BPF translation for the optimized interpreter.
963 */
5fe821a9 964 if (!fp->jited)
7ae457c1 965 fp = bpf_migrate_filter(fp);
bd4cf0ed
AS
966
967 return fp;
302d6637
JP
968}
969
970/**
7ae457c1 971 * bpf_prog_create - create an unattached filter
c6c4b97c 972 * @pfp: the unattached filter that is created
677a9fd3 973 * @fprog: the filter program
302d6637 974 *
c6c4b97c 975 * Create a filter independent of any socket. We first run some
302d6637
JP
976 * sanity checks on it to make sure it does not explode on us later.
977 * If an error occurs or there is insufficient memory for the filter
978 * a negative errno code is returned. On success the return is zero.
979 */
7ae457c1 980int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
302d6637 981{
009937e7 982 unsigned int fsize = bpf_classic_proglen(fprog);
7ae457c1 983 struct bpf_prog *fp;
302d6637
JP
984
985 /* Make sure new filter is there and in the right amounts. */
986 if (fprog->filter == NULL)
987 return -EINVAL;
988
60a3b225 989 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
302d6637
JP
990 if (!fp)
991 return -ENOMEM;
a3ea269b 992
302d6637
JP
993 memcpy(fp->insns, fprog->filter, fsize);
994
302d6637 995 fp->len = fprog->len;
a3ea269b
DB
996 /* Since unattached filters are not copied back to user
997 * space through sk_get_filter(), we do not need to hold
998 * a copy here, and can spare us the work.
999 */
1000 fp->orig_prog = NULL;
302d6637 1001
7ae457c1 1002 /* bpf_prepare_filter() already takes care of freeing
bd4cf0ed
AS
1003 * memory in case something goes wrong.
1004 */
7ae457c1 1005 fp = bpf_prepare_filter(fp);
bd4cf0ed
AS
1006 if (IS_ERR(fp))
1007 return PTR_ERR(fp);
302d6637
JP
1008
1009 *pfp = fp;
1010 return 0;
302d6637 1011}
7ae457c1 1012EXPORT_SYMBOL_GPL(bpf_prog_create);
302d6637 1013
7ae457c1 1014void bpf_prog_destroy(struct bpf_prog *fp)
302d6637 1015{
7ae457c1 1016 __bpf_prog_release(fp);
302d6637 1017}
7ae457c1 1018EXPORT_SYMBOL_GPL(bpf_prog_destroy);
302d6637 1019
1da177e4
LT
1020/**
1021 * sk_attach_filter - attach a socket filter
1022 * @fprog: the filter program
1023 * @sk: the socket to use
1024 *
1025 * Attach the user's filter code. We first run some sanity checks on
1026 * it to make sure it does not explode on us later. If an error
1027 * occurs or there is insufficient memory for the filter a negative
1028 * errno code is returned. On success the return is zero.
1029 */
1030int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1031{
d3904b73 1032 struct sk_filter *fp, *old_fp;
009937e7 1033 unsigned int fsize = bpf_classic_proglen(fprog);
7ae457c1
AS
1034 unsigned int bpf_fsize = bpf_prog_size(fprog->len);
1035 struct bpf_prog *prog;
1da177e4
LT
1036 int err;
1037
d59577b6
VB
1038 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1039 return -EPERM;
1040
1da177e4 1041 /* Make sure new filter is there and in the right amounts. */
e35bedf3
KK
1042 if (fprog->filter == NULL)
1043 return -EINVAL;
1da177e4 1044
60a3b225 1045 prog = bpf_prog_alloc(bpf_fsize, 0);
7ae457c1 1046 if (!prog)
1da177e4 1047 return -ENOMEM;
a3ea269b 1048
7ae457c1
AS
1049 if (copy_from_user(prog->insns, fprog->filter, fsize)) {
1050 kfree(prog);
1da177e4
LT
1051 return -EFAULT;
1052 }
1053
7ae457c1 1054 prog->len = fprog->len;
1da177e4 1055
7ae457c1 1056 err = bpf_prog_store_orig_filter(prog, fprog);
a3ea269b 1057 if (err) {
7ae457c1 1058 kfree(prog);
a3ea269b
DB
1059 return -ENOMEM;
1060 }
1061
7ae457c1 1062 /* bpf_prepare_filter() already takes care of freeing
bd4cf0ed
AS
1063 * memory in case something goes wrong.
1064 */
7ae457c1
AS
1065 prog = bpf_prepare_filter(prog);
1066 if (IS_ERR(prog))
1067 return PTR_ERR(prog);
1068
1069 fp = kmalloc(sizeof(*fp), GFP_KERNEL);
1070 if (!fp) {
1071 __bpf_prog_release(prog);
1072 return -ENOMEM;
1073 }
1074 fp->prog = prog;
1da177e4 1075
278571ba
AS
1076 atomic_set(&fp->refcnt, 0);
1077
1078 if (!sk_filter_charge(sk, fp)) {
1079 __sk_filter_release(fp);
1080 return -ENOMEM;
1081 }
1082
f91ff5b9
ED
1083 old_fp = rcu_dereference_protected(sk->sk_filter,
1084 sock_owned_by_user(sk));
d3904b73 1085 rcu_assign_pointer(sk->sk_filter, fp);
d3904b73 1086
9b013e05 1087 if (old_fp)
46bcf14f 1088 sk_filter_uncharge(sk, old_fp);
a3ea269b 1089
d3904b73 1090 return 0;
1da177e4 1091}
5ff3f073 1092EXPORT_SYMBOL_GPL(sk_attach_filter);
1da177e4 1093
55b33325
PE
1094int sk_detach_filter(struct sock *sk)
1095{
1096 int ret = -ENOENT;
1097 struct sk_filter *filter;
1098
d59577b6
VB
1099 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1100 return -EPERM;
1101
f91ff5b9
ED
1102 filter = rcu_dereference_protected(sk->sk_filter,
1103 sock_owned_by_user(sk));
55b33325 1104 if (filter) {
a9b3cd7f 1105 RCU_INIT_POINTER(sk->sk_filter, NULL);
46bcf14f 1106 sk_filter_uncharge(sk, filter);
55b33325
PE
1107 ret = 0;
1108 }
a3ea269b 1109
55b33325
PE
1110 return ret;
1111}
5ff3f073 1112EXPORT_SYMBOL_GPL(sk_detach_filter);
a8fc9277 1113
a3ea269b
DB
1114int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
1115 unsigned int len)
a8fc9277 1116{
a3ea269b 1117 struct sock_fprog_kern *fprog;
a8fc9277 1118 struct sk_filter *filter;
a3ea269b 1119 int ret = 0;
a8fc9277
PE
1120
1121 lock_sock(sk);
1122 filter = rcu_dereference_protected(sk->sk_filter,
a3ea269b 1123 sock_owned_by_user(sk));
a8fc9277
PE
1124 if (!filter)
1125 goto out;
a3ea269b
DB
1126
1127 /* We're copying the filter that has been originally attached,
1128 * so no conversion/decode needed anymore.
1129 */
7ae457c1 1130 fprog = filter->prog->orig_prog;
a3ea269b
DB
1131
1132 ret = fprog->len;
a8fc9277 1133 if (!len)
a3ea269b 1134 /* User space only enquires number of filter blocks. */
a8fc9277 1135 goto out;
a3ea269b 1136
a8fc9277 1137 ret = -EINVAL;
a3ea269b 1138 if (len < fprog->len)
a8fc9277
PE
1139 goto out;
1140
1141 ret = -EFAULT;
009937e7 1142 if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
a3ea269b 1143 goto out;
a8fc9277 1144
a3ea269b
DB
1145 /* Instead of bytes, the API requests to return the number
1146 * of filter blocks.
1147 */
1148 ret = fprog->len;
a8fc9277
PE
1149out:
1150 release_sock(sk);
1151 return ret;
1152}
This page took 0.801034 seconds and 5 git commands to generate.