1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
12 #include <linux/bpf.h>
13 #include <linux/syscalls.h>
14 #include <linux/slab.h>
15 #include <linux/anon_inodes.h>
16 #include <linux/file.h>
18 static LIST_HEAD(bpf_map_types
);
20 static struct bpf_map
*find_and_alloc_map(union bpf_attr
*attr
)
22 struct bpf_map_type_list
*tl
;
25 list_for_each_entry(tl
, &bpf_map_types
, list_node
) {
26 if (tl
->type
== attr
->map_type
) {
27 map
= tl
->ops
->map_alloc(attr
);
31 map
->map_type
= attr
->map_type
;
35 return ERR_PTR(-EINVAL
);
38 /* boot time registration of different map implementations */
39 void bpf_register_map_type(struct bpf_map_type_list
*tl
)
41 list_add(&tl
->list_node
, &bpf_map_types
);
44 /* called from workqueue */
45 static void bpf_map_free_deferred(struct work_struct
*work
)
47 struct bpf_map
*map
= container_of(work
, struct bpf_map
, work
);
49 /* implementation dependent freeing */
50 map
->ops
->map_free(map
);
53 /* decrement map refcnt and schedule it for freeing via workqueue
54 * (unrelying map implementation ops->map_free() might sleep)
56 void bpf_map_put(struct bpf_map
*map
)
58 if (atomic_dec_and_test(&map
->refcnt
)) {
59 INIT_WORK(&map
->work
, bpf_map_free_deferred
);
60 schedule_work(&map
->work
);
64 static int bpf_map_release(struct inode
*inode
, struct file
*filp
)
66 struct bpf_map
*map
= filp
->private_data
;
72 static const struct file_operations bpf_map_fops
= {
73 .release
= bpf_map_release
,
76 /* helper macro to check that unused fields 'union bpf_attr' are zero */
77 #define CHECK_ATTR(CMD) \
78 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
79 sizeof(attr->CMD##_LAST_FIELD), 0, \
81 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
82 sizeof(attr->CMD##_LAST_FIELD)) != NULL
84 #define BPF_MAP_CREATE_LAST_FIELD max_entries
85 /* called via syscall */
86 static int map_create(union bpf_attr
*attr
)
91 err
= CHECK_ATTR(BPF_MAP_CREATE
);
95 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
96 map
= find_and_alloc_map(attr
);
100 atomic_set(&map
->refcnt
, 1);
102 err
= anon_inode_getfd("bpf-map", &bpf_map_fops
, map
, O_RDWR
| O_CLOEXEC
);
105 /* failed to allocate fd */
111 map
->ops
->map_free(map
);
115 /* if error is returned, fd is released.
116 * On success caller should complete fd access with matching fdput()
118 struct bpf_map
*bpf_map_get(struct fd f
)
123 return ERR_PTR(-EBADF
);
125 if (f
.file
->f_op
!= &bpf_map_fops
) {
127 return ERR_PTR(-EINVAL
);
130 map
= f
.file
->private_data
;
135 /* helper to convert user pointers passed inside __aligned_u64 fields */
136 static void __user
*u64_to_ptr(__u64 val
)
138 return (void __user
*) (unsigned long) val
;
141 /* last field in 'union bpf_attr' used by this command */
142 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
144 static int map_lookup_elem(union bpf_attr
*attr
)
146 void __user
*ukey
= u64_to_ptr(attr
->key
);
147 void __user
*uvalue
= u64_to_ptr(attr
->value
);
148 int ufd
= attr
->map_fd
;
149 struct fd f
= fdget(ufd
);
154 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM
))
157 map
= bpf_map_get(f
);
162 key
= kmalloc(map
->key_size
, GFP_USER
);
167 if (copy_from_user(key
, ukey
, map
->key_size
) != 0)
172 value
= map
->ops
->map_lookup_elem(map
, key
);
177 if (copy_to_user(uvalue
, value
, map
->value_size
) != 0)
191 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD value
193 static int map_update_elem(union bpf_attr
*attr
)
195 void __user
*ukey
= u64_to_ptr(attr
->key
);
196 void __user
*uvalue
= u64_to_ptr(attr
->value
);
197 int ufd
= attr
->map_fd
;
198 struct fd f
= fdget(ufd
);
203 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM
))
206 map
= bpf_map_get(f
);
211 key
= kmalloc(map
->key_size
, GFP_USER
);
216 if (copy_from_user(key
, ukey
, map
->key_size
) != 0)
220 value
= kmalloc(map
->value_size
, GFP_USER
);
225 if (copy_from_user(value
, uvalue
, map
->value_size
) != 0)
228 /* eBPF program that use maps are running under rcu_read_lock(),
229 * therefore all map accessors rely on this fact, so do the same here
232 err
= map
->ops
->map_update_elem(map
, key
, value
);
244 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
246 static int map_delete_elem(union bpf_attr
*attr
)
248 void __user
*ukey
= u64_to_ptr(attr
->key
);
249 int ufd
= attr
->map_fd
;
250 struct fd f
= fdget(ufd
);
255 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM
))
258 map
= bpf_map_get(f
);
263 key
= kmalloc(map
->key_size
, GFP_USER
);
268 if (copy_from_user(key
, ukey
, map
->key_size
) != 0)
272 err
= map
->ops
->map_delete_elem(map
, key
);
282 /* last field in 'union bpf_attr' used by this command */
283 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
285 static int map_get_next_key(union bpf_attr
*attr
)
287 void __user
*ukey
= u64_to_ptr(attr
->key
);
288 void __user
*unext_key
= u64_to_ptr(attr
->next_key
);
289 int ufd
= attr
->map_fd
;
290 struct fd f
= fdget(ufd
);
292 void *key
, *next_key
;
295 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY
))
298 map
= bpf_map_get(f
);
303 key
= kmalloc(map
->key_size
, GFP_USER
);
308 if (copy_from_user(key
, ukey
, map
->key_size
) != 0)
312 next_key
= kmalloc(map
->key_size
, GFP_USER
);
317 err
= map
->ops
->map_get_next_key(map
, key
, next_key
);
323 if (copy_to_user(unext_key
, next_key
, map
->key_size
) != 0)
337 SYSCALL_DEFINE3(bpf
, int, cmd
, union bpf_attr __user
*, uattr
, unsigned int, size
)
339 union bpf_attr attr
= {};
342 /* the syscall is limited to root temporarily. This restriction will be
343 * lifted when security audit is clean. Note that eBPF+tracing must have
344 * this restriction, since it may pass kernel data to user space
346 if (!capable(CAP_SYS_ADMIN
))
349 if (!access_ok(VERIFY_READ
, uattr
, 1))
352 if (size
> PAGE_SIZE
) /* silly large */
355 /* If we're handed a bigger struct than we know of,
356 * ensure all the unknown bits are 0 - i.e. new
357 * user-space does not rely on any kernel feature
358 * extensions we dont know about yet.
360 if (size
> sizeof(attr
)) {
361 unsigned char __user
*addr
;
362 unsigned char __user
*end
;
365 addr
= (void __user
*)uattr
+ sizeof(attr
);
366 end
= (void __user
*)uattr
+ size
;
368 for (; addr
< end
; addr
++) {
369 err
= get_user(val
, addr
);
378 /* copy attributes from user space, may be less than sizeof(bpf_attr) */
379 if (copy_from_user(&attr
, uattr
, size
) != 0)
384 err
= map_create(&attr
);
386 case BPF_MAP_LOOKUP_ELEM
:
387 err
= map_lookup_elem(&attr
);
389 case BPF_MAP_UPDATE_ELEM
:
390 err
= map_update_elem(&attr
);
392 case BPF_MAP_DELETE_ELEM
:
393 err
= map_delete_elem(&attr
);
395 case BPF_MAP_GET_NEXT_KEY
:
396 err
= map_get_next_key(&attr
);