Commit | Line | Data |
---|---|---|
99c55f7d AS |
1 | /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com |
2 | * | |
3 | * This program is free software; you can redistribute it and/or | |
4 | * modify it under the terms of version 2 of the GNU General Public | |
5 | * License as published by the Free Software Foundation. | |
6 | * | |
7 | * This program is distributed in the hope that it will be useful, but | |
8 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
10 | * General Public License for more details. | |
11 | */ | |
12 | #include <linux/bpf.h> | |
13 | #include <linux/syscalls.h> | |
14 | #include <linux/slab.h> | |
15 | #include <linux/anon_inodes.h> | |
16 | ||
17 | static LIST_HEAD(bpf_map_types); | |
18 | ||
19 | static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) | |
20 | { | |
21 | struct bpf_map_type_list *tl; | |
22 | struct bpf_map *map; | |
23 | ||
24 | list_for_each_entry(tl, &bpf_map_types, list_node) { | |
25 | if (tl->type == attr->map_type) { | |
26 | map = tl->ops->map_alloc(attr); | |
27 | if (IS_ERR(map)) | |
28 | return map; | |
29 | map->ops = tl->ops; | |
30 | map->map_type = attr->map_type; | |
31 | return map; | |
32 | } | |
33 | } | |
34 | return ERR_PTR(-EINVAL); | |
35 | } | |
36 | ||
37 | /* boot time registration of different map implementations */ | |
38 | void bpf_register_map_type(struct bpf_map_type_list *tl) | |
39 | { | |
40 | list_add(&tl->list_node, &bpf_map_types); | |
41 | } | |
42 | ||
43 | /* called from workqueue */ | |
44 | static void bpf_map_free_deferred(struct work_struct *work) | |
45 | { | |
46 | struct bpf_map *map = container_of(work, struct bpf_map, work); | |
47 | ||
48 | /* implementation dependent freeing */ | |
49 | map->ops->map_free(map); | |
50 | } | |
51 | ||
52 | /* decrement map refcnt and schedule it for freeing via workqueue | |
53 | * (unrelying map implementation ops->map_free() might sleep) | |
54 | */ | |
55 | void bpf_map_put(struct bpf_map *map) | |
56 | { | |
57 | if (atomic_dec_and_test(&map->refcnt)) { | |
58 | INIT_WORK(&map->work, bpf_map_free_deferred); | |
59 | schedule_work(&map->work); | |
60 | } | |
61 | } | |
62 | ||
63 | static int bpf_map_release(struct inode *inode, struct file *filp) | |
64 | { | |
65 | struct bpf_map *map = filp->private_data; | |
66 | ||
67 | bpf_map_put(map); | |
68 | return 0; | |
69 | } | |
70 | ||
71 | static const struct file_operations bpf_map_fops = { | |
72 | .release = bpf_map_release, | |
73 | }; | |
74 | ||
75 | /* helper macro to check that unused fields 'union bpf_attr' are zero */ | |
76 | #define CHECK_ATTR(CMD) \ | |
77 | memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ | |
78 | sizeof(attr->CMD##_LAST_FIELD), 0, \ | |
79 | sizeof(*attr) - \ | |
80 | offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ | |
81 | sizeof(attr->CMD##_LAST_FIELD)) != NULL | |
82 | ||
83 | #define BPF_MAP_CREATE_LAST_FIELD max_entries | |
84 | /* called via syscall */ | |
85 | static int map_create(union bpf_attr *attr) | |
86 | { | |
87 | struct bpf_map *map; | |
88 | int err; | |
89 | ||
90 | err = CHECK_ATTR(BPF_MAP_CREATE); | |
91 | if (err) | |
92 | return -EINVAL; | |
93 | ||
94 | /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ | |
95 | map = find_and_alloc_map(attr); | |
96 | if (IS_ERR(map)) | |
97 | return PTR_ERR(map); | |
98 | ||
99 | atomic_set(&map->refcnt, 1); | |
100 | ||
101 | err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC); | |
102 | ||
103 | if (err < 0) | |
104 | /* failed to allocate fd */ | |
105 | goto free_map; | |
106 | ||
107 | return err; | |
108 | ||
109 | free_map: | |
110 | map->ops->map_free(map); | |
111 | return err; | |
112 | } | |
113 | ||
114 | SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) | |
115 | { | |
116 | union bpf_attr attr = {}; | |
117 | int err; | |
118 | ||
119 | /* the syscall is limited to root temporarily. This restriction will be | |
120 | * lifted when security audit is clean. Note that eBPF+tracing must have | |
121 | * this restriction, since it may pass kernel data to user space | |
122 | */ | |
123 | if (!capable(CAP_SYS_ADMIN)) | |
124 | return -EPERM; | |
125 | ||
126 | if (!access_ok(VERIFY_READ, uattr, 1)) | |
127 | return -EFAULT; | |
128 | ||
129 | if (size > PAGE_SIZE) /* silly large */ | |
130 | return -E2BIG; | |
131 | ||
132 | /* If we're handed a bigger struct than we know of, | |
133 | * ensure all the unknown bits are 0 - i.e. new | |
134 | * user-space does not rely on any kernel feature | |
135 | * extensions we dont know about yet. | |
136 | */ | |
137 | if (size > sizeof(attr)) { | |
138 | unsigned char __user *addr; | |
139 | unsigned char __user *end; | |
140 | unsigned char val; | |
141 | ||
142 | addr = (void __user *)uattr + sizeof(attr); | |
143 | end = (void __user *)uattr + size; | |
144 | ||
145 | for (; addr < end; addr++) { | |
146 | err = get_user(val, addr); | |
147 | if (err) | |
148 | return err; | |
149 | if (val) | |
150 | return -E2BIG; | |
151 | } | |
152 | size = sizeof(attr); | |
153 | } | |
154 | ||
155 | /* copy attributes from user space, may be less than sizeof(bpf_attr) */ | |
156 | if (copy_from_user(&attr, uattr, size) != 0) | |
157 | return -EFAULT; | |
158 | ||
159 | switch (cmd) { | |
160 | case BPF_MAP_CREATE: | |
161 | err = map_create(&attr); | |
162 | break; | |
163 | default: | |
164 | err = -EINVAL; | |
165 | break; | |
166 | } | |
167 | ||
168 | return err; | |
169 | } |