Commit | Line | Data |
---|---|---|
28fbcfa0 AS |
1 | /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com |
2 | * | |
3 | * This program is free software; you can redistribute it and/or | |
4 | * modify it under the terms of version 2 of the GNU General Public | |
5 | * License as published by the Free Software Foundation. | |
6 | * | |
7 | * This program is distributed in the hope that it will be useful, but | |
8 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
10 | * General Public License for more details. | |
11 | */ | |
12 | #include <linux/bpf.h> | |
13 | #include <linux/err.h> | |
14 | #include <linux/vmalloc.h> | |
15 | #include <linux/slab.h> | |
16 | #include <linux/mm.h> | |
04fd61ab | 17 | #include <linux/filter.h> |
0cdf5640 | 18 | #include <linux/perf_event.h> |
28fbcfa0 | 19 | |
a10423b8 AS |
20 | static void bpf_array_free_percpu(struct bpf_array *array) |
21 | { | |
22 | int i; | |
23 | ||
24 | for (i = 0; i < array->map.max_entries; i++) | |
25 | free_percpu(array->pptrs[i]); | |
26 | } | |
27 | ||
28 | static int bpf_array_alloc_percpu(struct bpf_array *array) | |
29 | { | |
30 | void __percpu *ptr; | |
31 | int i; | |
32 | ||
33 | for (i = 0; i < array->map.max_entries; i++) { | |
34 | ptr = __alloc_percpu_gfp(array->elem_size, 8, | |
35 | GFP_USER | __GFP_NOWARN); | |
36 | if (!ptr) { | |
37 | bpf_array_free_percpu(array); | |
38 | return -ENOMEM; | |
39 | } | |
40 | array->pptrs[i] = ptr; | |
41 | } | |
42 | ||
43 | return 0; | |
44 | } | |
45 | ||
28fbcfa0 AS |
46 | /* Called from syscall */ |
47 | static struct bpf_map *array_map_alloc(union bpf_attr *attr) | |
48 | { | |
a10423b8 | 49 | bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; |
28fbcfa0 | 50 | struct bpf_array *array; |
a10423b8 AS |
51 | u64 array_size; |
52 | u32 elem_size; | |
28fbcfa0 AS |
53 | |
54 | /* check sanity of attributes */ | |
55 | if (attr->max_entries == 0 || attr->key_size != 4 || | |
823707b6 | 56 | attr->value_size == 0 || attr->map_flags) |
28fbcfa0 AS |
57 | return ERR_PTR(-EINVAL); |
58 | ||
01b3f521 AS |
59 | if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1)) |
60 | /* if value_size is bigger, the user space won't be able to | |
61 | * access the elements. | |
62 | */ | |
63 | return ERR_PTR(-E2BIG); | |
64 | ||
28fbcfa0 AS |
65 | elem_size = round_up(attr->value_size, 8); |
66 | ||
a10423b8 AS |
67 | array_size = sizeof(*array); |
68 | if (percpu) | |
69 | array_size += (u64) attr->max_entries * sizeof(void *); | |
70 | else | |
71 | array_size += (u64) attr->max_entries * elem_size; | |
72 | ||
73 | /* make sure there is no u32 overflow later in round_up() */ | |
74 | if (array_size >= U32_MAX - PAGE_SIZE) | |
daaf427c AS |
75 | return ERR_PTR(-ENOMEM); |
76 | ||
daaf427c | 77 | |
28fbcfa0 | 78 | /* allocate all map elements and zero-initialize them */ |
daaf427c | 79 | array = kzalloc(array_size, GFP_USER | __GFP_NOWARN); |
28fbcfa0 | 80 | if (!array) { |
daaf427c | 81 | array = vzalloc(array_size); |
28fbcfa0 AS |
82 | if (!array) |
83 | return ERR_PTR(-ENOMEM); | |
84 | } | |
85 | ||
86 | /* copy mandatory map attributes */ | |
a10423b8 | 87 | array->map.map_type = attr->map_type; |
28fbcfa0 AS |
88 | array->map.key_size = attr->key_size; |
89 | array->map.value_size = attr->value_size; | |
90 | array->map.max_entries = attr->max_entries; | |
28fbcfa0 AS |
91 | array->elem_size = elem_size; |
92 | ||
a10423b8 AS |
93 | if (!percpu) |
94 | goto out; | |
95 | ||
96 | array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); | |
97 | ||
98 | if (array_size >= U32_MAX - PAGE_SIZE || | |
99 | elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) { | |
100 | kvfree(array); | |
101 | return ERR_PTR(-ENOMEM); | |
102 | } | |
103 | out: | |
104 | array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT; | |
105 | ||
28fbcfa0 | 106 | return &array->map; |
28fbcfa0 AS |
107 | } |
108 | ||
109 | /* Called from syscall or from eBPF program */ | |
110 | static void *array_map_lookup_elem(struct bpf_map *map, void *key) | |
111 | { | |
112 | struct bpf_array *array = container_of(map, struct bpf_array, map); | |
113 | u32 index = *(u32 *)key; | |
114 | ||
a10423b8 | 115 | if (unlikely(index >= array->map.max_entries)) |
28fbcfa0 AS |
116 | return NULL; |
117 | ||
118 | return array->value + array->elem_size * index; | |
119 | } | |
120 | ||
a10423b8 AS |
121 | /* Called from eBPF program */ |
122 | static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) | |
123 | { | |
124 | struct bpf_array *array = container_of(map, struct bpf_array, map); | |
125 | u32 index = *(u32 *)key; | |
126 | ||
127 | if (unlikely(index >= array->map.max_entries)) | |
128 | return NULL; | |
129 | ||
130 | return this_cpu_ptr(array->pptrs[index]); | |
131 | } | |
132 | ||
15a07b33 AS |
133 | int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) |
134 | { | |
135 | struct bpf_array *array = container_of(map, struct bpf_array, map); | |
136 | u32 index = *(u32 *)key; | |
137 | void __percpu *pptr; | |
138 | int cpu, off = 0; | |
139 | u32 size; | |
140 | ||
141 | if (unlikely(index >= array->map.max_entries)) | |
142 | return -ENOENT; | |
143 | ||
144 | /* per_cpu areas are zero-filled and bpf programs can only | |
145 | * access 'value_size' of them, so copying rounded areas | |
146 | * will not leak any kernel data | |
147 | */ | |
148 | size = round_up(map->value_size, 8); | |
149 | rcu_read_lock(); | |
150 | pptr = array->pptrs[index]; | |
151 | for_each_possible_cpu(cpu) { | |
152 | bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); | |
153 | off += size; | |
154 | } | |
155 | rcu_read_unlock(); | |
156 | return 0; | |
157 | } | |
158 | ||
28fbcfa0 AS |
159 | /* Called from syscall */ |
160 | static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) | |
161 | { | |
162 | struct bpf_array *array = container_of(map, struct bpf_array, map); | |
163 | u32 index = *(u32 *)key; | |
164 | u32 *next = (u32 *)next_key; | |
165 | ||
166 | if (index >= array->map.max_entries) { | |
167 | *next = 0; | |
168 | return 0; | |
169 | } | |
170 | ||
171 | if (index == array->map.max_entries - 1) | |
172 | return -ENOENT; | |
173 | ||
174 | *next = index + 1; | |
175 | return 0; | |
176 | } | |
177 | ||
178 | /* Called from syscall or from eBPF program */ | |
179 | static int array_map_update_elem(struct bpf_map *map, void *key, void *value, | |
180 | u64 map_flags) | |
181 | { | |
182 | struct bpf_array *array = container_of(map, struct bpf_array, map); | |
183 | u32 index = *(u32 *)key; | |
184 | ||
a10423b8 | 185 | if (unlikely(map_flags > BPF_EXIST)) |
28fbcfa0 AS |
186 | /* unknown flags */ |
187 | return -EINVAL; | |
188 | ||
a10423b8 | 189 | if (unlikely(index >= array->map.max_entries)) |
28fbcfa0 AS |
190 | /* all elements were pre-allocated, cannot insert a new one */ |
191 | return -E2BIG; | |
192 | ||
a10423b8 | 193 | if (unlikely(map_flags == BPF_NOEXIST)) |
daaf427c | 194 | /* all elements already exist */ |
28fbcfa0 AS |
195 | return -EEXIST; |
196 | ||
a10423b8 AS |
197 | if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) |
198 | memcpy(this_cpu_ptr(array->pptrs[index]), | |
199 | value, map->value_size); | |
200 | else | |
201 | memcpy(array->value + array->elem_size * index, | |
202 | value, map->value_size); | |
28fbcfa0 AS |
203 | return 0; |
204 | } | |
205 | ||
15a07b33 AS |
206 | int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, |
207 | u64 map_flags) | |
208 | { | |
209 | struct bpf_array *array = container_of(map, struct bpf_array, map); | |
210 | u32 index = *(u32 *)key; | |
211 | void __percpu *pptr; | |
212 | int cpu, off = 0; | |
213 | u32 size; | |
214 | ||
215 | if (unlikely(map_flags > BPF_EXIST)) | |
216 | /* unknown flags */ | |
217 | return -EINVAL; | |
218 | ||
219 | if (unlikely(index >= array->map.max_entries)) | |
220 | /* all elements were pre-allocated, cannot insert a new one */ | |
221 | return -E2BIG; | |
222 | ||
223 | if (unlikely(map_flags == BPF_NOEXIST)) | |
224 | /* all elements already exist */ | |
225 | return -EEXIST; | |
226 | ||
227 | /* the user space will provide round_up(value_size, 8) bytes that | |
228 | * will be copied into per-cpu area. bpf programs can only access | |
229 | * value_size of it. During lookup the same extra bytes will be | |
230 | * returned or zeros which were zero-filled by percpu_alloc, | |
231 | * so no kernel data leaks possible | |
232 | */ | |
233 | size = round_up(map->value_size, 8); | |
234 | rcu_read_lock(); | |
235 | pptr = array->pptrs[index]; | |
236 | for_each_possible_cpu(cpu) { | |
237 | bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); | |
238 | off += size; | |
239 | } | |
240 | rcu_read_unlock(); | |
241 | return 0; | |
242 | } | |
243 | ||
28fbcfa0 AS |
244 | /* Called from syscall or from eBPF program */ |
245 | static int array_map_delete_elem(struct bpf_map *map, void *key) | |
246 | { | |
247 | return -EINVAL; | |
248 | } | |
249 | ||
250 | /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ | |
251 | static void array_map_free(struct bpf_map *map) | |
252 | { | |
253 | struct bpf_array *array = container_of(map, struct bpf_array, map); | |
254 | ||
255 | /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, | |
256 | * so the programs (can be more than one that used this map) were | |
257 | * disconnected from events. Wait for outstanding programs to complete | |
258 | * and free the array | |
259 | */ | |
260 | synchronize_rcu(); | |
261 | ||
a10423b8 AS |
262 | if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) |
263 | bpf_array_free_percpu(array); | |
264 | ||
28fbcfa0 AS |
265 | kvfree(array); |
266 | } | |
267 | ||
a2c83fff | 268 | static const struct bpf_map_ops array_ops = { |
28fbcfa0 AS |
269 | .map_alloc = array_map_alloc, |
270 | .map_free = array_map_free, | |
271 | .map_get_next_key = array_map_get_next_key, | |
272 | .map_lookup_elem = array_map_lookup_elem, | |
273 | .map_update_elem = array_map_update_elem, | |
274 | .map_delete_elem = array_map_delete_elem, | |
275 | }; | |
276 | ||
a2c83fff | 277 | static struct bpf_map_type_list array_type __read_mostly = { |
28fbcfa0 AS |
278 | .ops = &array_ops, |
279 | .type = BPF_MAP_TYPE_ARRAY, | |
280 | }; | |
281 | ||
a10423b8 AS |
282 | static const struct bpf_map_ops percpu_array_ops = { |
283 | .map_alloc = array_map_alloc, | |
284 | .map_free = array_map_free, | |
285 | .map_get_next_key = array_map_get_next_key, | |
286 | .map_lookup_elem = percpu_array_map_lookup_elem, | |
287 | .map_update_elem = array_map_update_elem, | |
288 | .map_delete_elem = array_map_delete_elem, | |
289 | }; | |
290 | ||
291 | static struct bpf_map_type_list percpu_array_type __read_mostly = { | |
292 | .ops = &percpu_array_ops, | |
293 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | |
294 | }; | |
295 | ||
28fbcfa0 AS |
296 | static int __init register_array_map(void) |
297 | { | |
a2c83fff | 298 | bpf_register_map_type(&array_type); |
a10423b8 | 299 | bpf_register_map_type(&percpu_array_type); |
28fbcfa0 AS |
300 | return 0; |
301 | } | |
302 | late_initcall(register_array_map); | |
04fd61ab | 303 | |
2a36f0b9 | 304 | static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr) |
04fd61ab | 305 | { |
2a36f0b9 | 306 | /* only file descriptors can be stored in this type of map */ |
04fd61ab AS |
307 | if (attr->value_size != sizeof(u32)) |
308 | return ERR_PTR(-EINVAL); | |
309 | return array_map_alloc(attr); | |
310 | } | |
311 | ||
2a36f0b9 | 312 | static void fd_array_map_free(struct bpf_map *map) |
04fd61ab AS |
313 | { |
314 | struct bpf_array *array = container_of(map, struct bpf_array, map); | |
315 | int i; | |
316 | ||
317 | synchronize_rcu(); | |
318 | ||
319 | /* make sure it's empty */ | |
320 | for (i = 0; i < array->map.max_entries; i++) | |
2a36f0b9 | 321 | BUG_ON(array->ptrs[i] != NULL); |
04fd61ab AS |
322 | kvfree(array); |
323 | } | |
324 | ||
2a36f0b9 | 325 | static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) |
04fd61ab AS |
326 | { |
327 | return NULL; | |
328 | } | |
329 | ||
330 | /* only called from syscall */ | |
2a36f0b9 WN |
331 | static int fd_array_map_update_elem(struct bpf_map *map, void *key, |
332 | void *value, u64 map_flags) | |
04fd61ab AS |
333 | { |
334 | struct bpf_array *array = container_of(map, struct bpf_array, map); | |
2a36f0b9 | 335 | void *new_ptr, *old_ptr; |
04fd61ab AS |
336 | u32 index = *(u32 *)key, ufd; |
337 | ||
338 | if (map_flags != BPF_ANY) | |
339 | return -EINVAL; | |
340 | ||
341 | if (index >= array->map.max_entries) | |
342 | return -E2BIG; | |
343 | ||
344 | ufd = *(u32 *)value; | |
2a36f0b9 WN |
345 | new_ptr = map->ops->map_fd_get_ptr(map, ufd); |
346 | if (IS_ERR(new_ptr)) | |
347 | return PTR_ERR(new_ptr); | |
04fd61ab | 348 | |
2a36f0b9 WN |
349 | old_ptr = xchg(array->ptrs + index, new_ptr); |
350 | if (old_ptr) | |
351 | map->ops->map_fd_put_ptr(old_ptr); | |
04fd61ab AS |
352 | |
353 | return 0; | |
354 | } | |
355 | ||
2a36f0b9 | 356 | static int fd_array_map_delete_elem(struct bpf_map *map, void *key) |
04fd61ab AS |
357 | { |
358 | struct bpf_array *array = container_of(map, struct bpf_array, map); | |
2a36f0b9 | 359 | void *old_ptr; |
04fd61ab AS |
360 | u32 index = *(u32 *)key; |
361 | ||
362 | if (index >= array->map.max_entries) | |
363 | return -E2BIG; | |
364 | ||
2a36f0b9 WN |
365 | old_ptr = xchg(array->ptrs + index, NULL); |
366 | if (old_ptr) { | |
367 | map->ops->map_fd_put_ptr(old_ptr); | |
04fd61ab AS |
368 | return 0; |
369 | } else { | |
370 | return -ENOENT; | |
371 | } | |
372 | } | |
373 | ||
2a36f0b9 WN |
374 | static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd) |
375 | { | |
376 | struct bpf_array *array = container_of(map, struct bpf_array, map); | |
377 | struct bpf_prog *prog = bpf_prog_get(fd); | |
378 | if (IS_ERR(prog)) | |
379 | return prog; | |
380 | ||
381 | if (!bpf_prog_array_compatible(array, prog)) { | |
382 | bpf_prog_put(prog); | |
383 | return ERR_PTR(-EINVAL); | |
384 | } | |
385 | return prog; | |
386 | } | |
387 | ||
388 | static void prog_fd_array_put_ptr(void *ptr) | |
389 | { | |
390 | struct bpf_prog *prog = ptr; | |
391 | ||
392 | bpf_prog_put_rcu(prog); | |
393 | } | |
394 | ||
04fd61ab | 395 | /* decrement refcnt of all bpf_progs that are stored in this map */ |
2a36f0b9 | 396 | void bpf_fd_array_map_clear(struct bpf_map *map) |
04fd61ab AS |
397 | { |
398 | struct bpf_array *array = container_of(map, struct bpf_array, map); | |
399 | int i; | |
400 | ||
401 | for (i = 0; i < array->map.max_entries; i++) | |
2a36f0b9 | 402 | fd_array_map_delete_elem(map, &i); |
04fd61ab AS |
403 | } |
404 | ||
405 | static const struct bpf_map_ops prog_array_ops = { | |
2a36f0b9 WN |
406 | .map_alloc = fd_array_map_alloc, |
407 | .map_free = fd_array_map_free, | |
04fd61ab | 408 | .map_get_next_key = array_map_get_next_key, |
2a36f0b9 WN |
409 | .map_lookup_elem = fd_array_map_lookup_elem, |
410 | .map_update_elem = fd_array_map_update_elem, | |
411 | .map_delete_elem = fd_array_map_delete_elem, | |
412 | .map_fd_get_ptr = prog_fd_array_get_ptr, | |
413 | .map_fd_put_ptr = prog_fd_array_put_ptr, | |
04fd61ab AS |
414 | }; |
415 | ||
416 | static struct bpf_map_type_list prog_array_type __read_mostly = { | |
417 | .ops = &prog_array_ops, | |
418 | .type = BPF_MAP_TYPE_PROG_ARRAY, | |
419 | }; | |
420 | ||
421 | static int __init register_prog_array_map(void) | |
422 | { | |
423 | bpf_register_map_type(&prog_array_type); | |
424 | return 0; | |
425 | } | |
426 | late_initcall(register_prog_array_map); | |
ea317b26 KX |
427 | |
428 | static void perf_event_array_map_free(struct bpf_map *map) | |
429 | { | |
430 | bpf_fd_array_map_clear(map); | |
431 | fd_array_map_free(map); | |
432 | } | |
433 | ||
434 | static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd) | |
435 | { | |
436 | struct perf_event *event; | |
437 | const struct perf_event_attr *attr; | |
e03e7ee3 | 438 | struct file *file; |
ea317b26 | 439 | |
e03e7ee3 AS |
440 | file = perf_event_get(fd); |
441 | if (IS_ERR(file)) | |
442 | return file; | |
443 | ||
444 | event = file->private_data; | |
ea317b26 KX |
445 | |
446 | attr = perf_event_attrs(event); | |
447 | if (IS_ERR(attr)) | |
62544ce8 | 448 | goto err; |
ea317b26 | 449 | |
62544ce8 AS |
450 | if (attr->inherit) |
451 | goto err; | |
452 | ||
453 | if (attr->type == PERF_TYPE_RAW) | |
e03e7ee3 | 454 | return file; |
62544ce8 AS |
455 | |
456 | if (attr->type == PERF_TYPE_HARDWARE) | |
e03e7ee3 | 457 | return file; |
62544ce8 AS |
458 | |
459 | if (attr->type == PERF_TYPE_SOFTWARE && | |
460 | attr->config == PERF_COUNT_SW_BPF_OUTPUT) | |
e03e7ee3 | 461 | return file; |
62544ce8 | 462 | err: |
e03e7ee3 | 463 | fput(file); |
62544ce8 | 464 | return ERR_PTR(-EINVAL); |
ea317b26 KX |
465 | } |
466 | ||
467 | static void perf_event_fd_array_put_ptr(void *ptr) | |
468 | { | |
e03e7ee3 | 469 | fput((struct file *)ptr); |
ea317b26 KX |
470 | } |
471 | ||
472 | static const struct bpf_map_ops perf_event_array_ops = { | |
473 | .map_alloc = fd_array_map_alloc, | |
474 | .map_free = perf_event_array_map_free, | |
475 | .map_get_next_key = array_map_get_next_key, | |
476 | .map_lookup_elem = fd_array_map_lookup_elem, | |
477 | .map_update_elem = fd_array_map_update_elem, | |
478 | .map_delete_elem = fd_array_map_delete_elem, | |
479 | .map_fd_get_ptr = perf_event_fd_array_get_ptr, | |
480 | .map_fd_put_ptr = perf_event_fd_array_put_ptr, | |
481 | }; | |
482 | ||
483 | static struct bpf_map_type_list perf_event_array_type __read_mostly = { | |
484 | .ops = &perf_event_array_ops, | |
485 | .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, | |
486 | }; | |
487 | ||
488 | static int __init register_perf_event_array_map(void) | |
489 | { | |
490 | bpf_register_map_type(&perf_event_array_type); | |
491 | return 0; | |
492 | } | |
493 | late_initcall(register_perf_event_array_map); |