mempool: Introduce COW_ZERO poison value
[librseq.git] / src / rseq-mempool.c
CommitLineData
ef6695f1
MD
1// SPDX-License-Identifier: MIT
2// SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
34337fec 4#include <rseq/mempool.h>
ef6695f1
MD
5#include <sys/mman.h>
6#include <assert.h>
7#include <string.h>
8#include <pthread.h>
9#include <unistd.h>
10#include <stdlib.h>
11#include <rseq/compiler.h>
12#include <errno.h>
13#include <stdint.h>
14#include <stdbool.h>
367e559c 15#include <stdio.h>
a5694a4d 16#include <fcntl.h>
367e559c
MD
17
18#ifdef HAVE_LIBNUMA
19# include <numa.h>
20# include <numaif.h>
21#endif
ef6695f1 22
34337fec 23#include "rseq-utils.h"
47c725dd 24#include <rseq/rseq.h>
19be9217 25
ef6695f1 26/*
b73b0c25 27 * rseq-mempool.c: rseq CPU-Local Storage (CLS) memory allocator.
ef6695f1 28 *
8ab16a24
MD
29 * The rseq per-CPU memory allocator allows the application the request
30 * memory pools of CPU-Local memory each of containing objects of a
8aa1462d
MD
31 * given size (rounded to next power of 2), reserving a given virtual
32 * address size per CPU, for a given maximum number of CPUs.
8ab16a24
MD
33 *
34 * The per-CPU memory allocator is analogous to TLS (Thread-Local
35 * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
36 * memory allocator provides CPU-Local Storage.
ef6695f1
MD
37 */
38
3236da62 39#define POOL_SET_NR_ENTRIES RSEQ_BITS_PER_LONG
ef6695f1 40
1a426b47
MD
41#define POOL_HEADER_NR_PAGES 2
42
72b100a1
MD
43/*
44 * Smallest allocation should hold enough space for a free list pointer.
45 */
ef6695f1
MD
46#if RSEQ_BITS_PER_LONG == 64
47# define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
48#else
49# define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
50#endif
51
0fdf7a4c
OD
52#define BIT_PER_ULONG (8 * sizeof(unsigned long))
53
57d8b586
OD
54#define MOVE_PAGES_BATCH_SIZE 4096
55
0ba2a93e 56#define RANGE_HEADER_OFFSET sizeof(struct rseq_mempool_range)
4aa3220c 57
3975084e 58#if RSEQ_BITS_PER_LONG == 64
6ea98a7b 59# define DEFAULT_COW_INIT_POISON_VALUE 0x5555555555555555ULL
3975084e 60#else
6ea98a7b 61# define DEFAULT_COW_INIT_POISON_VALUE 0x55555555UL
3975084e
MD
62#endif
63
6ea98a7b
MD
64#define DEFAULT_COW_ZERO_POISON_VALUE 0x0
65
ef6695f1
MD
66struct free_list_node;
67
68struct free_list_node {
69 struct free_list_node *next;
70};
71
cb475906 72enum mempool_type {
89b7e681
MD
73 MEMPOOL_TYPE_GLOBAL = 0, /* Default */
74 MEMPOOL_TYPE_PERCPU = 1,
cb475906
MD
75};
76
0ba2a93e 77struct rseq_mempool_attr {
135811f2 78 bool init_set;
6e329183 79 int (*init_func)(void *priv, void *addr, size_t len, int cpu);
135811f2
MD
80 void *init_priv;
81
d6acc8aa 82 bool robust_set;
cb475906
MD
83
84 enum mempool_type type;
85 size_t stride;
86 int max_nr_cpus;
e11a02d7
MD
87
88 unsigned long max_nr_ranges;
455e090e
MD
89
90 bool poison_set;
91 uintptr_t poison;
a5694a4d
MD
92
93 enum rseq_mempool_populate_policy populate_policy;
9bd07c29
MD
94};
95
0ba2a93e 96struct rseq_mempool_range;
b73b0c25 97
0ba2a93e 98struct rseq_mempool_range {
9d986353
MD
99 struct rseq_mempool_range *next; /* Linked list of ranges. */
100 struct rseq_mempool *pool; /* Backward reference to container pool. */
a5694a4d
MD
101
102 /*
103 * Memory layout of a mempool range:
1a426b47
MD
104 * - Canary header page (for destroy-after-fork detection),
105 * - Header page (contains struct rseq_mempool_range at the
106 * very end),
c0de0012
MD
107 * - Base of the per-cpu data, starting with CPU 0.
108 * Aliases with free-list for non-robust populate all pool.
a5694a4d
MD
109 * - CPU 1,
110 * ...
111 * - CPU max_nr_cpus - 1
4e8ae59d 112 * - init values (unpopulated for RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL).
c0de0012
MD
113 * Aliases with free-list for non-robust populate none pool.
114 * - free list (for robust pool).
115 *
116 * The free list aliases the CPU 0 memory area for non-robust
117 * populate all pools. It aliases with init values for
118 * non-robust populate none pools. It is located immediately
119 * after the init values for robust pools.
a5694a4d 120 */
4aa3220c 121 void *header;
ef6695f1 122 void *base;
a5694a4d
MD
123 /*
124 * The init values contains malloc_init/zmalloc values.
4e8ae59d 125 * Pointer is NULL for RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL.
a5694a4d
MD
126 */
127 void *init;
b73b0c25 128 size_t next_unused;
fa6a0fb3
MD
129
130 /* Pool range mmap/munmap */
131 void *mmap_addr;
132 size_t mmap_len;
133
b73b0c25
MD
134 /* Track alloc/free. */
135 unsigned long *alloc_bitmap;
136};
137
0ba2a93e 138struct rseq_mempool {
9d986353
MD
139 /* Head of ranges linked-list. */
140 struct rseq_mempool_range *range_list;
141 unsigned long nr_ranges;
b73b0c25 142
ef6695f1 143 size_t item_len;
ef6695f1 144 int item_order;
ef6695f1
MD
145
146 /*
8ab16a24 147 * The free list chains freed items on the CPU 0 address range.
ef6695f1 148 * We should rethink this decision if false sharing between
8ab16a24 149 * malloc/free from other CPUs and data accesses from CPU 0
ef6695f1
MD
150 * becomes an issue. This is a NULL-terminated singly-linked
151 * list.
152 */
153 struct free_list_node *free_list_head;
b73b0c25 154
ef6695f1
MD
155 /* This lock protects allocation/free within the pool. */
156 pthread_mutex_t lock;
9bd07c29 157
0ba2a93e 158 struct rseq_mempool_attr attr;
ca452fee 159 char *name;
ef6695f1
MD
160};
161
ef6695f1
MD
162/*
163 * Pool set entries are indexed by item_len rounded to the next power of
164 * 2. A pool set can contain NULL pool entries, in which case the next
165 * large enough entry will be used for allocation.
166 */
0ba2a93e 167struct rseq_mempool_set {
ef6695f1
MD
168 /* This lock protects add vs malloc/zmalloc within the pool set. */
169 pthread_mutex_t lock;
0ba2a93e 170 struct rseq_mempool *entries[POOL_SET_NR_ENTRIES];
ef6695f1
MD
171};
172
86617384
MD
173static
174const char *get_pool_name(const struct rseq_mempool *pool)
175{
176 return pool->name ? : "<anonymous>";
177}
178
367e559c 179static
6fbf1fb6 180void *__rseq_pool_range_percpu_ptr(const struct rseq_mempool_range *range, int cpu,
f2981623 181 uintptr_t item_offset, size_t stride)
367e559c 182{
15b63c9f 183 return range->base + (stride * cpu) + item_offset;
367e559c
MD
184}
185
a5694a4d
MD
186static
187void *__rseq_pool_range_init_ptr(const struct rseq_mempool_range *range,
188 uintptr_t item_offset)
189{
190 if (!range->init)
191 return NULL;
192 return range->init + item_offset;
193}
194
195static
196void __rseq_percpu *__rseq_free_list_to_percpu_ptr(const struct rseq_mempool *pool,
197 struct free_list_node *node)
198{
199 void __rseq_percpu *p = (void __rseq_percpu *) node;
200
c0de0012
MD
201 if (pool->attr.robust_set) {
202 /* Skip cpus. */
a5694a4d 203 p -= pool->attr.max_nr_cpus * pool->attr.stride;
c0de0012 204 /* Skip init values */
4e8ae59d 205 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
c0de0012
MD
206 p -= pool->attr.stride;
207
208 } else {
209 /* Populate none free list is in init values */
4e8ae59d 210 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
c0de0012
MD
211 p -= pool->attr.max_nr_cpus * pool->attr.stride;
212 }
a5694a4d
MD
213 return p;
214}
215
216static
217struct free_list_node *__rseq_percpu_to_free_list_ptr(const struct rseq_mempool *pool,
218 void __rseq_percpu *p)
219{
c0de0012
MD
220 if (pool->attr.robust_set) {
221 /* Skip cpus. */
a5694a4d 222 p += pool->attr.max_nr_cpus * pool->attr.stride;
c0de0012 223 /* Skip init values */
4e8ae59d 224 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
c0de0012
MD
225 p += pool->attr.stride;
226
227 } else {
228 /* Populate none free list is in init values */
4e8ae59d 229 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
c0de0012
MD
230 p += pool->attr.max_nr_cpus * pool->attr.stride;
231 }
a5694a4d
MD
232 return (struct free_list_node *) p;
233}
234
235static
14af0aa2 236intptr_t rseq_cmp_item(void *p, size_t item_len, intptr_t cmp_value, intptr_t *unexpected_value)
a5694a4d 237{
14af0aa2
MD
238 size_t offset;
239 intptr_t res = 0;
a5694a4d 240
14af0aa2
MD
241 for (offset = 0; offset < item_len; offset += sizeof(uintptr_t)) {
242 intptr_t v = *((intptr_t *) (p + offset));
243
244 if ((res = v - cmp_value) != 0) {
245 if (unexpected_value)
246 *unexpected_value = v;
a5694a4d 247 break;
14af0aa2
MD
248 }
249 }
a5694a4d
MD
250 return res;
251}
252
367e559c 253static
15b63c9f
MD
254void rseq_percpu_zero_item(struct rseq_mempool *pool,
255 struct rseq_mempool_range *range, uintptr_t item_offset)
367e559c 256{
a5694a4d 257 char *init_p = NULL;
367e559c
MD
258 int i;
259
a5694a4d
MD
260 init_p = __rseq_pool_range_init_ptr(range, item_offset);
261 if (init_p)
644298bb 262 bzero(init_p, pool->item_len);
cb475906 263 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
15b63c9f 264 char *p = __rseq_pool_range_percpu_ptr(range, i,
cb475906 265 item_offset, pool->attr.stride);
a5694a4d 266
1b658191
MD
267 /*
268 * If item is already zeroed, either because the
269 * init range update has propagated or because the
270 * content is already zeroed (e.g. zero page), don't
271 * write to the page. This eliminates useless COW over
272 * the zero page just for overwriting it with zeroes.
273 *
274 * This means zmalloc() in populate all policy pool do
275 * not trigger COW for CPUs which are not actively
276 * writing to the pool. This is however not the case for
277 * malloc_init() in populate-all pools if it populates
278 * non-zero content.
279 */
14af0aa2 280 if (!rseq_cmp_item(p, pool->item_len, 0, NULL))
a5694a4d 281 continue;
644298bb 282 bzero(p, pool->item_len);
367e559c
MD
283 }
284}
285
6ff43d9a
MD
286static
287void rseq_percpu_init_item(struct rseq_mempool *pool,
288 struct rseq_mempool_range *range, uintptr_t item_offset,
289 void *init_ptr, size_t init_len)
290{
a5694a4d 291 char *init_p = NULL;
6ff43d9a
MD
292 int i;
293
a5694a4d
MD
294 init_p = __rseq_pool_range_init_ptr(range, item_offset);
295 if (init_p)
296 memcpy(init_p, init_ptr, init_len);
6ff43d9a
MD
297 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
298 char *p = __rseq_pool_range_percpu_ptr(range, i,
299 item_offset, pool->attr.stride);
a5694a4d 300
1b658191
MD
301 /*
302 * If the update propagated through a shared mapping,
303 * or the item already has the correct content, skip
304 * writing it into the cpu item to eliminate useless
305 * COW of the page.
306 */
307 if (!memcmp(init_ptr, p, init_len))
a5694a4d 308 continue;
6ff43d9a
MD
309 memcpy(p, init_ptr, init_len);
310 }
311}
312
a5694a4d
MD
313static
314void rseq_poison_item(void *p, size_t item_len, uintptr_t poison)
315{
316 size_t offset;
317
318 for (offset = 0; offset < item_len; offset += sizeof(uintptr_t))
319 *((uintptr_t *) (p + offset)) = poison;
320}
321
455e090e
MD
322static
323void rseq_percpu_poison_item(struct rseq_mempool *pool,
324 struct rseq_mempool_range *range, uintptr_t item_offset)
325{
326 uintptr_t poison = pool->attr.poison;
a5694a4d 327 char *init_p = NULL;
455e090e
MD
328 int i;
329
a5694a4d
MD
330 init_p = __rseq_pool_range_init_ptr(range, item_offset);
331 if (init_p)
332 rseq_poison_item(init_p, pool->item_len, poison);
455e090e
MD
333 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
334 char *p = __rseq_pool_range_percpu_ptr(range, i,
335 item_offset, pool->attr.stride);
455e090e 336
1b658191
MD
337 /*
338 * If the update propagated through a shared mapping,
339 * or the item already has the correct content, skip
340 * writing it into the cpu item to eliminate useless
341 * COW of the page.
342 *
343 * It is recommended to use zero as poison value for
344 * populate-all pools to eliminate COW due to writing
345 * poison to unused CPU memory.
346 */
14af0aa2 347 if (rseq_cmp_item(p, pool->item_len, poison, NULL) == 0)
a5694a4d
MD
348 continue;
349 rseq_poison_item(p, pool->item_len, poison);
350 }
351}
352
353/* Always inline for __builtin_return_address(0). */
354static inline __attribute__((always_inline))
355void rseq_check_poison_item(const struct rseq_mempool *pool, uintptr_t item_offset,
c0de0012 356 void *p, size_t item_len, uintptr_t poison)
a5694a4d 357{
1b658191 358 intptr_t unexpected_value;
a5694a4d 359
14af0aa2 360 if (rseq_cmp_item(p, item_len, poison, &unexpected_value) == 0)
1b658191 361 return;
a5694a4d 362
1b658191
MD
363 fprintf(stderr, "%s: Poison corruption detected (0x%lx) for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
364 __func__, (unsigned long) unexpected_value, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
365 abort();
86617384
MD
366}
367
368/* Always inline for __builtin_return_address(0). */
369static inline __attribute__((always_inline))
6fbf1fb6
MD
370void rseq_percpu_check_poison_item(const struct rseq_mempool *pool,
371 const struct rseq_mempool_range *range, uintptr_t item_offset)
86617384
MD
372{
373 uintptr_t poison = pool->attr.poison;
a5694a4d 374 char *init_p;
86617384
MD
375 int i;
376
3975084e 377 if (!pool->attr.robust_set)
86617384 378 return;
a5694a4d
MD
379 init_p = __rseq_pool_range_init_ptr(range, item_offset);
380 if (init_p)
c0de0012 381 rseq_check_poison_item(pool, item_offset, init_p, pool->item_len, poison);
86617384
MD
382 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
383 char *p = __rseq_pool_range_percpu_ptr(range, i,
384 item_offset, pool->attr.stride);
c0de0012 385 rseq_check_poison_item(pool, item_offset, p, pool->item_len, poison);
455e090e
MD
386 }
387}
388
15b63c9f 389#ifdef HAVE_LIBNUMA
c6fd3981 390int rseq_mempool_range_init_numa(void *addr, size_t len, int cpu, int numa_flags)
367e559c 391{
f2981623 392 unsigned long nr_pages, page_len;
c6fd3981
MD
393 int status[MOVE_PAGES_BATCH_SIZE];
394 int nodes[MOVE_PAGES_BATCH_SIZE];
395 void *pages[MOVE_PAGES_BATCH_SIZE];
f2981623 396 long ret;
367e559c 397
c6fd3981
MD
398 if (!numa_flags) {
399 errno = EINVAL;
400 return -1;
401 }
367e559c 402 page_len = rseq_get_page_len();
c6fd3981 403 nr_pages = len >> rseq_get_count_order_ulong(page_len);
57d8b586 404
c6fd3981
MD
405 nodes[0] = numa_node_of_cpu(cpu);
406 if (nodes[0] < 0)
407 return -1;
57d8b586 408
c6fd3981
MD
409 for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) {
410 nodes[k] = nodes[0];
411 }
57d8b586 412
c6fd3981 413 for (unsigned long page = 0; page < nr_pages;) {
57d8b586 414
c6fd3981
MD
415 size_t max_k = RSEQ_ARRAY_SIZE(pages);
416 size_t left = nr_pages - page;
57d8b586 417
c6fd3981
MD
418 if (left < max_k) {
419 max_k = left;
420 }
57d8b586 421
c6fd3981
MD
422 for (size_t k = 0; k < max_k; ++k, ++page) {
423 pages[k] = addr + (page * page_len);
424 status[k] = -EPERM;
367e559c 425 }
b73b0c25 426
c6fd3981
MD
427 ret = move_pages(0, max_k, pages, nodes, status, numa_flags);
428
429 if (ret < 0)
b73b0c25 430 return ret;
c6fd3981
MD
431
432 if (ret > 0) {
433 fprintf(stderr, "%lu pages were not migrated\n", ret);
434 for (size_t k = 0; k < max_k; ++k) {
435 if (status[k] < 0)
436 fprintf(stderr,
437 "Error while moving page %p to numa node %d: %u\n",
438 pages[k], nodes[k], -status[k]);
439 }
440 }
b73b0c25
MD
441 }
442 return 0;
443}
367e559c 444#else
c6fd3981
MD
445int rseq_mempool_range_init_numa(void *addr __attribute__((unused)),
446 size_t len __attribute__((unused)),
447 int cpu __attribute__((unused)),
367e559c
MD
448 int numa_flags __attribute__((unused)))
449{
c6fd3981
MD
450 errno = ENOSYS;
451 return -1;
367e559c
MD
452}
453#endif
454
0fdf7a4c 455static
0ba2a93e 456int create_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
0fdf7a4c
OD
457{
458 size_t count;
459
cb475906 460 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
0fdf7a4c
OD
461
462 /*
9649c7ee
MD
463 * Not being able to create the validation bitmap is an error
464 * that needs to be reported.
0fdf7a4c 465 */
b73b0c25
MD
466 range->alloc_bitmap = calloc(count, sizeof(unsigned long));
467 if (!range->alloc_bitmap)
9649c7ee
MD
468 return -1;
469 return 0;
0fdf7a4c
OD
470}
471
b73b0c25 472static
a5694a4d 473bool percpu_addr_in_pool(const struct rseq_mempool *pool, void __rseq_percpu *_addr)
b73b0c25 474{
0ba2a93e 475 struct rseq_mempool_range *range;
a5694a4d 476 void *addr = (void *) _addr;
b73b0c25 477
9d986353 478 for (range = pool->range_list; range; range = range->next) {
b73b0c25
MD
479 if (addr >= range->base && addr < range->base + range->next_unused)
480 return true;
481 }
482 return false;
483}
484
a9ec6111
OD
485/* Always inline for __builtin_return_address(0). */
486static inline __attribute__((always_inline))
1a426b47 487void check_free_list(const struct rseq_mempool *pool, bool mapping_accessible)
a9ec6111 488{
b73b0c25
MD
489 size_t total_item = 0, total_never_allocated = 0, total_freed = 0,
490 max_list_traversal = 0, traversal_iteration = 0;
0ba2a93e 491 struct rseq_mempool_range *range;
b73b0c25 492
1a426b47 493 if (!pool->attr.robust_set || !mapping_accessible)
b73b0c25
MD
494 return;
495
9d986353 496 for (range = pool->range_list; range; range = range->next) {
cb475906
MD
497 total_item += pool->attr.stride >> pool->item_order;
498 total_never_allocated += (pool->attr.stride - range->next_unused) >> pool->item_order;
b73b0c25
MD
499 }
500 max_list_traversal = total_item - total_never_allocated;
a9ec6111
OD
501
502 for (struct free_list_node *node = pool->free_list_head, *prev = NULL;
503 node;
504 prev = node,
505 node = node->next) {
506
a9ec6111 507 if (traversal_iteration >= max_list_traversal) {
ca452fee
MD
508 fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n",
509 __func__, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111
OD
510 abort();
511 }
512
513 /* Node is out of range. */
a5694a4d 514 if (!percpu_addr_in_pool(pool, __rseq_free_list_to_percpu_ptr(pool, node))) {
a9ec6111 515 if (prev)
ca452fee
MD
516 fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
517 __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111 518 else
ca452fee
MD
519 fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
520 __func__, node, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111
OD
521 abort();
522 }
523
b73b0c25
MD
524 traversal_iteration++;
525 total_freed++;
a9ec6111
OD
526 }
527
528 if (total_never_allocated + total_freed != total_item) {
ca452fee
MD
529 fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n",
530 __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0));
a9ec6111
OD
531 abort();
532 }
a9ec6111
OD
533}
534
6fbf1fb6
MD
535/* Always inline for __builtin_return_address(0). */
536static inline __attribute__((always_inline))
537void check_range_poison(const struct rseq_mempool *pool,
538 const struct rseq_mempool_range *range)
539{
540 size_t item_offset;
541
542 for (item_offset = 0; item_offset < range->next_unused;
543 item_offset += pool->item_len)
544 rseq_percpu_check_poison_item(pool, range, item_offset);
545}
546
547/* Always inline for __builtin_return_address(0). */
548static inline __attribute__((always_inline))
1a426b47 549void check_pool_poison(const struct rseq_mempool *pool, bool mapping_accessible)
6fbf1fb6
MD
550{
551 struct rseq_mempool_range *range;
552
1a426b47 553 if (!pool->attr.robust_set || !mapping_accessible)
6fbf1fb6
MD
554 return;
555 for (range = pool->range_list; range; range = range->next)
556 check_range_poison(pool, range);
557}
558
e7cbbc10
MD
559/* Always inline for __builtin_return_address(0). */
560static inline __attribute__((always_inline))
0ba2a93e 561void destroy_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
0fdf7a4c 562{
b73b0c25 563 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 564 size_t count, total_leaks = 0;
0fdf7a4c 565
9649c7ee 566 if (!bitmap)
0fdf7a4c 567 return;
0fdf7a4c 568
cb475906 569 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
0fdf7a4c
OD
570
571 /* Assert that all items in the pool were freed. */
9649c7ee
MD
572 for (size_t k = 0; k < count; ++k)
573 total_leaks += rseq_hweight_ulong(bitmap[k]);
574 if (total_leaks) {
ca452fee
MD
575 fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n",
576 __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0));
9649c7ee 577 abort();
0fdf7a4c
OD
578 }
579
580 free(bitmap);
a5694a4d 581 range->alloc_bitmap = NULL;
0fdf7a4c
OD
582}
583
b73b0c25
MD
584/* Always inline for __builtin_return_address(0). */
585static inline __attribute__((always_inline))
0ba2a93e 586int rseq_mempool_range_destroy(struct rseq_mempool *pool,
1a426b47
MD
587 struct rseq_mempool_range *range,
588 bool mapping_accessible)
b73b0c25
MD
589{
590 destroy_alloc_bitmap(pool, range);
1a426b47
MD
591 if (!mapping_accessible) {
592 /*
593 * Only the header pages are populated in the child
594 * process.
595 */
596 return munmap(range->header, POOL_HEADER_NR_PAGES * rseq_get_page_len());
597 }
5cd72fc7 598 return munmap(range->mmap_addr, range->mmap_len);
5c99f3d6
MD
599}
600
601/*
602 * Allocate a memory mapping aligned on @alignment, with an optional
603 * @pre_header before the mapping.
604 */
605static
5cd72fc7 606void *aligned_mmap_anonymous(size_t page_size, size_t len, size_t alignment,
5c99f3d6
MD
607 void **pre_header, size_t pre_header_len)
608{
609 size_t minimum_page_count, page_count, extra, total_allocate = 0;
610 int page_order;
611 void *ptr;
612
613 if (len < page_size || alignment < page_size ||
b72b2d9e 614 !is_pow2(alignment) || (len & (alignment - 1))) {
5c99f3d6
MD
615 errno = EINVAL;
616 return NULL;
617 }
618 page_order = rseq_get_count_order_ulong(page_size);
619 if (page_order < 0) {
620 errno = EINVAL;
621 return NULL;
622 }
623 if (pre_header_len && (pre_header_len & (page_size - 1))) {
624 errno = EINVAL;
625 return NULL;
626 }
627
628 minimum_page_count = (pre_header_len + len) >> page_order;
629 page_count = (pre_header_len + len + alignment - page_size) >> page_order;
630
631 assert(page_count >= minimum_page_count);
632
5cd72fc7
MD
633 ptr = mmap(NULL, page_count << page_order, PROT_READ | PROT_WRITE,
634 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
635 if (ptr == MAP_FAILED) {
636 ptr = NULL;
5c99f3d6 637 goto alloc_error;
5cd72fc7 638 }
5c99f3d6
MD
639
640 total_allocate = page_count << page_order;
641
642 if (!(((uintptr_t) ptr + pre_header_len) & (alignment - 1))) {
643 /* Pointer is already aligned. ptr points to pre_header. */
644 goto out;
645 }
646
647 /* Unmap extra before. */
648 extra = offset_align((uintptr_t) ptr + pre_header_len, alignment);
649 assert(!(extra & (page_size - 1)));
5cd72fc7 650 if (munmap(ptr, extra)) {
5c99f3d6
MD
651 perror("munmap");
652 abort();
653 }
654 total_allocate -= extra;
655 ptr += extra; /* ptr points to pre_header */
656 page_count -= extra >> page_order;
657out:
658 assert(page_count >= minimum_page_count);
659
660 if (page_count > minimum_page_count) {
661 void *extra_ptr;
662
663 /* Unmap extra after. */
664 extra_ptr = ptr + (minimum_page_count << page_order);
665 extra = (page_count - minimum_page_count) << page_order;
5cd72fc7 666 if (munmap(extra_ptr, extra)) {
5c99f3d6
MD
667 perror("munmap");
668 abort();
669 }
670 total_allocate -= extra;
671 }
672
673 assert(!(((uintptr_t)ptr + pre_header_len) & (alignment - 1)));
674 assert(total_allocate == len + pre_header_len);
675
676alloc_error:
677 if (ptr) {
678 if (pre_header)
679 *pre_header = ptr;
680 ptr += pre_header_len;
681 }
682 return ptr;
b73b0c25
MD
683}
684
a5694a4d 685static
cc0413ab 686int rseq_memfd_create_init(const char *poolname, size_t init_len)
a5694a4d 687{
a10c1c93 688 int fd;
cc0413ab
OD
689 char buf[249]; /* Limit is 249 bytes. */
690 const char *name;
a10c1c93 691
cc0413ab
OD
692 if (poolname) {
693 snprintf(buf, sizeof(buf), "%s:rseq-mempool", poolname);
694 name = buf;
695 } else {
696 name = "<anonymous>:rseq-mempool";
697 }
698
699 fd = memfd_create(name, MFD_CLOEXEC);
a10c1c93
MD
700 if (fd < 0) {
701 perror("memfd_create");
702 goto end;
a5694a4d 703 }
a10c1c93 704 if (ftruncate(fd, (off_t) init_len)) {
025165ad
MD
705 if (close(fd))
706 perror("close");
a10c1c93
MD
707 fd = -1;
708 goto end;
709 }
710end:
711 return fd;
712}
713
714static
715void rseq_memfd_close(int fd)
716{
025165ad
MD
717 if (fd < 0)
718 return;
a10c1c93
MD
719 if (close(fd))
720 perror("close");
a5694a4d
MD
721}
722
b73b0c25 723static
0ba2a93e 724struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool)
b73b0c25 725{
0ba2a93e 726 struct rseq_mempool_range *range;
5c99f3d6 727 unsigned long page_size;
4aa3220c 728 void *header;
b73b0c25 729 void *base;
a5694a4d 730 size_t range_len; /* Range len excludes header. */
1a426b47 731 size_t header_len;
025165ad 732 int memfd = -1;
b73b0c25 733
e11a02d7
MD
734 if (pool->attr.max_nr_ranges &&
735 pool->nr_ranges >= pool->attr.max_nr_ranges) {
9d986353
MD
736 errno = ENOMEM;
737 return NULL;
738 }
5c99f3d6 739 page_size = rseq_get_page_len();
b73b0c25 740
1a426b47 741 header_len = POOL_HEADER_NR_PAGES * page_size;
a5694a4d 742 range_len = pool->attr.stride * pool->attr.max_nr_cpus;
4e8ae59d 743 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
a5694a4d 744 range_len += pool->attr.stride; /* init values */
c0de0012
MD
745 if (pool->attr.robust_set)
746 range_len += pool->attr.stride; /* free list */
5cd72fc7 747 base = aligned_mmap_anonymous(page_size, range_len,
1a426b47 748 pool->attr.stride, &header, header_len);
b73b0c25 749 if (!base)
5c99f3d6 750 return NULL;
0ba2a93e 751 range = (struct rseq_mempool_range *) (base - RANGE_HEADER_OFFSET);
5c99f3d6 752 range->pool = pool;
4aa3220c 753 range->header = header;
a5694a4d 754 range->base = base;
fa6a0fb3 755 range->mmap_addr = header;
1a426b47 756 range->mmap_len = header_len + range_len;
a5694a4d 757
4e8ae59d 758 if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL) {
a5694a4d
MD
759 range->init = base + (pool->attr.stride * pool->attr.max_nr_cpus);
760 /* Populate init values pages from memfd */
cc0413ab 761 memfd = rseq_memfd_create_init(pool->name, pool->attr.stride);
a10c1c93 762 if (memfd < 0)
a5694a4d
MD
763 goto error_alloc;
764 if (mmap(range->init, pool->attr.stride, PROT_READ | PROT_WRITE,
1a426b47 765 MAP_SHARED | MAP_FIXED, memfd, 0) != (void *) range->init)
a5694a4d 766 goto error_alloc;
a5694a4d
MD
767 assert(pool->attr.type == MEMPOOL_TYPE_PERCPU);
768 /*
769 * Map per-cpu memory as private COW mappings of init values.
770 */
771 {
772 int cpu;
773
774 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
775 void *p = base + (pool->attr.stride * cpu);
776 size_t len = pool->attr.stride;
777
778 if (mmap(p, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED,
1a426b47 779 memfd, 0) != (void *) p)
a5694a4d 780 goto error_alloc;
a5694a4d
MD
781 }
782 }
bf7b01a3
MD
783 /*
784 * The init values shared mapping should not be shared
785 * with the children processes across fork. Prevent the
786 * whole mapping from being used across fork.
787 */
1a426b47
MD
788 if (madvise(base, range_len, MADV_DONTFORK))
789 goto error_alloc;
790
791 /*
792 * Write 0x1 in first byte of header first page, which
793 * will be WIPEONFORK (and thus cleared) in children
794 * processes. Used to find out if pool destroy is called
795 * from a child process after fork.
796 */
797 *((char *) header) = 0x1;
798 if (madvise(header, page_size, MADV_WIPEONFORK))
bf7b01a3 799 goto error_alloc;
1a426b47
MD
800
801 /*
802 * The second header page contains the struct
803 * rseq_mempool_range, which is needed by pool destroy.
804 * Leave this anonymous page populated (COW) in child
805 * processes.
806 */
a10c1c93 807 rseq_memfd_close(memfd);
025165ad 808 memfd = -1;
a5694a4d
MD
809 }
810
b73b0c25
MD
811 if (pool->attr.robust_set) {
812 if (create_alloc_bitmap(pool, range))
813 goto error_alloc;
814 }
135811f2 815 if (pool->attr.init_set) {
374c2773
MD
816 switch (pool->attr.type) {
817 case MEMPOOL_TYPE_GLOBAL:
6e329183 818 if (pool->attr.init_func(pool->attr.init_priv,
374c2773 819 base, pool->attr.stride, -1)) {
6e329183
MD
820 goto error_alloc;
821 }
374c2773
MD
822 break;
823 case MEMPOOL_TYPE_PERCPU:
824 {
825 int cpu;
826 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
827 if (pool->attr.init_func(pool->attr.init_priv,
828 base + (pool->attr.stride * cpu),
829 pool->attr.stride, cpu)) {
830 goto error_alloc;
831 }
832 }
833 break;
834 }
835 default:
836 abort();
135811f2
MD
837 }
838 }
9d986353 839 pool->nr_ranges++;
b73b0c25
MD
840 return range;
841
842error_alloc:
025165ad 843 rseq_memfd_close(memfd);
1a426b47 844 (void) rseq_mempool_range_destroy(pool, range, true);
b73b0c25
MD
845 return NULL;
846}
847
1a426b47
MD
848static
849bool pool_mappings_accessible(struct rseq_mempool *pool)
850{
851 struct rseq_mempool_range *range;
852 size_t page_size;
853 char *addr;
854
855 if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
856 return true;
857 range = pool->range_list;
858 if (!range)
859 return true;
860 page_size = rseq_get_page_len();
861 /*
862 * Header first page is one page before the page containing the
863 * range structure.
864 */
865 addr = (char *) ((uintptr_t) range & ~(page_size - 1)) - page_size;
866 /*
867 * Look for 0x1 first byte marker in header first page.
868 */
869 if (*addr != 0x1)
870 return false;
871 return true;
872}
873
0ba2a93e 874int rseq_mempool_destroy(struct rseq_mempool *pool)
9649c7ee 875{
0ba2a93e 876 struct rseq_mempool_range *range, *next_range;
1a426b47 877 bool mapping_accessible;
b73b0c25 878 int ret = 0;
9649c7ee 879
f510ddc5
MD
880 if (!pool)
881 return 0;
1a426b47
MD
882
883 /*
884 * Validate that the pool mappings are accessible before doing
885 * free list/poison validation and unmapping ranges. This allows
886 * calling pool destroy in child process after a fork for
887 * populate-none pools to free pool resources.
888 */
889 mapping_accessible = pool_mappings_accessible(pool);
890
891 check_free_list(pool, mapping_accessible);
892 check_pool_poison(pool, mapping_accessible);
893
b73b0c25 894 /* Iteration safe against removal. */
9d986353 895 for (range = pool->range_list; range && (next_range = range->next, 1); range = next_range) {
1a426b47 896 if (rseq_mempool_range_destroy(pool, range, mapping_accessible))
b73b0c25
MD
897 goto end;
898 /* Update list head to keep list coherent in case of partial failure. */
9d986353 899 pool->range_list = next_range;
b73b0c25 900 }
9649c7ee 901 pthread_mutex_destroy(&pool->lock);
ca452fee 902 free(pool->name);
eb8db04d 903 free(pool);
9649c7ee 904end:
b73b0c25 905 return ret;
9649c7ee
MD
906}
907
0ba2a93e 908struct rseq_mempool *rseq_mempool_create(const char *pool_name,
cb475906 909 size_t item_len, const struct rseq_mempool_attr *_attr)
ef6695f1 910{
0ba2a93e
MD
911 struct rseq_mempool *pool;
912 struct rseq_mempool_attr attr = {};
ef6695f1 913 int order;
ef6695f1
MD
914
915 /* Make sure each item is large enough to contain free list pointers. */
916 if (item_len < sizeof(void *))
917 item_len = sizeof(void *);
918
919 /* Align item_len on next power of two. */
19be9217 920 order = rseq_get_count_order_ulong(item_len);
ef6695f1
MD
921 if (order < 0) {
922 errno = EINVAL;
923 return NULL;
924 }
925 item_len = 1UL << order;
926
a82006d0
MD
927 if (_attr)
928 memcpy(&attr, _attr, sizeof(attr));
a82006d0 929
cb475906
MD
930 switch (attr.type) {
931 case MEMPOOL_TYPE_PERCPU:
932 if (attr.max_nr_cpus < 0) {
933 errno = EINVAL;
934 return NULL;
935 }
936 if (attr.max_nr_cpus == 0) {
937 /* Auto-detect */
47c725dd 938 attr.max_nr_cpus = rseq_get_max_nr_cpus();
cb475906
MD
939 if (attr.max_nr_cpus == 0) {
940 errno = EINVAL;
941 return NULL;
942 }
943 }
944 break;
945 case MEMPOOL_TYPE_GLOBAL:
a5694a4d 946 /* Override populate policy for global type. */
4e8ae59d
MD
947 if (attr.populate_policy == RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE)
948 attr.populate_policy = RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL;
89b7e681
MD
949 /* Use a 1-cpu pool for global mempool type. */
950 attr.max_nr_cpus = 1;
cb475906
MD
951 break;
952 }
953 if (!attr.stride)
954 attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */
3975084e
MD
955 if (attr.robust_set && !attr.poison_set) {
956 attr.poison_set = true;
6ea98a7b
MD
957 if (attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
958 attr.poison = DEFAULT_COW_INIT_POISON_VALUE;
959 else
960 attr.poison = DEFAULT_COW_ZERO_POISON_VALUE;
3975084e 961 }
cb475906
MD
962 if (item_len > attr.stride || attr.stride < (size_t) rseq_get_page_len() ||
963 !is_pow2(attr.stride)) {
964 errno = EINVAL;
965 return NULL;
966 }
967
0ba2a93e 968 pool = calloc(1, sizeof(struct rseq_mempool));
bc510b60
MD
969 if (!pool)
970 return NULL;
ef6695f1 971
b73b0c25 972 memcpy(&pool->attr, &attr, sizeof(attr));
ef6695f1 973 pthread_mutex_init(&pool->lock, NULL);
ef6695f1
MD
974 pool->item_len = item_len;
975 pool->item_order = order;
b73b0c25 976
9d986353
MD
977 pool->range_list = rseq_mempool_range_create(pool);
978 if (!pool->range_list)
b73b0c25 979 goto error_alloc;
0fdf7a4c 980
ca452fee
MD
981 if (pool_name) {
982 pool->name = strdup(pool_name);
983 if (!pool->name)
984 goto error_alloc;
985 }
ef6695f1 986 return pool;
ef6695f1 987
9649c7ee 988error_alloc:
0ba2a93e 989 rseq_mempool_destroy(pool);
9649c7ee
MD
990 errno = ENOMEM;
991 return NULL;
ef6695f1
MD
992}
993
e7cbbc10
MD
994/* Always inline for __builtin_return_address(0). */
995static inline __attribute__((always_inline))
9d986353 996void set_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
0fdf7a4c 997{
9d986353 998 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 999 size_t item_index = item_offset >> pool->item_order;
0fdf7a4c
OD
1000 unsigned long mask;
1001 size_t k;
1002
9649c7ee 1003 if (!bitmap)
0fdf7a4c 1004 return;
0fdf7a4c 1005
9649c7ee 1006 k = item_index / BIT_PER_ULONG;
0fdf7a4c
OD
1007 mask = 1ULL << (item_index % BIT_PER_ULONG);
1008
9649c7ee
MD
1009 /* Print error if bit is already set. */
1010 if (bitmap[k] & mask) {
ca452fee
MD
1011 fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
1012 __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
9649c7ee
MD
1013 abort();
1014 }
0fdf7a4c
OD
1015 bitmap[k] |= mask;
1016}
1017
ef6695f1 1018static
6ff43d9a
MD
1019void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool,
1020 bool zeroed, void *init_ptr, size_t init_len)
ef6695f1 1021{
9d986353 1022 struct rseq_mempool_range *range;
ef6695f1
MD
1023 struct free_list_node *node;
1024 uintptr_t item_offset;
d24ee051 1025 void __rseq_percpu *addr;
ef6695f1 1026
6ff43d9a
MD
1027 if (init_len > pool->item_len) {
1028 errno = EINVAL;
1029 return NULL;
1030 }
ef6695f1
MD
1031 pthread_mutex_lock(&pool->lock);
1032 /* Get first entry from free list. */
1033 node = pool->free_list_head;
1034 if (node != NULL) {
a5694a4d 1035 void *range_base, *ptr;
9d986353 1036
a5694a4d
MD
1037 ptr = __rseq_free_list_to_percpu_ptr(pool, node);
1038 range_base = (void *) ((uintptr_t) ptr & (~(pool->attr.stride - 1)));
9d986353 1039 range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
ef6695f1
MD
1040 /* Remove node from free list (update head). */
1041 pool->free_list_head = node->next;
a5694a4d 1042 item_offset = (uintptr_t) (ptr - range_base);
86617384 1043 rseq_percpu_check_poison_item(pool, range, item_offset);
a5694a4d 1044 addr = __rseq_free_list_to_percpu_ptr(pool, node);
ef6695f1
MD
1045 goto end;
1046 }
9d986353
MD
1047 /*
1048 * If the most recent range (first in list) does not have any
1049 * room left, create a new range and prepend it to the list
1050 * head.
1051 */
1052 range = pool->range_list;
1053 if (range->next_unused + pool->item_len > pool->attr.stride) {
1054 range = rseq_mempool_range_create(pool);
1055 if (!range) {
1056 errno = ENOMEM;
1057 addr = NULL;
1058 goto end;
1059 }
1060 /* Add range to head of list. */
1061 range->next = pool->range_list;
1062 pool->range_list = range;
ef6695f1 1063 }
9d986353
MD
1064 /* First range in list has room left. */
1065 item_offset = range->next_unused;
1066 addr = (void __rseq_percpu *) (range->base + item_offset);
1067 range->next_unused += pool->item_len;
ef6695f1 1068end:
8f28507f 1069 if (addr)
9d986353 1070 set_alloc_slot(pool, range, item_offset);
ef6695f1 1071 pthread_mutex_unlock(&pool->lock);
6ff43d9a
MD
1072 if (addr) {
1073 if (zeroed)
1074 rseq_percpu_zero_item(pool, range, item_offset);
1075 else if (init_ptr) {
1076 rseq_percpu_init_item(pool, range, item_offset,
1077 init_ptr, init_len);
1078 }
1079 }
ef6695f1
MD
1080 return addr;
1081}
1082
15da5c27 1083void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool)
ef6695f1 1084{
6ff43d9a 1085 return __rseq_percpu_malloc(pool, false, NULL, 0);
ef6695f1
MD
1086}
1087
15da5c27 1088void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool)
ef6695f1 1089{
6ff43d9a
MD
1090 return __rseq_percpu_malloc(pool, true, NULL, 0);
1091}
1092
1093void __rseq_percpu *rseq_mempool_percpu_malloc_init(struct rseq_mempool *pool,
1094 void *init_ptr, size_t len)
1095{
1096 return __rseq_percpu_malloc(pool, false, init_ptr, len);
ef6695f1
MD
1097}
1098
e7cbbc10
MD
1099/* Always inline for __builtin_return_address(0). */
1100static inline __attribute__((always_inline))
9d986353 1101void clear_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
0fdf7a4c 1102{
9d986353 1103 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 1104 size_t item_index = item_offset >> pool->item_order;
0fdf7a4c
OD
1105 unsigned long mask;
1106 size_t k;
1107
9649c7ee 1108 if (!bitmap)
0fdf7a4c 1109 return;
0fdf7a4c 1110
9649c7ee
MD
1111 k = item_index / BIT_PER_ULONG;
1112 mask = 1ULL << (item_index % BIT_PER_ULONG);
0fdf7a4c 1113
9649c7ee
MD
1114 /* Print error if bit is not set. */
1115 if (!(bitmap[k] & mask)) {
ca452fee
MD
1116 fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
1117 __func__, get_pool_name(pool), pool, item_offset,
1118 (void *) __builtin_return_address(0));
9649c7ee
MD
1119 abort();
1120 }
0fdf7a4c
OD
1121 bitmap[k] &= ~mask;
1122}
1123
cb475906 1124void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride)
ef6695f1
MD
1125{
1126 uintptr_t ptr = (uintptr_t) _ptr;
cb475906 1127 void *range_base = (void *) (ptr & (~(stride - 1)));
0ba2a93e
MD
1128 struct rseq_mempool_range *range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
1129 struct rseq_mempool *pool = range->pool;
cb475906 1130 uintptr_t item_offset = ptr & (stride - 1);
ef6695f1
MD
1131 struct free_list_node *head, *item;
1132
1133 pthread_mutex_lock(&pool->lock);
9d986353 1134 clear_alloc_slot(pool, range, item_offset);
ef6695f1
MD
1135 /* Add ptr to head of free list */
1136 head = pool->free_list_head;
455e090e
MD
1137 if (pool->attr.poison_set)
1138 rseq_percpu_poison_item(pool, range, item_offset);
a5694a4d 1139 item = __rseq_percpu_to_free_list_ptr(pool, _ptr);
455e090e
MD
1140 /*
1141 * Setting the next pointer will overwrite the first uintptr_t
a5694a4d
MD
1142 * poison for either CPU 0 (populate all) or init data (populate
1143 * none).
455e090e 1144 */
ef6695f1
MD
1145 item->next = head;
1146 pool->free_list_head = item;
1147 pthread_mutex_unlock(&pool->lock);
1148}
1149
0ba2a93e 1150struct rseq_mempool_set *rseq_mempool_set_create(void)
ef6695f1 1151{
0ba2a93e 1152 struct rseq_mempool_set *pool_set;
ef6695f1 1153
0ba2a93e 1154 pool_set = calloc(1, sizeof(struct rseq_mempool_set));
ef6695f1
MD
1155 if (!pool_set)
1156 return NULL;
1157 pthread_mutex_init(&pool_set->lock, NULL);
1158 return pool_set;
1159}
1160
0ba2a93e 1161int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set)
ef6695f1
MD
1162{
1163 int order, ret;
1164
1165 for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) {
0ba2a93e 1166 struct rseq_mempool *pool = pool_set->entries[order];
ef6695f1
MD
1167
1168 if (!pool)
1169 continue;
0ba2a93e 1170 ret = rseq_mempool_destroy(pool);
ef6695f1
MD
1171 if (ret)
1172 return ret;
1173 pool_set->entries[order] = NULL;
1174 }
1175 pthread_mutex_destroy(&pool_set->lock);
1176 free(pool_set);
1177 return 0;
1178}
1179
1180/* Ownership of pool is handed over to pool set on success. */
0ba2a93e 1181int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, struct rseq_mempool *pool)
ef6695f1
MD
1182{
1183 size_t item_order = pool->item_order;
1184 int ret = 0;
1185
1186 pthread_mutex_lock(&pool_set->lock);
1187 if (pool_set->entries[item_order]) {
1188 errno = EBUSY;
1189 ret = -1;
1190 goto end;
1191 }
1192 pool_set->entries[pool->item_order] = pool;
1193end:
1194 pthread_mutex_unlock(&pool_set->lock);
1195 return ret;
1196}
1197
1198static
6ff43d9a
MD
1199void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set,
1200 void *init_ptr, size_t len, bool zeroed)
ef6695f1
MD
1201{
1202 int order, min_order = POOL_SET_MIN_ENTRY;
0ba2a93e 1203 struct rseq_mempool *pool;
d24ee051 1204 void __rseq_percpu *addr;
ef6695f1 1205
d06f5cf5
MD
1206 order = rseq_get_count_order_ulong(len);
1207 if (order > POOL_SET_MIN_ENTRY)
1208 min_order = order;
ef6695f1
MD
1209again:
1210 pthread_mutex_lock(&pool_set->lock);
1211 /* First smallest present pool where @len fits. */
1212 for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
1213 pool = pool_set->entries[order];
1214
1215 if (!pool)
1216 continue;
1217 if (pool->item_len >= len)
1218 goto found;
1219 }
1220 pool = NULL;
1221found:
1222 pthread_mutex_unlock(&pool_set->lock);
1223 if (pool) {
6ff43d9a 1224 addr = __rseq_percpu_malloc(pool, zeroed, init_ptr, len);
ef6695f1
MD
1225 if (addr == NULL && errno == ENOMEM) {
1226 /*
1227 * If the allocation failed, try again with a
1228 * larger pool.
1229 */
1230 min_order = order + 1;
1231 goto again;
1232 }
1233 } else {
1234 /* Not found. */
1235 errno = ENOMEM;
1236 addr = NULL;
1237 }
1238 return addr;
1239}
1240
15da5c27 1241void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len)
ef6695f1 1242{
6ff43d9a 1243 return __rseq_mempool_set_malloc(pool_set, NULL, len, false);
ef6695f1
MD
1244}
1245
15da5c27 1246void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len)
ef6695f1 1247{
6ff43d9a
MD
1248 return __rseq_mempool_set_malloc(pool_set, NULL, len, true);
1249}
1250
1251void __rseq_percpu *rseq_mempool_set_percpu_malloc_init(struct rseq_mempool_set *pool_set,
1252 void *init_ptr, size_t len)
1253{
1254 return __rseq_mempool_set_malloc(pool_set, init_ptr, len, true);
ef6695f1 1255}
9bd07c29 1256
0ba2a93e 1257struct rseq_mempool_attr *rseq_mempool_attr_create(void)
a82006d0 1258{
0ba2a93e 1259 return calloc(1, sizeof(struct rseq_mempool_attr));
a82006d0
MD
1260}
1261
0ba2a93e 1262void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr)
a82006d0
MD
1263{
1264 free(attr);
1265}
1266
135811f2 1267int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr,
6e329183 1268 int (*init_func)(void *priv, void *addr, size_t len, int cpu),
135811f2
MD
1269 void *init_priv)
1270{
1271 if (!attr) {
1272 errno = EINVAL;
1273 return -1;
1274 }
1275 attr->init_set = true;
1276 attr->init_func = init_func;
1277 attr->init_priv = init_priv;
1278 return 0;
1279}
1280
0ba2a93e 1281int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr)
d6acc8aa
MD
1282{
1283 if (!attr) {
1284 errno = EINVAL;
1285 return -1;
1286 }
1287 attr->robust_set = true;
1288 return 0;
1289}
cb475906
MD
1290
1291int rseq_mempool_attr_set_percpu(struct rseq_mempool_attr *attr,
1292 size_t stride, int max_nr_cpus)
1293{
1294 if (!attr) {
1295 errno = EINVAL;
1296 return -1;
1297 }
1298 attr->type = MEMPOOL_TYPE_PERCPU;
1299 attr->stride = stride;
1300 attr->max_nr_cpus = max_nr_cpus;
1301 return 0;
1302}
1303
1304int rseq_mempool_attr_set_global(struct rseq_mempool_attr *attr,
1305 size_t stride)
1306{
1307 if (!attr) {
1308 errno = EINVAL;
1309 return -1;
1310 }
1311 attr->type = MEMPOOL_TYPE_GLOBAL;
1312 attr->stride = stride;
89b7e681 1313 attr->max_nr_cpus = 0;
cb475906
MD
1314 return 0;
1315}
6037d364 1316
e11a02d7
MD
1317int rseq_mempool_attr_set_max_nr_ranges(struct rseq_mempool_attr *attr,
1318 unsigned long max_nr_ranges)
1319{
1320 if (!attr) {
1321 errno = EINVAL;
1322 return -1;
1323 }
1324 attr->max_nr_ranges = max_nr_ranges;
1325 return 0;
1326}
1327
455e090e
MD
1328int rseq_mempool_attr_set_poison(struct rseq_mempool_attr *attr,
1329 uintptr_t poison)
1330{
1331 if (!attr) {
1332 errno = EINVAL;
1333 return -1;
1334 }
1335 attr->poison_set = true;
1336 attr->poison = poison;
1337 return 0;
1338}
1339
a5694a4d
MD
1340int rseq_mempool_attr_set_populate_policy(struct rseq_mempool_attr *attr,
1341 enum rseq_mempool_populate_policy policy)
1342{
1343 if (!attr) {
1344 errno = EINVAL;
1345 return -1;
1346 }
1347 attr->populate_policy = policy;
1348 return 0;
1349}
1350
6037d364
MD
1351int rseq_mempool_get_max_nr_cpus(struct rseq_mempool *mempool)
1352{
1353 if (!mempool || mempool->attr.type != MEMPOOL_TYPE_PERCPU) {
1354 errno = EINVAL;
1355 return -1;
1356 }
1357 return mempool->attr.max_nr_cpus;
1358}
This page took 0.105275 seconds and 4 git commands to generate.