mempool_test: test rseq_mempool_percpu_malloc_init
[librseq.git] / src / rseq-mempool.c
CommitLineData
ef6695f1
MD
1// SPDX-License-Identifier: MIT
2// SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
34337fec 4#include <rseq/mempool.h>
ef6695f1
MD
5#include <sys/mman.h>
6#include <assert.h>
7#include <string.h>
8#include <pthread.h>
9#include <unistd.h>
10#include <stdlib.h>
11#include <rseq/compiler.h>
12#include <errno.h>
13#include <stdint.h>
14#include <stdbool.h>
367e559c
MD
15#include <stdio.h>
16
17#ifdef HAVE_LIBNUMA
18# include <numa.h>
19# include <numaif.h>
20#endif
ef6695f1 21
34337fec 22#include "rseq-utils.h"
47c725dd 23#include <rseq/rseq.h>
19be9217 24
ef6695f1 25/*
b73b0c25 26 * rseq-mempool.c: rseq CPU-Local Storage (CLS) memory allocator.
ef6695f1 27 *
8ab16a24
MD
28 * The rseq per-CPU memory allocator allows the application the request
29 * memory pools of CPU-Local memory each of containing objects of a
8aa1462d
MD
30 * given size (rounded to next power of 2), reserving a given virtual
31 * address size per CPU, for a given maximum number of CPUs.
8ab16a24
MD
32 *
33 * The per-CPU memory allocator is analogous to TLS (Thread-Local
34 * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
35 * memory allocator provides CPU-Local Storage.
ef6695f1
MD
36 */
37
3236da62 38#define POOL_SET_NR_ENTRIES RSEQ_BITS_PER_LONG
ef6695f1 39
72b100a1
MD
40/*
41 * Smallest allocation should hold enough space for a free list pointer.
42 */
ef6695f1
MD
43#if RSEQ_BITS_PER_LONG == 64
44# define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
45#else
46# define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
47#endif
48
0fdf7a4c
OD
49#define BIT_PER_ULONG (8 * sizeof(unsigned long))
50
57d8b586
OD
51#define MOVE_PAGES_BATCH_SIZE 4096
52
0ba2a93e 53#define RANGE_HEADER_OFFSET sizeof(struct rseq_mempool_range)
4aa3220c 54
3975084e
MD
55#if RSEQ_BITS_PER_LONG == 64
56# define DEFAULT_POISON_VALUE 0x5555555555555555ULL
57#else
58# define DEFAULT_POISON_VALUE 0x55555555UL
59#endif
60
ef6695f1
MD
61struct free_list_node;
62
63struct free_list_node {
64 struct free_list_node *next;
65};
66
cb475906 67enum mempool_type {
89b7e681
MD
68 MEMPOOL_TYPE_GLOBAL = 0, /* Default */
69 MEMPOOL_TYPE_PERCPU = 1,
cb475906
MD
70};
71
0ba2a93e 72struct rseq_mempool_attr {
a82006d0 73 bool mmap_set;
9bd07c29
MD
74 void *(*mmap_func)(void *priv, size_t len);
75 int (*munmap_func)(void *priv, void *ptr, size_t len);
76 void *mmap_priv;
d6acc8aa 77
135811f2 78 bool init_set;
6e329183 79 int (*init_func)(void *priv, void *addr, size_t len, int cpu);
135811f2
MD
80 void *init_priv;
81
d6acc8aa 82 bool robust_set;
cb475906
MD
83
84 enum mempool_type type;
85 size_t stride;
86 int max_nr_cpus;
e11a02d7
MD
87
88 unsigned long max_nr_ranges;
455e090e
MD
89
90 bool poison_set;
91 uintptr_t poison;
9bd07c29
MD
92};
93
0ba2a93e 94struct rseq_mempool_range;
b73b0c25 95
0ba2a93e 96struct rseq_mempool_range {
9d986353
MD
97 struct rseq_mempool_range *next; /* Linked list of ranges. */
98 struct rseq_mempool *pool; /* Backward reference to container pool. */
4aa3220c 99 void *header;
ef6695f1 100 void *base;
b73b0c25
MD
101 size_t next_unused;
102 /* Track alloc/free. */
103 unsigned long *alloc_bitmap;
104};
105
0ba2a93e 106struct rseq_mempool {
9d986353
MD
107 /* Head of ranges linked-list. */
108 struct rseq_mempool_range *range_list;
109 unsigned long nr_ranges;
b73b0c25 110
ef6695f1 111 size_t item_len;
ef6695f1 112 int item_order;
ef6695f1
MD
113
114 /*
8ab16a24 115 * The free list chains freed items on the CPU 0 address range.
ef6695f1 116 * We should rethink this decision if false sharing between
8ab16a24 117 * malloc/free from other CPUs and data accesses from CPU 0
ef6695f1
MD
118 * becomes an issue. This is a NULL-terminated singly-linked
119 * list.
120 */
121 struct free_list_node *free_list_head;
b73b0c25 122
ef6695f1
MD
123 /* This lock protects allocation/free within the pool. */
124 pthread_mutex_t lock;
9bd07c29 125
0ba2a93e 126 struct rseq_mempool_attr attr;
ca452fee 127 char *name;
ef6695f1
MD
128};
129
ef6695f1
MD
130/*
131 * Pool set entries are indexed by item_len rounded to the next power of
132 * 2. A pool set can contain NULL pool entries, in which case the next
133 * large enough entry will be used for allocation.
134 */
0ba2a93e 135struct rseq_mempool_set {
ef6695f1
MD
136 /* This lock protects add vs malloc/zmalloc within the pool set. */
137 pthread_mutex_t lock;
0ba2a93e 138 struct rseq_mempool *entries[POOL_SET_NR_ENTRIES];
ef6695f1
MD
139};
140
86617384
MD
141static
142const char *get_pool_name(const struct rseq_mempool *pool)
143{
144 return pool->name ? : "<anonymous>";
145}
146
367e559c 147static
6fbf1fb6 148void *__rseq_pool_range_percpu_ptr(const struct rseq_mempool_range *range, int cpu,
f2981623 149 uintptr_t item_offset, size_t stride)
367e559c 150{
15b63c9f 151 return range->base + (stride * cpu) + item_offset;
367e559c
MD
152}
153
367e559c 154static
15b63c9f
MD
155void rseq_percpu_zero_item(struct rseq_mempool *pool,
156 struct rseq_mempool_range *range, uintptr_t item_offset)
367e559c
MD
157{
158 int i;
159
cb475906 160 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
15b63c9f 161 char *p = __rseq_pool_range_percpu_ptr(range, i,
cb475906 162 item_offset, pool->attr.stride);
367e559c
MD
163 memset(p, 0, pool->item_len);
164 }
165}
166
6ff43d9a
MD
167static
168void rseq_percpu_init_item(struct rseq_mempool *pool,
169 struct rseq_mempool_range *range, uintptr_t item_offset,
170 void *init_ptr, size_t init_len)
171{
172 int i;
173
174 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
175 char *p = __rseq_pool_range_percpu_ptr(range, i,
176 item_offset, pool->attr.stride);
177 memcpy(p, init_ptr, init_len);
178 }
179}
180
455e090e
MD
181static
182void rseq_percpu_poison_item(struct rseq_mempool *pool,
183 struct rseq_mempool_range *range, uintptr_t item_offset)
184{
185 uintptr_t poison = pool->attr.poison;
186 int i;
187
188 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
189 char *p = __rseq_pool_range_percpu_ptr(range, i,
190 item_offset, pool->attr.stride);
191 size_t offset;
192
193 for (offset = 0; offset < pool->item_len; offset += sizeof(uintptr_t))
86617384
MD
194 *((uintptr_t *) (p + offset)) = poison;
195 }
196}
197
198/* Always inline for __builtin_return_address(0). */
199static inline __attribute__((always_inline))
6fbf1fb6
MD
200void rseq_percpu_check_poison_item(const struct rseq_mempool *pool,
201 const struct rseq_mempool_range *range, uintptr_t item_offset)
86617384
MD
202{
203 uintptr_t poison = pool->attr.poison;
204 int i;
205
3975084e 206 if (!pool->attr.robust_set)
86617384
MD
207 return;
208 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
209 char *p = __rseq_pool_range_percpu_ptr(range, i,
210 item_offset, pool->attr.stride);
211 size_t offset;
212
213 for (offset = 0; offset < pool->item_len; offset += sizeof(uintptr_t)) {
214 uintptr_t v;
215
216 /* Skip poison check for free-list pointer. */
217 if (i == 0 && offset == 0)
218 continue;
219 v = *((uintptr_t *) (p + offset));
220 if (v != poison) {
221 fprintf(stderr, "%s: Poison corruption detected (0x%lx) for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
222 __func__, (unsigned long) v, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
223 abort();
224 }
225 }
455e090e
MD
226 }
227}
228
15b63c9f 229#ifdef HAVE_LIBNUMA
c6fd3981 230int rseq_mempool_range_init_numa(void *addr, size_t len, int cpu, int numa_flags)
367e559c 231{
f2981623 232 unsigned long nr_pages, page_len;
c6fd3981
MD
233 int status[MOVE_PAGES_BATCH_SIZE];
234 int nodes[MOVE_PAGES_BATCH_SIZE];
235 void *pages[MOVE_PAGES_BATCH_SIZE];
f2981623 236 long ret;
367e559c 237
c6fd3981
MD
238 if (!numa_flags) {
239 errno = EINVAL;
240 return -1;
241 }
367e559c 242 page_len = rseq_get_page_len();
c6fd3981 243 nr_pages = len >> rseq_get_count_order_ulong(page_len);
57d8b586 244
c6fd3981
MD
245 nodes[0] = numa_node_of_cpu(cpu);
246 if (nodes[0] < 0)
247 return -1;
57d8b586 248
c6fd3981
MD
249 for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) {
250 nodes[k] = nodes[0];
251 }
57d8b586 252
c6fd3981 253 for (unsigned long page = 0; page < nr_pages;) {
57d8b586 254
c6fd3981
MD
255 size_t max_k = RSEQ_ARRAY_SIZE(pages);
256 size_t left = nr_pages - page;
57d8b586 257
c6fd3981
MD
258 if (left < max_k) {
259 max_k = left;
260 }
57d8b586 261
c6fd3981
MD
262 for (size_t k = 0; k < max_k; ++k, ++page) {
263 pages[k] = addr + (page * page_len);
264 status[k] = -EPERM;
367e559c 265 }
b73b0c25 266
c6fd3981
MD
267 ret = move_pages(0, max_k, pages, nodes, status, numa_flags);
268
269 if (ret < 0)
b73b0c25 270 return ret;
c6fd3981
MD
271
272 if (ret > 0) {
273 fprintf(stderr, "%lu pages were not migrated\n", ret);
274 for (size_t k = 0; k < max_k; ++k) {
275 if (status[k] < 0)
276 fprintf(stderr,
277 "Error while moving page %p to numa node %d: %u\n",
278 pages[k], nodes[k], -status[k]);
279 }
280 }
b73b0c25
MD
281 }
282 return 0;
283}
367e559c 284#else
c6fd3981
MD
285int rseq_mempool_range_init_numa(void *addr __attribute__((unused)),
286 size_t len __attribute__((unused)),
287 int cpu __attribute__((unused)),
367e559c
MD
288 int numa_flags __attribute__((unused)))
289{
c6fd3981
MD
290 errno = ENOSYS;
291 return -1;
367e559c
MD
292}
293#endif
294
9bd07c29
MD
295static
296void *default_mmap_func(void *priv __attribute__((unused)), size_t len)
297{
298 void *base;
299
300 base = mmap(NULL, len, PROT_READ | PROT_WRITE,
301 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
302 if (base == MAP_FAILED)
303 return NULL;
304 return base;
305}
306
307static
308int default_munmap_func(void *priv __attribute__((unused)), void *ptr, size_t len)
309{
310 return munmap(ptr, len);
311}
312
0fdf7a4c 313static
0ba2a93e 314int create_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
0fdf7a4c
OD
315{
316 size_t count;
317
cb475906 318 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
0fdf7a4c
OD
319
320 /*
9649c7ee
MD
321 * Not being able to create the validation bitmap is an error
322 * that needs to be reported.
0fdf7a4c 323 */
b73b0c25
MD
324 range->alloc_bitmap = calloc(count, sizeof(unsigned long));
325 if (!range->alloc_bitmap)
9649c7ee
MD
326 return -1;
327 return 0;
0fdf7a4c
OD
328}
329
b73b0c25 330static
0ba2a93e 331bool addr_in_pool(const struct rseq_mempool *pool, void *addr)
b73b0c25 332{
0ba2a93e 333 struct rseq_mempool_range *range;
b73b0c25 334
9d986353 335 for (range = pool->range_list; range; range = range->next) {
b73b0c25
MD
336 if (addr >= range->base && addr < range->base + range->next_unused)
337 return true;
338 }
339 return false;
340}
341
a9ec6111
OD
342/* Always inline for __builtin_return_address(0). */
343static inline __attribute__((always_inline))
0ba2a93e 344void check_free_list(const struct rseq_mempool *pool)
a9ec6111 345{
b73b0c25
MD
346 size_t total_item = 0, total_never_allocated = 0, total_freed = 0,
347 max_list_traversal = 0, traversal_iteration = 0;
0ba2a93e 348 struct rseq_mempool_range *range;
b73b0c25
MD
349
350 if (!pool->attr.robust_set)
351 return;
352
9d986353 353 for (range = pool->range_list; range; range = range->next) {
cb475906
MD
354 total_item += pool->attr.stride >> pool->item_order;
355 total_never_allocated += (pool->attr.stride - range->next_unused) >> pool->item_order;
b73b0c25
MD
356 }
357 max_list_traversal = total_item - total_never_allocated;
a9ec6111
OD
358
359 for (struct free_list_node *node = pool->free_list_head, *prev = NULL;
360 node;
361 prev = node,
362 node = node->next) {
363
364 void *node_addr = node;
365
366 if (traversal_iteration >= max_list_traversal) {
ca452fee
MD
367 fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n",
368 __func__, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111
OD
369 abort();
370 }
371
372 /* Node is out of range. */
b73b0c25 373 if (!addr_in_pool(pool, node_addr)) {
a9ec6111 374 if (prev)
ca452fee
MD
375 fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
376 __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111 377 else
ca452fee
MD
378 fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
379 __func__, node, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111
OD
380 abort();
381 }
382
b73b0c25
MD
383 traversal_iteration++;
384 total_freed++;
a9ec6111
OD
385 }
386
387 if (total_never_allocated + total_freed != total_item) {
ca452fee
MD
388 fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n",
389 __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0));
a9ec6111
OD
390 abort();
391 }
a9ec6111
OD
392}
393
6fbf1fb6
MD
394/* Always inline for __builtin_return_address(0). */
395static inline __attribute__((always_inline))
396void check_range_poison(const struct rseq_mempool *pool,
397 const struct rseq_mempool_range *range)
398{
399 size_t item_offset;
400
401 for (item_offset = 0; item_offset < range->next_unused;
402 item_offset += pool->item_len)
403 rseq_percpu_check_poison_item(pool, range, item_offset);
404}
405
406/* Always inline for __builtin_return_address(0). */
407static inline __attribute__((always_inline))
408void check_pool_poison(const struct rseq_mempool *pool)
409{
410 struct rseq_mempool_range *range;
411
3975084e 412 if (!pool->attr.robust_set)
6fbf1fb6
MD
413 return;
414 for (range = pool->range_list; range; range = range->next)
415 check_range_poison(pool, range);
416}
417
e7cbbc10
MD
418/* Always inline for __builtin_return_address(0). */
419static inline __attribute__((always_inline))
0ba2a93e 420void destroy_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
0fdf7a4c 421{
b73b0c25 422 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 423 size_t count, total_leaks = 0;
0fdf7a4c 424
9649c7ee 425 if (!bitmap)
0fdf7a4c 426 return;
0fdf7a4c 427
cb475906 428 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
0fdf7a4c
OD
429
430 /* Assert that all items in the pool were freed. */
9649c7ee
MD
431 for (size_t k = 0; k < count; ++k)
432 total_leaks += rseq_hweight_ulong(bitmap[k]);
433 if (total_leaks) {
ca452fee
MD
434 fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n",
435 __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0));
9649c7ee 436 abort();
0fdf7a4c
OD
437 }
438
439 free(bitmap);
440}
441
b73b0c25
MD
442/* Always inline for __builtin_return_address(0). */
443static inline __attribute__((always_inline))
0ba2a93e
MD
444int rseq_mempool_range_destroy(struct rseq_mempool *pool,
445 struct rseq_mempool_range *range)
b73b0c25
MD
446{
447 destroy_alloc_bitmap(pool, range);
5c99f3d6 448 /* range is a header located one page before the aligned mapping. */
4aa3220c 449 return pool->attr.munmap_func(pool->attr.mmap_priv, range->header,
cb475906 450 (pool->attr.stride * pool->attr.max_nr_cpus) + rseq_get_page_len());
5c99f3d6
MD
451}
452
453/*
454 * Allocate a memory mapping aligned on @alignment, with an optional
455 * @pre_header before the mapping.
456 */
457static
0ba2a93e 458void *aligned_mmap_anonymous(struct rseq_mempool *pool,
5c99f3d6
MD
459 size_t page_size, size_t len, size_t alignment,
460 void **pre_header, size_t pre_header_len)
461{
462 size_t minimum_page_count, page_count, extra, total_allocate = 0;
463 int page_order;
464 void *ptr;
465
466 if (len < page_size || alignment < page_size ||
b72b2d9e 467 !is_pow2(alignment) || (len & (alignment - 1))) {
5c99f3d6
MD
468 errno = EINVAL;
469 return NULL;
470 }
471 page_order = rseq_get_count_order_ulong(page_size);
472 if (page_order < 0) {
473 errno = EINVAL;
474 return NULL;
475 }
476 if (pre_header_len && (pre_header_len & (page_size - 1))) {
477 errno = EINVAL;
478 return NULL;
479 }
480
481 minimum_page_count = (pre_header_len + len) >> page_order;
482 page_count = (pre_header_len + len + alignment - page_size) >> page_order;
483
484 assert(page_count >= minimum_page_count);
485
486 ptr = pool->attr.mmap_func(pool->attr.mmap_priv, page_count << page_order);
487 if (!ptr)
488 goto alloc_error;
489
490 total_allocate = page_count << page_order;
491
492 if (!(((uintptr_t) ptr + pre_header_len) & (alignment - 1))) {
493 /* Pointer is already aligned. ptr points to pre_header. */
494 goto out;
495 }
496
497 /* Unmap extra before. */
498 extra = offset_align((uintptr_t) ptr + pre_header_len, alignment);
499 assert(!(extra & (page_size - 1)));
500 if (pool->attr.munmap_func(pool->attr.mmap_priv, ptr, extra)) {
501 perror("munmap");
502 abort();
503 }
504 total_allocate -= extra;
505 ptr += extra; /* ptr points to pre_header */
506 page_count -= extra >> page_order;
507out:
508 assert(page_count >= minimum_page_count);
509
510 if (page_count > minimum_page_count) {
511 void *extra_ptr;
512
513 /* Unmap extra after. */
514 extra_ptr = ptr + (minimum_page_count << page_order);
515 extra = (page_count - minimum_page_count) << page_order;
516 if (pool->attr.munmap_func(pool->attr.mmap_priv, extra_ptr, extra)) {
517 perror("munmap");
518 abort();
519 }
520 total_allocate -= extra;
521 }
522
523 assert(!(((uintptr_t)ptr + pre_header_len) & (alignment - 1)));
524 assert(total_allocate == len + pre_header_len);
525
526alloc_error:
527 if (ptr) {
528 if (pre_header)
529 *pre_header = ptr;
530 ptr += pre_header_len;
531 }
532 return ptr;
b73b0c25
MD
533}
534
535static
0ba2a93e 536struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool)
b73b0c25 537{
0ba2a93e 538 struct rseq_mempool_range *range;
5c99f3d6 539 unsigned long page_size;
4aa3220c 540 void *header;
b73b0c25
MD
541 void *base;
542
e11a02d7
MD
543 if (pool->attr.max_nr_ranges &&
544 pool->nr_ranges >= pool->attr.max_nr_ranges) {
9d986353
MD
545 errno = ENOMEM;
546 return NULL;
547 }
5c99f3d6 548 page_size = rseq_get_page_len();
b73b0c25 549
5c99f3d6 550 base = aligned_mmap_anonymous(pool, page_size,
cb475906
MD
551 pool->attr.stride * pool->attr.max_nr_cpus,
552 pool->attr.stride,
4aa3220c 553 &header, page_size);
b73b0c25 554 if (!base)
5c99f3d6 555 return NULL;
0ba2a93e 556 range = (struct rseq_mempool_range *) (base - RANGE_HEADER_OFFSET);
5c99f3d6 557 range->pool = pool;
b73b0c25 558 range->base = base;
4aa3220c 559 range->header = header;
b73b0c25
MD
560 if (pool->attr.robust_set) {
561 if (create_alloc_bitmap(pool, range))
562 goto error_alloc;
563 }
135811f2 564 if (pool->attr.init_set) {
374c2773
MD
565 switch (pool->attr.type) {
566 case MEMPOOL_TYPE_GLOBAL:
6e329183 567 if (pool->attr.init_func(pool->attr.init_priv,
374c2773 568 base, pool->attr.stride, -1)) {
6e329183
MD
569 goto error_alloc;
570 }
374c2773
MD
571 break;
572 case MEMPOOL_TYPE_PERCPU:
573 {
574 int cpu;
575 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
576 if (pool->attr.init_func(pool->attr.init_priv,
577 base + (pool->attr.stride * cpu),
578 pool->attr.stride, cpu)) {
579 goto error_alloc;
580 }
581 }
582 break;
583 }
584 default:
585 abort();
135811f2
MD
586 }
587 }
9d986353 588 pool->nr_ranges++;
b73b0c25
MD
589 return range;
590
591error_alloc:
0ba2a93e 592 (void) rseq_mempool_range_destroy(pool, range);
b73b0c25
MD
593 return NULL;
594}
595
0ba2a93e 596int rseq_mempool_destroy(struct rseq_mempool *pool)
9649c7ee 597{
0ba2a93e 598 struct rseq_mempool_range *range, *next_range;
b73b0c25 599 int ret = 0;
9649c7ee 600
f510ddc5
MD
601 if (!pool)
602 return 0;
b73b0c25 603 check_free_list(pool);
6fbf1fb6 604 check_pool_poison(pool);
b73b0c25 605 /* Iteration safe against removal. */
9d986353 606 for (range = pool->range_list; range && (next_range = range->next, 1); range = next_range) {
0ba2a93e 607 if (rseq_mempool_range_destroy(pool, range))
b73b0c25
MD
608 goto end;
609 /* Update list head to keep list coherent in case of partial failure. */
9d986353 610 pool->range_list = next_range;
b73b0c25 611 }
9649c7ee 612 pthread_mutex_destroy(&pool->lock);
ca452fee 613 free(pool->name);
9649c7ee
MD
614 memset(pool, 0, sizeof(*pool));
615end:
b73b0c25 616 return ret;
9649c7ee
MD
617}
618
0ba2a93e 619struct rseq_mempool *rseq_mempool_create(const char *pool_name,
cb475906 620 size_t item_len, const struct rseq_mempool_attr *_attr)
ef6695f1 621{
0ba2a93e
MD
622 struct rseq_mempool *pool;
623 struct rseq_mempool_attr attr = {};
ef6695f1 624 int order;
ef6695f1
MD
625
626 /* Make sure each item is large enough to contain free list pointers. */
627 if (item_len < sizeof(void *))
628 item_len = sizeof(void *);
629
630 /* Align item_len on next power of two. */
19be9217 631 order = rseq_get_count_order_ulong(item_len);
ef6695f1
MD
632 if (order < 0) {
633 errno = EINVAL;
634 return NULL;
635 }
636 item_len = 1UL << order;
637
a82006d0
MD
638 if (_attr)
639 memcpy(&attr, _attr, sizeof(attr));
640 if (!attr.mmap_set) {
641 attr.mmap_func = default_mmap_func;
642 attr.munmap_func = default_munmap_func;
643 attr.mmap_priv = NULL;
9bd07c29 644 }
a82006d0 645
cb475906
MD
646 switch (attr.type) {
647 case MEMPOOL_TYPE_PERCPU:
648 if (attr.max_nr_cpus < 0) {
649 errno = EINVAL;
650 return NULL;
651 }
652 if (attr.max_nr_cpus == 0) {
653 /* Auto-detect */
47c725dd 654 attr.max_nr_cpus = rseq_get_max_nr_cpus();
cb475906
MD
655 if (attr.max_nr_cpus == 0) {
656 errno = EINVAL;
657 return NULL;
658 }
659 }
660 break;
661 case MEMPOOL_TYPE_GLOBAL:
89b7e681
MD
662 /* Use a 1-cpu pool for global mempool type. */
663 attr.max_nr_cpus = 1;
cb475906
MD
664 break;
665 }
666 if (!attr.stride)
667 attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */
3975084e
MD
668 if (attr.robust_set && !attr.poison_set) {
669 attr.poison_set = true;
670 attr.poison = DEFAULT_POISON_VALUE;
671 }
cb475906
MD
672 if (item_len > attr.stride || attr.stride < (size_t) rseq_get_page_len() ||
673 !is_pow2(attr.stride)) {
674 errno = EINVAL;
675 return NULL;
676 }
677
0ba2a93e 678 pool = calloc(1, sizeof(struct rseq_mempool));
bc510b60
MD
679 if (!pool)
680 return NULL;
ef6695f1 681
b73b0c25 682 memcpy(&pool->attr, &attr, sizeof(attr));
ef6695f1 683 pthread_mutex_init(&pool->lock, NULL);
ef6695f1
MD
684 pool->item_len = item_len;
685 pool->item_order = order;
b73b0c25 686
9d986353
MD
687 pool->range_list = rseq_mempool_range_create(pool);
688 if (!pool->range_list)
b73b0c25 689 goto error_alloc;
0fdf7a4c 690
ca452fee
MD
691 if (pool_name) {
692 pool->name = strdup(pool_name);
693 if (!pool->name)
694 goto error_alloc;
695 }
ef6695f1 696 return pool;
ef6695f1 697
9649c7ee 698error_alloc:
0ba2a93e 699 rseq_mempool_destroy(pool);
9649c7ee
MD
700 errno = ENOMEM;
701 return NULL;
ef6695f1
MD
702}
703
e7cbbc10
MD
704/* Always inline for __builtin_return_address(0). */
705static inline __attribute__((always_inline))
9d986353 706void set_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
0fdf7a4c 707{
9d986353 708 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 709 size_t item_index = item_offset >> pool->item_order;
0fdf7a4c
OD
710 unsigned long mask;
711 size_t k;
712
9649c7ee 713 if (!bitmap)
0fdf7a4c 714 return;
0fdf7a4c 715
9649c7ee 716 k = item_index / BIT_PER_ULONG;
0fdf7a4c
OD
717 mask = 1ULL << (item_index % BIT_PER_ULONG);
718
9649c7ee
MD
719 /* Print error if bit is already set. */
720 if (bitmap[k] & mask) {
ca452fee
MD
721 fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
722 __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
9649c7ee
MD
723 abort();
724 }
0fdf7a4c
OD
725 bitmap[k] |= mask;
726}
727
ef6695f1 728static
6ff43d9a
MD
729void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool,
730 bool zeroed, void *init_ptr, size_t init_len)
ef6695f1 731{
9d986353 732 struct rseq_mempool_range *range;
ef6695f1
MD
733 struct free_list_node *node;
734 uintptr_t item_offset;
d24ee051 735 void __rseq_percpu *addr;
ef6695f1 736
6ff43d9a
MD
737 if (init_len > pool->item_len) {
738 errno = EINVAL;
739 return NULL;
740 }
ef6695f1
MD
741 pthread_mutex_lock(&pool->lock);
742 /* Get first entry from free list. */
743 node = pool->free_list_head;
744 if (node != NULL) {
9d986353
MD
745 uintptr_t ptr = (uintptr_t) node;
746 void *range_base = (void *) (ptr & (~(pool->attr.stride - 1)));
747
748 range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
ef6695f1
MD
749 /* Remove node from free list (update head). */
750 pool->free_list_head = node->next;
9d986353 751 item_offset = (uintptr_t) ((void *) node - range_base);
86617384 752 rseq_percpu_check_poison_item(pool, range, item_offset);
9d986353 753 addr = (void __rseq_percpu *) node;
ef6695f1
MD
754 goto end;
755 }
9d986353
MD
756 /*
757 * If the most recent range (first in list) does not have any
758 * room left, create a new range and prepend it to the list
759 * head.
760 */
761 range = pool->range_list;
762 if (range->next_unused + pool->item_len > pool->attr.stride) {
763 range = rseq_mempool_range_create(pool);
764 if (!range) {
765 errno = ENOMEM;
766 addr = NULL;
767 goto end;
768 }
769 /* Add range to head of list. */
770 range->next = pool->range_list;
771 pool->range_list = range;
ef6695f1 772 }
9d986353
MD
773 /* First range in list has room left. */
774 item_offset = range->next_unused;
775 addr = (void __rseq_percpu *) (range->base + item_offset);
776 range->next_unused += pool->item_len;
ef6695f1 777end:
8f28507f 778 if (addr)
9d986353 779 set_alloc_slot(pool, range, item_offset);
ef6695f1 780 pthread_mutex_unlock(&pool->lock);
6ff43d9a
MD
781 if (addr) {
782 if (zeroed)
783 rseq_percpu_zero_item(pool, range, item_offset);
784 else if (init_ptr) {
785 rseq_percpu_init_item(pool, range, item_offset,
786 init_ptr, init_len);
787 }
788 }
ef6695f1
MD
789 return addr;
790}
791
15da5c27 792void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool)
ef6695f1 793{
6ff43d9a 794 return __rseq_percpu_malloc(pool, false, NULL, 0);
ef6695f1
MD
795}
796
15da5c27 797void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool)
ef6695f1 798{
6ff43d9a
MD
799 return __rseq_percpu_malloc(pool, true, NULL, 0);
800}
801
802void __rseq_percpu *rseq_mempool_percpu_malloc_init(struct rseq_mempool *pool,
803 void *init_ptr, size_t len)
804{
805 return __rseq_percpu_malloc(pool, false, init_ptr, len);
ef6695f1
MD
806}
807
e7cbbc10
MD
808/* Always inline for __builtin_return_address(0). */
809static inline __attribute__((always_inline))
9d986353 810void clear_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
0fdf7a4c 811{
9d986353 812 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 813 size_t item_index = item_offset >> pool->item_order;
0fdf7a4c
OD
814 unsigned long mask;
815 size_t k;
816
9649c7ee 817 if (!bitmap)
0fdf7a4c 818 return;
0fdf7a4c 819
9649c7ee
MD
820 k = item_index / BIT_PER_ULONG;
821 mask = 1ULL << (item_index % BIT_PER_ULONG);
0fdf7a4c 822
9649c7ee
MD
823 /* Print error if bit is not set. */
824 if (!(bitmap[k] & mask)) {
ca452fee
MD
825 fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
826 __func__, get_pool_name(pool), pool, item_offset,
827 (void *) __builtin_return_address(0));
9649c7ee
MD
828 abort();
829 }
0fdf7a4c
OD
830 bitmap[k] &= ~mask;
831}
832
cb475906 833void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride)
ef6695f1
MD
834{
835 uintptr_t ptr = (uintptr_t) _ptr;
cb475906 836 void *range_base = (void *) (ptr & (~(stride - 1)));
0ba2a93e
MD
837 struct rseq_mempool_range *range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
838 struct rseq_mempool *pool = range->pool;
cb475906 839 uintptr_t item_offset = ptr & (stride - 1);
ef6695f1
MD
840 struct free_list_node *head, *item;
841
842 pthread_mutex_lock(&pool->lock);
9d986353 843 clear_alloc_slot(pool, range, item_offset);
ef6695f1
MD
844 /* Add ptr to head of free list */
845 head = pool->free_list_head;
455e090e
MD
846 if (pool->attr.poison_set)
847 rseq_percpu_poison_item(pool, range, item_offset);
8ab16a24 848 /* Free-list is in CPU 0 range. */
4aa3220c 849 item = (struct free_list_node *) ptr;
455e090e
MD
850 /*
851 * Setting the next pointer will overwrite the first uintptr_t
852 * poison for CPU 0.
853 */
ef6695f1
MD
854 item->next = head;
855 pool->free_list_head = item;
856 pthread_mutex_unlock(&pool->lock);
857}
858
0ba2a93e 859struct rseq_mempool_set *rseq_mempool_set_create(void)
ef6695f1 860{
0ba2a93e 861 struct rseq_mempool_set *pool_set;
ef6695f1 862
0ba2a93e 863 pool_set = calloc(1, sizeof(struct rseq_mempool_set));
ef6695f1
MD
864 if (!pool_set)
865 return NULL;
866 pthread_mutex_init(&pool_set->lock, NULL);
867 return pool_set;
868}
869
0ba2a93e 870int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set)
ef6695f1
MD
871{
872 int order, ret;
873
874 for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) {
0ba2a93e 875 struct rseq_mempool *pool = pool_set->entries[order];
ef6695f1
MD
876
877 if (!pool)
878 continue;
0ba2a93e 879 ret = rseq_mempool_destroy(pool);
ef6695f1
MD
880 if (ret)
881 return ret;
882 pool_set->entries[order] = NULL;
883 }
884 pthread_mutex_destroy(&pool_set->lock);
885 free(pool_set);
886 return 0;
887}
888
889/* Ownership of pool is handed over to pool set on success. */
0ba2a93e 890int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, struct rseq_mempool *pool)
ef6695f1
MD
891{
892 size_t item_order = pool->item_order;
893 int ret = 0;
894
895 pthread_mutex_lock(&pool_set->lock);
896 if (pool_set->entries[item_order]) {
897 errno = EBUSY;
898 ret = -1;
899 goto end;
900 }
901 pool_set->entries[pool->item_order] = pool;
902end:
903 pthread_mutex_unlock(&pool_set->lock);
904 return ret;
905}
906
907static
6ff43d9a
MD
908void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set,
909 void *init_ptr, size_t len, bool zeroed)
ef6695f1
MD
910{
911 int order, min_order = POOL_SET_MIN_ENTRY;
0ba2a93e 912 struct rseq_mempool *pool;
d24ee051 913 void __rseq_percpu *addr;
ef6695f1 914
d06f5cf5
MD
915 order = rseq_get_count_order_ulong(len);
916 if (order > POOL_SET_MIN_ENTRY)
917 min_order = order;
ef6695f1
MD
918again:
919 pthread_mutex_lock(&pool_set->lock);
920 /* First smallest present pool where @len fits. */
921 for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
922 pool = pool_set->entries[order];
923
924 if (!pool)
925 continue;
926 if (pool->item_len >= len)
927 goto found;
928 }
929 pool = NULL;
930found:
931 pthread_mutex_unlock(&pool_set->lock);
932 if (pool) {
6ff43d9a 933 addr = __rseq_percpu_malloc(pool, zeroed, init_ptr, len);
ef6695f1
MD
934 if (addr == NULL && errno == ENOMEM) {
935 /*
936 * If the allocation failed, try again with a
937 * larger pool.
938 */
939 min_order = order + 1;
940 goto again;
941 }
942 } else {
943 /* Not found. */
944 errno = ENOMEM;
945 addr = NULL;
946 }
947 return addr;
948}
949
15da5c27 950void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len)
ef6695f1 951{
6ff43d9a 952 return __rseq_mempool_set_malloc(pool_set, NULL, len, false);
ef6695f1
MD
953}
954
15da5c27 955void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len)
ef6695f1 956{
6ff43d9a
MD
957 return __rseq_mempool_set_malloc(pool_set, NULL, len, true);
958}
959
960void __rseq_percpu *rseq_mempool_set_percpu_malloc_init(struct rseq_mempool_set *pool_set,
961 void *init_ptr, size_t len)
962{
963 return __rseq_mempool_set_malloc(pool_set, init_ptr, len, true);
ef6695f1 964}
9bd07c29 965
0ba2a93e 966struct rseq_mempool_attr *rseq_mempool_attr_create(void)
a82006d0 967{
0ba2a93e 968 return calloc(1, sizeof(struct rseq_mempool_attr));
a82006d0
MD
969}
970
0ba2a93e 971void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr)
a82006d0
MD
972{
973 free(attr);
974}
975
0ba2a93e 976int rseq_mempool_attr_set_mmap(struct rseq_mempool_attr *attr,
a82006d0 977 void *(*mmap_func)(void *priv, size_t len),
9bd07c29
MD
978 int (*munmap_func)(void *priv, void *ptr, size_t len),
979 void *mmap_priv)
980{
8118247e
MD
981 if (!attr) {
982 errno = EINVAL;
983 return -1;
984 }
a82006d0 985 attr->mmap_set = true;
9bd07c29
MD
986 attr->mmap_func = mmap_func;
987 attr->munmap_func = munmap_func;
988 attr->mmap_priv = mmap_priv;
8118247e 989 return 0;
9bd07c29 990}
d6acc8aa 991
135811f2 992int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr,
6e329183 993 int (*init_func)(void *priv, void *addr, size_t len, int cpu),
135811f2
MD
994 void *init_priv)
995{
996 if (!attr) {
997 errno = EINVAL;
998 return -1;
999 }
1000 attr->init_set = true;
1001 attr->init_func = init_func;
1002 attr->init_priv = init_priv;
1003 return 0;
1004}
1005
0ba2a93e 1006int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr)
d6acc8aa
MD
1007{
1008 if (!attr) {
1009 errno = EINVAL;
1010 return -1;
1011 }
1012 attr->robust_set = true;
1013 return 0;
1014}
cb475906
MD
1015
1016int rseq_mempool_attr_set_percpu(struct rseq_mempool_attr *attr,
1017 size_t stride, int max_nr_cpus)
1018{
1019 if (!attr) {
1020 errno = EINVAL;
1021 return -1;
1022 }
1023 attr->type = MEMPOOL_TYPE_PERCPU;
1024 attr->stride = stride;
1025 attr->max_nr_cpus = max_nr_cpus;
1026 return 0;
1027}
1028
1029int rseq_mempool_attr_set_global(struct rseq_mempool_attr *attr,
1030 size_t stride)
1031{
1032 if (!attr) {
1033 errno = EINVAL;
1034 return -1;
1035 }
1036 attr->type = MEMPOOL_TYPE_GLOBAL;
1037 attr->stride = stride;
89b7e681 1038 attr->max_nr_cpus = 0;
cb475906
MD
1039 return 0;
1040}
6037d364 1041
e11a02d7
MD
1042int rseq_mempool_attr_set_max_nr_ranges(struct rseq_mempool_attr *attr,
1043 unsigned long max_nr_ranges)
1044{
1045 if (!attr) {
1046 errno = EINVAL;
1047 return -1;
1048 }
1049 attr->max_nr_ranges = max_nr_ranges;
1050 return 0;
1051}
1052
455e090e
MD
1053int rseq_mempool_attr_set_poison(struct rseq_mempool_attr *attr,
1054 uintptr_t poison)
1055{
1056 if (!attr) {
1057 errno = EINVAL;
1058 return -1;
1059 }
1060 attr->poison_set = true;
1061 attr->poison = poison;
1062 return 0;
1063}
1064
6037d364
MD
1065int rseq_mempool_get_max_nr_cpus(struct rseq_mempool *mempool)
1066{
1067 if (!mempool || mempool->attr.type != MEMPOOL_TYPE_PERCPU) {
1068 errno = EINVAL;
1069 return -1;
1070 }
1071 return mempool->attr.max_nr_cpus;
1072}
This page took 0.104764 seconds and 4 git commands to generate.