Mempool: default as global
[librseq.git] / src / rseq-mempool.c
CommitLineData
ef6695f1
MD
1// SPDX-License-Identifier: MIT
2// SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
34337fec 4#include <rseq/mempool.h>
ef6695f1
MD
5#include <sys/mman.h>
6#include <assert.h>
7#include <string.h>
8#include <pthread.h>
9#include <unistd.h>
10#include <stdlib.h>
11#include <rseq/compiler.h>
12#include <errno.h>
13#include <stdint.h>
14#include <stdbool.h>
367e559c
MD
15#include <stdio.h>
16
17#ifdef HAVE_LIBNUMA
18# include <numa.h>
19# include <numaif.h>
20#endif
ef6695f1 21
34337fec 22#include "rseq-utils.h"
cb475906 23#include "smp.h"
19be9217 24
ef6695f1 25/*
b73b0c25 26 * rseq-mempool.c: rseq CPU-Local Storage (CLS) memory allocator.
ef6695f1 27 *
8ab16a24
MD
28 * The rseq per-CPU memory allocator allows the application the request
29 * memory pools of CPU-Local memory each of containing objects of a
8aa1462d
MD
30 * given size (rounded to next power of 2), reserving a given virtual
31 * address size per CPU, for a given maximum number of CPUs.
8ab16a24
MD
32 *
33 * The per-CPU memory allocator is analogous to TLS (Thread-Local
34 * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
35 * memory allocator provides CPU-Local Storage.
ef6695f1
MD
36 */
37
3236da62 38#define POOL_SET_NR_ENTRIES RSEQ_BITS_PER_LONG
ef6695f1 39
72b100a1
MD
40/*
41 * Smallest allocation should hold enough space for a free list pointer.
42 */
ef6695f1
MD
43#if RSEQ_BITS_PER_LONG == 64
44# define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
45#else
46# define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
47#endif
48
bb1552e2
MD
49/*
50 * Skip pool index 0 to ensure allocated entries at index 0 do not match
51 * a NULL pointer.
52 */
53#define FIRST_POOL 1
54
0fdf7a4c
OD
55#define BIT_PER_ULONG (8 * sizeof(unsigned long))
56
57d8b586
OD
57#define MOVE_PAGES_BATCH_SIZE 4096
58
0ba2a93e 59#define RANGE_HEADER_OFFSET sizeof(struct rseq_mempool_range)
4aa3220c 60
ef6695f1
MD
61struct free_list_node;
62
63struct free_list_node {
64 struct free_list_node *next;
65};
66
cb475906 67enum mempool_type {
89b7e681
MD
68 MEMPOOL_TYPE_GLOBAL = 0, /* Default */
69 MEMPOOL_TYPE_PERCPU = 1,
cb475906
MD
70};
71
0ba2a93e 72struct rseq_mempool_attr {
a82006d0 73 bool mmap_set;
9bd07c29
MD
74 void *(*mmap_func)(void *priv, size_t len);
75 int (*munmap_func)(void *priv, void *ptr, size_t len);
76 void *mmap_priv;
d6acc8aa
MD
77
78 bool robust_set;
cb475906
MD
79
80 enum mempool_type type;
81 size_t stride;
82 int max_nr_cpus;
9bd07c29
MD
83};
84
0ba2a93e 85struct rseq_mempool_range;
b73b0c25 86
0ba2a93e
MD
87struct rseq_mempool_range {
88 struct rseq_mempool_range *next;
89 struct rseq_mempool *pool; /* Backward ref. to container pool. */
4aa3220c 90 void *header;
ef6695f1 91 void *base;
b73b0c25
MD
92 size_t next_unused;
93 /* Track alloc/free. */
94 unsigned long *alloc_bitmap;
95};
96
0ba2a93e 97struct rseq_mempool {
b73b0c25 98 /* Linked-list of ranges. */
0ba2a93e 99 struct rseq_mempool_range *ranges;
b73b0c25 100
ef6695f1 101 size_t item_len;
ef6695f1 102 int item_order;
ef6695f1
MD
103
104 /*
8ab16a24 105 * The free list chains freed items on the CPU 0 address range.
ef6695f1 106 * We should rethink this decision if false sharing between
8ab16a24 107 * malloc/free from other CPUs and data accesses from CPU 0
ef6695f1
MD
108 * becomes an issue. This is a NULL-terminated singly-linked
109 * list.
110 */
111 struct free_list_node *free_list_head;
b73b0c25 112
ef6695f1
MD
113 /* This lock protects allocation/free within the pool. */
114 pthread_mutex_t lock;
9bd07c29 115
0ba2a93e 116 struct rseq_mempool_attr attr;
ca452fee 117 char *name;
ef6695f1
MD
118};
119
ef6695f1
MD
120/*
121 * Pool set entries are indexed by item_len rounded to the next power of
122 * 2. A pool set can contain NULL pool entries, in which case the next
123 * large enough entry will be used for allocation.
124 */
0ba2a93e 125struct rseq_mempool_set {
ef6695f1
MD
126 /* This lock protects add vs malloc/zmalloc within the pool set. */
127 pthread_mutex_t lock;
0ba2a93e 128 struct rseq_mempool *entries[POOL_SET_NR_ENTRIES];
ef6695f1
MD
129};
130
367e559c 131static
0ba2a93e 132void *__rseq_pool_percpu_ptr(struct rseq_mempool *pool, int cpu,
f2981623 133 uintptr_t item_offset, size_t stride)
367e559c 134{
b73b0c25 135 /* TODO: Implement multi-ranges support. */
f2981623 136 return pool->ranges->base + (stride * cpu) + item_offset;
367e559c
MD
137}
138
367e559c 139static
0ba2a93e 140void rseq_percpu_zero_item(struct rseq_mempool *pool, uintptr_t item_offset)
367e559c
MD
141{
142 int i;
143
cb475906 144 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
f2981623 145 char *p = __rseq_pool_percpu_ptr(pool, i,
cb475906 146 item_offset, pool->attr.stride);
367e559c
MD
147 memset(p, 0, pool->item_len);
148 }
149}
150
b73b0c25
MD
151//TODO: this will need to be reimplemented for ranges,
152//which cannot use __rseq_pool_percpu_ptr.
153#if 0 //#ifdef HAVE_LIBNUMA
154static
0ba2a93e 155int rseq_mempool_range_init_numa(struct rseq_mempool *pool, struct rseq_mempool_range *range, int numa_flags)
367e559c 156{
f2981623
MD
157 unsigned long nr_pages, page_len;
158 long ret;
367e559c
MD
159 int cpu;
160
161 if (!numa_flags)
9bd07c29 162 return 0;
367e559c 163 page_len = rseq_get_page_len();
cb475906
MD
164 nr_pages = pool->attr.stride >> rseq_get_count_order_ulong(page_len);
165 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
367e559c 166
57d8b586
OD
167 int status[MOVE_PAGES_BATCH_SIZE];
168 int nodes[MOVE_PAGES_BATCH_SIZE];
169 void *pages[MOVE_PAGES_BATCH_SIZE];
170
171 nodes[0] = numa_node_of_cpu(cpu);
172 for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) {
173 nodes[k] = nodes[0];
174 }
175
176 for (unsigned long page = 0; page < nr_pages;) {
177
178 size_t max_k = RSEQ_ARRAY_SIZE(pages);
179 size_t left = nr_pages - page;
367e559c 180
57d8b586
OD
181 if (left < max_k) {
182 max_k = left;
183 }
184
185 for (size_t k = 0; k < max_k; ++k, ++page) {
186 pages[k] = __rseq_pool_percpu_ptr(pool, cpu, page * page_len);
187 status[k] = -EPERM;
188 }
189
190 ret = move_pages(0, max_k, pages, nodes, status, numa_flags);
191
192 if (ret < 0)
9bd07c29 193 return ret;
57d8b586
OD
194
195 if (ret > 0) {
196 fprintf(stderr, "%lu pages were not migrated\n", ret);
197 for (size_t k = 0; k < max_k; ++k) {
198 if (status[k] < 0)
199 fprintf(stderr,
200 "Error while moving page %p to numa node %d: %u\n",
201 pages[k], nodes[k], -status[k]);
202 }
203 }
367e559c
MD
204 }
205 }
9bd07c29 206 return 0;
367e559c 207}
b73b0c25 208
0ba2a93e 209int rseq_mempool_init_numa(struct rseq_mempool *pool, int numa_flags)
b73b0c25 210{
0ba2a93e 211 struct rseq_mempool_range *range;
b73b0c25
MD
212 int ret;
213
214 if (!numa_flags)
215 return 0;
216 for (range = pool->ranges; range; range = range->next) {
0ba2a93e 217 ret = rseq_mempool_range_init_numa(pool, range, numa_flags);
b73b0c25
MD
218 if (ret)
219 return ret;
220 }
221 return 0;
222}
367e559c 223#else
0ba2a93e 224int rseq_mempool_init_numa(struct rseq_mempool *pool __attribute__((unused)),
367e559c
MD
225 int numa_flags __attribute__((unused)))
226{
9bd07c29 227 return 0;
367e559c
MD
228}
229#endif
230
9bd07c29
MD
231static
232void *default_mmap_func(void *priv __attribute__((unused)), size_t len)
233{
234 void *base;
235
236 base = mmap(NULL, len, PROT_READ | PROT_WRITE,
237 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
238 if (base == MAP_FAILED)
239 return NULL;
240 return base;
241}
242
243static
244int default_munmap_func(void *priv __attribute__((unused)), void *ptr, size_t len)
245{
246 return munmap(ptr, len);
247}
248
0fdf7a4c 249static
0ba2a93e 250int create_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
0fdf7a4c
OD
251{
252 size_t count;
253
cb475906 254 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
0fdf7a4c
OD
255
256 /*
9649c7ee
MD
257 * Not being able to create the validation bitmap is an error
258 * that needs to be reported.
0fdf7a4c 259 */
b73b0c25
MD
260 range->alloc_bitmap = calloc(count, sizeof(unsigned long));
261 if (!range->alloc_bitmap)
9649c7ee
MD
262 return -1;
263 return 0;
0fdf7a4c
OD
264}
265
ca452fee 266static
0ba2a93e 267const char *get_pool_name(const struct rseq_mempool *pool)
ca452fee
MD
268{
269 return pool->name ? : "<anonymous>";
270}
271
b73b0c25 272static
0ba2a93e 273bool addr_in_pool(const struct rseq_mempool *pool, void *addr)
b73b0c25 274{
0ba2a93e 275 struct rseq_mempool_range *range;
b73b0c25
MD
276
277 for (range = pool->ranges; range; range = range->next) {
278 if (addr >= range->base && addr < range->base + range->next_unused)
279 return true;
280 }
281 return false;
282}
283
a9ec6111
OD
284/* Always inline for __builtin_return_address(0). */
285static inline __attribute__((always_inline))
0ba2a93e 286void check_free_list(const struct rseq_mempool *pool)
a9ec6111 287{
b73b0c25
MD
288 size_t total_item = 0, total_never_allocated = 0, total_freed = 0,
289 max_list_traversal = 0, traversal_iteration = 0;
0ba2a93e 290 struct rseq_mempool_range *range;
b73b0c25
MD
291
292 if (!pool->attr.robust_set)
293 return;
294
295 for (range = pool->ranges; range; range = range->next) {
cb475906
MD
296 total_item += pool->attr.stride >> pool->item_order;
297 total_never_allocated += (pool->attr.stride - range->next_unused) >> pool->item_order;
b73b0c25
MD
298 }
299 max_list_traversal = total_item - total_never_allocated;
a9ec6111
OD
300
301 for (struct free_list_node *node = pool->free_list_head, *prev = NULL;
302 node;
303 prev = node,
304 node = node->next) {
305
306 void *node_addr = node;
307
308 if (traversal_iteration >= max_list_traversal) {
ca452fee
MD
309 fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n",
310 __func__, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111
OD
311 abort();
312 }
313
314 /* Node is out of range. */
b73b0c25 315 if (!addr_in_pool(pool, node_addr)) {
a9ec6111 316 if (prev)
ca452fee
MD
317 fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
318 __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111 319 else
ca452fee
MD
320 fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
321 __func__, node, get_pool_name(pool), pool, __builtin_return_address(0));
a9ec6111
OD
322 abort();
323 }
324
b73b0c25
MD
325 traversal_iteration++;
326 total_freed++;
a9ec6111
OD
327 }
328
329 if (total_never_allocated + total_freed != total_item) {
ca452fee
MD
330 fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n",
331 __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0));
a9ec6111
OD
332 abort();
333 }
a9ec6111
OD
334}
335
e7cbbc10
MD
336/* Always inline for __builtin_return_address(0). */
337static inline __attribute__((always_inline))
0ba2a93e 338void destroy_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
0fdf7a4c 339{
b73b0c25 340 unsigned long *bitmap = range->alloc_bitmap;
9649c7ee 341 size_t count, total_leaks = 0;
0fdf7a4c 342
9649c7ee 343 if (!bitmap)
0fdf7a4c 344 return;
0fdf7a4c 345
cb475906 346 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
0fdf7a4c
OD
347
348 /* Assert that all items in the pool were freed. */
9649c7ee
MD
349 for (size_t k = 0; k < count; ++k)
350 total_leaks += rseq_hweight_ulong(bitmap[k]);
351 if (total_leaks) {
ca452fee
MD
352 fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n",
353 __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0));
9649c7ee 354 abort();
0fdf7a4c
OD
355 }
356
357 free(bitmap);
358}
359
b73b0c25
MD
360/* Always inline for __builtin_return_address(0). */
361static inline __attribute__((always_inline))
0ba2a93e
MD
362int rseq_mempool_range_destroy(struct rseq_mempool *pool,
363 struct rseq_mempool_range *range)
b73b0c25
MD
364{
365 destroy_alloc_bitmap(pool, range);
5c99f3d6 366 /* range is a header located one page before the aligned mapping. */
4aa3220c 367 return pool->attr.munmap_func(pool->attr.mmap_priv, range->header,
cb475906 368 (pool->attr.stride * pool->attr.max_nr_cpus) + rseq_get_page_len());
5c99f3d6
MD
369}
370
371/*
372 * Allocate a memory mapping aligned on @alignment, with an optional
373 * @pre_header before the mapping.
374 */
375static
0ba2a93e 376void *aligned_mmap_anonymous(struct rseq_mempool *pool,
5c99f3d6
MD
377 size_t page_size, size_t len, size_t alignment,
378 void **pre_header, size_t pre_header_len)
379{
380 size_t minimum_page_count, page_count, extra, total_allocate = 0;
381 int page_order;
382 void *ptr;
383
384 if (len < page_size || alignment < page_size ||
385 !is_pow2(len) || !is_pow2(alignment)) {
386 errno = EINVAL;
387 return NULL;
388 }
389 page_order = rseq_get_count_order_ulong(page_size);
390 if (page_order < 0) {
391 errno = EINVAL;
392 return NULL;
393 }
394 if (pre_header_len && (pre_header_len & (page_size - 1))) {
395 errno = EINVAL;
396 return NULL;
397 }
398
399 minimum_page_count = (pre_header_len + len) >> page_order;
400 page_count = (pre_header_len + len + alignment - page_size) >> page_order;
401
402 assert(page_count >= minimum_page_count);
403
404 ptr = pool->attr.mmap_func(pool->attr.mmap_priv, page_count << page_order);
405 if (!ptr)
406 goto alloc_error;
407
408 total_allocate = page_count << page_order;
409
410 if (!(((uintptr_t) ptr + pre_header_len) & (alignment - 1))) {
411 /* Pointer is already aligned. ptr points to pre_header. */
412 goto out;
413 }
414
415 /* Unmap extra before. */
416 extra = offset_align((uintptr_t) ptr + pre_header_len, alignment);
417 assert(!(extra & (page_size - 1)));
418 if (pool->attr.munmap_func(pool->attr.mmap_priv, ptr, extra)) {
419 perror("munmap");
420 abort();
421 }
422 total_allocate -= extra;
423 ptr += extra; /* ptr points to pre_header */
424 page_count -= extra >> page_order;
425out:
426 assert(page_count >= minimum_page_count);
427
428 if (page_count > minimum_page_count) {
429 void *extra_ptr;
430
431 /* Unmap extra after. */
432 extra_ptr = ptr + (minimum_page_count << page_order);
433 extra = (page_count - minimum_page_count) << page_order;
434 if (pool->attr.munmap_func(pool->attr.mmap_priv, extra_ptr, extra)) {
435 perror("munmap");
436 abort();
437 }
438 total_allocate -= extra;
439 }
440
441 assert(!(((uintptr_t)ptr + pre_header_len) & (alignment - 1)));
442 assert(total_allocate == len + pre_header_len);
443
444alloc_error:
445 if (ptr) {
446 if (pre_header)
447 *pre_header = ptr;
448 ptr += pre_header_len;
449 }
450 return ptr;
b73b0c25
MD
451}
452
453static
0ba2a93e 454struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool)
b73b0c25 455{
0ba2a93e 456 struct rseq_mempool_range *range;
5c99f3d6 457 unsigned long page_size;
4aa3220c 458 void *header;
b73b0c25
MD
459 void *base;
460
5c99f3d6 461 page_size = rseq_get_page_len();
b73b0c25 462
5c99f3d6 463 base = aligned_mmap_anonymous(pool, page_size,
cb475906
MD
464 pool->attr.stride * pool->attr.max_nr_cpus,
465 pool->attr.stride,
4aa3220c 466 &header, page_size);
b73b0c25 467 if (!base)
5c99f3d6 468 return NULL;
0ba2a93e 469 range = (struct rseq_mempool_range *) (base - RANGE_HEADER_OFFSET);
5c99f3d6 470 range->pool = pool;
b73b0c25 471 range->base = base;
4aa3220c 472 range->header = header;
b73b0c25
MD
473 if (pool->attr.robust_set) {
474 if (create_alloc_bitmap(pool, range))
475 goto error_alloc;
476 }
477 return range;
478
479error_alloc:
0ba2a93e 480 (void) rseq_mempool_range_destroy(pool, range);
b73b0c25
MD
481 return NULL;
482}
483
0ba2a93e 484int rseq_mempool_destroy(struct rseq_mempool *pool)
9649c7ee 485{
0ba2a93e 486 struct rseq_mempool_range *range, *next_range;
b73b0c25 487 int ret = 0;
9649c7ee 488
f510ddc5
MD
489 if (!pool)
490 return 0;
b73b0c25
MD
491 check_free_list(pool);
492 /* Iteration safe against removal. */
493 for (range = pool->ranges; range && (next_range = range->next, 1); range = next_range) {
0ba2a93e 494 if (rseq_mempool_range_destroy(pool, range))
b73b0c25
MD
495 goto end;
496 /* Update list head to keep list coherent in case of partial failure. */
497 pool->ranges = next_range;
498 }
9649c7ee 499 pthread_mutex_destroy(&pool->lock);
ca452fee 500 free(pool->name);
9649c7ee
MD
501 memset(pool, 0, sizeof(*pool));
502end:
b73b0c25 503 return ret;
9649c7ee
MD
504}
505
0ba2a93e 506struct rseq_mempool *rseq_mempool_create(const char *pool_name,
cb475906 507 size_t item_len, const struct rseq_mempool_attr *_attr)
ef6695f1 508{
0ba2a93e
MD
509 struct rseq_mempool *pool;
510 struct rseq_mempool_attr attr = {};
ef6695f1 511 int order;
ef6695f1
MD
512
513 /* Make sure each item is large enough to contain free list pointers. */
514 if (item_len < sizeof(void *))
515 item_len = sizeof(void *);
516
517 /* Align item_len on next power of two. */
19be9217 518 order = rseq_get_count_order_ulong(item_len);
ef6695f1
MD
519 if (order < 0) {
520 errno = EINVAL;
521 return NULL;
522 }
523 item_len = 1UL << order;
524
a82006d0
MD
525 if (_attr)
526 memcpy(&attr, _attr, sizeof(attr));
527 if (!attr.mmap_set) {
528 attr.mmap_func = default_mmap_func;
529 attr.munmap_func = default_munmap_func;
530 attr.mmap_priv = NULL;
9bd07c29 531 }
a82006d0 532
cb475906
MD
533 switch (attr.type) {
534 case MEMPOOL_TYPE_PERCPU:
535 if (attr.max_nr_cpus < 0) {
536 errno = EINVAL;
537 return NULL;
538 }
539 if (attr.max_nr_cpus == 0) {
540 /* Auto-detect */
541 attr.max_nr_cpus = get_possible_cpus_array_len();
542 if (attr.max_nr_cpus == 0) {
543 errno = EINVAL;
544 return NULL;
545 }
546 }
547 break;
548 case MEMPOOL_TYPE_GLOBAL:
89b7e681
MD
549 /* Use a 1-cpu pool for global mempool type. */
550 attr.max_nr_cpus = 1;
cb475906
MD
551 break;
552 }
553 if (!attr.stride)
554 attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */
555 if (item_len > attr.stride || attr.stride < (size_t) rseq_get_page_len() ||
556 !is_pow2(attr.stride)) {
557 errno = EINVAL;
558 return NULL;
559 }
560
0ba2a93e 561 pool = calloc(1, sizeof(struct rseq_mempool));
bc510b60
MD
562 if (!pool)
563 return NULL;
ef6695f1 564
b73b0c25 565 memcpy(&pool->attr, &attr, sizeof(attr));
ef6695f1 566 pthread_mutex_init(&pool->lock, NULL);
ef6695f1
MD
567 pool->item_len = item_len;
568 pool->item_order = order;
b73b0c25
MD
569
570 //TODO: implement multi-range support.
0ba2a93e 571 pool->ranges = rseq_mempool_range_create(pool);
b73b0c25
MD
572 if (!pool->ranges)
573 goto error_alloc;
0fdf7a4c 574
ca452fee
MD
575 if (pool_name) {
576 pool->name = strdup(pool_name);
577 if (!pool->name)
578 goto error_alloc;
579 }
ef6695f1 580 return pool;
ef6695f1 581
9649c7ee 582error_alloc:
0ba2a93e 583 rseq_mempool_destroy(pool);
9649c7ee
MD
584 errno = ENOMEM;
585 return NULL;
ef6695f1
MD
586}
587
e7cbbc10
MD
588/* Always inline for __builtin_return_address(0). */
589static inline __attribute__((always_inline))
0ba2a93e 590void set_alloc_slot(struct rseq_mempool *pool, size_t item_offset)
0fdf7a4c 591{
b73b0c25 592 unsigned long *bitmap = pool->ranges->alloc_bitmap;
9649c7ee 593 size_t item_index = item_offset >> pool->item_order;
0fdf7a4c
OD
594 unsigned long mask;
595 size_t k;
596
9649c7ee 597 if (!bitmap)
0fdf7a4c 598 return;
0fdf7a4c 599
9649c7ee 600 k = item_index / BIT_PER_ULONG;
0fdf7a4c
OD
601 mask = 1ULL << (item_index % BIT_PER_ULONG);
602
9649c7ee
MD
603 /* Print error if bit is already set. */
604 if (bitmap[k] & mask) {
ca452fee
MD
605 fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
606 __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
9649c7ee
MD
607 abort();
608 }
0fdf7a4c
OD
609 bitmap[k] |= mask;
610}
611
ef6695f1 612static
0ba2a93e 613void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool, bool zeroed)
ef6695f1
MD
614{
615 struct free_list_node *node;
616 uintptr_t item_offset;
d24ee051 617 void __rseq_percpu *addr;
ef6695f1
MD
618
619 pthread_mutex_lock(&pool->lock);
620 /* Get first entry from free list. */
621 node = pool->free_list_head;
622 if (node != NULL) {
623 /* Remove node from free list (update head). */
624 pool->free_list_head = node->next;
b73b0c25 625 item_offset = (uintptr_t) ((void *) node - pool->ranges->base);
4aa3220c 626 addr = (void __rseq_percpu *) (pool->ranges->base + item_offset);
ef6695f1
MD
627 goto end;
628 }
cb475906 629 if (pool->ranges->next_unused + pool->item_len > pool->attr.stride) {
ea1a3ada 630 errno = ENOMEM;
ef6695f1
MD
631 addr = NULL;
632 goto end;
633 }
b73b0c25 634 item_offset = pool->ranges->next_unused;
4aa3220c 635 addr = (void __rseq_percpu *) (pool->ranges->base + item_offset);
b73b0c25 636 pool->ranges->next_unused += pool->item_len;
ef6695f1 637end:
8f28507f
OD
638 if (addr)
639 set_alloc_slot(pool, item_offset);
ef6695f1
MD
640 pthread_mutex_unlock(&pool->lock);
641 if (zeroed && addr)
642 rseq_percpu_zero_item(pool, item_offset);
643 return addr;
644}
645
15da5c27 646void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool)
ef6695f1
MD
647{
648 return __rseq_percpu_malloc(pool, false);
649}
650
15da5c27 651void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool)
ef6695f1
MD
652{
653 return __rseq_percpu_malloc(pool, true);
654}
655
e7cbbc10
MD
656/* Always inline for __builtin_return_address(0). */
657static inline __attribute__((always_inline))
0ba2a93e 658void clear_alloc_slot(struct rseq_mempool *pool, size_t item_offset)
0fdf7a4c 659{
b73b0c25 660 unsigned long *bitmap = pool->ranges->alloc_bitmap;
9649c7ee 661 size_t item_index = item_offset >> pool->item_order;
0fdf7a4c
OD
662 unsigned long mask;
663 size_t k;
664
9649c7ee 665 if (!bitmap)
0fdf7a4c 666 return;
0fdf7a4c 667
9649c7ee
MD
668 k = item_index / BIT_PER_ULONG;
669 mask = 1ULL << (item_index % BIT_PER_ULONG);
0fdf7a4c 670
9649c7ee
MD
671 /* Print error if bit is not set. */
672 if (!(bitmap[k] & mask)) {
ca452fee
MD
673 fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
674 __func__, get_pool_name(pool), pool, item_offset,
675 (void *) __builtin_return_address(0));
9649c7ee
MD
676 abort();
677 }
0fdf7a4c
OD
678 bitmap[k] &= ~mask;
679}
680
cb475906 681void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride)
ef6695f1
MD
682{
683 uintptr_t ptr = (uintptr_t) _ptr;
cb475906 684 void *range_base = (void *) (ptr & (~(stride - 1)));
0ba2a93e
MD
685 struct rseq_mempool_range *range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
686 struct rseq_mempool *pool = range->pool;
cb475906 687 uintptr_t item_offset = ptr & (stride - 1);
ef6695f1
MD
688 struct free_list_node *head, *item;
689
690 pthread_mutex_lock(&pool->lock);
9649c7ee 691 clear_alloc_slot(pool, item_offset);
ef6695f1
MD
692 /* Add ptr to head of free list */
693 head = pool->free_list_head;
8ab16a24 694 /* Free-list is in CPU 0 range. */
4aa3220c 695 item = (struct free_list_node *) ptr;
ef6695f1
MD
696 item->next = head;
697 pool->free_list_head = item;
698 pthread_mutex_unlock(&pool->lock);
699}
700
0ba2a93e 701struct rseq_mempool_set *rseq_mempool_set_create(void)
ef6695f1 702{
0ba2a93e 703 struct rseq_mempool_set *pool_set;
ef6695f1 704
0ba2a93e 705 pool_set = calloc(1, sizeof(struct rseq_mempool_set));
ef6695f1
MD
706 if (!pool_set)
707 return NULL;
708 pthread_mutex_init(&pool_set->lock, NULL);
709 return pool_set;
710}
711
0ba2a93e 712int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set)
ef6695f1
MD
713{
714 int order, ret;
715
716 for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) {
0ba2a93e 717 struct rseq_mempool *pool = pool_set->entries[order];
ef6695f1
MD
718
719 if (!pool)
720 continue;
0ba2a93e 721 ret = rseq_mempool_destroy(pool);
ef6695f1
MD
722 if (ret)
723 return ret;
724 pool_set->entries[order] = NULL;
725 }
726 pthread_mutex_destroy(&pool_set->lock);
727 free(pool_set);
728 return 0;
729}
730
731/* Ownership of pool is handed over to pool set on success. */
0ba2a93e 732int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, struct rseq_mempool *pool)
ef6695f1
MD
733{
734 size_t item_order = pool->item_order;
735 int ret = 0;
736
737 pthread_mutex_lock(&pool_set->lock);
738 if (pool_set->entries[item_order]) {
739 errno = EBUSY;
740 ret = -1;
741 goto end;
742 }
743 pool_set->entries[pool->item_order] = pool;
744end:
745 pthread_mutex_unlock(&pool_set->lock);
746 return ret;
747}
748
749static
0ba2a93e 750void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set, size_t len, bool zeroed)
ef6695f1
MD
751{
752 int order, min_order = POOL_SET_MIN_ENTRY;
0ba2a93e 753 struct rseq_mempool *pool;
d24ee051 754 void __rseq_percpu *addr;
ef6695f1 755
d06f5cf5
MD
756 order = rseq_get_count_order_ulong(len);
757 if (order > POOL_SET_MIN_ENTRY)
758 min_order = order;
ef6695f1
MD
759again:
760 pthread_mutex_lock(&pool_set->lock);
761 /* First smallest present pool where @len fits. */
762 for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
763 pool = pool_set->entries[order];
764
765 if (!pool)
766 continue;
767 if (pool->item_len >= len)
768 goto found;
769 }
770 pool = NULL;
771found:
772 pthread_mutex_unlock(&pool_set->lock);
773 if (pool) {
774 addr = __rseq_percpu_malloc(pool, zeroed);
775 if (addr == NULL && errno == ENOMEM) {
776 /*
777 * If the allocation failed, try again with a
778 * larger pool.
779 */
780 min_order = order + 1;
781 goto again;
782 }
783 } else {
784 /* Not found. */
785 errno = ENOMEM;
786 addr = NULL;
787 }
788 return addr;
789}
790
15da5c27 791void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len)
ef6695f1 792{
0ba2a93e 793 return __rseq_mempool_set_malloc(pool_set, len, false);
ef6695f1
MD
794}
795
15da5c27 796void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len)
ef6695f1 797{
0ba2a93e 798 return __rseq_mempool_set_malloc(pool_set, len, true);
ef6695f1 799}
9bd07c29 800
0ba2a93e 801struct rseq_mempool_attr *rseq_mempool_attr_create(void)
a82006d0 802{
0ba2a93e 803 return calloc(1, sizeof(struct rseq_mempool_attr));
a82006d0
MD
804}
805
0ba2a93e 806void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr)
a82006d0
MD
807{
808 free(attr);
809}
810
0ba2a93e 811int rseq_mempool_attr_set_mmap(struct rseq_mempool_attr *attr,
a82006d0 812 void *(*mmap_func)(void *priv, size_t len),
9bd07c29
MD
813 int (*munmap_func)(void *priv, void *ptr, size_t len),
814 void *mmap_priv)
815{
8118247e
MD
816 if (!attr) {
817 errno = EINVAL;
818 return -1;
819 }
a82006d0 820 attr->mmap_set = true;
9bd07c29
MD
821 attr->mmap_func = mmap_func;
822 attr->munmap_func = munmap_func;
823 attr->mmap_priv = mmap_priv;
8118247e 824 return 0;
9bd07c29 825}
d6acc8aa 826
0ba2a93e 827int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr)
d6acc8aa
MD
828{
829 if (!attr) {
830 errno = EINVAL;
831 return -1;
832 }
833 attr->robust_set = true;
834 return 0;
835}
cb475906
MD
836
837int rseq_mempool_attr_set_percpu(struct rseq_mempool_attr *attr,
838 size_t stride, int max_nr_cpus)
839{
840 if (!attr) {
841 errno = EINVAL;
842 return -1;
843 }
844 attr->type = MEMPOOL_TYPE_PERCPU;
845 attr->stride = stride;
846 attr->max_nr_cpus = max_nr_cpus;
847 return 0;
848}
849
850int rseq_mempool_attr_set_global(struct rseq_mempool_attr *attr,
851 size_t stride)
852{
853 if (!attr) {
854 errno = EINVAL;
855 return -1;
856 }
857 attr->type = MEMPOOL_TYPE_GLOBAL;
858 attr->stride = stride;
89b7e681 859 attr->max_nr_cpus = 0;
cb475906
MD
860 return 0;
861}
This page took 0.072954 seconds and 4 git commands to generate.