c53b16e15098942802beafb1a1bdc016ef11d539
[librseq.git] / src / rseq-mempool.c
1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
4 #include <rseq/mempool.h>
5 #include <sys/mman.h>
6 #include <assert.h>
7 #include <string.h>
8 #include <pthread.h>
9 #include <unistd.h>
10 #include <stdlib.h>
11 #include <rseq/compiler.h>
12 #include <errno.h>
13 #include <stdint.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16
17 #ifdef HAVE_LIBNUMA
18 # include <numa.h>
19 # include <numaif.h>
20 #endif
21
22 #include "rseq-utils.h"
23 #include <rseq/rseq.h>
24
25 /*
26 * rseq-mempool.c: rseq CPU-Local Storage (CLS) memory allocator.
27 *
28 * The rseq per-CPU memory allocator allows the application the request
29 * memory pools of CPU-Local memory each of containing objects of a
30 * given size (rounded to next power of 2), reserving a given virtual
31 * address size per CPU, for a given maximum number of CPUs.
32 *
33 * The per-CPU memory allocator is analogous to TLS (Thread-Local
34 * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
35 * memory allocator provides CPU-Local Storage.
36 */
37
38 #define POOL_SET_NR_ENTRIES RSEQ_BITS_PER_LONG
39
40 /*
41 * Smallest allocation should hold enough space for a free list pointer.
42 */
43 #if RSEQ_BITS_PER_LONG == 64
44 # define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
45 #else
46 # define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
47 #endif
48
49 /*
50 * Skip pool index 0 to ensure allocated entries at index 0 do not match
51 * a NULL pointer.
52 */
53 #define FIRST_POOL 1
54
55 #define BIT_PER_ULONG (8 * sizeof(unsigned long))
56
57 #define MOVE_PAGES_BATCH_SIZE 4096
58
59 #define RANGE_HEADER_OFFSET sizeof(struct rseq_mempool_range)
60
61 struct free_list_node;
62
63 struct free_list_node {
64 struct free_list_node *next;
65 };
66
67 enum mempool_type {
68 MEMPOOL_TYPE_GLOBAL = 0, /* Default */
69 MEMPOOL_TYPE_PERCPU = 1,
70 };
71
72 struct rseq_mempool_attr {
73 bool mmap_set;
74 void *(*mmap_func)(void *priv, size_t len);
75 int (*munmap_func)(void *priv, void *ptr, size_t len);
76 void *mmap_priv;
77
78 bool init_set;
79 void (*init_func)(void *priv, void *addr, size_t len, int cpu);
80 void *init_priv;
81
82 bool robust_set;
83
84 enum mempool_type type;
85 size_t stride;
86 int max_nr_cpus;
87 };
88
89 struct rseq_mempool_range;
90
91 struct rseq_mempool_range {
92 struct rseq_mempool_range *next;
93 struct rseq_mempool *pool; /* Backward ref. to container pool. */
94 void *header;
95 void *base;
96 size_t next_unused;
97 /* Track alloc/free. */
98 unsigned long *alloc_bitmap;
99 };
100
101 struct rseq_mempool {
102 /* Linked-list of ranges. */
103 struct rseq_mempool_range *ranges;
104
105 size_t item_len;
106 int item_order;
107
108 /*
109 * The free list chains freed items on the CPU 0 address range.
110 * We should rethink this decision if false sharing between
111 * malloc/free from other CPUs and data accesses from CPU 0
112 * becomes an issue. This is a NULL-terminated singly-linked
113 * list.
114 */
115 struct free_list_node *free_list_head;
116
117 /* This lock protects allocation/free within the pool. */
118 pthread_mutex_t lock;
119
120 struct rseq_mempool_attr attr;
121 char *name;
122 };
123
124 /*
125 * Pool set entries are indexed by item_len rounded to the next power of
126 * 2. A pool set can contain NULL pool entries, in which case the next
127 * large enough entry will be used for allocation.
128 */
129 struct rseq_mempool_set {
130 /* This lock protects add vs malloc/zmalloc within the pool set. */
131 pthread_mutex_t lock;
132 struct rseq_mempool *entries[POOL_SET_NR_ENTRIES];
133 };
134
135 static
136 void *__rseq_pool_range_percpu_ptr(struct rseq_mempool_range *range, int cpu,
137 uintptr_t item_offset, size_t stride)
138 {
139 return range->base + (stride * cpu) + item_offset;
140 }
141
142 static
143 void rseq_percpu_zero_item(struct rseq_mempool *pool,
144 struct rseq_mempool_range *range, uintptr_t item_offset)
145 {
146 int i;
147
148 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
149 char *p = __rseq_pool_range_percpu_ptr(range, i,
150 item_offset, pool->attr.stride);
151 memset(p, 0, pool->item_len);
152 }
153 }
154
155 #ifdef HAVE_LIBNUMA
156 int rseq_mempool_range_init_numa(void *addr, size_t len, int cpu, int numa_flags)
157 {
158 unsigned long nr_pages, page_len;
159 int status[MOVE_PAGES_BATCH_SIZE];
160 int nodes[MOVE_PAGES_BATCH_SIZE];
161 void *pages[MOVE_PAGES_BATCH_SIZE];
162 long ret;
163
164 if (!numa_flags) {
165 errno = EINVAL;
166 return -1;
167 }
168 page_len = rseq_get_page_len();
169 nr_pages = len >> rseq_get_count_order_ulong(page_len);
170
171 nodes[0] = numa_node_of_cpu(cpu);
172 if (nodes[0] < 0)
173 return -1;
174
175 for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) {
176 nodes[k] = nodes[0];
177 }
178
179 for (unsigned long page = 0; page < nr_pages;) {
180
181 size_t max_k = RSEQ_ARRAY_SIZE(pages);
182 size_t left = nr_pages - page;
183
184 if (left < max_k) {
185 max_k = left;
186 }
187
188 for (size_t k = 0; k < max_k; ++k, ++page) {
189 pages[k] = addr + (page * page_len);
190 status[k] = -EPERM;
191 }
192
193 ret = move_pages(0, max_k, pages, nodes, status, numa_flags);
194
195 if (ret < 0)
196 return ret;
197
198 if (ret > 0) {
199 fprintf(stderr, "%lu pages were not migrated\n", ret);
200 for (size_t k = 0; k < max_k; ++k) {
201 if (status[k] < 0)
202 fprintf(stderr,
203 "Error while moving page %p to numa node %d: %u\n",
204 pages[k], nodes[k], -status[k]);
205 }
206 }
207 }
208 return 0;
209 }
210 #else
211 int rseq_mempool_range_init_numa(void *addr __attribute__((unused)),
212 size_t len __attribute__((unused)),
213 int cpu __attribute__((unused)),
214 int numa_flags __attribute__((unused)))
215 {
216 errno = ENOSYS;
217 return -1;
218 }
219 #endif
220
221 static
222 void *default_mmap_func(void *priv __attribute__((unused)), size_t len)
223 {
224 void *base;
225
226 base = mmap(NULL, len, PROT_READ | PROT_WRITE,
227 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
228 if (base == MAP_FAILED)
229 return NULL;
230 return base;
231 }
232
233 static
234 int default_munmap_func(void *priv __attribute__((unused)), void *ptr, size_t len)
235 {
236 return munmap(ptr, len);
237 }
238
239 static
240 int create_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
241 {
242 size_t count;
243
244 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
245
246 /*
247 * Not being able to create the validation bitmap is an error
248 * that needs to be reported.
249 */
250 range->alloc_bitmap = calloc(count, sizeof(unsigned long));
251 if (!range->alloc_bitmap)
252 return -1;
253 return 0;
254 }
255
256 static
257 const char *get_pool_name(const struct rseq_mempool *pool)
258 {
259 return pool->name ? : "<anonymous>";
260 }
261
262 static
263 bool addr_in_pool(const struct rseq_mempool *pool, void *addr)
264 {
265 struct rseq_mempool_range *range;
266
267 for (range = pool->ranges; range; range = range->next) {
268 if (addr >= range->base && addr < range->base + range->next_unused)
269 return true;
270 }
271 return false;
272 }
273
274 /* Always inline for __builtin_return_address(0). */
275 static inline __attribute__((always_inline))
276 void check_free_list(const struct rseq_mempool *pool)
277 {
278 size_t total_item = 0, total_never_allocated = 0, total_freed = 0,
279 max_list_traversal = 0, traversal_iteration = 0;
280 struct rseq_mempool_range *range;
281
282 if (!pool->attr.robust_set)
283 return;
284
285 for (range = pool->ranges; range; range = range->next) {
286 total_item += pool->attr.stride >> pool->item_order;
287 total_never_allocated += (pool->attr.stride - range->next_unused) >> pool->item_order;
288 }
289 max_list_traversal = total_item - total_never_allocated;
290
291 for (struct free_list_node *node = pool->free_list_head, *prev = NULL;
292 node;
293 prev = node,
294 node = node->next) {
295
296 void *node_addr = node;
297
298 if (traversal_iteration >= max_list_traversal) {
299 fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n",
300 __func__, get_pool_name(pool), pool, __builtin_return_address(0));
301 abort();
302 }
303
304 /* Node is out of range. */
305 if (!addr_in_pool(pool, node_addr)) {
306 if (prev)
307 fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
308 __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0));
309 else
310 fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
311 __func__, node, get_pool_name(pool), pool, __builtin_return_address(0));
312 abort();
313 }
314
315 traversal_iteration++;
316 total_freed++;
317 }
318
319 if (total_never_allocated + total_freed != total_item) {
320 fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n",
321 __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0));
322 abort();
323 }
324 }
325
326 /* Always inline for __builtin_return_address(0). */
327 static inline __attribute__((always_inline))
328 void destroy_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
329 {
330 unsigned long *bitmap = range->alloc_bitmap;
331 size_t count, total_leaks = 0;
332
333 if (!bitmap)
334 return;
335
336 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
337
338 /* Assert that all items in the pool were freed. */
339 for (size_t k = 0; k < count; ++k)
340 total_leaks += rseq_hweight_ulong(bitmap[k]);
341 if (total_leaks) {
342 fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n",
343 __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0));
344 abort();
345 }
346
347 free(bitmap);
348 }
349
350 /* Always inline for __builtin_return_address(0). */
351 static inline __attribute__((always_inline))
352 int rseq_mempool_range_destroy(struct rseq_mempool *pool,
353 struct rseq_mempool_range *range)
354 {
355 destroy_alloc_bitmap(pool, range);
356 /* range is a header located one page before the aligned mapping. */
357 return pool->attr.munmap_func(pool->attr.mmap_priv, range->header,
358 (pool->attr.stride * pool->attr.max_nr_cpus) + rseq_get_page_len());
359 }
360
361 /*
362 * Allocate a memory mapping aligned on @alignment, with an optional
363 * @pre_header before the mapping.
364 */
365 static
366 void *aligned_mmap_anonymous(struct rseq_mempool *pool,
367 size_t page_size, size_t len, size_t alignment,
368 void **pre_header, size_t pre_header_len)
369 {
370 size_t minimum_page_count, page_count, extra, total_allocate = 0;
371 int page_order;
372 void *ptr;
373
374 if (len < page_size || alignment < page_size ||
375 !is_pow2(alignment) || (len & (alignment - 1))) {
376 errno = EINVAL;
377 return NULL;
378 }
379 page_order = rseq_get_count_order_ulong(page_size);
380 if (page_order < 0) {
381 errno = EINVAL;
382 return NULL;
383 }
384 if (pre_header_len && (pre_header_len & (page_size - 1))) {
385 errno = EINVAL;
386 return NULL;
387 }
388
389 minimum_page_count = (pre_header_len + len) >> page_order;
390 page_count = (pre_header_len + len + alignment - page_size) >> page_order;
391
392 assert(page_count >= minimum_page_count);
393
394 ptr = pool->attr.mmap_func(pool->attr.mmap_priv, page_count << page_order);
395 if (!ptr)
396 goto alloc_error;
397
398 total_allocate = page_count << page_order;
399
400 if (!(((uintptr_t) ptr + pre_header_len) & (alignment - 1))) {
401 /* Pointer is already aligned. ptr points to pre_header. */
402 goto out;
403 }
404
405 /* Unmap extra before. */
406 extra = offset_align((uintptr_t) ptr + pre_header_len, alignment);
407 assert(!(extra & (page_size - 1)));
408 if (pool->attr.munmap_func(pool->attr.mmap_priv, ptr, extra)) {
409 perror("munmap");
410 abort();
411 }
412 total_allocate -= extra;
413 ptr += extra; /* ptr points to pre_header */
414 page_count -= extra >> page_order;
415 out:
416 assert(page_count >= minimum_page_count);
417
418 if (page_count > minimum_page_count) {
419 void *extra_ptr;
420
421 /* Unmap extra after. */
422 extra_ptr = ptr + (minimum_page_count << page_order);
423 extra = (page_count - minimum_page_count) << page_order;
424 if (pool->attr.munmap_func(pool->attr.mmap_priv, extra_ptr, extra)) {
425 perror("munmap");
426 abort();
427 }
428 total_allocate -= extra;
429 }
430
431 assert(!(((uintptr_t)ptr + pre_header_len) & (alignment - 1)));
432 assert(total_allocate == len + pre_header_len);
433
434 alloc_error:
435 if (ptr) {
436 if (pre_header)
437 *pre_header = ptr;
438 ptr += pre_header_len;
439 }
440 return ptr;
441 }
442
443 static
444 struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool)
445 {
446 struct rseq_mempool_range *range;
447 unsigned long page_size;
448 void *header;
449 void *base;
450
451 page_size = rseq_get_page_len();
452
453 base = aligned_mmap_anonymous(pool, page_size,
454 pool->attr.stride * pool->attr.max_nr_cpus,
455 pool->attr.stride,
456 &header, page_size);
457 if (!base)
458 return NULL;
459 range = (struct rseq_mempool_range *) (base - RANGE_HEADER_OFFSET);
460 range->pool = pool;
461 range->base = base;
462 range->header = header;
463 if (pool->attr.robust_set) {
464 if (create_alloc_bitmap(pool, range))
465 goto error_alloc;
466 }
467 if (pool->attr.init_set) {
468 int cpu;
469
470 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
471 pool->attr.init_func(pool->attr.init_priv,
472 base + (pool->attr.stride * cpu),
473 pool->attr.stride, cpu);
474 }
475 }
476 return range;
477
478 error_alloc:
479 (void) rseq_mempool_range_destroy(pool, range);
480 return NULL;
481 }
482
483 int rseq_mempool_destroy(struct rseq_mempool *pool)
484 {
485 struct rseq_mempool_range *range, *next_range;
486 int ret = 0;
487
488 if (!pool)
489 return 0;
490 check_free_list(pool);
491 /* Iteration safe against removal. */
492 for (range = pool->ranges; range && (next_range = range->next, 1); range = next_range) {
493 if (rseq_mempool_range_destroy(pool, range))
494 goto end;
495 /* Update list head to keep list coherent in case of partial failure. */
496 pool->ranges = next_range;
497 }
498 pthread_mutex_destroy(&pool->lock);
499 free(pool->name);
500 memset(pool, 0, sizeof(*pool));
501 end:
502 return ret;
503 }
504
505 struct rseq_mempool *rseq_mempool_create(const char *pool_name,
506 size_t item_len, const struct rseq_mempool_attr *_attr)
507 {
508 struct rseq_mempool *pool;
509 struct rseq_mempool_attr attr = {};
510 int order;
511
512 /* Make sure each item is large enough to contain free list pointers. */
513 if (item_len < sizeof(void *))
514 item_len = sizeof(void *);
515
516 /* Align item_len on next power of two. */
517 order = rseq_get_count_order_ulong(item_len);
518 if (order < 0) {
519 errno = EINVAL;
520 return NULL;
521 }
522 item_len = 1UL << order;
523
524 if (_attr)
525 memcpy(&attr, _attr, sizeof(attr));
526 if (!attr.mmap_set) {
527 attr.mmap_func = default_mmap_func;
528 attr.munmap_func = default_munmap_func;
529 attr.mmap_priv = NULL;
530 }
531
532 switch (attr.type) {
533 case MEMPOOL_TYPE_PERCPU:
534 if (attr.max_nr_cpus < 0) {
535 errno = EINVAL;
536 return NULL;
537 }
538 if (attr.max_nr_cpus == 0) {
539 /* Auto-detect */
540 attr.max_nr_cpus = rseq_get_max_nr_cpus();
541 if (attr.max_nr_cpus == 0) {
542 errno = EINVAL;
543 return NULL;
544 }
545 }
546 break;
547 case MEMPOOL_TYPE_GLOBAL:
548 /* Use a 1-cpu pool for global mempool type. */
549 attr.max_nr_cpus = 1;
550 break;
551 }
552 if (!attr.stride)
553 attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */
554 if (item_len > attr.stride || attr.stride < (size_t) rseq_get_page_len() ||
555 !is_pow2(attr.stride)) {
556 errno = EINVAL;
557 return NULL;
558 }
559
560 pool = calloc(1, sizeof(struct rseq_mempool));
561 if (!pool)
562 return NULL;
563
564 memcpy(&pool->attr, &attr, sizeof(attr));
565 pthread_mutex_init(&pool->lock, NULL);
566 pool->item_len = item_len;
567 pool->item_order = order;
568
569 //TODO: implement multi-range support.
570 pool->ranges = rseq_mempool_range_create(pool);
571 if (!pool->ranges)
572 goto error_alloc;
573
574 if (pool_name) {
575 pool->name = strdup(pool_name);
576 if (!pool->name)
577 goto error_alloc;
578 }
579 return pool;
580
581 error_alloc:
582 rseq_mempool_destroy(pool);
583 errno = ENOMEM;
584 return NULL;
585 }
586
587 /* Always inline for __builtin_return_address(0). */
588 static inline __attribute__((always_inline))
589 void set_alloc_slot(struct rseq_mempool *pool, size_t item_offset)
590 {
591 unsigned long *bitmap = pool->ranges->alloc_bitmap;
592 size_t item_index = item_offset >> pool->item_order;
593 unsigned long mask;
594 size_t k;
595
596 if (!bitmap)
597 return;
598
599 k = item_index / BIT_PER_ULONG;
600 mask = 1ULL << (item_index % BIT_PER_ULONG);
601
602 /* Print error if bit is already set. */
603 if (bitmap[k] & mask) {
604 fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
605 __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
606 abort();
607 }
608 bitmap[k] |= mask;
609 }
610
611 static
612 void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool, bool zeroed)
613 {
614 struct free_list_node *node;
615 uintptr_t item_offset;
616 void __rseq_percpu *addr;
617
618 pthread_mutex_lock(&pool->lock);
619 /* Get first entry from free list. */
620 node = pool->free_list_head;
621 if (node != NULL) {
622 /* Remove node from free list (update head). */
623 pool->free_list_head = node->next;
624 item_offset = (uintptr_t) ((void *) node - pool->ranges->base);
625 addr = (void __rseq_percpu *) (pool->ranges->base + item_offset);
626 goto end;
627 }
628 if (pool->ranges->next_unused + pool->item_len > pool->attr.stride) {
629 errno = ENOMEM;
630 addr = NULL;
631 goto end;
632 }
633 item_offset = pool->ranges->next_unused;
634 addr = (void __rseq_percpu *) (pool->ranges->base + item_offset);
635 pool->ranges->next_unused += pool->item_len;
636 end:
637 if (addr)
638 set_alloc_slot(pool, item_offset);
639 pthread_mutex_unlock(&pool->lock);
640 if (zeroed && addr)
641 rseq_percpu_zero_item(pool, pool->ranges, item_offset);
642 return addr;
643 }
644
645 void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool)
646 {
647 return __rseq_percpu_malloc(pool, false);
648 }
649
650 void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool)
651 {
652 return __rseq_percpu_malloc(pool, true);
653 }
654
655 /* Always inline for __builtin_return_address(0). */
656 static inline __attribute__((always_inline))
657 void clear_alloc_slot(struct rseq_mempool *pool, size_t item_offset)
658 {
659 unsigned long *bitmap = pool->ranges->alloc_bitmap;
660 size_t item_index = item_offset >> pool->item_order;
661 unsigned long mask;
662 size_t k;
663
664 if (!bitmap)
665 return;
666
667 k = item_index / BIT_PER_ULONG;
668 mask = 1ULL << (item_index % BIT_PER_ULONG);
669
670 /* Print error if bit is not set. */
671 if (!(bitmap[k] & mask)) {
672 fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
673 __func__, get_pool_name(pool), pool, item_offset,
674 (void *) __builtin_return_address(0));
675 abort();
676 }
677 bitmap[k] &= ~mask;
678 }
679
680 void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride)
681 {
682 uintptr_t ptr = (uintptr_t) _ptr;
683 void *range_base = (void *) (ptr & (~(stride - 1)));
684 struct rseq_mempool_range *range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
685 struct rseq_mempool *pool = range->pool;
686 uintptr_t item_offset = ptr & (stride - 1);
687 struct free_list_node *head, *item;
688
689 pthread_mutex_lock(&pool->lock);
690 clear_alloc_slot(pool, item_offset);
691 /* Add ptr to head of free list */
692 head = pool->free_list_head;
693 /* Free-list is in CPU 0 range. */
694 item = (struct free_list_node *) ptr;
695 item->next = head;
696 pool->free_list_head = item;
697 pthread_mutex_unlock(&pool->lock);
698 }
699
700 struct rseq_mempool_set *rseq_mempool_set_create(void)
701 {
702 struct rseq_mempool_set *pool_set;
703
704 pool_set = calloc(1, sizeof(struct rseq_mempool_set));
705 if (!pool_set)
706 return NULL;
707 pthread_mutex_init(&pool_set->lock, NULL);
708 return pool_set;
709 }
710
711 int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set)
712 {
713 int order, ret;
714
715 for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) {
716 struct rseq_mempool *pool = pool_set->entries[order];
717
718 if (!pool)
719 continue;
720 ret = rseq_mempool_destroy(pool);
721 if (ret)
722 return ret;
723 pool_set->entries[order] = NULL;
724 }
725 pthread_mutex_destroy(&pool_set->lock);
726 free(pool_set);
727 return 0;
728 }
729
730 /* Ownership of pool is handed over to pool set on success. */
731 int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, struct rseq_mempool *pool)
732 {
733 size_t item_order = pool->item_order;
734 int ret = 0;
735
736 pthread_mutex_lock(&pool_set->lock);
737 if (pool_set->entries[item_order]) {
738 errno = EBUSY;
739 ret = -1;
740 goto end;
741 }
742 pool_set->entries[pool->item_order] = pool;
743 end:
744 pthread_mutex_unlock(&pool_set->lock);
745 return ret;
746 }
747
748 static
749 void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set, size_t len, bool zeroed)
750 {
751 int order, min_order = POOL_SET_MIN_ENTRY;
752 struct rseq_mempool *pool;
753 void __rseq_percpu *addr;
754
755 order = rseq_get_count_order_ulong(len);
756 if (order > POOL_SET_MIN_ENTRY)
757 min_order = order;
758 again:
759 pthread_mutex_lock(&pool_set->lock);
760 /* First smallest present pool where @len fits. */
761 for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
762 pool = pool_set->entries[order];
763
764 if (!pool)
765 continue;
766 if (pool->item_len >= len)
767 goto found;
768 }
769 pool = NULL;
770 found:
771 pthread_mutex_unlock(&pool_set->lock);
772 if (pool) {
773 addr = __rseq_percpu_malloc(pool, zeroed);
774 if (addr == NULL && errno == ENOMEM) {
775 /*
776 * If the allocation failed, try again with a
777 * larger pool.
778 */
779 min_order = order + 1;
780 goto again;
781 }
782 } else {
783 /* Not found. */
784 errno = ENOMEM;
785 addr = NULL;
786 }
787 return addr;
788 }
789
790 void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len)
791 {
792 return __rseq_mempool_set_malloc(pool_set, len, false);
793 }
794
795 void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len)
796 {
797 return __rseq_mempool_set_malloc(pool_set, len, true);
798 }
799
800 struct rseq_mempool_attr *rseq_mempool_attr_create(void)
801 {
802 return calloc(1, sizeof(struct rseq_mempool_attr));
803 }
804
805 void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr)
806 {
807 free(attr);
808 }
809
810 int rseq_mempool_attr_set_mmap(struct rseq_mempool_attr *attr,
811 void *(*mmap_func)(void *priv, size_t len),
812 int (*munmap_func)(void *priv, void *ptr, size_t len),
813 void *mmap_priv)
814 {
815 if (!attr) {
816 errno = EINVAL;
817 return -1;
818 }
819 attr->mmap_set = true;
820 attr->mmap_func = mmap_func;
821 attr->munmap_func = munmap_func;
822 attr->mmap_priv = mmap_priv;
823 return 0;
824 }
825
826 int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr,
827 void (*init_func)(void *priv, void *addr, size_t len, int cpu),
828 void *init_priv)
829 {
830 if (!attr) {
831 errno = EINVAL;
832 return -1;
833 }
834 attr->init_set = true;
835 attr->init_func = init_func;
836 attr->init_priv = init_priv;
837 return 0;
838 }
839
840 int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr)
841 {
842 if (!attr) {
843 errno = EINVAL;
844 return -1;
845 }
846 attr->robust_set = true;
847 return 0;
848 }
849
850 int rseq_mempool_attr_set_percpu(struct rseq_mempool_attr *attr,
851 size_t stride, int max_nr_cpus)
852 {
853 if (!attr) {
854 errno = EINVAL;
855 return -1;
856 }
857 attr->type = MEMPOOL_TYPE_PERCPU;
858 attr->stride = stride;
859 attr->max_nr_cpus = max_nr_cpus;
860 return 0;
861 }
862
863 int rseq_mempool_attr_set_global(struct rseq_mempool_attr *attr,
864 size_t stride)
865 {
866 if (!attr) {
867 errno = EINVAL;
868 return -1;
869 }
870 attr->type = MEMPOOL_TYPE_GLOBAL;
871 attr->stride = stride;
872 attr->max_nr_cpus = 0;
873 return 0;
874 }
875
876 int rseq_mempool_get_max_nr_cpus(struct rseq_mempool *mempool)
877 {
878 if (!mempool || mempool->attr.type != MEMPOOL_TYPE_PERCPU) {
879 errno = EINVAL;
880 return -1;
881 }
882 return mempool->attr.max_nr_cpus;
883 }
This page took 0.048641 seconds and 4 git commands to generate.