mempool: Introduce support for ranges linked list
[librseq.git] / src / rseq-mempool.c
1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2024 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
4 #include <rseq/mempool.h>
5 #include <sys/mman.h>
6 #include <assert.h>
7 #include <string.h>
8 #include <pthread.h>
9 #include <unistd.h>
10 #include <stdlib.h>
11 #include <rseq/compiler.h>
12 #include <errno.h>
13 #include <stdint.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16
17 #ifdef HAVE_LIBNUMA
18 # include <numa.h>
19 # include <numaif.h>
20 #endif
21
22 #include "rseq-utils.h"
23 #include <rseq/rseq.h>
24
25 /*
26 * rseq-mempool.c: rseq CPU-Local Storage (CLS) memory allocator.
27 *
28 * The rseq per-CPU memory allocator allows the application the request
29 * memory pools of CPU-Local memory each of containing objects of a
30 * given size (rounded to next power of 2), reserving a given virtual
31 * address size per CPU, for a given maximum number of CPUs.
32 *
33 * The per-CPU memory allocator is analogous to TLS (Thread-Local
34 * Storage) memory: TLS is Thread-Local Storage, whereas the per-CPU
35 * memory allocator provides CPU-Local Storage.
36 */
37
38 #define POOL_SET_NR_ENTRIES RSEQ_BITS_PER_LONG
39
40 /*
41 * Smallest allocation should hold enough space for a free list pointer.
42 */
43 #if RSEQ_BITS_PER_LONG == 64
44 # define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
45 #else
46 # define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
47 #endif
48
49 #define BIT_PER_ULONG (8 * sizeof(unsigned long))
50
51 #define MOVE_PAGES_BATCH_SIZE 4096
52
53 #define RANGE_HEADER_OFFSET sizeof(struct rseq_mempool_range)
54
55 //TODO: make this configurable
56 #define MEMPOOL_MAX_NR_RANGES 1
57
58 struct free_list_node;
59
60 struct free_list_node {
61 struct free_list_node *next;
62 };
63
64 enum mempool_type {
65 MEMPOOL_TYPE_GLOBAL = 0, /* Default */
66 MEMPOOL_TYPE_PERCPU = 1,
67 };
68
69 struct rseq_mempool_attr {
70 bool mmap_set;
71 void *(*mmap_func)(void *priv, size_t len);
72 int (*munmap_func)(void *priv, void *ptr, size_t len);
73 void *mmap_priv;
74
75 bool init_set;
76 int (*init_func)(void *priv, void *addr, size_t len, int cpu);
77 void *init_priv;
78
79 bool robust_set;
80
81 enum mempool_type type;
82 size_t stride;
83 int max_nr_cpus;
84 };
85
86 struct rseq_mempool_range;
87
88 struct rseq_mempool_range {
89 struct rseq_mempool_range *next; /* Linked list of ranges. */
90 struct rseq_mempool *pool; /* Backward reference to container pool. */
91 void *header;
92 void *base;
93 size_t next_unused;
94 /* Track alloc/free. */
95 unsigned long *alloc_bitmap;
96 };
97
98 struct rseq_mempool {
99 /* Head of ranges linked-list. */
100 struct rseq_mempool_range *range_list;
101 unsigned long nr_ranges;
102
103 size_t item_len;
104 int item_order;
105
106 /*
107 * The free list chains freed items on the CPU 0 address range.
108 * We should rethink this decision if false sharing between
109 * malloc/free from other CPUs and data accesses from CPU 0
110 * becomes an issue. This is a NULL-terminated singly-linked
111 * list.
112 */
113 struct free_list_node *free_list_head;
114
115 /* This lock protects allocation/free within the pool. */
116 pthread_mutex_t lock;
117
118 struct rseq_mempool_attr attr;
119 char *name;
120 };
121
122 /*
123 * Pool set entries are indexed by item_len rounded to the next power of
124 * 2. A pool set can contain NULL pool entries, in which case the next
125 * large enough entry will be used for allocation.
126 */
127 struct rseq_mempool_set {
128 /* This lock protects add vs malloc/zmalloc within the pool set. */
129 pthread_mutex_t lock;
130 struct rseq_mempool *entries[POOL_SET_NR_ENTRIES];
131 };
132
133 static
134 void *__rseq_pool_range_percpu_ptr(struct rseq_mempool_range *range, int cpu,
135 uintptr_t item_offset, size_t stride)
136 {
137 return range->base + (stride * cpu) + item_offset;
138 }
139
140 static
141 void rseq_percpu_zero_item(struct rseq_mempool *pool,
142 struct rseq_mempool_range *range, uintptr_t item_offset)
143 {
144 int i;
145
146 for (i = 0; i < pool->attr.max_nr_cpus; i++) {
147 char *p = __rseq_pool_range_percpu_ptr(range, i,
148 item_offset, pool->attr.stride);
149 memset(p, 0, pool->item_len);
150 }
151 }
152
153 #ifdef HAVE_LIBNUMA
154 int rseq_mempool_range_init_numa(void *addr, size_t len, int cpu, int numa_flags)
155 {
156 unsigned long nr_pages, page_len;
157 int status[MOVE_PAGES_BATCH_SIZE];
158 int nodes[MOVE_PAGES_BATCH_SIZE];
159 void *pages[MOVE_PAGES_BATCH_SIZE];
160 long ret;
161
162 if (!numa_flags) {
163 errno = EINVAL;
164 return -1;
165 }
166 page_len = rseq_get_page_len();
167 nr_pages = len >> rseq_get_count_order_ulong(page_len);
168
169 nodes[0] = numa_node_of_cpu(cpu);
170 if (nodes[0] < 0)
171 return -1;
172
173 for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) {
174 nodes[k] = nodes[0];
175 }
176
177 for (unsigned long page = 0; page < nr_pages;) {
178
179 size_t max_k = RSEQ_ARRAY_SIZE(pages);
180 size_t left = nr_pages - page;
181
182 if (left < max_k) {
183 max_k = left;
184 }
185
186 for (size_t k = 0; k < max_k; ++k, ++page) {
187 pages[k] = addr + (page * page_len);
188 status[k] = -EPERM;
189 }
190
191 ret = move_pages(0, max_k, pages, nodes, status, numa_flags);
192
193 if (ret < 0)
194 return ret;
195
196 if (ret > 0) {
197 fprintf(stderr, "%lu pages were not migrated\n", ret);
198 for (size_t k = 0; k < max_k; ++k) {
199 if (status[k] < 0)
200 fprintf(stderr,
201 "Error while moving page %p to numa node %d: %u\n",
202 pages[k], nodes[k], -status[k]);
203 }
204 }
205 }
206 return 0;
207 }
208 #else
209 int rseq_mempool_range_init_numa(void *addr __attribute__((unused)),
210 size_t len __attribute__((unused)),
211 int cpu __attribute__((unused)),
212 int numa_flags __attribute__((unused)))
213 {
214 errno = ENOSYS;
215 return -1;
216 }
217 #endif
218
219 static
220 void *default_mmap_func(void *priv __attribute__((unused)), size_t len)
221 {
222 void *base;
223
224 base = mmap(NULL, len, PROT_READ | PROT_WRITE,
225 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
226 if (base == MAP_FAILED)
227 return NULL;
228 return base;
229 }
230
231 static
232 int default_munmap_func(void *priv __attribute__((unused)), void *ptr, size_t len)
233 {
234 return munmap(ptr, len);
235 }
236
237 static
238 int create_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
239 {
240 size_t count;
241
242 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
243
244 /*
245 * Not being able to create the validation bitmap is an error
246 * that needs to be reported.
247 */
248 range->alloc_bitmap = calloc(count, sizeof(unsigned long));
249 if (!range->alloc_bitmap)
250 return -1;
251 return 0;
252 }
253
254 static
255 const char *get_pool_name(const struct rseq_mempool *pool)
256 {
257 return pool->name ? : "<anonymous>";
258 }
259
260 static
261 bool addr_in_pool(const struct rseq_mempool *pool, void *addr)
262 {
263 struct rseq_mempool_range *range;
264
265 for (range = pool->range_list; range; range = range->next) {
266 if (addr >= range->base && addr < range->base + range->next_unused)
267 return true;
268 }
269 return false;
270 }
271
272 /* Always inline for __builtin_return_address(0). */
273 static inline __attribute__((always_inline))
274 void check_free_list(const struct rseq_mempool *pool)
275 {
276 size_t total_item = 0, total_never_allocated = 0, total_freed = 0,
277 max_list_traversal = 0, traversal_iteration = 0;
278 struct rseq_mempool_range *range;
279
280 if (!pool->attr.robust_set)
281 return;
282
283 for (range = pool->range_list; range; range = range->next) {
284 total_item += pool->attr.stride >> pool->item_order;
285 total_never_allocated += (pool->attr.stride - range->next_unused) >> pool->item_order;
286 }
287 max_list_traversal = total_item - total_never_allocated;
288
289 for (struct free_list_node *node = pool->free_list_head, *prev = NULL;
290 node;
291 prev = node,
292 node = node->next) {
293
294 void *node_addr = node;
295
296 if (traversal_iteration >= max_list_traversal) {
297 fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n",
298 __func__, get_pool_name(pool), pool, __builtin_return_address(0));
299 abort();
300 }
301
302 /* Node is out of range. */
303 if (!addr_in_pool(pool, node_addr)) {
304 if (prev)
305 fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
306 __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0));
307 else
308 fprintf(stderr, "%s: Corrupted free-list node [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
309 __func__, node, get_pool_name(pool), pool, __builtin_return_address(0));
310 abort();
311 }
312
313 traversal_iteration++;
314 total_freed++;
315 }
316
317 if (total_never_allocated + total_freed != total_item) {
318 fprintf(stderr, "%s: Corrupted free-list in pool \"%s\" (%p); total-item: %zu total-never-used: %zu total-freed: %zu, caller %p.\n",
319 __func__, get_pool_name(pool), pool, total_item, total_never_allocated, total_freed, __builtin_return_address(0));
320 abort();
321 }
322 }
323
324 /* Always inline for __builtin_return_address(0). */
325 static inline __attribute__((always_inline))
326 void destroy_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
327 {
328 unsigned long *bitmap = range->alloc_bitmap;
329 size_t count, total_leaks = 0;
330
331 if (!bitmap)
332 return;
333
334 count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG;
335
336 /* Assert that all items in the pool were freed. */
337 for (size_t k = 0; k < count; ++k)
338 total_leaks += rseq_hweight_ulong(bitmap[k]);
339 if (total_leaks) {
340 fprintf(stderr, "%s: Pool \"%s\" (%p) has %zu leaked items on destroy, caller: %p.\n",
341 __func__, get_pool_name(pool), pool, total_leaks, (void *) __builtin_return_address(0));
342 abort();
343 }
344
345 free(bitmap);
346 }
347
348 /* Always inline for __builtin_return_address(0). */
349 static inline __attribute__((always_inline))
350 int rseq_mempool_range_destroy(struct rseq_mempool *pool,
351 struct rseq_mempool_range *range)
352 {
353 destroy_alloc_bitmap(pool, range);
354 /* range is a header located one page before the aligned mapping. */
355 return pool->attr.munmap_func(pool->attr.mmap_priv, range->header,
356 (pool->attr.stride * pool->attr.max_nr_cpus) + rseq_get_page_len());
357 }
358
359 /*
360 * Allocate a memory mapping aligned on @alignment, with an optional
361 * @pre_header before the mapping.
362 */
363 static
364 void *aligned_mmap_anonymous(struct rseq_mempool *pool,
365 size_t page_size, size_t len, size_t alignment,
366 void **pre_header, size_t pre_header_len)
367 {
368 size_t minimum_page_count, page_count, extra, total_allocate = 0;
369 int page_order;
370 void *ptr;
371
372 if (len < page_size || alignment < page_size ||
373 !is_pow2(alignment) || (len & (alignment - 1))) {
374 errno = EINVAL;
375 return NULL;
376 }
377 page_order = rseq_get_count_order_ulong(page_size);
378 if (page_order < 0) {
379 errno = EINVAL;
380 return NULL;
381 }
382 if (pre_header_len && (pre_header_len & (page_size - 1))) {
383 errno = EINVAL;
384 return NULL;
385 }
386
387 minimum_page_count = (pre_header_len + len) >> page_order;
388 page_count = (pre_header_len + len + alignment - page_size) >> page_order;
389
390 assert(page_count >= minimum_page_count);
391
392 ptr = pool->attr.mmap_func(pool->attr.mmap_priv, page_count << page_order);
393 if (!ptr)
394 goto alloc_error;
395
396 total_allocate = page_count << page_order;
397
398 if (!(((uintptr_t) ptr + pre_header_len) & (alignment - 1))) {
399 /* Pointer is already aligned. ptr points to pre_header. */
400 goto out;
401 }
402
403 /* Unmap extra before. */
404 extra = offset_align((uintptr_t) ptr + pre_header_len, alignment);
405 assert(!(extra & (page_size - 1)));
406 if (pool->attr.munmap_func(pool->attr.mmap_priv, ptr, extra)) {
407 perror("munmap");
408 abort();
409 }
410 total_allocate -= extra;
411 ptr += extra; /* ptr points to pre_header */
412 page_count -= extra >> page_order;
413 out:
414 assert(page_count >= minimum_page_count);
415
416 if (page_count > minimum_page_count) {
417 void *extra_ptr;
418
419 /* Unmap extra after. */
420 extra_ptr = ptr + (minimum_page_count << page_order);
421 extra = (page_count - minimum_page_count) << page_order;
422 if (pool->attr.munmap_func(pool->attr.mmap_priv, extra_ptr, extra)) {
423 perror("munmap");
424 abort();
425 }
426 total_allocate -= extra;
427 }
428
429 assert(!(((uintptr_t)ptr + pre_header_len) & (alignment - 1)));
430 assert(total_allocate == len + pre_header_len);
431
432 alloc_error:
433 if (ptr) {
434 if (pre_header)
435 *pre_header = ptr;
436 ptr += pre_header_len;
437 }
438 return ptr;
439 }
440
441 static
442 struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool)
443 {
444 struct rseq_mempool_range *range;
445 unsigned long page_size;
446 void *header;
447 void *base;
448
449 if (pool->nr_ranges >= MEMPOOL_MAX_NR_RANGES) {
450 errno = ENOMEM;
451 return NULL;
452 }
453 page_size = rseq_get_page_len();
454
455 base = aligned_mmap_anonymous(pool, page_size,
456 pool->attr.stride * pool->attr.max_nr_cpus,
457 pool->attr.stride,
458 &header, page_size);
459 if (!base)
460 return NULL;
461 range = (struct rseq_mempool_range *) (base - RANGE_HEADER_OFFSET);
462 range->pool = pool;
463 range->base = base;
464 range->header = header;
465 if (pool->attr.robust_set) {
466 if (create_alloc_bitmap(pool, range))
467 goto error_alloc;
468 }
469 if (pool->attr.init_set) {
470 switch (pool->attr.type) {
471 case MEMPOOL_TYPE_GLOBAL:
472 if (pool->attr.init_func(pool->attr.init_priv,
473 base, pool->attr.stride, -1)) {
474 goto error_alloc;
475 }
476 break;
477 case MEMPOOL_TYPE_PERCPU:
478 {
479 int cpu;
480 for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
481 if (pool->attr.init_func(pool->attr.init_priv,
482 base + (pool->attr.stride * cpu),
483 pool->attr.stride, cpu)) {
484 goto error_alloc;
485 }
486 }
487 break;
488 }
489 default:
490 abort();
491 }
492 }
493 pool->nr_ranges++;
494 return range;
495
496 error_alloc:
497 (void) rseq_mempool_range_destroy(pool, range);
498 return NULL;
499 }
500
501 int rseq_mempool_destroy(struct rseq_mempool *pool)
502 {
503 struct rseq_mempool_range *range, *next_range;
504 int ret = 0;
505
506 if (!pool)
507 return 0;
508 check_free_list(pool);
509 /* Iteration safe against removal. */
510 for (range = pool->range_list; range && (next_range = range->next, 1); range = next_range) {
511 if (rseq_mempool_range_destroy(pool, range))
512 goto end;
513 /* Update list head to keep list coherent in case of partial failure. */
514 pool->range_list = next_range;
515 }
516 pthread_mutex_destroy(&pool->lock);
517 free(pool->name);
518 memset(pool, 0, sizeof(*pool));
519 end:
520 return ret;
521 }
522
523 struct rseq_mempool *rseq_mempool_create(const char *pool_name,
524 size_t item_len, const struct rseq_mempool_attr *_attr)
525 {
526 struct rseq_mempool *pool;
527 struct rseq_mempool_attr attr = {};
528 int order;
529
530 /* Make sure each item is large enough to contain free list pointers. */
531 if (item_len < sizeof(void *))
532 item_len = sizeof(void *);
533
534 /* Align item_len on next power of two. */
535 order = rseq_get_count_order_ulong(item_len);
536 if (order < 0) {
537 errno = EINVAL;
538 return NULL;
539 }
540 item_len = 1UL << order;
541
542 if (_attr)
543 memcpy(&attr, _attr, sizeof(attr));
544 if (!attr.mmap_set) {
545 attr.mmap_func = default_mmap_func;
546 attr.munmap_func = default_munmap_func;
547 attr.mmap_priv = NULL;
548 }
549
550 switch (attr.type) {
551 case MEMPOOL_TYPE_PERCPU:
552 if (attr.max_nr_cpus < 0) {
553 errno = EINVAL;
554 return NULL;
555 }
556 if (attr.max_nr_cpus == 0) {
557 /* Auto-detect */
558 attr.max_nr_cpus = rseq_get_max_nr_cpus();
559 if (attr.max_nr_cpus == 0) {
560 errno = EINVAL;
561 return NULL;
562 }
563 }
564 break;
565 case MEMPOOL_TYPE_GLOBAL:
566 /* Use a 1-cpu pool for global mempool type. */
567 attr.max_nr_cpus = 1;
568 break;
569 }
570 if (!attr.stride)
571 attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */
572 if (item_len > attr.stride || attr.stride < (size_t) rseq_get_page_len() ||
573 !is_pow2(attr.stride)) {
574 errno = EINVAL;
575 return NULL;
576 }
577
578 pool = calloc(1, sizeof(struct rseq_mempool));
579 if (!pool)
580 return NULL;
581
582 memcpy(&pool->attr, &attr, sizeof(attr));
583 pthread_mutex_init(&pool->lock, NULL);
584 pool->item_len = item_len;
585 pool->item_order = order;
586
587 pool->range_list = rseq_mempool_range_create(pool);
588 if (!pool->range_list)
589 goto error_alloc;
590
591 if (pool_name) {
592 pool->name = strdup(pool_name);
593 if (!pool->name)
594 goto error_alloc;
595 }
596 return pool;
597
598 error_alloc:
599 rseq_mempool_destroy(pool);
600 errno = ENOMEM;
601 return NULL;
602 }
603
604 /* Always inline for __builtin_return_address(0). */
605 static inline __attribute__((always_inline))
606 void set_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
607 {
608 unsigned long *bitmap = range->alloc_bitmap;
609 size_t item_index = item_offset >> pool->item_order;
610 unsigned long mask;
611 size_t k;
612
613 if (!bitmap)
614 return;
615
616 k = item_index / BIT_PER_ULONG;
617 mask = 1ULL << (item_index % BIT_PER_ULONG);
618
619 /* Print error if bit is already set. */
620 if (bitmap[k] & mask) {
621 fprintf(stderr, "%s: Allocator corruption detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
622 __func__, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
623 abort();
624 }
625 bitmap[k] |= mask;
626 }
627
628 static
629 void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool, bool zeroed)
630 {
631 struct rseq_mempool_range *range;
632 struct free_list_node *node;
633 uintptr_t item_offset;
634 void __rseq_percpu *addr;
635
636 pthread_mutex_lock(&pool->lock);
637 /* Get first entry from free list. */
638 node = pool->free_list_head;
639 if (node != NULL) {
640 uintptr_t ptr = (uintptr_t) node;
641 void *range_base = (void *) (ptr & (~(pool->attr.stride - 1)));
642
643 range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
644 /* Remove node from free list (update head). */
645 pool->free_list_head = node->next;
646 item_offset = (uintptr_t) ((void *) node - range_base);
647 addr = (void __rseq_percpu *) node;
648 goto end;
649 }
650 /*
651 * If the most recent range (first in list) does not have any
652 * room left, create a new range and prepend it to the list
653 * head.
654 */
655 range = pool->range_list;
656 if (range->next_unused + pool->item_len > pool->attr.stride) {
657 range = rseq_mempool_range_create(pool);
658 if (!range) {
659 errno = ENOMEM;
660 addr = NULL;
661 goto end;
662 }
663 /* Add range to head of list. */
664 range->next = pool->range_list;
665 pool->range_list = range;
666 }
667 /* First range in list has room left. */
668 item_offset = range->next_unused;
669 addr = (void __rseq_percpu *) (range->base + item_offset);
670 range->next_unused += pool->item_len;
671 end:
672 if (addr)
673 set_alloc_slot(pool, range, item_offset);
674 pthread_mutex_unlock(&pool->lock);
675 if (zeroed && addr)
676 rseq_percpu_zero_item(pool, range, item_offset);
677 return addr;
678 }
679
680 void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool)
681 {
682 return __rseq_percpu_malloc(pool, false);
683 }
684
685 void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool)
686 {
687 return __rseq_percpu_malloc(pool, true);
688 }
689
690 /* Always inline for __builtin_return_address(0). */
691 static inline __attribute__((always_inline))
692 void clear_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
693 {
694 unsigned long *bitmap = range->alloc_bitmap;
695 size_t item_index = item_offset >> pool->item_order;
696 unsigned long mask;
697 size_t k;
698
699 if (!bitmap)
700 return;
701
702 k = item_index / BIT_PER_ULONG;
703 mask = 1ULL << (item_index % BIT_PER_ULONG);
704
705 /* Print error if bit is not set. */
706 if (!(bitmap[k] & mask)) {
707 fprintf(stderr, "%s: Double-free detected for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
708 __func__, get_pool_name(pool), pool, item_offset,
709 (void *) __builtin_return_address(0));
710 abort();
711 }
712 bitmap[k] &= ~mask;
713 }
714
715 void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride)
716 {
717 uintptr_t ptr = (uintptr_t) _ptr;
718 void *range_base = (void *) (ptr & (~(stride - 1)));
719 struct rseq_mempool_range *range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
720 struct rseq_mempool *pool = range->pool;
721 uintptr_t item_offset = ptr & (stride - 1);
722 struct free_list_node *head, *item;
723
724 pthread_mutex_lock(&pool->lock);
725 clear_alloc_slot(pool, range, item_offset);
726 /* Add ptr to head of free list */
727 head = pool->free_list_head;
728 /* Free-list is in CPU 0 range. */
729 item = (struct free_list_node *) ptr;
730 item->next = head;
731 pool->free_list_head = item;
732 pthread_mutex_unlock(&pool->lock);
733 }
734
735 struct rseq_mempool_set *rseq_mempool_set_create(void)
736 {
737 struct rseq_mempool_set *pool_set;
738
739 pool_set = calloc(1, sizeof(struct rseq_mempool_set));
740 if (!pool_set)
741 return NULL;
742 pthread_mutex_init(&pool_set->lock, NULL);
743 return pool_set;
744 }
745
746 int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set)
747 {
748 int order, ret;
749
750 for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) {
751 struct rseq_mempool *pool = pool_set->entries[order];
752
753 if (!pool)
754 continue;
755 ret = rseq_mempool_destroy(pool);
756 if (ret)
757 return ret;
758 pool_set->entries[order] = NULL;
759 }
760 pthread_mutex_destroy(&pool_set->lock);
761 free(pool_set);
762 return 0;
763 }
764
765 /* Ownership of pool is handed over to pool set on success. */
766 int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, struct rseq_mempool *pool)
767 {
768 size_t item_order = pool->item_order;
769 int ret = 0;
770
771 pthread_mutex_lock(&pool_set->lock);
772 if (pool_set->entries[item_order]) {
773 errno = EBUSY;
774 ret = -1;
775 goto end;
776 }
777 pool_set->entries[pool->item_order] = pool;
778 end:
779 pthread_mutex_unlock(&pool_set->lock);
780 return ret;
781 }
782
783 static
784 void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set, size_t len, bool zeroed)
785 {
786 int order, min_order = POOL_SET_MIN_ENTRY;
787 struct rseq_mempool *pool;
788 void __rseq_percpu *addr;
789
790 order = rseq_get_count_order_ulong(len);
791 if (order > POOL_SET_MIN_ENTRY)
792 min_order = order;
793 again:
794 pthread_mutex_lock(&pool_set->lock);
795 /* First smallest present pool where @len fits. */
796 for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
797 pool = pool_set->entries[order];
798
799 if (!pool)
800 continue;
801 if (pool->item_len >= len)
802 goto found;
803 }
804 pool = NULL;
805 found:
806 pthread_mutex_unlock(&pool_set->lock);
807 if (pool) {
808 addr = __rseq_percpu_malloc(pool, zeroed);
809 if (addr == NULL && errno == ENOMEM) {
810 /*
811 * If the allocation failed, try again with a
812 * larger pool.
813 */
814 min_order = order + 1;
815 goto again;
816 }
817 } else {
818 /* Not found. */
819 errno = ENOMEM;
820 addr = NULL;
821 }
822 return addr;
823 }
824
825 void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len)
826 {
827 return __rseq_mempool_set_malloc(pool_set, len, false);
828 }
829
830 void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len)
831 {
832 return __rseq_mempool_set_malloc(pool_set, len, true);
833 }
834
835 struct rseq_mempool_attr *rseq_mempool_attr_create(void)
836 {
837 return calloc(1, sizeof(struct rseq_mempool_attr));
838 }
839
840 void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr)
841 {
842 free(attr);
843 }
844
845 int rseq_mempool_attr_set_mmap(struct rseq_mempool_attr *attr,
846 void *(*mmap_func)(void *priv, size_t len),
847 int (*munmap_func)(void *priv, void *ptr, size_t len),
848 void *mmap_priv)
849 {
850 if (!attr) {
851 errno = EINVAL;
852 return -1;
853 }
854 attr->mmap_set = true;
855 attr->mmap_func = mmap_func;
856 attr->munmap_func = munmap_func;
857 attr->mmap_priv = mmap_priv;
858 return 0;
859 }
860
861 int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr,
862 int (*init_func)(void *priv, void *addr, size_t len, int cpu),
863 void *init_priv)
864 {
865 if (!attr) {
866 errno = EINVAL;
867 return -1;
868 }
869 attr->init_set = true;
870 attr->init_func = init_func;
871 attr->init_priv = init_priv;
872 return 0;
873 }
874
875 int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr)
876 {
877 if (!attr) {
878 errno = EINVAL;
879 return -1;
880 }
881 attr->robust_set = true;
882 return 0;
883 }
884
885 int rseq_mempool_attr_set_percpu(struct rseq_mempool_attr *attr,
886 size_t stride, int max_nr_cpus)
887 {
888 if (!attr) {
889 errno = EINVAL;
890 return -1;
891 }
892 attr->type = MEMPOOL_TYPE_PERCPU;
893 attr->stride = stride;
894 attr->max_nr_cpus = max_nr_cpus;
895 return 0;
896 }
897
898 int rseq_mempool_attr_set_global(struct rseq_mempool_attr *attr,
899 size_t stride)
900 {
901 if (!attr) {
902 errno = EINVAL;
903 return -1;
904 }
905 attr->type = MEMPOOL_TYPE_GLOBAL;
906 attr->stride = stride;
907 attr->max_nr_cpus = 0;
908 return 0;
909 }
910
911 int rseq_mempool_get_max_nr_cpus(struct rseq_mempool *mempool)
912 {
913 if (!mempool || mempool->attr.type != MEMPOOL_TYPE_PERCPU) {
914 errno = EINVAL;
915 return -1;
916 }
917 return mempool->attr.max_nr_cpus;
918 }
This page took 0.067686 seconds and 4 git commands to generate.