X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=src%2Frseq-mempool.c;h=c5b46178cdf7b8efcab00fbef7d4b499086a62ec;hb=805d0043db4c6d645a783c1a994d2d43a8e946e1;hp=4d914ba37dd5fce4791539e90df8e22193cde04f;hpb=f510ddc54559adda6000ec59a80011f90fb8c60d;p=librseq.git diff --git a/src/rseq-mempool.c b/src/rseq-mempool.c index 4d914ba..c5b4617 100644 --- a/src/rseq-mempool.c +++ b/src/rseq-mempool.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: MIT // SPDX-FileCopyrightText: 2024 Mathieu Desnoyers +// SPDX-FileCopyrightText: 2024 Olivier Dion #include #include @@ -13,6 +14,7 @@ #include #include #include +#include #ifdef HAVE_LIBNUMA # include @@ -20,6 +22,7 @@ #endif #include "rseq-utils.h" +#include /* * rseq-mempool.c: rseq CPU-Local Storage (CLS) memory allocator. @@ -36,6 +39,8 @@ #define POOL_SET_NR_ENTRIES RSEQ_BITS_PER_LONG +#define POOL_HEADER_NR_PAGES 2 + /* * Smallest allocation should hold enough space for a free list pointer. */ @@ -45,17 +50,23 @@ # define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */ #endif -/* - * Skip pool index 0 to ensure allocated entries at index 0 do not match - * a NULL pointer. - */ -#define FIRST_POOL 1 - #define BIT_PER_ULONG (8 * sizeof(unsigned long)) #define MOVE_PAGES_BATCH_SIZE 4096 -#define RANGE_HEADER_OFFSET sizeof(struct rseq_percpu_pool_range) +#define RANGE_HEADER_OFFSET sizeof(struct rseq_mempool_range) + +#if RSEQ_BITS_PER_LONG == 64 +# define DEFAULT_COW_INIT_POISON_VALUE 0x5555555555555555ULL +#else +# define DEFAULT_COW_INIT_POISON_VALUE 0x55555555UL +#endif + +/* + * Define the default COW_ZERO poison value as zero to prevent useless + * COW page allocation when writing poison values when freeing items. + */ +#define DEFAULT_COW_ZERO_POISON_VALUE 0x0 struct free_list_node; @@ -63,49 +74,104 @@ struct free_list_node { struct free_list_node *next; }; -struct rseq_pool_attr { - bool mmap_set; - void *(*mmap_func)(void *priv, size_t len); - int (*munmap_func)(void *priv, void *ptr, size_t len); - void *mmap_priv; +enum mempool_type { + MEMPOOL_TYPE_GLOBAL = 0, /* Default */ + MEMPOOL_TYPE_PERCPU = 1, +}; + +struct rseq_mempool_attr { + bool init_set; + int (*init_func)(void *priv, void *addr, size_t len, int cpu); + void *init_priv; bool robust_set; + + enum mempool_type type; + size_t stride; + int max_nr_cpus; + + unsigned long max_nr_ranges; + + bool poison_set; + uintptr_t poison; + + enum rseq_mempool_populate_policy populate_policy; }; -struct rseq_percpu_pool_range; +struct rseq_mempool_range; + +struct rseq_mempool_range { + struct rseq_mempool_range *next; /* Linked list of ranges. */ + struct rseq_mempool *pool; /* Backward reference to container pool. */ -struct rseq_percpu_pool_range { - struct rseq_percpu_pool_range *next; - struct rseq_percpu_pool *pool; /* Backward ref. to container pool. */ + /* + * Memory layout of a mempool range: + * - Canary header page (for detection of destroy-after-fork of + * COW_INIT pool), + * - Header page (contains struct rseq_mempool_range at the + * very end), + * - Base of the per-cpu data, starting with CPU 0. + * Aliases with free-list for non-robust COW_ZERO pool. + * - CPU 1, + * ... + * - CPU max_nr_cpus - 1 + * - init values (only allocated for COW_INIT pool). + * Aliases with free-list for non-robust COW_INIT pool. + * - free list (for robust pool). + * + * The free list aliases the CPU 0 memory area for non-robust + * COW_ZERO pools. It aliases with init values for non-robust + * COW_INIT pools. It is located immediately after the init + * values for robust pools. + */ void *header; void *base; + /* + * The init values contains malloc_init/zmalloc values. + * Pointer is NULL for RSEQ_MEMPOOL_POPULATE_COW_ZERO. + */ + void *init; size_t next_unused; + + /* Pool range mmap/munmap */ + void *mmap_addr; + size_t mmap_len; + /* Track alloc/free. */ unsigned long *alloc_bitmap; }; -struct rseq_percpu_pool { - /* Linked-list of ranges. */ - struct rseq_percpu_pool_range *ranges; +struct rseq_mempool { + /* Head of ranges linked-list. */ + struct rseq_mempool_range *range_list; + unsigned long nr_ranges; size_t item_len; - size_t percpu_stride; int item_order; - int max_nr_cpus; /* - * The free list chains freed items on the CPU 0 address range. - * We should rethink this decision if false sharing between - * malloc/free from other CPUs and data accesses from CPU 0 - * becomes an issue. This is a NULL-terminated singly-linked - * list. + * COW_INIT non-robust pools: + * The free list chains freed items on the init + * values address range. + * + * COW_ZERO non-robust pools: + * The free list chains freed items on the CPU 0 + * address range. We should rethink this + * decision if false sharing between malloc/free + * from other CPUs and data accesses from CPU 0 + * becomes an issue. + * + * Robust pools: The free list chains freed items in the + * address range dedicated for the free list. + * + * This is a NULL-terminated singly-linked list. */ struct free_list_node *free_list_head; /* This lock protects allocation/free within the pool. */ pthread_mutex_t lock; - struct rseq_pool_attr attr; + struct rseq_mempool_attr attr; char *name; }; @@ -114,136 +180,300 @@ struct rseq_percpu_pool { * 2. A pool set can contain NULL pool entries, in which case the next * large enough entry will be used for allocation. */ -struct rseq_percpu_pool_set { +struct rseq_mempool_set { /* This lock protects add vs malloc/zmalloc within the pool set. */ pthread_mutex_t lock; - struct rseq_percpu_pool *entries[POOL_SET_NR_ENTRIES]; + struct rseq_mempool *entries[POOL_SET_NR_ENTRIES]; }; static -void *__rseq_pool_percpu_ptr(struct rseq_percpu_pool *pool, int cpu, +const char *get_pool_name(const struct rseq_mempool *pool) +{ + return pool->name ? : ""; +} + +static +void *__rseq_pool_range_percpu_ptr(const struct rseq_mempool_range *range, int cpu, uintptr_t item_offset, size_t stride) { - /* TODO: Implement multi-ranges support. */ - return pool->ranges->base + (stride * cpu) + item_offset; + return range->base + (stride * cpu) + item_offset; } static -void rseq_percpu_zero_item(struct rseq_percpu_pool *pool, uintptr_t item_offset) +void *__rseq_pool_range_init_ptr(const struct rseq_mempool_range *range, + uintptr_t item_offset) { - int i; + if (!range->init) + return NULL; + return range->init + item_offset; +} + +static +void __rseq_percpu *__rseq_free_list_to_percpu_ptr(const struct rseq_mempool *pool, + struct free_list_node *node) +{ + void __rseq_percpu *p = (void __rseq_percpu *) node; - for (i = 0; i < pool->max_nr_cpus; i++) { - char *p = __rseq_pool_percpu_ptr(pool, i, - item_offset, pool->percpu_stride); - memset(p, 0, pool->item_len); + if (pool->attr.robust_set) { + /* Skip cpus. */ + p -= pool->attr.max_nr_cpus * pool->attr.stride; + /* Skip init values */ + if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) + p -= pool->attr.stride; + + } else { + /* COW_INIT free list is in init values */ + if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) + p -= pool->attr.max_nr_cpus * pool->attr.stride; } + return p; } -//TODO: this will need to be reimplemented for ranges, -//which cannot use __rseq_pool_percpu_ptr. -#if 0 //#ifdef HAVE_LIBNUMA static -int rseq_percpu_pool_range_init_numa(struct rseq_percpu_pool *pool, struct rseq_percpu_pool_range *range, int numa_flags) +struct free_list_node *__rseq_percpu_to_free_list_ptr(const struct rseq_mempool *pool, + void __rseq_percpu *p) { - unsigned long nr_pages, page_len; - long ret; - int cpu; + if (pool->attr.robust_set) { + /* Skip cpus. */ + p += pool->attr.max_nr_cpus * pool->attr.stride; + /* Skip init values */ + if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) + p += pool->attr.stride; - if (!numa_flags) - return 0; - page_len = rseq_get_page_len(); - nr_pages = pool->percpu_stride >> rseq_get_count_order_ulong(page_len); - for (cpu = 0; cpu < pool->max_nr_cpus; cpu++) { + } else { + /* COW_INIT free list is in init values */ + if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) + p += pool->attr.max_nr_cpus * pool->attr.stride; + } + return (struct free_list_node *) p; +} - int status[MOVE_PAGES_BATCH_SIZE]; - int nodes[MOVE_PAGES_BATCH_SIZE]; - void *pages[MOVE_PAGES_BATCH_SIZE]; +static +intptr_t rseq_cmp_item(void *p, size_t item_len, intptr_t cmp_value, intptr_t *unexpected_value) +{ + size_t offset; + intptr_t res = 0; - nodes[0] = numa_node_of_cpu(cpu); - for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) { - nodes[k] = nodes[0]; - } + for (offset = 0; offset < item_len; offset += sizeof(uintptr_t)) { + intptr_t v = *((intptr_t *) (p + offset)); - for (unsigned long page = 0; page < nr_pages;) { + if ((res = v - cmp_value) != 0) { + if (unexpected_value) + *unexpected_value = v; + break; + } + } + return res; +} - size_t max_k = RSEQ_ARRAY_SIZE(pages); - size_t left = nr_pages - page; +static +void rseq_percpu_zero_item(struct rseq_mempool *pool, + struct rseq_mempool_range *range, uintptr_t item_offset) +{ + char *init_p = NULL; + int i; - if (left < max_k) { - max_k = left; - } + init_p = __rseq_pool_range_init_ptr(range, item_offset); + if (init_p) + bzero(init_p, pool->item_len); + for (i = 0; i < pool->attr.max_nr_cpus; i++) { + char *p = __rseq_pool_range_percpu_ptr(range, i, + item_offset, pool->attr.stride); + + /* + * If item is already zeroed, either because the + * init range update has propagated or because the + * content is already zeroed (e.g. zero page), don't + * write to the page. This eliminates useless COW over + * the zero page just for overwriting it with zeroes. + * + * This means zmalloc() in COW_ZERO policy pool do + * not trigger COW for CPUs which are not actively + * writing to the pool. This is however not the case for + * malloc_init() in populate-all pools if it populates + * non-zero content. + */ + if (!rseq_cmp_item(p, pool->item_len, 0, NULL)) + continue; + bzero(p, pool->item_len); + } +} - for (size_t k = 0; k < max_k; ++k, ++page) { - pages[k] = __rseq_pool_percpu_ptr(pool, cpu, page * page_len); - status[k] = -EPERM; - } +static +void rseq_percpu_init_item(struct rseq_mempool *pool, + struct rseq_mempool_range *range, uintptr_t item_offset, + void *init_ptr, size_t init_len) +{ + char *init_p = NULL; + int i; - ret = move_pages(0, max_k, pages, nodes, status, numa_flags); + init_p = __rseq_pool_range_init_ptr(range, item_offset); + if (init_p) + memcpy(init_p, init_ptr, init_len); + for (i = 0; i < pool->attr.max_nr_cpus; i++) { + char *p = __rseq_pool_range_percpu_ptr(range, i, + item_offset, pool->attr.stride); + + /* + * If the update propagated through a shared mapping, + * or the item already has the correct content, skip + * writing it into the cpu item to eliminate useless + * COW of the page. + */ + if (!memcmp(init_ptr, p, init_len)) + continue; + memcpy(p, init_ptr, init_len); + } +} - if (ret < 0) - return ret; +static +void rseq_poison_item(void *p, size_t item_len, uintptr_t poison) +{ + size_t offset; - if (ret > 0) { - fprintf(stderr, "%lu pages were not migrated\n", ret); - for (size_t k = 0; k < max_k; ++k) { - if (status[k] < 0) - fprintf(stderr, - "Error while moving page %p to numa node %d: %u\n", - pages[k], nodes[k], -status[k]); - } - } - } - } - return 0; + for (offset = 0; offset < item_len; offset += sizeof(uintptr_t)) + *((uintptr_t *) (p + offset)) = poison; } -int rseq_percpu_pool_init_numa(struct rseq_percpu_pool *pool, int numa_flags) +static +void rseq_percpu_poison_item(struct rseq_mempool *pool, + struct rseq_mempool_range *range, uintptr_t item_offset) { - struct rseq_percpu_pool_range *range; - int ret; + uintptr_t poison = pool->attr.poison; + char *init_p = NULL; + int i; - if (!numa_flags) - return 0; - for (range = pool->ranges; range; range = range->next) { - ret = rseq_percpu_pool_range_init_numa(pool, range, numa_flags); - if (ret) - return ret; + init_p = __rseq_pool_range_init_ptr(range, item_offset); + if (init_p) + rseq_poison_item(init_p, pool->item_len, poison); + for (i = 0; i < pool->attr.max_nr_cpus; i++) { + char *p = __rseq_pool_range_percpu_ptr(range, i, + item_offset, pool->attr.stride); + + /* + * If the update propagated through a shared mapping, + * or the item already has the correct content, skip + * writing it into the cpu item to eliminate useless + * COW of the page. + * + * It is recommended to use zero as poison value for + * COW_ZERO pools to eliminate COW due to writing + * poison to CPU memory still backed by the zero page. + */ + if (rseq_cmp_item(p, pool->item_len, poison, NULL) == 0) + continue; + rseq_poison_item(p, pool->item_len, poison); } - return 0; } -#else -int rseq_percpu_pool_init_numa(struct rseq_percpu_pool *pool __attribute__((unused)), - int numa_flags __attribute__((unused))) + +/* Always inline for __builtin_return_address(0). */ +static inline __attribute__((always_inline)) +void rseq_check_poison_item(const struct rseq_mempool *pool, uintptr_t item_offset, + void *p, size_t item_len, uintptr_t poison) { - return 0; + intptr_t unexpected_value; + + if (rseq_cmp_item(p, item_len, poison, &unexpected_value) == 0) + return; + + fprintf(stderr, "%s: Poison corruption detected (0x%lx) for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n", + __func__, (unsigned long) unexpected_value, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0)); + abort(); } -#endif -static -void *default_mmap_func(void *priv __attribute__((unused)), size_t len) +/* Always inline for __builtin_return_address(0). */ +static inline __attribute__((always_inline)) +void rseq_percpu_check_poison_item(const struct rseq_mempool *pool, + const struct rseq_mempool_range *range, uintptr_t item_offset) { - void *base; + uintptr_t poison = pool->attr.poison; + char *init_p; + int i; - base = mmap(NULL, len, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (base == MAP_FAILED) - return NULL; - return base; + if (!pool->attr.robust_set) + return; + init_p = __rseq_pool_range_init_ptr(range, item_offset); + if (init_p) + rseq_check_poison_item(pool, item_offset, init_p, pool->item_len, poison); + for (i = 0; i < pool->attr.max_nr_cpus; i++) { + char *p = __rseq_pool_range_percpu_ptr(range, i, + item_offset, pool->attr.stride); + rseq_check_poison_item(pool, item_offset, p, pool->item_len, poison); + } } -static -int default_munmap_func(void *priv __attribute__((unused)), void *ptr, size_t len) +#ifdef HAVE_LIBNUMA +int rseq_mempool_range_init_numa(void *addr, size_t len, int cpu, int numa_flags) { - return munmap(ptr, len); + unsigned long nr_pages, page_len; + int status[MOVE_PAGES_BATCH_SIZE]; + int nodes[MOVE_PAGES_BATCH_SIZE]; + void *pages[MOVE_PAGES_BATCH_SIZE]; + long ret; + + if (!numa_flags) { + errno = EINVAL; + return -1; + } + page_len = rseq_get_page_len(); + nr_pages = len >> rseq_get_count_order_ulong(page_len); + + nodes[0] = numa_node_of_cpu(cpu); + if (nodes[0] < 0) + return -1; + + for (size_t k = 1; k < RSEQ_ARRAY_SIZE(nodes); ++k) { + nodes[k] = nodes[0]; + } + + for (unsigned long page = 0; page < nr_pages;) { + + size_t max_k = RSEQ_ARRAY_SIZE(pages); + size_t left = nr_pages - page; + + if (left < max_k) { + max_k = left; + } + + for (size_t k = 0; k < max_k; ++k, ++page) { + pages[k] = addr + (page * page_len); + status[k] = -EPERM; + } + + ret = move_pages(0, max_k, pages, nodes, status, numa_flags); + + if (ret < 0) + return ret; + + if (ret > 0) { + fprintf(stderr, "%lu pages were not migrated\n", ret); + for (size_t k = 0; k < max_k; ++k) { + if (status[k] < 0) + fprintf(stderr, + "Error while moving page %p to numa node %d: %u\n", + pages[k], nodes[k], -status[k]); + } + } + } + return 0; } +#else +int rseq_mempool_range_init_numa(void *addr __attribute__((unused)), + size_t len __attribute__((unused)), + int cpu __attribute__((unused)), + int numa_flags __attribute__((unused))) +{ + errno = ENOSYS; + return -1; +} +#endif static -int create_alloc_bitmap(struct rseq_percpu_pool *pool, struct rseq_percpu_pool_range *range) +int create_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range) { size_t count; - count = ((pool->percpu_stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG; + count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG; /* * Not being able to create the validation bitmap is an error @@ -256,17 +486,12 @@ int create_alloc_bitmap(struct rseq_percpu_pool *pool, struct rseq_percpu_pool_r } static -const char *get_pool_name(const struct rseq_percpu_pool *pool) -{ - return pool->name ? : ""; -} - -static -bool addr_in_pool(const struct rseq_percpu_pool *pool, void *addr) +bool percpu_addr_in_pool(const struct rseq_mempool *pool, void __rseq_percpu *_addr) { - struct rseq_percpu_pool_range *range; + struct rseq_mempool_range *range; + void *addr = (void *) _addr; - for (range = pool->ranges; range; range = range->next) { + for (range = pool->range_list; range; range = range->next) { if (addr >= range->base && addr < range->base + range->next_unused) return true; } @@ -275,18 +500,18 @@ bool addr_in_pool(const struct rseq_percpu_pool *pool, void *addr) /* Always inline for __builtin_return_address(0). */ static inline __attribute__((always_inline)) -void check_free_list(const struct rseq_percpu_pool *pool) +void check_free_list(const struct rseq_mempool *pool, bool mapping_accessible) { size_t total_item = 0, total_never_allocated = 0, total_freed = 0, max_list_traversal = 0, traversal_iteration = 0; - struct rseq_percpu_pool_range *range; + struct rseq_mempool_range *range; - if (!pool->attr.robust_set) + if (!pool->attr.robust_set || !mapping_accessible) return; - for (range = pool->ranges; range; range = range->next) { - total_item += pool->percpu_stride >> pool->item_order; - total_never_allocated += (pool->percpu_stride - range->next_unused) >> pool->item_order; + for (range = pool->range_list; range; range = range->next) { + total_item += pool->attr.stride >> pool->item_order; + total_never_allocated += (pool->attr.stride - range->next_unused) >> pool->item_order; } max_list_traversal = total_item - total_never_allocated; @@ -295,8 +520,6 @@ void check_free_list(const struct rseq_percpu_pool *pool) prev = node, node = node->next) { - void *node_addr = node; - if (traversal_iteration >= max_list_traversal) { fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n", __func__, get_pool_name(pool), pool, __builtin_return_address(0)); @@ -304,7 +527,7 @@ void check_free_list(const struct rseq_percpu_pool *pool) } /* Node is out of range. */ - if (!addr_in_pool(pool, node_addr)) { + if (!percpu_addr_in_pool(pool, __rseq_free_list_to_percpu_ptr(pool, node))) { if (prev) fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n", __func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0)); @@ -327,7 +550,31 @@ void check_free_list(const struct rseq_percpu_pool *pool) /* Always inline for __builtin_return_address(0). */ static inline __attribute__((always_inline)) -void destroy_alloc_bitmap(struct rseq_percpu_pool *pool, struct rseq_percpu_pool_range *range) +void check_range_poison(const struct rseq_mempool *pool, + const struct rseq_mempool_range *range) +{ + size_t item_offset; + + for (item_offset = 0; item_offset < range->next_unused; + item_offset += pool->item_len) + rseq_percpu_check_poison_item(pool, range, item_offset); +} + +/* Always inline for __builtin_return_address(0). */ +static inline __attribute__((always_inline)) +void check_pool_poison(const struct rseq_mempool *pool, bool mapping_accessible) +{ + struct rseq_mempool_range *range; + + if (!pool->attr.robust_set || !mapping_accessible) + return; + for (range = pool->range_list; range; range = range->next) + check_range_poison(pool, range); +} + +/* Always inline for __builtin_return_address(0). */ +static inline __attribute__((always_inline)) +void destroy_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range) { unsigned long *bitmap = range->alloc_bitmap; size_t count, total_leaks = 0; @@ -335,7 +582,7 @@ void destroy_alloc_bitmap(struct rseq_percpu_pool *pool, struct rseq_percpu_pool if (!bitmap) return; - count = ((pool->percpu_stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG; + count = ((pool->attr.stride >> pool->item_order) + BIT_PER_ULONG - 1) / BIT_PER_ULONG; /* Assert that all items in the pool were freed. */ for (size_t k = 0; k < count; ++k) @@ -347,17 +594,24 @@ void destroy_alloc_bitmap(struct rseq_percpu_pool *pool, struct rseq_percpu_pool } free(bitmap); + range->alloc_bitmap = NULL; } /* Always inline for __builtin_return_address(0). */ static inline __attribute__((always_inline)) -int rseq_percpu_pool_range_destroy(struct rseq_percpu_pool *pool, - struct rseq_percpu_pool_range *range) +int rseq_mempool_range_destroy(struct rseq_mempool *pool, + struct rseq_mempool_range *range, + bool mapping_accessible) { destroy_alloc_bitmap(pool, range); - /* range is a header located one page before the aligned mapping. */ - return pool->attr.munmap_func(pool->attr.mmap_priv, range->header, - (pool->percpu_stride * pool->max_nr_cpus) + rseq_get_page_len()); + if (!mapping_accessible) { + /* + * Only the header pages are populated in the child + * process. + */ + return munmap(range->header, POOL_HEADER_NR_PAGES * rseq_get_page_len()); + } + return munmap(range->mmap_addr, range->mmap_len); } /* @@ -365,8 +619,7 @@ int rseq_percpu_pool_range_destroy(struct rseq_percpu_pool *pool, * @pre_header before the mapping. */ static -void *aligned_mmap_anonymous(struct rseq_percpu_pool *pool, - size_t page_size, size_t len, size_t alignment, +void *aligned_mmap_anonymous(size_t page_size, size_t len, size_t alignment, void **pre_header, size_t pre_header_len) { size_t minimum_page_count, page_count, extra, total_allocate = 0; @@ -374,7 +627,7 @@ void *aligned_mmap_anonymous(struct rseq_percpu_pool *pool, void *ptr; if (len < page_size || alignment < page_size || - !is_pow2(len) || !is_pow2(alignment)) { + !is_pow2(alignment) || (len & (alignment - 1))) { errno = EINVAL; return NULL; } @@ -393,9 +646,12 @@ void *aligned_mmap_anonymous(struct rseq_percpu_pool *pool, assert(page_count >= minimum_page_count); - ptr = pool->attr.mmap_func(pool->attr.mmap_priv, page_count << page_order); - if (!ptr) + ptr = mmap(NULL, page_count << page_order, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (ptr == MAP_FAILED) { + ptr = NULL; goto alloc_error; + } total_allocate = page_count << page_order; @@ -407,7 +663,7 @@ void *aligned_mmap_anonymous(struct rseq_percpu_pool *pool, /* Unmap extra before. */ extra = offset_align((uintptr_t) ptr + pre_header_len, alignment); assert(!(extra & (page_size - 1))); - if (pool->attr.munmap_func(pool->attr.mmap_priv, ptr, extra)) { + if (munmap(ptr, extra)) { perror("munmap"); abort(); } @@ -423,7 +679,7 @@ out: /* Unmap extra after. */ extra_ptr = ptr + (minimum_page_count << page_order); extra = (page_count - minimum_page_count) << page_order; - if (pool->attr.munmap_func(pool->attr.mmap_priv, extra_ptr, extra)) { + if (munmap(extra_ptr, extra)) { perror("munmap"); abort(); } @@ -443,64 +699,233 @@ alloc_error: } static -struct rseq_percpu_pool_range *rseq_percpu_pool_range_create(struct rseq_percpu_pool *pool) +int rseq_memfd_create_init(const char *poolname, size_t init_len) { - struct rseq_percpu_pool_range *range; + int fd; + char buf[249]; /* Limit is 249 bytes. */ + const char *name; + + if (poolname) { + snprintf(buf, sizeof(buf), "%s:rseq-mempool", poolname); + name = buf; + } else { + name = ":rseq-mempool"; + } + + fd = memfd_create(name, MFD_CLOEXEC); + if (fd < 0) { + perror("memfd_create"); + goto end; + } + if (ftruncate(fd, (off_t) init_len)) { + if (close(fd)) + perror("close"); + fd = -1; + goto end; + } +end: + return fd; +} + +static +void rseq_memfd_close(int fd) +{ + if (fd < 0) + return; + if (close(fd)) + perror("close"); +} + +static +struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool) +{ + struct rseq_mempool_range *range; unsigned long page_size; void *header; void *base; + size_t range_len; /* Range len excludes header. */ + size_t header_len; + int memfd = -1; + if (pool->attr.max_nr_ranges && + pool->nr_ranges >= pool->attr.max_nr_ranges) { + errno = ENOMEM; + return NULL; + } page_size = rseq_get_page_len(); - base = aligned_mmap_anonymous(pool, page_size, - pool->percpu_stride * pool->max_nr_cpus, - pool->percpu_stride, - &header, page_size); + header_len = POOL_HEADER_NR_PAGES * page_size; + range_len = pool->attr.stride * pool->attr.max_nr_cpus; + if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) + range_len += pool->attr.stride; /* init values */ + if (pool->attr.robust_set) + range_len += pool->attr.stride; /* dedicated free list */ + base = aligned_mmap_anonymous(page_size, range_len, + pool->attr.stride, &header, header_len); if (!base) return NULL; - range = (struct rseq_percpu_pool_range *) (base - RANGE_HEADER_OFFSET); + range = (struct rseq_mempool_range *) (base - RANGE_HEADER_OFFSET); range->pool = pool; - range->base = base; range->header = header; + range->base = base; + range->mmap_addr = header; + range->mmap_len = header_len + range_len; + + if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) { + range->init = base + (pool->attr.stride * pool->attr.max_nr_cpus); + /* Populate init values pages from memfd */ + memfd = rseq_memfd_create_init(pool->name, pool->attr.stride); + if (memfd < 0) + goto error_alloc; + if (mmap(range->init, pool->attr.stride, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, memfd, 0) != (void *) range->init) + goto error_alloc; + assert(pool->attr.type == MEMPOOL_TYPE_PERCPU); + /* + * Map per-cpu memory as private COW mappings of init values. + */ + { + int cpu; + + for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) { + void *p = base + (pool->attr.stride * cpu); + size_t len = pool->attr.stride; + + if (mmap(p, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED, + memfd, 0) != (void *) p) + goto error_alloc; + } + } + /* + * The init values shared mapping should not be shared + * with the children processes across fork. Prevent the + * whole mapping from being used across fork. + */ + if (madvise(base, range_len, MADV_DONTFORK)) + goto error_alloc; + + /* + * Write 0x1 in first byte of header first page, which + * will be WIPEONFORK (and thus cleared) in children + * processes. Used to find out if pool destroy is called + * from a child process after fork. + */ + *((char *) header) = 0x1; + if (madvise(header, page_size, MADV_WIPEONFORK)) + goto error_alloc; + + /* + * The second header page contains the struct + * rseq_mempool_range, which is needed by pool destroy. + * Leave this anonymous page populated (COW) in child + * processes. + */ + rseq_memfd_close(memfd); + memfd = -1; + } + if (pool->attr.robust_set) { if (create_alloc_bitmap(pool, range)) goto error_alloc; } + if (pool->attr.init_set) { + switch (pool->attr.type) { + case MEMPOOL_TYPE_GLOBAL: + if (pool->attr.init_func(pool->attr.init_priv, + base, pool->attr.stride, -1)) { + goto error_alloc; + } + break; + case MEMPOOL_TYPE_PERCPU: + { + int cpu; + for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) { + if (pool->attr.init_func(pool->attr.init_priv, + base + (pool->attr.stride * cpu), + pool->attr.stride, cpu)) { + goto error_alloc; + } + } + break; + } + default: + abort(); + } + } + pool->nr_ranges++; return range; error_alloc: - (void) rseq_percpu_pool_range_destroy(pool, range); + rseq_memfd_close(memfd); + (void) rseq_mempool_range_destroy(pool, range, true); return NULL; } -int rseq_percpu_pool_destroy(struct rseq_percpu_pool *pool) +static +bool pool_mappings_accessible(struct rseq_mempool *pool) +{ + struct rseq_mempool_range *range; + size_t page_size; + char *addr; + + if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_COW_INIT) + return true; + range = pool->range_list; + if (!range) + return true; + page_size = rseq_get_page_len(); + /* + * Header first page is one page before the page containing the + * range structure. + */ + addr = (char *) ((uintptr_t) range & ~(page_size - 1)) - page_size; + /* + * Look for 0x1 first byte marker in header first page. + */ + if (*addr != 0x1) + return false; + return true; +} + +int rseq_mempool_destroy(struct rseq_mempool *pool) { - struct rseq_percpu_pool_range *range, *next_range; + struct rseq_mempool_range *range, *next_range; + bool mapping_accessible; int ret = 0; if (!pool) return 0; - check_free_list(pool); + + /* + * Validate that the pool mappings are accessible before doing + * free list/poison validation and unmapping ranges. This allows + * calling pool destroy in child process after a fork for COW_INIT + * pools to free pool resources. + */ + mapping_accessible = pool_mappings_accessible(pool); + + check_free_list(pool, mapping_accessible); + check_pool_poison(pool, mapping_accessible); + /* Iteration safe against removal. */ - for (range = pool->ranges; range && (next_range = range->next, 1); range = next_range) { - if (rseq_percpu_pool_range_destroy(pool, range)) + for (range = pool->range_list; range && (next_range = range->next, 1); range = next_range) { + if (rseq_mempool_range_destroy(pool, range, mapping_accessible)) goto end; /* Update list head to keep list coherent in case of partial failure. */ - pool->ranges = next_range; + pool->range_list = next_range; } pthread_mutex_destroy(&pool->lock); free(pool->name); - memset(pool, 0, sizeof(*pool)); + free(pool); end: return ret; } -struct rseq_percpu_pool *rseq_percpu_pool_create(const char *pool_name, - size_t item_len, size_t percpu_stride, int max_nr_cpus, - const struct rseq_pool_attr *_attr) +struct rseq_mempool *rseq_mempool_create(const char *pool_name, + size_t item_len, const struct rseq_mempool_attr *_attr) { - struct rseq_percpu_pool *pool; - struct rseq_pool_attr attr = {}; + struct rseq_mempool *pool; + struct rseq_mempool_attr attr = {}; int order; /* Make sure each item is large enough to contain free list pointers. */ @@ -515,38 +940,71 @@ struct rseq_percpu_pool *rseq_percpu_pool_create(const char *pool_name, } item_len = 1UL << order; - if (!percpu_stride) - percpu_stride = RSEQ_PERCPU_STRIDE; /* Use default */ + if (_attr) + memcpy(&attr, _attr, sizeof(attr)); - if (max_nr_cpus < 0 || item_len > percpu_stride || - percpu_stride < (size_t) rseq_get_page_len() || - !is_pow2(percpu_stride)) { + /* + * Validate that the pool populate policy requested is known. + */ + switch (attr.populate_policy) { + case RSEQ_MEMPOOL_POPULATE_COW_INIT: + break; + case RSEQ_MEMPOOL_POPULATE_COW_ZERO: + break; + default: errno = EINVAL; return NULL; } - if (_attr) - memcpy(&attr, _attr, sizeof(attr)); - if (!attr.mmap_set) { - attr.mmap_func = default_mmap_func; - attr.munmap_func = default_munmap_func; - attr.mmap_priv = NULL; + switch (attr.type) { + case MEMPOOL_TYPE_PERCPU: + if (attr.max_nr_cpus < 0) { + errno = EINVAL; + return NULL; + } + if (attr.max_nr_cpus == 0) { + /* Auto-detect */ + attr.max_nr_cpus = rseq_get_max_nr_cpus(); + if (attr.max_nr_cpus == 0) { + errno = EINVAL; + return NULL; + } + } + break; + case MEMPOOL_TYPE_GLOBAL: + /* Override populate policy for global type. */ + if (attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) + attr.populate_policy = RSEQ_MEMPOOL_POPULATE_COW_ZERO; + /* Use a 1-cpu pool for global mempool type. */ + attr.max_nr_cpus = 1; + break; + } + if (!attr.stride) + attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */ + if (attr.robust_set && !attr.poison_set) { + attr.poison_set = true; + if (attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) + attr.poison = DEFAULT_COW_INIT_POISON_VALUE; + else + attr.poison = DEFAULT_COW_ZERO_POISON_VALUE; + } + if (item_len > attr.stride || attr.stride < (size_t) rseq_get_page_len() || + !is_pow2(attr.stride)) { + errno = EINVAL; + return NULL; } - pool = calloc(1, sizeof(struct rseq_percpu_pool)); + pool = calloc(1, sizeof(struct rseq_mempool)); if (!pool) return NULL; memcpy(&pool->attr, &attr, sizeof(attr)); pthread_mutex_init(&pool->lock, NULL); - pool->percpu_stride = percpu_stride; - pool->max_nr_cpus = max_nr_cpus; pool->item_len = item_len; pool->item_order = order; - //TODO: implement multi-range support. - pool->ranges = rseq_percpu_pool_range_create(pool); - if (!pool->ranges) + pool->range_list = rseq_mempool_range_create(pool); + if (!pool->range_list) goto error_alloc; if (pool_name) { @@ -557,16 +1015,16 @@ struct rseq_percpu_pool *rseq_percpu_pool_create(const char *pool_name, return pool; error_alloc: - rseq_percpu_pool_destroy(pool); + rseq_mempool_destroy(pool); errno = ENOMEM; return NULL; } /* Always inline for __builtin_return_address(0). */ static inline __attribute__((always_inline)) -void set_alloc_slot(struct rseq_percpu_pool *pool, size_t item_offset) +void set_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset) { - unsigned long *bitmap = pool->ranges->alloc_bitmap; + unsigned long *bitmap = range->alloc_bitmap; size_t item_index = item_offset >> pool->item_order; unsigned long mask; size_t k; @@ -587,54 +1045,91 @@ void set_alloc_slot(struct rseq_percpu_pool *pool, size_t item_offset) } static -void __rseq_percpu *__rseq_percpu_malloc(struct rseq_percpu_pool *pool, bool zeroed) +void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool, + bool zeroed, void *init_ptr, size_t init_len) { + struct rseq_mempool_range *range; struct free_list_node *node; uintptr_t item_offset; void __rseq_percpu *addr; + if (init_len > pool->item_len) { + errno = EINVAL; + return NULL; + } pthread_mutex_lock(&pool->lock); /* Get first entry from free list. */ node = pool->free_list_head; if (node != NULL) { + void *range_base, *ptr; + + ptr = __rseq_free_list_to_percpu_ptr(pool, node); + range_base = (void *) ((uintptr_t) ptr & (~(pool->attr.stride - 1))); + range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET); /* Remove node from free list (update head). */ pool->free_list_head = node->next; - item_offset = (uintptr_t) ((void *) node - pool->ranges->base); - addr = (void __rseq_percpu *) (pool->ranges->base + item_offset); + item_offset = (uintptr_t) (ptr - range_base); + rseq_percpu_check_poison_item(pool, range, item_offset); + addr = __rseq_free_list_to_percpu_ptr(pool, node); goto end; } - if (pool->ranges->next_unused + pool->item_len > pool->percpu_stride) { - errno = ENOMEM; - addr = NULL; - goto end; + /* + * If the most recent range (first in list) does not have any + * room left, create a new range and prepend it to the list + * head. + */ + range = pool->range_list; + if (range->next_unused + pool->item_len > pool->attr.stride) { + range = rseq_mempool_range_create(pool); + if (!range) { + errno = ENOMEM; + addr = NULL; + goto end; + } + /* Add range to head of list. */ + range->next = pool->range_list; + pool->range_list = range; } - item_offset = pool->ranges->next_unused; - addr = (void __rseq_percpu *) (pool->ranges->base + item_offset); - pool->ranges->next_unused += pool->item_len; + /* First range in list has room left. */ + item_offset = range->next_unused; + addr = (void __rseq_percpu *) (range->base + item_offset); + range->next_unused += pool->item_len; end: if (addr) - set_alloc_slot(pool, item_offset); + set_alloc_slot(pool, range, item_offset); pthread_mutex_unlock(&pool->lock); - if (zeroed && addr) - rseq_percpu_zero_item(pool, item_offset); + if (addr) { + if (zeroed) + rseq_percpu_zero_item(pool, range, item_offset); + else if (init_ptr) { + rseq_percpu_init_item(pool, range, item_offset, + init_ptr, init_len); + } + } return addr; } -void __rseq_percpu *rseq_percpu_malloc(struct rseq_percpu_pool *pool) +void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool) { - return __rseq_percpu_malloc(pool, false); + return __rseq_percpu_malloc(pool, false, NULL, 0); } -void __rseq_percpu *rseq_percpu_zmalloc(struct rseq_percpu_pool *pool) +void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool) { - return __rseq_percpu_malloc(pool, true); + return __rseq_percpu_malloc(pool, true, NULL, 0); +} + +void __rseq_percpu *rseq_mempool_percpu_malloc_init(struct rseq_mempool *pool, + void *init_ptr, size_t len) +{ + return __rseq_percpu_malloc(pool, false, init_ptr, len); } /* Always inline for __builtin_return_address(0). */ static inline __attribute__((always_inline)) -void clear_alloc_slot(struct rseq_percpu_pool *pool, size_t item_offset) +void clear_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset) { - unsigned long *bitmap = pool->ranges->alloc_bitmap; + unsigned long *bitmap = range->alloc_bitmap; size_t item_index = item_offset >> pool->item_order; unsigned long mask; size_t k; @@ -655,47 +1150,53 @@ void clear_alloc_slot(struct rseq_percpu_pool *pool, size_t item_offset) bitmap[k] &= ~mask; } -void __rseq_percpu_free(void __rseq_percpu *_ptr, size_t percpu_stride) +void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride) { uintptr_t ptr = (uintptr_t) _ptr; - void *range_base = (void *) (ptr & (~(percpu_stride - 1))); - struct rseq_percpu_pool_range *range = (struct rseq_percpu_pool_range *) (range_base - RANGE_HEADER_OFFSET); - struct rseq_percpu_pool *pool = range->pool; - uintptr_t item_offset = ptr & (percpu_stride - 1); + void *range_base = (void *) (ptr & (~(stride - 1))); + struct rseq_mempool_range *range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET); + struct rseq_mempool *pool = range->pool; + uintptr_t item_offset = ptr & (stride - 1); struct free_list_node *head, *item; pthread_mutex_lock(&pool->lock); - clear_alloc_slot(pool, item_offset); + clear_alloc_slot(pool, range, item_offset); /* Add ptr to head of free list */ head = pool->free_list_head; - /* Free-list is in CPU 0 range. */ - item = (struct free_list_node *) ptr; + if (pool->attr.poison_set) + rseq_percpu_poison_item(pool, range, item_offset); + item = __rseq_percpu_to_free_list_ptr(pool, _ptr); + /* + * Setting the next pointer will overwrite the first uintptr_t + * poison for either CPU 0 (COW_ZERO, non-robust), or init data + * (COW_INIT, non-robust). + */ item->next = head; pool->free_list_head = item; pthread_mutex_unlock(&pool->lock); } -struct rseq_percpu_pool_set *rseq_percpu_pool_set_create(void) +struct rseq_mempool_set *rseq_mempool_set_create(void) { - struct rseq_percpu_pool_set *pool_set; + struct rseq_mempool_set *pool_set; - pool_set = calloc(1, sizeof(struct rseq_percpu_pool_set)); + pool_set = calloc(1, sizeof(struct rseq_mempool_set)); if (!pool_set) return NULL; pthread_mutex_init(&pool_set->lock, NULL); return pool_set; } -int rseq_percpu_pool_set_destroy(struct rseq_percpu_pool_set *pool_set) +int rseq_mempool_set_destroy(struct rseq_mempool_set *pool_set) { int order, ret; for (order = POOL_SET_MIN_ENTRY; order < POOL_SET_NR_ENTRIES; order++) { - struct rseq_percpu_pool *pool = pool_set->entries[order]; + struct rseq_mempool *pool = pool_set->entries[order]; if (!pool) continue; - ret = rseq_percpu_pool_destroy(pool); + ret = rseq_mempool_destroy(pool); if (ret) return ret; pool_set->entries[order] = NULL; @@ -706,7 +1207,7 @@ int rseq_percpu_pool_set_destroy(struct rseq_percpu_pool_set *pool_set) } /* Ownership of pool is handed over to pool set on success. */ -int rseq_percpu_pool_set_add_pool(struct rseq_percpu_pool_set *pool_set, struct rseq_percpu_pool *pool) +int rseq_mempool_set_add_pool(struct rseq_mempool_set *pool_set, struct rseq_mempool *pool) { size_t item_order = pool->item_order; int ret = 0; @@ -724,10 +1225,11 @@ end: } static -void __rseq_percpu *__rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len, bool zeroed) +void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set, + void *init_ptr, size_t len, bool zeroed) { int order, min_order = POOL_SET_MIN_ENTRY; - struct rseq_percpu_pool *pool; + struct rseq_mempool *pool; void __rseq_percpu *addr; order = rseq_get_count_order_ulong(len); @@ -748,7 +1250,7 @@ again: found: pthread_mutex_unlock(&pool_set->lock); if (pool) { - addr = __rseq_percpu_malloc(pool, zeroed); + addr = __rseq_percpu_malloc(pool, zeroed, init_ptr, len); if (addr == NULL && errno == ENOMEM) { /* * If the allocation failed, try again with a @@ -765,43 +1267,48 @@ found: return addr; } -void __rseq_percpu *rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len) +void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len) { - return __rseq_percpu_pool_set_malloc(pool_set, len, false); + return __rseq_mempool_set_malloc(pool_set, NULL, len, false); } -void __rseq_percpu *rseq_percpu_pool_set_zmalloc(struct rseq_percpu_pool_set *pool_set, size_t len) +void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len) { - return __rseq_percpu_pool_set_malloc(pool_set, len, true); + return __rseq_mempool_set_malloc(pool_set, NULL, len, true); } -struct rseq_pool_attr *rseq_pool_attr_create(void) +void __rseq_percpu *rseq_mempool_set_percpu_malloc_init(struct rseq_mempool_set *pool_set, + void *init_ptr, size_t len) { - return calloc(1, sizeof(struct rseq_pool_attr)); + return __rseq_mempool_set_malloc(pool_set, init_ptr, len, true); } -void rseq_pool_attr_destroy(struct rseq_pool_attr *attr) +struct rseq_mempool_attr *rseq_mempool_attr_create(void) +{ + return calloc(1, sizeof(struct rseq_mempool_attr)); +} + +void rseq_mempool_attr_destroy(struct rseq_mempool_attr *attr) { free(attr); } -int rseq_pool_attr_set_mmap(struct rseq_pool_attr *attr, - void *(*mmap_func)(void *priv, size_t len), - int (*munmap_func)(void *priv, void *ptr, size_t len), - void *mmap_priv) +int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr, + int (*init_func)(void *priv, void *addr, size_t len, int cpu), + void *init_priv) { if (!attr) { errno = EINVAL; return -1; } - attr->mmap_set = true; - attr->mmap_func = mmap_func; - attr->munmap_func = munmap_func; - attr->mmap_priv = mmap_priv; + attr->init_set = true; + attr->init_func = init_func; + attr->init_priv = init_priv; + attr->populate_policy = RSEQ_MEMPOOL_POPULATE_COW_INIT; return 0; } -int rseq_pool_attr_set_robust(struct rseq_pool_attr *attr) +int rseq_mempool_attr_set_robust(struct rseq_mempool_attr *attr) { if (!attr) { errno = EINVAL; @@ -810,3 +1317,72 @@ int rseq_pool_attr_set_robust(struct rseq_pool_attr *attr) attr->robust_set = true; return 0; } + +int rseq_mempool_attr_set_percpu(struct rseq_mempool_attr *attr, + size_t stride, int max_nr_cpus) +{ + if (!attr) { + errno = EINVAL; + return -1; + } + attr->type = MEMPOOL_TYPE_PERCPU; + attr->stride = stride; + attr->max_nr_cpus = max_nr_cpus; + return 0; +} + +int rseq_mempool_attr_set_global(struct rseq_mempool_attr *attr, + size_t stride) +{ + if (!attr) { + errno = EINVAL; + return -1; + } + attr->type = MEMPOOL_TYPE_GLOBAL; + attr->stride = stride; + attr->max_nr_cpus = 0; + return 0; +} + +int rseq_mempool_attr_set_max_nr_ranges(struct rseq_mempool_attr *attr, + unsigned long max_nr_ranges) +{ + if (!attr) { + errno = EINVAL; + return -1; + } + attr->max_nr_ranges = max_nr_ranges; + return 0; +} + +int rseq_mempool_attr_set_poison(struct rseq_mempool_attr *attr, + uintptr_t poison) +{ + if (!attr) { + errno = EINVAL; + return -1; + } + attr->poison_set = true; + attr->poison = poison; + return 0; +} + +int rseq_mempool_attr_set_populate_policy(struct rseq_mempool_attr *attr, + enum rseq_mempool_populate_policy policy) +{ + if (!attr) { + errno = EINVAL; + return -1; + } + attr->populate_policy = policy; + return 0; +} + +int rseq_mempool_get_max_nr_cpus(struct rseq_mempool *mempool) +{ + if (!mempool || mempool->attr.type != MEMPOOL_TYPE_PERCPU) { + errno = EINVAL; + return -1; + } + return mempool->attr.max_nr_cpus; +}