#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
+#include <fcntl.h>
#ifdef HAVE_LIBNUMA
# include <numa.h>
# define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
#endif
-/*
- * Skip pool index 0 to ensure allocated entries at index 0 do not match
- * a NULL pointer.
- */
-#define FIRST_POOL 1
-
#define BIT_PER_ULONG (8 * sizeof(unsigned long))
#define MOVE_PAGES_BATCH_SIZE 4096
#define RANGE_HEADER_OFFSET sizeof(struct rseq_mempool_range)
+#if RSEQ_BITS_PER_LONG == 64
+# define DEFAULT_POISON_VALUE 0x5555555555555555ULL
+#else
+# define DEFAULT_POISON_VALUE 0x55555555UL
+#endif
+
struct free_list_node;
struct free_list_node {
void *mmap_priv;
bool init_set;
- void (*init_func)(void *priv, void *addr, size_t len, int cpu);
+ int (*init_func)(void *priv, void *addr, size_t len, int cpu);
void *init_priv;
bool robust_set;
enum mempool_type type;
size_t stride;
int max_nr_cpus;
+
+ unsigned long max_nr_ranges;
+
+ bool poison_set;
+ uintptr_t poison;
+
+ enum rseq_mempool_populate_policy populate_policy;
};
struct rseq_mempool_range;
struct rseq_mempool_range {
- struct rseq_mempool_range *next;
- struct rseq_mempool *pool; /* Backward ref. to container pool. */
+ struct rseq_mempool_range *next; /* Linked list of ranges. */
+ struct rseq_mempool *pool; /* Backward reference to container pool. */
+
+ /*
+ * Memory layout of a mempool range:
+ * - Header page (contains struct rseq_mempool_range at the very end),
+ * - Base of the per-cpu data, starting with CPU 0.
+ * Aliases with free-list for non-robust populate all pool.
+ * - CPU 1,
+ * ...
+ * - CPU max_nr_cpus - 1
+ * - init values (unpopulated for RSEQ_MEMPOOL_POPULATE_ALL).
+ * Aliases with free-list for non-robust populate none pool.
+ * - free list (for robust pool).
+ *
+ * The free list aliases the CPU 0 memory area for non-robust
+ * populate all pools. It aliases with init values for
+ * non-robust populate none pools. It is located immediately
+ * after the init values for robust pools.
+ */
void *header;
void *base;
+ /*
+ * The init values contains malloc_init/zmalloc values.
+ * Pointer is NULL for RSEQ_MEMPOOL_POPULATE_ALL.
+ */
+ void *init;
size_t next_unused;
+
+ /* Pool range mmap/munmap */
+ void *mmap_addr;
+ size_t mmap_len;
+
/* Track alloc/free. */
unsigned long *alloc_bitmap;
};
struct rseq_mempool {
- /* Linked-list of ranges. */
- struct rseq_mempool_range *ranges;
+ /* Head of ranges linked-list. */
+ struct rseq_mempool_range *range_list;
+ unsigned long nr_ranges;
size_t item_len;
int item_order;
struct rseq_mempool *entries[POOL_SET_NR_ENTRIES];
};
+/*
+ * This memfd is used to implement the user COW behavior for the page
+ * protection scheme. memfd is a sparse virtual file. Its layout (in
+ * offset from beginning of file) matches the process address space
+ * (pointers directly converted to file offsets).
+ */
+struct rseq_memfd {
+ pthread_mutex_t lock;
+ size_t reserved_size;
+ unsigned int refcount;
+ int fd;
+};
+
+static struct rseq_memfd memfd = {
+ .lock = PTHREAD_MUTEX_INITIALIZER,
+ .reserved_size = 0,
+ .refcount = 0,
+ .fd = -1,
+};
+
static
-void *__rseq_pool_range_percpu_ptr(struct rseq_mempool_range *range, int cpu,
+const char *get_pool_name(const struct rseq_mempool *pool)
+{
+ return pool->name ? : "<anonymous>";
+}
+
+static
+void *__rseq_pool_range_percpu_ptr(const struct rseq_mempool_range *range, int cpu,
uintptr_t item_offset, size_t stride)
{
return range->base + (stride * cpu) + item_offset;
}
+static
+void *__rseq_pool_range_init_ptr(const struct rseq_mempool_range *range,
+ uintptr_t item_offset)
+{
+ if (!range->init)
+ return NULL;
+ return range->init + item_offset;
+}
+
+static
+void __rseq_percpu *__rseq_free_list_to_percpu_ptr(const struct rseq_mempool *pool,
+ struct free_list_node *node)
+{
+ void __rseq_percpu *p = (void __rseq_percpu *) node;
+
+ if (pool->attr.robust_set) {
+ /* Skip cpus. */
+ p -= pool->attr.max_nr_cpus * pool->attr.stride;
+ /* Skip init values */
+ if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_ALL)
+ p -= pool->attr.stride;
+
+ } else {
+ /* Populate none free list is in init values */
+ if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_ALL)
+ p -= pool->attr.max_nr_cpus * pool->attr.stride;
+ }
+ return p;
+}
+
+static
+struct free_list_node *__rseq_percpu_to_free_list_ptr(const struct rseq_mempool *pool,
+ void __rseq_percpu *p)
+{
+ if (pool->attr.robust_set) {
+ /* Skip cpus. */
+ p += pool->attr.max_nr_cpus * pool->attr.stride;
+ /* Skip init values */
+ if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_ALL)
+ p += pool->attr.stride;
+
+ } else {
+ /* Populate none free list is in init values */
+ if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_ALL)
+ p += pool->attr.max_nr_cpus * pool->attr.stride;
+ }
+ return (struct free_list_node *) p;
+}
+
+static
+off_t ptr_to_off_t(void *p)
+{
+ return (off_t) (uintptr_t) p;
+}
+
+static
+int memcmpbyte(const char *s, int c, size_t n)
+{
+ int res = 0;
+
+ while (n-- > 0)
+ if ((res = *(s++) - c) != 0)
+ break;
+ return res;
+}
+
static
void rseq_percpu_zero_item(struct rseq_mempool *pool,
struct rseq_mempool_range *range, uintptr_t item_offset)
{
+ char *init_p = NULL;
int i;
+ init_p = __rseq_pool_range_init_ptr(range, item_offset);
+ if (init_p)
+ memset(init_p, 0, pool->item_len);
for (i = 0; i < pool->attr.max_nr_cpus; i++) {
char *p = __rseq_pool_range_percpu_ptr(range, i,
item_offset, pool->attr.stride);
+
+ /*
+ * If item is already zeroed, either because the
+ * init range update has propagated or because the
+ * content is already zeroed (e.g. zero page), don't
+ * write to the page. This eliminates useless COW over
+ * the zero page just for overwriting it with zeroes.
+ *
+ * This means zmalloc() in populate all policy pool do
+ * not trigger COW for CPUs which are not actively
+ * writing to the pool. This is however not the case for
+ * malloc_init() in populate-all pools if it populates
+ * non-zero content.
+ */
+ if (!memcmpbyte(p, 0, pool->item_len))
+ continue;
memset(p, 0, pool->item_len);
}
}
+static
+void rseq_percpu_init_item(struct rseq_mempool *pool,
+ struct rseq_mempool_range *range, uintptr_t item_offset,
+ void *init_ptr, size_t init_len)
+{
+ char *init_p = NULL;
+ int i;
+
+ init_p = __rseq_pool_range_init_ptr(range, item_offset);
+ if (init_p)
+ memcpy(init_p, init_ptr, init_len);
+ for (i = 0; i < pool->attr.max_nr_cpus; i++) {
+ char *p = __rseq_pool_range_percpu_ptr(range, i,
+ item_offset, pool->attr.stride);
+
+ /*
+ * If the update propagated through a shared mapping,
+ * or the item already has the correct content, skip
+ * writing it into the cpu item to eliminate useless
+ * COW of the page.
+ */
+ if (!memcmp(init_ptr, p, init_len))
+ continue;
+ memcpy(p, init_ptr, init_len);
+ }
+}
+
+static
+void rseq_poison_item(void *p, size_t item_len, uintptr_t poison)
+{
+ size_t offset;
+
+ for (offset = 0; offset < item_len; offset += sizeof(uintptr_t))
+ *((uintptr_t *) (p + offset)) = poison;
+}
+
+static
+intptr_t rseq_cmp_poison_item(void *p, size_t item_len, uintptr_t poison, intptr_t *unexpected_value)
+{
+ size_t offset;
+ intptr_t res = 0;
+
+ for (offset = 0; offset < item_len; offset += sizeof(uintptr_t)) {
+ intptr_t v = *((intptr_t *) (p + offset));
+
+ if ((res = v - (intptr_t) poison) != 0) {
+ if (unexpected_value)
+ *unexpected_value = v;
+ break;
+ }
+ }
+ return res;
+}
+
+static
+void rseq_percpu_poison_item(struct rseq_mempool *pool,
+ struct rseq_mempool_range *range, uintptr_t item_offset)
+{
+ uintptr_t poison = pool->attr.poison;
+ char *init_p = NULL;
+ int i;
+
+ init_p = __rseq_pool_range_init_ptr(range, item_offset);
+ if (init_p)
+ rseq_poison_item(init_p, pool->item_len, poison);
+ for (i = 0; i < pool->attr.max_nr_cpus; i++) {
+ char *p = __rseq_pool_range_percpu_ptr(range, i,
+ item_offset, pool->attr.stride);
+
+ /*
+ * If the update propagated through a shared mapping,
+ * or the item already has the correct content, skip
+ * writing it into the cpu item to eliminate useless
+ * COW of the page.
+ *
+ * It is recommended to use zero as poison value for
+ * populate-all pools to eliminate COW due to writing
+ * poison to unused CPU memory.
+ */
+ if (rseq_cmp_poison_item(p, pool->item_len, poison, NULL) == 0)
+ continue;
+ rseq_poison_item(p, pool->item_len, poison);
+ }
+}
+
+/* Always inline for __builtin_return_address(0). */
+static inline __attribute__((always_inline))
+void rseq_check_poison_item(const struct rseq_mempool *pool, uintptr_t item_offset,
+ void *p, size_t item_len, uintptr_t poison)
+{
+ intptr_t unexpected_value;
+
+ if (rseq_cmp_poison_item(p, item_len, poison, &unexpected_value) == 0)
+ return;
+
+ fprintf(stderr, "%s: Poison corruption detected (0x%lx) for pool: \"%s\" (%p), item offset: %zu, caller: %p.\n",
+ __func__, (unsigned long) unexpected_value, get_pool_name(pool), pool, item_offset, (void *) __builtin_return_address(0));
+ abort();
+}
+
+/* Always inline for __builtin_return_address(0). */
+static inline __attribute__((always_inline))
+void rseq_percpu_check_poison_item(const struct rseq_mempool *pool,
+ const struct rseq_mempool_range *range, uintptr_t item_offset)
+{
+ uintptr_t poison = pool->attr.poison;
+ char *init_p;
+ int i;
+
+ if (!pool->attr.robust_set)
+ return;
+ init_p = __rseq_pool_range_init_ptr(range, item_offset);
+ if (init_p)
+ rseq_check_poison_item(pool, item_offset, init_p, pool->item_len, poison);
+ for (i = 0; i < pool->attr.max_nr_cpus; i++) {
+ char *p = __rseq_pool_range_percpu_ptr(range, i,
+ item_offset, pool->attr.stride);
+ rseq_check_poison_item(pool, item_offset, p, pool->item_len, poison);
+ }
+}
+
#ifdef HAVE_LIBNUMA
int rseq_mempool_range_init_numa(void *addr, size_t len, int cpu, int numa_flags)
{
}
static
-const char *get_pool_name(const struct rseq_mempool *pool)
-{
- return pool->name ? : "<anonymous>";
-}
-
-static
-bool addr_in_pool(const struct rseq_mempool *pool, void *addr)
+bool percpu_addr_in_pool(const struct rseq_mempool *pool, void __rseq_percpu *_addr)
{
struct rseq_mempool_range *range;
+ void *addr = (void *) _addr;
- for (range = pool->ranges; range; range = range->next) {
+ for (range = pool->range_list; range; range = range->next) {
if (addr >= range->base && addr < range->base + range->next_unused)
return true;
}
if (!pool->attr.robust_set)
return;
- for (range = pool->ranges; range; range = range->next) {
+ for (range = pool->range_list; range; range = range->next) {
total_item += pool->attr.stride >> pool->item_order;
total_never_allocated += (pool->attr.stride - range->next_unused) >> pool->item_order;
}
prev = node,
node = node->next) {
- void *node_addr = node;
-
if (traversal_iteration >= max_list_traversal) {
fprintf(stderr, "%s: Corrupted free-list; Possibly infinite loop in pool \"%s\" (%p), caller %p.\n",
__func__, get_pool_name(pool), pool, __builtin_return_address(0));
}
/* Node is out of range. */
- if (!addr_in_pool(pool, node_addr)) {
+ if (!percpu_addr_in_pool(pool, __rseq_free_list_to_percpu_ptr(pool, node))) {
if (prev)
fprintf(stderr, "%s: Corrupted free-list node %p -> [out-of-range %p] in pool \"%s\" (%p), caller %p.\n",
__func__, prev, node, get_pool_name(pool), pool, __builtin_return_address(0));
}
}
+/* Always inline for __builtin_return_address(0). */
+static inline __attribute__((always_inline))
+void check_range_poison(const struct rseq_mempool *pool,
+ const struct rseq_mempool_range *range)
+{
+ size_t item_offset;
+
+ for (item_offset = 0; item_offset < range->next_unused;
+ item_offset += pool->item_len)
+ rseq_percpu_check_poison_item(pool, range, item_offset);
+}
+
+/* Always inline for __builtin_return_address(0). */
+static inline __attribute__((always_inline))
+void check_pool_poison(const struct rseq_mempool *pool)
+{
+ struct rseq_mempool_range *range;
+
+ if (!pool->attr.robust_set)
+ return;
+ for (range = pool->range_list; range; range = range->next)
+ check_range_poison(pool, range);
+}
+
/* Always inline for __builtin_return_address(0). */
static inline __attribute__((always_inline))
void destroy_alloc_bitmap(struct rseq_mempool *pool, struct rseq_mempool_range *range)
}
free(bitmap);
+ range->alloc_bitmap = NULL;
}
/* Always inline for __builtin_return_address(0). */
int rseq_mempool_range_destroy(struct rseq_mempool *pool,
struct rseq_mempool_range *range)
{
+ int ret = 0;
+
destroy_alloc_bitmap(pool, range);
+
+ /*
+ * Punch a hole into memfd where the init values used to be.
+ */
+ if (range->init) {
+ ret = fallocate(memfd.fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ ptr_to_off_t(range->init), pool->attr.stride);
+ if (ret)
+ return ret;
+ range->init = NULL;
+ }
+
/* range is a header located one page before the aligned mapping. */
- return pool->attr.munmap_func(pool->attr.mmap_priv, range->header,
- (pool->attr.stride * pool->attr.max_nr_cpus) + rseq_get_page_len());
+ return pool->attr.munmap_func(pool->attr.mmap_priv, range->mmap_addr, range->mmap_len);
}
/*
return ptr;
}
+static
+int rseq_memfd_reserve_init(void *init, size_t init_len)
+{
+ int ret = 0;
+ size_t reserve_len;
+
+ pthread_mutex_lock(&memfd.lock);
+ reserve_len = (size_t) ptr_to_off_t(init) + init_len;
+ if (reserve_len > memfd.reserved_size) {
+ if (ftruncate(memfd.fd, (off_t) reserve_len)) {
+ ret = -1;
+ goto unlock;
+ }
+ memfd.reserved_size = reserve_len;
+ }
+unlock:
+ pthread_mutex_unlock(&memfd.lock);
+ return ret;
+}
+
static
struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool)
{
unsigned long page_size;
void *header;
void *base;
+ size_t range_len; /* Range len excludes header. */
+ if (pool->attr.max_nr_ranges &&
+ pool->nr_ranges >= pool->attr.max_nr_ranges) {
+ errno = ENOMEM;
+ return NULL;
+ }
page_size = rseq_get_page_len();
+ range_len = pool->attr.stride * pool->attr.max_nr_cpus;
+ if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_ALL)
+ range_len += pool->attr.stride; /* init values */
+ if (pool->attr.robust_set)
+ range_len += pool->attr.stride; /* free list */
base = aligned_mmap_anonymous(pool, page_size,
- pool->attr.stride * pool->attr.max_nr_cpus,
+ range_len,
pool->attr.stride,
&header, page_size);
if (!base)
return NULL;
range = (struct rseq_mempool_range *) (base - RANGE_HEADER_OFFSET);
range->pool = pool;
- range->base = base;
range->header = header;
+ range->base = base;
+ range->mmap_addr = header;
+ range->mmap_len = page_size + range_len;
+
+ if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_ALL) {
+ range->init = base + (pool->attr.stride * pool->attr.max_nr_cpus);
+ /* Populate init values pages from memfd */
+ if (rseq_memfd_reserve_init(range->init, pool->attr.stride))
+ goto error_alloc;
+ if (mmap(range->init, pool->attr.stride, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, memfd.fd,
+ ptr_to_off_t(range->init)) != (void *) range->init) {
+ goto error_alloc;
+ }
+ assert(pool->attr.type == MEMPOOL_TYPE_PERCPU);
+ /*
+ * Map per-cpu memory as private COW mappings of init values.
+ */
+ {
+ int cpu;
+
+ for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
+ void *p = base + (pool->attr.stride * cpu);
+ size_t len = pool->attr.stride;
+
+ if (mmap(p, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED,
+ memfd.fd, ptr_to_off_t(range->init)) != (void *) p) {
+ goto error_alloc;
+ }
+ }
+ }
+ }
+
if (pool->attr.robust_set) {
if (create_alloc_bitmap(pool, range))
goto error_alloc;
}
if (pool->attr.init_set) {
- int cpu;
-
- for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
- pool->attr.init_func(pool->attr.init_priv,
- base + (pool->attr.stride * cpu),
- pool->attr.stride, cpu);
+ switch (pool->attr.type) {
+ case MEMPOOL_TYPE_GLOBAL:
+ if (pool->attr.init_func(pool->attr.init_priv,
+ base, pool->attr.stride, -1)) {
+ goto error_alloc;
+ }
+ break;
+ case MEMPOOL_TYPE_PERCPU:
+ {
+ int cpu;
+ for (cpu = 0; cpu < pool->attr.max_nr_cpus; cpu++) {
+ if (pool->attr.init_func(pool->attr.init_priv,
+ base + (pool->attr.stride * cpu),
+ pool->attr.stride, cpu)) {
+ goto error_alloc;
+ }
+ }
+ break;
+ }
+ default:
+ abort();
}
}
+ pool->nr_ranges++;
return range;
error_alloc:
return NULL;
}
+static
+int rseq_mempool_memfd_ref(struct rseq_mempool *pool)
+{
+ int ret = 0;
+
+ if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_ALL)
+ return 0;
+
+ pthread_mutex_lock(&memfd.lock);
+ if (memfd.refcount == 0) {
+ memfd.fd = memfd_create("mempool", MFD_CLOEXEC);
+ if (memfd.fd < 0) {
+ perror("memfd_create");
+ ret = -1;
+ goto unlock;
+ }
+ }
+ memfd.refcount++;
+unlock:
+ pthread_mutex_unlock(&memfd.lock);
+ return ret;
+}
+
+static
+void rseq_mempool_memfd_unref(struct rseq_mempool *pool)
+{
+ if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_ALL)
+ return;
+
+ pthread_mutex_lock(&memfd.lock);
+ if (memfd.refcount == 1) {
+ if (close(memfd.fd)) {
+ perror("close");
+ abort();
+ }
+ memfd.fd = -1;
+ memfd.reserved_size = 0;
+ }
+ memfd.refcount--;
+ pthread_mutex_unlock(&memfd.lock);
+}
+
int rseq_mempool_destroy(struct rseq_mempool *pool)
{
struct rseq_mempool_range *range, *next_range;
if (!pool)
return 0;
check_free_list(pool);
+ check_pool_poison(pool);
/* Iteration safe against removal. */
- for (range = pool->ranges; range && (next_range = range->next, 1); range = next_range) {
+ for (range = pool->range_list; range && (next_range = range->next, 1); range = next_range) {
if (rseq_mempool_range_destroy(pool, range))
goto end;
/* Update list head to keep list coherent in case of partial failure. */
- pool->ranges = next_range;
+ pool->range_list = next_range;
}
+ rseq_mempool_memfd_unref(pool);
pthread_mutex_destroy(&pool->lock);
free(pool->name);
- memset(pool, 0, sizeof(*pool));
+ free(pool);
end:
return ret;
}
}
break;
case MEMPOOL_TYPE_GLOBAL:
+ /* Override populate policy for global type. */
+ attr.populate_policy = RSEQ_MEMPOOL_POPULATE_ALL;
/* Use a 1-cpu pool for global mempool type. */
attr.max_nr_cpus = 1;
break;
}
if (!attr.stride)
attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */
+ if (attr.robust_set && !attr.poison_set) {
+ attr.poison_set = true;
+ attr.poison = DEFAULT_POISON_VALUE;
+ }
if (item_len > attr.stride || attr.stride < (size_t) rseq_get_page_len() ||
!is_pow2(attr.stride)) {
errno = EINVAL;
pool->item_len = item_len;
pool->item_order = order;
- //TODO: implement multi-range support.
- pool->ranges = rseq_mempool_range_create(pool);
- if (!pool->ranges)
+ if (rseq_mempool_memfd_ref(pool))
+ goto error_alloc;
+
+ pool->range_list = rseq_mempool_range_create(pool);
+ if (!pool->range_list)
goto error_alloc;
if (pool_name) {
/* Always inline for __builtin_return_address(0). */
static inline __attribute__((always_inline))
-void set_alloc_slot(struct rseq_mempool *pool, size_t item_offset)
+void set_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
{
- unsigned long *bitmap = pool->ranges->alloc_bitmap;
+ unsigned long *bitmap = range->alloc_bitmap;
size_t item_index = item_offset >> pool->item_order;
unsigned long mask;
size_t k;
}
static
-void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool, bool zeroed)
+void __rseq_percpu *__rseq_percpu_malloc(struct rseq_mempool *pool,
+ bool zeroed, void *init_ptr, size_t init_len)
{
+ struct rseq_mempool_range *range;
struct free_list_node *node;
uintptr_t item_offset;
void __rseq_percpu *addr;
+ if (init_len > pool->item_len) {
+ errno = EINVAL;
+ return NULL;
+ }
pthread_mutex_lock(&pool->lock);
/* Get first entry from free list. */
node = pool->free_list_head;
if (node != NULL) {
+ void *range_base, *ptr;
+
+ ptr = __rseq_free_list_to_percpu_ptr(pool, node);
+ range_base = (void *) ((uintptr_t) ptr & (~(pool->attr.stride - 1)));
+ range = (struct rseq_mempool_range *) (range_base - RANGE_HEADER_OFFSET);
/* Remove node from free list (update head). */
pool->free_list_head = node->next;
- item_offset = (uintptr_t) ((void *) node - pool->ranges->base);
- addr = (void __rseq_percpu *) (pool->ranges->base + item_offset);
+ item_offset = (uintptr_t) (ptr - range_base);
+ rseq_percpu_check_poison_item(pool, range, item_offset);
+ addr = __rseq_free_list_to_percpu_ptr(pool, node);
goto end;
}
- if (pool->ranges->next_unused + pool->item_len > pool->attr.stride) {
- errno = ENOMEM;
- addr = NULL;
- goto end;
+ /*
+ * If the most recent range (first in list) does not have any
+ * room left, create a new range and prepend it to the list
+ * head.
+ */
+ range = pool->range_list;
+ if (range->next_unused + pool->item_len > pool->attr.stride) {
+ range = rseq_mempool_range_create(pool);
+ if (!range) {
+ errno = ENOMEM;
+ addr = NULL;
+ goto end;
+ }
+ /* Add range to head of list. */
+ range->next = pool->range_list;
+ pool->range_list = range;
}
- item_offset = pool->ranges->next_unused;
- addr = (void __rseq_percpu *) (pool->ranges->base + item_offset);
- pool->ranges->next_unused += pool->item_len;
+ /* First range in list has room left. */
+ item_offset = range->next_unused;
+ addr = (void __rseq_percpu *) (range->base + item_offset);
+ range->next_unused += pool->item_len;
end:
if (addr)
- set_alloc_slot(pool, item_offset);
+ set_alloc_slot(pool, range, item_offset);
pthread_mutex_unlock(&pool->lock);
- if (zeroed && addr)
- rseq_percpu_zero_item(pool, pool->ranges, item_offset);
+ if (addr) {
+ if (zeroed)
+ rseq_percpu_zero_item(pool, range, item_offset);
+ else if (init_ptr) {
+ rseq_percpu_init_item(pool, range, item_offset,
+ init_ptr, init_len);
+ }
+ }
return addr;
}
void __rseq_percpu *rseq_mempool_percpu_malloc(struct rseq_mempool *pool)
{
- return __rseq_percpu_malloc(pool, false);
+ return __rseq_percpu_malloc(pool, false, NULL, 0);
}
void __rseq_percpu *rseq_mempool_percpu_zmalloc(struct rseq_mempool *pool)
{
- return __rseq_percpu_malloc(pool, true);
+ return __rseq_percpu_malloc(pool, true, NULL, 0);
+}
+
+void __rseq_percpu *rseq_mempool_percpu_malloc_init(struct rseq_mempool *pool,
+ void *init_ptr, size_t len)
+{
+ return __rseq_percpu_malloc(pool, false, init_ptr, len);
}
/* Always inline for __builtin_return_address(0). */
static inline __attribute__((always_inline))
-void clear_alloc_slot(struct rseq_mempool *pool, size_t item_offset)
+void clear_alloc_slot(struct rseq_mempool *pool, struct rseq_mempool_range *range, size_t item_offset)
{
- unsigned long *bitmap = pool->ranges->alloc_bitmap;
+ unsigned long *bitmap = range->alloc_bitmap;
size_t item_index = item_offset >> pool->item_order;
unsigned long mask;
size_t k;
struct free_list_node *head, *item;
pthread_mutex_lock(&pool->lock);
- clear_alloc_slot(pool, item_offset);
+ clear_alloc_slot(pool, range, item_offset);
/* Add ptr to head of free list */
head = pool->free_list_head;
- /* Free-list is in CPU 0 range. */
- item = (struct free_list_node *) ptr;
+ if (pool->attr.poison_set)
+ rseq_percpu_poison_item(pool, range, item_offset);
+ item = __rseq_percpu_to_free_list_ptr(pool, _ptr);
+ /*
+ * Setting the next pointer will overwrite the first uintptr_t
+ * poison for either CPU 0 (populate all) or init data (populate
+ * none).
+ */
item->next = head;
pool->free_list_head = item;
pthread_mutex_unlock(&pool->lock);
}
static
-void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set, size_t len, bool zeroed)
+void __rseq_percpu *__rseq_mempool_set_malloc(struct rseq_mempool_set *pool_set,
+ void *init_ptr, size_t len, bool zeroed)
{
int order, min_order = POOL_SET_MIN_ENTRY;
struct rseq_mempool *pool;
found:
pthread_mutex_unlock(&pool_set->lock);
if (pool) {
- addr = __rseq_percpu_malloc(pool, zeroed);
+ addr = __rseq_percpu_malloc(pool, zeroed, init_ptr, len);
if (addr == NULL && errno == ENOMEM) {
/*
* If the allocation failed, try again with a
void __rseq_percpu *rseq_mempool_set_percpu_malloc(struct rseq_mempool_set *pool_set, size_t len)
{
- return __rseq_mempool_set_malloc(pool_set, len, false);
+ return __rseq_mempool_set_malloc(pool_set, NULL, len, false);
}
void __rseq_percpu *rseq_mempool_set_percpu_zmalloc(struct rseq_mempool_set *pool_set, size_t len)
{
- return __rseq_mempool_set_malloc(pool_set, len, true);
+ return __rseq_mempool_set_malloc(pool_set, NULL, len, true);
+}
+
+void __rseq_percpu *rseq_mempool_set_percpu_malloc_init(struct rseq_mempool_set *pool_set,
+ void *init_ptr, size_t len)
+{
+ return __rseq_mempool_set_malloc(pool_set, init_ptr, len, true);
}
struct rseq_mempool_attr *rseq_mempool_attr_create(void)
}
int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr,
- void (*init_func)(void *priv, void *addr, size_t len, int cpu),
+ int (*init_func)(void *priv, void *addr, size_t len, int cpu),
void *init_priv)
{
if (!attr) {
return 0;
}
+int rseq_mempool_attr_set_max_nr_ranges(struct rseq_mempool_attr *attr,
+ unsigned long max_nr_ranges)
+{
+ if (!attr) {
+ errno = EINVAL;
+ return -1;
+ }
+ attr->max_nr_ranges = max_nr_ranges;
+ return 0;
+}
+
+int rseq_mempool_attr_set_poison(struct rseq_mempool_attr *attr,
+ uintptr_t poison)
+{
+ if (!attr) {
+ errno = EINVAL;
+ return -1;
+ }
+ attr->poison_set = true;
+ attr->poison = poison;
+ return 0;
+}
+
+int rseq_mempool_attr_set_populate_policy(struct rseq_mempool_attr *attr,
+ enum rseq_mempool_populate_policy policy)
+{
+ if (!attr) {
+ errno = EINVAL;
+ return -1;
+ }
+ attr->populate_policy = policy;
+ return 0;
+}
+
int rseq_mempool_get_max_nr_cpus(struct rseq_mempool *mempool)
{
if (!mempool || mempool->attr.type != MEMPOOL_TYPE_PERCPU) {