tests/basic_percpu_ops_test.c

   1 // SPDX-License-Identifier: LGPL-2.1
   2 #define _GNU_SOURCE
   3 #include <assert.h>
   4 #include <pthread.h>
   5 #include <sched.h>
   6 #include <stdint.h>
   7 #include <stdio.h>
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <stddef.h>
  11
  12 #include <rseq/percpu-op.h>
  13
  14 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
  15
  16 struct percpu_lock_entry {
  17         intptr_t v;
  18 } __attribute__((aligned(128)));
  19
  20 struct percpu_lock {
  21         struct percpu_lock_entry c[CPU_SETSIZE];
  22 };
  23
  24 struct test_data_entry {
  25         intptr_t count;
  26 } __attribute__((aligned(128)));
  27
  28 struct spinlock_test_data {
  29         struct percpu_lock lock;
  30         struct test_data_entry c[CPU_SETSIZE];
  31         int reps;
  32 };
  33
  34 struct percpu_list_node {
  35         intptr_t data;
  36         struct percpu_list_node *next;
  37 };
  38
  39 struct percpu_list_entry {
  40         struct percpu_list_node *head;
  41 } __attribute__((aligned(128)));
  42
  43 struct percpu_list {
  44         struct percpu_list_entry c[CPU_SETSIZE];
  45 };
  46
  47 /* A simple percpu spinlock.  Returns the cpu lock was acquired on. */
  48 int rseq_percpu_lock(struct percpu_lock *lock)
  49 {
  50         int cpu;
  51
  52         for (;;) {
  53                 int ret;
  54
  55                 cpu = rseq_cpu_start();
  56                 ret = percpu_cmpeqv_storev(&lock->c[cpu].v,
  57                                            0, 1, cpu);
  58                 if (rseq_likely(!ret))
  59                         break;
  60                 if (rseq_unlikely(ret < 0)) {
  61                         perror("cpu_opv");
  62                         abort();
  63                 }
  64                 /* Retry if comparison fails. */
  65         }
  66         /*
  67          * Acquire semantic when taking lock after control dependency.
  68          * Matches rseq_smp_store_release().
  69          */
  70         rseq_smp_acquire__after_ctrl_dep();
  71         return cpu;
  72 }
  73
  74 void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
  75 {
  76         assert(lock->c[cpu].v == 1);
  77         /*
  78          * Release lock, with release semantic. Matches
  79          * rseq_smp_acquire__after_ctrl_dep().
  80          */
  81         rseq_smp_store_release(&lock->c[cpu].v, 0);
  82 }
  83
  84 void *test_percpu_spinlock_thread(void *arg)
  85 {
  86         struct spinlock_test_data *data = arg;
  87         int i, cpu;
  88
  89         if (rseq_register_current_thread()) {
  90                 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
  91                         errno, strerror(errno));
  92                 abort();
  93         }
  94         for (i = 0; i < data->reps; i++) {
  95                 cpu = rseq_percpu_lock(&data->lock);
  96                 data->c[cpu].count++;
  97                 rseq_percpu_unlock(&data->lock, cpu);
  98         }
  99         if (rseq_unregister_current_thread()) {
 100                 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
 101                         errno, strerror(errno));
 102                 abort();
 103         }
 104
 105         return NULL;
 106 }
 107
 108 /*
 109  * A simple test which implements a sharded counter using a per-cpu
 110  * lock.  Obviously real applications might prefer to simply use a
 111  * per-cpu increment; however, this is reasonable for a test and the
 112  * lock can be extended to synchronize more complicated operations.
 113  */
 114 void test_percpu_spinlock(void)
 115 {
 116         const int num_threads = 200;
 117         int i;
 118         uint64_t sum;
 119         pthread_t test_threads[num_threads];
 120         struct spinlock_test_data data;
 121
 122         memset(&data, 0, sizeof(data));
 123         data.reps = 5000;
 124
 125         for (i = 0; i < num_threads; i++)
 126                 pthread_create(&test_threads[i], NULL,
 127                                test_percpu_spinlock_thread, &data);
 128
 129         for (i = 0; i < num_threads; i++)
 130                 pthread_join(test_threads[i], NULL);
 131
 132         sum = 0;
 133         for (i = 0; i < CPU_SETSIZE; i++)
 134                 sum += data.c[i].count;
 135
 136         assert(sum == (uint64_t)data.reps * num_threads);
 137 }
 138
 139 int percpu_list_push(struct percpu_list *list, struct percpu_list_node *node,
 140                      int cpu)
 141 {
 142         for (;;) {
 143                 intptr_t *targetptr, newval, expect;
 144                 int ret;
 145
 146                 /* Load list->c[cpu].head with single-copy atomicity. */
 147                 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
 148                 newval = (intptr_t)node;
 149                 targetptr = (intptr_t *)&list->c[cpu].head;
 150                 node->next = (struct percpu_list_node *)expect;
 151                 ret = percpu_cmpeqv_storev(targetptr, expect, newval, cpu);
 152                 if (rseq_likely(!ret))
 153                         break;
 154                 if (rseq_unlikely(ret < 0)) {
 155                         perror("cpu_opv");
 156                         abort();
 157                 }
 158                 /* Retry if comparison fails. */
 159         }
 160         return cpu;
 161 }
 162
 163 /*
 164  * Unlike a traditional lock-less linked list; the availability of a
 165  * rseq primitive allows us to implement pop without concerns over
 166  * ABA-type races.
 167  */
 168 struct percpu_list_node *percpu_list_pop(struct percpu_list *list,
 169                                          int cpu)
 170 {
 171         struct percpu_list_node *head;
 172         intptr_t *targetptr, expectnot, *load;
 173         off_t offset;
 174         int ret;
 175
 176         targetptr = (intptr_t *)&list->c[cpu].head;
 177         expectnot = (intptr_t)NULL;
 178         offset = offsetof(struct percpu_list_node, next);
 179         load = (intptr_t *)&head;
 180         ret = percpu_cmpnev_storeoffp_load(targetptr, expectnot,
 181                                            offset, load, cpu);
 182         if (rseq_unlikely(ret < 0)) {
 183                 perror("cpu_opv");
 184                 abort();
 185         }
 186         if (ret > 0)
 187                 return NULL;
 188         return head;
 189 }
 190
 191 void *test_percpu_list_thread(void *arg)
 192 {
 193         int i;
 194         struct percpu_list *list = (struct percpu_list *)arg;
 195
 196         if (rseq_register_current_thread()) {
 197                 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
 198                         errno, strerror(errno));
 199                 abort();
 200         }
 201
 202         for (i = 0; i < 100000; i++) {
 203                 struct percpu_list_node *node;
 204
 205                 node = percpu_list_pop(list, rseq_cpu_start());
 206                 sched_yield();  /* encourage shuffling */
 207                 if (node)
 208                         percpu_list_push(list, node, rseq_cpu_start());
 209         }
 210
 211         if (rseq_unregister_current_thread()) {
 212                 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
 213                         errno, strerror(errno));
 214                 abort();
 215         }
 216
 217         return NULL;
 218 }
 219
 220 /* Simultaneous modification to a per-cpu linked list from many threads.  */
 221 void test_percpu_list(void)
 222 {
 223         int i, j;
 224         uint64_t sum = 0, expected_sum = 0;
 225         struct percpu_list list;
 226         pthread_t test_threads[200];
 227         cpu_set_t allowed_cpus;
 228
 229         memset(&list, 0, sizeof(list));
 230
 231         /* Generate list entries for every usable cpu. */
 232         sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
 233         for (i = 0; i < CPU_SETSIZE; i++) {
 234                 if (!CPU_ISSET(i, &allowed_cpus))
 235                         continue;
 236                 for (j = 1; j <= 100; j++) {
 237                         struct percpu_list_node *node;
 238
 239                         expected_sum += j;
 240
 241                         node = malloc(sizeof(*node));
 242                         assert(node);
 243                         node->data = j;
 244                         node->next = list.c[i].head;
 245                         list.c[i].head = node;
 246                 }
 247         }
 248
 249         for (i = 0; i < 200; i++)
 250                 pthread_create(&test_threads[i], NULL,
 251                        test_percpu_list_thread, &list);
 252
 253         for (i = 0; i < 200; i++)
 254                 pthread_join(test_threads[i], NULL);
 255
 256         for (i = 0; i < CPU_SETSIZE; i++) {
 257                 struct percpu_list_node *node;
 258
 259                 if (!CPU_ISSET(i, &allowed_cpus))
 260                         continue;
 261
 262                 while ((node = percpu_list_pop(&list, i))) {
 263                         sum += node->data;
 264                         free(node);
 265                 }
 266         }
 267
 268         /*
 269          * All entries should now be accounted for (unless some external
 270          * actor is interfering with our allowed affinity while this
 271          * test is running).
 272          */
 273         assert(sum == expected_sum);
 274 }
 275
 276 int main(int argc, char **argv)
 277 {
 278         if (rseq_register_current_thread()) {
 279                 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
 280                         errno, strerror(errno));
 281                 goto error;
 282         }
 283         printf("spinlock\n");
 284         test_percpu_spinlock();
 285         printf("percpu_list\n");
 286         test_percpu_list();
 287         if (rseq_unregister_current_thread()) {
 288                 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
 289                         errno, strerror(errno));
 290                 goto error;
 291         }
 292         return 0;
 293
 294 error:
 295         return -1;
 296 }
 297