Commit | Line | Data |
---|---|---|
c47174fc AA |
1 | /* |
2 | * Stress userfaultfd syscall. | |
3 | * | |
4 | * Copyright (C) 2015 Red Hat, Inc. | |
5 | * | |
6 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
7 | * the COPYING file in the top-level directory. | |
8 | * | |
9 | * This test allocates two virtual areas and bounces the physical | |
10 | * memory across the two virtual areas (from area_src to area_dst) | |
11 | * using userfaultfd. | |
12 | * | |
13 | * There are three threads running per CPU: | |
14 | * | |
15 | * 1) one per-CPU thread takes a per-page pthread_mutex in a random | |
16 | * page of the area_dst (while the physical page may still be in | |
17 | * area_src), and increments a per-page counter in the same page, | |
18 | * and checks its value against a verification region. | |
19 | * | |
20 | * 2) another per-CPU thread handles the userfaults generated by | |
21 | * thread 1 above. userfaultfd blocking reads or poll() modes are | |
22 | * exercised interleaved. | |
23 | * | |
24 | * 3) one last per-CPU thread transfers the memory in the background | |
25 | * at maximum bandwidth (if not already transferred by thread | |
26 | * 2). Each cpu thread takes cares of transferring a portion of the | |
27 | * area. | |
28 | * | |
29 | * When all threads of type 3 completed the transfer, one bounce is | |
30 | * complete. area_src and area_dst are then swapped. All threads are | |
31 | * respawned and so the bounce is immediately restarted in the | |
32 | * opposite direction. | |
33 | * | |
34 | * per-CPU threads 1 by triggering userfaults inside | |
35 | * pthread_mutex_lock will also verify the atomicity of the memory | |
36 | * transfer (UFFDIO_COPY). | |
37 | * | |
38 | * The program takes two parameters: the amounts of physical memory in | |
39 | * megabytes (MiB) of the area and the number of bounces to execute. | |
40 | * | |
41 | * # 100MiB 99999 bounces | |
42 | * ./userfaultfd 100 99999 | |
43 | * | |
44 | * # 1GiB 99 bounces | |
45 | * ./userfaultfd 1000 99 | |
46 | * | |
47 | * # 10MiB-~6GiB 999 bounces, continue forever unless an error triggers | |
48 | * while ./userfaultfd $[RANDOM % 6000 + 10] 999; do true; done | |
49 | */ | |
50 | ||
51 | #define _GNU_SOURCE | |
52 | #include <stdio.h> | |
53 | #include <errno.h> | |
54 | #include <unistd.h> | |
55 | #include <stdlib.h> | |
56 | #include <sys/types.h> | |
57 | #include <sys/stat.h> | |
58 | #include <fcntl.h> | |
59 | #include <time.h> | |
60 | #include <signal.h> | |
61 | #include <poll.h> | |
62 | #include <string.h> | |
63 | #include <sys/mman.h> | |
64 | #include <sys/syscall.h> | |
65 | #include <sys/ioctl.h> | |
66 | #include <pthread.h> | |
67 | #include "../../../../include/uapi/linux/userfaultfd.h" | |
68 | ||
69 | #ifdef __x86_64__ | |
70 | #define __NR_userfaultfd 323 | |
71 | #elif defined(__i386__) | |
49df2e3e | 72 | #define __NR_userfaultfd 374 |
c47174fc AA |
73 | #elif defined(__powewrpc__) |
74 | #define __NR_userfaultfd 364 | |
02243571 HC |
75 | #elif defined(__s390__) |
76 | #define __NR_userfaultfd 355 | |
c47174fc AA |
77 | #else |
78 | #error "missing __NR_userfaultfd definition" | |
79 | #endif | |
80 | ||
81 | static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; | |
82 | ||
83 | #define BOUNCE_RANDOM (1<<0) | |
84 | #define BOUNCE_RACINGFAULTS (1<<1) | |
85 | #define BOUNCE_VERIFY (1<<2) | |
86 | #define BOUNCE_POLL (1<<3) | |
87 | static int bounces; | |
88 | ||
89 | static unsigned long long *count_verify; | |
90 | static int uffd, finished, *pipefd; | |
91 | static char *area_src, *area_dst; | |
92 | static char *zeropage; | |
93 | pthread_attr_t attr; | |
94 | ||
95 | /* pthread_mutex_t starts at page offset 0 */ | |
96 | #define area_mutex(___area, ___nr) \ | |
97 | ((pthread_mutex_t *) ((___area) + (___nr)*page_size)) | |
98 | /* | |
99 | * count is placed in the page after pthread_mutex_t naturally aligned | |
100 | * to avoid non alignment faults on non-x86 archs. | |
101 | */ | |
102 | #define area_count(___area, ___nr) \ | |
103 | ((volatile unsigned long long *) ((unsigned long) \ | |
104 | ((___area) + (___nr)*page_size + \ | |
105 | sizeof(pthread_mutex_t) + \ | |
106 | sizeof(unsigned long long) - 1) & \ | |
107 | ~(unsigned long)(sizeof(unsigned long long) \ | |
108 | - 1))) | |
109 | ||
110 | static int my_bcmp(char *str1, char *str2, size_t n) | |
111 | { | |
112 | unsigned long i; | |
113 | for (i = 0; i < n; i++) | |
114 | if (str1[i] != str2[i]) | |
115 | return 1; | |
116 | return 0; | |
117 | } | |
118 | ||
119 | static void *locking_thread(void *arg) | |
120 | { | |
121 | unsigned long cpu = (unsigned long) arg; | |
122 | struct random_data rand; | |
123 | unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */ | |
124 | int32_t rand_nr; | |
125 | unsigned long long count; | |
126 | char randstate[64]; | |
127 | unsigned int seed; | |
128 | time_t start; | |
129 | ||
130 | if (bounces & BOUNCE_RANDOM) { | |
131 | seed = (unsigned int) time(NULL) - bounces; | |
132 | if (!(bounces & BOUNCE_RACINGFAULTS)) | |
133 | seed += cpu; | |
134 | bzero(&rand, sizeof(rand)); | |
135 | bzero(&randstate, sizeof(randstate)); | |
136 | if (initstate_r(seed, randstate, sizeof(randstate), &rand)) | |
137 | fprintf(stderr, "srandom_r error\n"), exit(1); | |
138 | } else { | |
139 | page_nr = -bounces; | |
140 | if (!(bounces & BOUNCE_RACINGFAULTS)) | |
141 | page_nr += cpu * nr_pages_per_cpu; | |
142 | } | |
143 | ||
144 | while (!finished) { | |
145 | if (bounces & BOUNCE_RANDOM) { | |
146 | if (random_r(&rand, &rand_nr)) | |
147 | fprintf(stderr, "random_r 1 error\n"), exit(1); | |
148 | page_nr = rand_nr; | |
149 | if (sizeof(page_nr) > sizeof(rand_nr)) { | |
150 | if (random_r(&rand, &rand_nr)) | |
151 | fprintf(stderr, "random_r 2 error\n"), exit(1); | |
af8713b7 GU |
152 | page_nr |= (((unsigned long) rand_nr) << 16) << |
153 | 16; | |
c47174fc AA |
154 | } |
155 | } else | |
156 | page_nr += 1; | |
157 | page_nr %= nr_pages; | |
158 | ||
159 | start = time(NULL); | |
160 | if (bounces & BOUNCE_VERIFY) { | |
161 | count = *area_count(area_dst, page_nr); | |
162 | if (!count) | |
163 | fprintf(stderr, | |
164 | "page_nr %lu wrong count %Lu %Lu\n", | |
165 | page_nr, count, | |
166 | count_verify[page_nr]), exit(1); | |
167 | ||
168 | ||
169 | /* | |
170 | * We can't use bcmp (or memcmp) because that | |
171 | * returns 0 erroneously if the memory is | |
172 | * changing under it (even if the end of the | |
173 | * page is never changing and always | |
174 | * different). | |
175 | */ | |
176 | #if 1 | |
177 | if (!my_bcmp(area_dst + page_nr * page_size, zeropage, | |
178 | page_size)) | |
179 | fprintf(stderr, | |
180 | "my_bcmp page_nr %lu wrong count %Lu %Lu\n", | |
181 | page_nr, count, | |
182 | count_verify[page_nr]), exit(1); | |
183 | #else | |
184 | unsigned long loops; | |
185 | ||
186 | loops = 0; | |
187 | /* uncomment the below line to test with mutex */ | |
188 | /* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */ | |
189 | while (!bcmp(area_dst + page_nr * page_size, zeropage, | |
190 | page_size)) { | |
191 | loops += 1; | |
192 | if (loops > 10) | |
193 | break; | |
194 | } | |
195 | /* uncomment below line to test with mutex */ | |
196 | /* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */ | |
197 | if (loops) { | |
198 | fprintf(stderr, | |
199 | "page_nr %lu all zero thread %lu %p %lu\n", | |
200 | page_nr, cpu, area_dst + page_nr * page_size, | |
201 | loops); | |
202 | if (loops > 10) | |
203 | exit(1); | |
204 | } | |
205 | #endif | |
206 | } | |
207 | ||
208 | pthread_mutex_lock(area_mutex(area_dst, page_nr)); | |
209 | count = *area_count(area_dst, page_nr); | |
210 | if (count != count_verify[page_nr]) { | |
211 | fprintf(stderr, | |
212 | "page_nr %lu memory corruption %Lu %Lu\n", | |
213 | page_nr, count, | |
214 | count_verify[page_nr]), exit(1); | |
215 | } | |
216 | count++; | |
217 | *area_count(area_dst, page_nr) = count_verify[page_nr] = count; | |
218 | pthread_mutex_unlock(area_mutex(area_dst, page_nr)); | |
219 | ||
220 | if (time(NULL) - start > 1) | |
221 | fprintf(stderr, | |
222 | "userfault too slow %ld " | |
223 | "possible false positive with overcommit\n", | |
224 | time(NULL) - start); | |
225 | } | |
226 | ||
227 | return NULL; | |
228 | } | |
229 | ||
230 | static int copy_page(unsigned long offset) | |
231 | { | |
232 | struct uffdio_copy uffdio_copy; | |
233 | ||
234 | if (offset >= nr_pages * page_size) | |
235 | fprintf(stderr, "unexpected offset %lu\n", | |
236 | offset), exit(1); | |
237 | uffdio_copy.dst = (unsigned long) area_dst + offset; | |
238 | uffdio_copy.src = (unsigned long) area_src + offset; | |
239 | uffdio_copy.len = page_size; | |
240 | uffdio_copy.mode = 0; | |
241 | uffdio_copy.copy = 0; | |
242 | if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy)) { | |
243 | /* real retval in ufdio_copy.copy */ | |
244 | if (uffdio_copy.copy != -EEXIST) | |
245 | fprintf(stderr, "UFFDIO_COPY error %Ld\n", | |
246 | uffdio_copy.copy), exit(1); | |
247 | } else if (uffdio_copy.copy != page_size) { | |
248 | fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n", | |
249 | uffdio_copy.copy), exit(1); | |
250 | } else | |
251 | return 1; | |
252 | return 0; | |
253 | } | |
254 | ||
255 | static void *uffd_poll_thread(void *arg) | |
256 | { | |
257 | unsigned long cpu = (unsigned long) arg; | |
258 | struct pollfd pollfd[2]; | |
259 | struct uffd_msg msg; | |
260 | int ret; | |
261 | unsigned long offset; | |
262 | char tmp_chr; | |
263 | unsigned long userfaults = 0; | |
264 | ||
265 | pollfd[0].fd = uffd; | |
266 | pollfd[0].events = POLLIN; | |
267 | pollfd[1].fd = pipefd[cpu*2]; | |
268 | pollfd[1].events = POLLIN; | |
269 | ||
270 | for (;;) { | |
271 | ret = poll(pollfd, 2, -1); | |
272 | if (!ret) | |
273 | fprintf(stderr, "poll error %d\n", ret), exit(1); | |
274 | if (ret < 0) | |
275 | perror("poll"), exit(1); | |
276 | if (pollfd[1].revents & POLLIN) { | |
277 | if (read(pollfd[1].fd, &tmp_chr, 1) != 1) | |
278 | fprintf(stderr, "read pipefd error\n"), | |
279 | exit(1); | |
280 | break; | |
281 | } | |
282 | if (!(pollfd[0].revents & POLLIN)) | |
283 | fprintf(stderr, "pollfd[0].revents %d\n", | |
284 | pollfd[0].revents), exit(1); | |
285 | ret = read(uffd, &msg, sizeof(msg)); | |
286 | if (ret < 0) { | |
287 | if (errno == EAGAIN) | |
288 | continue; | |
289 | perror("nonblocking read error"), exit(1); | |
290 | } | |
291 | if (msg.event != UFFD_EVENT_PAGEFAULT) | |
292 | fprintf(stderr, "unexpected msg event %u\n", | |
293 | msg.event), exit(1); | |
294 | if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) | |
295 | fprintf(stderr, "unexpected write fault\n"), exit(1); | |
af8713b7 GU |
296 | offset = (char *)(unsigned long)msg.arg.pagefault.address - |
297 | area_dst; | |
c47174fc AA |
298 | offset &= ~(page_size-1); |
299 | if (copy_page(offset)) | |
300 | userfaults++; | |
301 | } | |
302 | return (void *)userfaults; | |
303 | } | |
304 | ||
305 | pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER; | |
306 | ||
307 | static void *uffd_read_thread(void *arg) | |
308 | { | |
309 | unsigned long *this_cpu_userfaults; | |
310 | struct uffd_msg msg; | |
311 | unsigned long offset; | |
312 | int ret; | |
313 | ||
314 | this_cpu_userfaults = (unsigned long *) arg; | |
315 | *this_cpu_userfaults = 0; | |
316 | ||
317 | pthread_mutex_unlock(&uffd_read_mutex); | |
318 | /* from here cancellation is ok */ | |
319 | ||
320 | for (;;) { | |
321 | ret = read(uffd, &msg, sizeof(msg)); | |
322 | if (ret != sizeof(msg)) { | |
323 | if (ret < 0) | |
324 | perror("blocking read error"), exit(1); | |
325 | else | |
326 | fprintf(stderr, "short read\n"), exit(1); | |
327 | } | |
328 | if (msg.event != UFFD_EVENT_PAGEFAULT) | |
329 | fprintf(stderr, "unexpected msg event %u\n", | |
330 | msg.event), exit(1); | |
331 | if (bounces & BOUNCE_VERIFY && | |
332 | msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) | |
333 | fprintf(stderr, "unexpected write fault\n"), exit(1); | |
af8713b7 GU |
334 | offset = (char *)(unsigned long)msg.arg.pagefault.address - |
335 | area_dst; | |
c47174fc AA |
336 | offset &= ~(page_size-1); |
337 | if (copy_page(offset)) | |
338 | (*this_cpu_userfaults)++; | |
339 | } | |
340 | return (void *)NULL; | |
341 | } | |
342 | ||
343 | static void *background_thread(void *arg) | |
344 | { | |
345 | unsigned long cpu = (unsigned long) arg; | |
346 | unsigned long page_nr; | |
347 | ||
348 | for (page_nr = cpu * nr_pages_per_cpu; | |
349 | page_nr < (cpu+1) * nr_pages_per_cpu; | |
350 | page_nr++) | |
351 | copy_page(page_nr * page_size); | |
352 | ||
353 | return NULL; | |
354 | } | |
355 | ||
356 | static int stress(unsigned long *userfaults) | |
357 | { | |
358 | unsigned long cpu; | |
359 | pthread_t locking_threads[nr_cpus]; | |
360 | pthread_t uffd_threads[nr_cpus]; | |
361 | pthread_t background_threads[nr_cpus]; | |
362 | void **_userfaults = (void **) userfaults; | |
363 | ||
364 | finished = 0; | |
365 | for (cpu = 0; cpu < nr_cpus; cpu++) { | |
366 | if (pthread_create(&locking_threads[cpu], &attr, | |
367 | locking_thread, (void *)cpu)) | |
368 | return 1; | |
369 | if (bounces & BOUNCE_POLL) { | |
370 | if (pthread_create(&uffd_threads[cpu], &attr, | |
371 | uffd_poll_thread, (void *)cpu)) | |
372 | return 1; | |
373 | } else { | |
374 | if (pthread_create(&uffd_threads[cpu], &attr, | |
375 | uffd_read_thread, | |
376 | &_userfaults[cpu])) | |
377 | return 1; | |
378 | pthread_mutex_lock(&uffd_read_mutex); | |
379 | } | |
380 | if (pthread_create(&background_threads[cpu], &attr, | |
381 | background_thread, (void *)cpu)) | |
382 | return 1; | |
383 | } | |
384 | for (cpu = 0; cpu < nr_cpus; cpu++) | |
385 | if (pthread_join(background_threads[cpu], NULL)) | |
386 | return 1; | |
387 | ||
388 | /* | |
389 | * Be strict and immediately zap area_src, the whole area has | |
390 | * been transferred already by the background treads. The | |
391 | * area_src could then be faulted in in a racy way by still | |
392 | * running uffdio_threads reading zeropages after we zapped | |
393 | * area_src (but they're guaranteed to get -EEXIST from | |
394 | * UFFDIO_COPY without writing zero pages into area_dst | |
395 | * because the background threads already completed). | |
396 | */ | |
397 | if (madvise(area_src, nr_pages * page_size, MADV_DONTNEED)) { | |
398 | perror("madvise"); | |
399 | return 1; | |
400 | } | |
401 | ||
402 | for (cpu = 0; cpu < nr_cpus; cpu++) { | |
403 | char c; | |
404 | if (bounces & BOUNCE_POLL) { | |
405 | if (write(pipefd[cpu*2+1], &c, 1) != 1) { | |
406 | fprintf(stderr, "pipefd write error\n"); | |
407 | return 1; | |
408 | } | |
409 | if (pthread_join(uffd_threads[cpu], &_userfaults[cpu])) | |
410 | return 1; | |
411 | } else { | |
412 | if (pthread_cancel(uffd_threads[cpu])) | |
413 | return 1; | |
414 | if (pthread_join(uffd_threads[cpu], NULL)) | |
415 | return 1; | |
416 | } | |
417 | } | |
418 | ||
419 | finished = 1; | |
420 | for (cpu = 0; cpu < nr_cpus; cpu++) | |
421 | if (pthread_join(locking_threads[cpu], NULL)) | |
422 | return 1; | |
423 | ||
424 | return 0; | |
425 | } | |
426 | ||
427 | static int userfaultfd_stress(void) | |
428 | { | |
429 | void *area; | |
430 | char *tmp_area; | |
431 | unsigned long nr; | |
432 | struct uffdio_register uffdio_register; | |
433 | struct uffdio_api uffdio_api; | |
434 | unsigned long cpu; | |
435 | int uffd_flags; | |
436 | unsigned long userfaults[nr_cpus]; | |
437 | ||
438 | if (posix_memalign(&area, page_size, nr_pages * page_size)) { | |
439 | fprintf(stderr, "out of memory\n"); | |
440 | return 1; | |
441 | } | |
442 | area_src = area; | |
443 | if (posix_memalign(&area, page_size, nr_pages * page_size)) { | |
444 | fprintf(stderr, "out of memory\n"); | |
445 | return 1; | |
446 | } | |
447 | area_dst = area; | |
448 | ||
449 | uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); | |
450 | if (uffd < 0) { | |
451 | fprintf(stderr, | |
452 | "userfaultfd syscall not available in this kernel\n"); | |
453 | return 1; | |
454 | } | |
455 | uffd_flags = fcntl(uffd, F_GETFD, NULL); | |
456 | ||
457 | uffdio_api.api = UFFD_API; | |
458 | uffdio_api.features = 0; | |
459 | if (ioctl(uffd, UFFDIO_API, &uffdio_api)) { | |
460 | fprintf(stderr, "UFFDIO_API\n"); | |
461 | return 1; | |
462 | } | |
463 | if (uffdio_api.api != UFFD_API) { | |
464 | fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api); | |
465 | return 1; | |
466 | } | |
467 | ||
468 | count_verify = malloc(nr_pages * sizeof(unsigned long long)); | |
469 | if (!count_verify) { | |
470 | perror("count_verify"); | |
471 | return 1; | |
472 | } | |
473 | ||
474 | for (nr = 0; nr < nr_pages; nr++) { | |
475 | *area_mutex(area_src, nr) = (pthread_mutex_t) | |
476 | PTHREAD_MUTEX_INITIALIZER; | |
477 | count_verify[nr] = *area_count(area_src, nr) = 1; | |
478 | } | |
479 | ||
480 | pipefd = malloc(sizeof(int) * nr_cpus * 2); | |
481 | if (!pipefd) { | |
482 | perror("pipefd"); | |
483 | return 1; | |
484 | } | |
485 | for (cpu = 0; cpu < nr_cpus; cpu++) { | |
486 | if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) { | |
487 | perror("pipe"); | |
488 | return 1; | |
489 | } | |
490 | } | |
491 | ||
492 | if (posix_memalign(&area, page_size, page_size)) { | |
493 | fprintf(stderr, "out of memory\n"); | |
494 | return 1; | |
495 | } | |
496 | zeropage = area; | |
497 | bzero(zeropage, page_size); | |
498 | ||
499 | pthread_mutex_lock(&uffd_read_mutex); | |
500 | ||
501 | pthread_attr_init(&attr); | |
502 | pthread_attr_setstacksize(&attr, 16*1024*1024); | |
503 | ||
504 | while (bounces--) { | |
505 | unsigned long expected_ioctls; | |
506 | ||
507 | printf("bounces: %d, mode:", bounces); | |
508 | if (bounces & BOUNCE_RANDOM) | |
509 | printf(" rnd"); | |
510 | if (bounces & BOUNCE_RACINGFAULTS) | |
511 | printf(" racing"); | |
512 | if (bounces & BOUNCE_VERIFY) | |
513 | printf(" ver"); | |
514 | if (bounces & BOUNCE_POLL) | |
515 | printf(" poll"); | |
516 | printf(", "); | |
517 | fflush(stdout); | |
518 | ||
519 | if (bounces & BOUNCE_POLL) | |
520 | fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); | |
521 | else | |
522 | fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK); | |
523 | ||
524 | /* register */ | |
525 | uffdio_register.range.start = (unsigned long) area_dst; | |
526 | uffdio_register.range.len = nr_pages * page_size; | |
527 | uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; | |
528 | if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { | |
529 | fprintf(stderr, "register failure\n"); | |
530 | return 1; | |
531 | } | |
532 | expected_ioctls = (1 << _UFFDIO_WAKE) | | |
533 | (1 << _UFFDIO_COPY) | | |
534 | (1 << _UFFDIO_ZEROPAGE); | |
535 | if ((uffdio_register.ioctls & expected_ioctls) != | |
536 | expected_ioctls) { | |
537 | fprintf(stderr, | |
538 | "unexpected missing ioctl for anon memory\n"); | |
539 | return 1; | |
540 | } | |
541 | ||
542 | /* | |
543 | * The madvise done previously isn't enough: some | |
544 | * uffd_thread could have read userfaults (one of | |
545 | * those already resolved by the background thread) | |
546 | * and it may be in the process of calling | |
547 | * UFFDIO_COPY. UFFDIO_COPY will read the zapped | |
548 | * area_src and it would map a zero page in it (of | |
549 | * course such a UFFDIO_COPY is perfectly safe as it'd | |
550 | * return -EEXIST). The problem comes at the next | |
551 | * bounce though: that racing UFFDIO_COPY would | |
552 | * generate zeropages in the area_src, so invalidating | |
553 | * the previous MADV_DONTNEED. Without this additional | |
554 | * MADV_DONTNEED those zeropages leftovers in the | |
555 | * area_src would lead to -EEXIST failure during the | |
556 | * next bounce, effectively leaving a zeropage in the | |
557 | * area_dst. | |
558 | * | |
559 | * Try to comment this out madvise to see the memory | |
560 | * corruption being caught pretty quick. | |
561 | * | |
562 | * khugepaged is also inhibited to collapse THP after | |
563 | * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's | |
564 | * required to MADV_DONTNEED here. | |
565 | */ | |
566 | if (madvise(area_dst, nr_pages * page_size, MADV_DONTNEED)) { | |
567 | perror("madvise 2"); | |
568 | return 1; | |
569 | } | |
570 | ||
571 | /* bounce pass */ | |
572 | if (stress(userfaults)) | |
573 | return 1; | |
574 | ||
575 | /* unregister */ | |
576 | if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) { | |
577 | fprintf(stderr, "register failure\n"); | |
578 | return 1; | |
579 | } | |
580 | ||
581 | /* verification */ | |
582 | if (bounces & BOUNCE_VERIFY) { | |
583 | for (nr = 0; nr < nr_pages; nr++) { | |
584 | if (my_bcmp(area_dst, | |
585 | area_dst + nr * page_size, | |
586 | sizeof(pthread_mutex_t))) { | |
587 | fprintf(stderr, | |
588 | "error mutex 2 %lu\n", | |
589 | nr); | |
590 | bounces = 0; | |
591 | } | |
592 | if (*area_count(area_dst, nr) != count_verify[nr]) { | |
593 | fprintf(stderr, | |
594 | "error area_count %Lu %Lu %lu\n", | |
595 | *area_count(area_src, nr), | |
596 | count_verify[nr], | |
597 | nr); | |
598 | bounces = 0; | |
599 | } | |
600 | } | |
601 | } | |
602 | ||
603 | /* prepare next bounce */ | |
604 | tmp_area = area_src; | |
605 | area_src = area_dst; | |
606 | area_dst = tmp_area; | |
607 | ||
608 | printf("userfaults:"); | |
609 | for (cpu = 0; cpu < nr_cpus; cpu++) | |
610 | printf(" %lu", userfaults[cpu]); | |
611 | printf("\n"); | |
612 | } | |
613 | ||
614 | return 0; | |
615 | } | |
616 | ||
617 | int main(int argc, char **argv) | |
618 | { | |
619 | if (argc < 3) | |
620 | fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); | |
621 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | |
622 | page_size = sysconf(_SC_PAGE_SIZE); | |
623 | if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) > | |
624 | page_size) | |
625 | fprintf(stderr, "Impossible to run this test\n"), exit(2); | |
626 | nr_pages_per_cpu = atol(argv[1]) * 1024*1024 / page_size / | |
627 | nr_cpus; | |
628 | if (!nr_pages_per_cpu) { | |
629 | fprintf(stderr, "invalid MiB\n"); | |
630 | fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); | |
631 | } | |
632 | bounces = atoi(argv[2]); | |
633 | if (bounces <= 0) { | |
634 | fprintf(stderr, "invalid bounces\n"); | |
635 | fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); | |
636 | } | |
637 | nr_pages = nr_pages_per_cpu * nr_cpus; | |
638 | printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", | |
639 | nr_pages, nr_pages_per_cpu); | |
640 | return userfaultfd_stress(); | |
641 | } |