Commit | Line | Data |
---|---|---|
827f3b49 HM |
1 | /* |
2 | * mem-memcpy.c | |
3 | * | |
4 | * memcpy: Simple memory copy in various ways | |
5 | * | |
6 | * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> | |
7 | */ | |
827f3b49 HM |
8 | |
9 | #include "../perf.h" | |
10 | #include "../util/util.h" | |
11 | #include "../util/parse-options.h" | |
827f3b49 | 12 | #include "../util/header.h" |
57480d2c | 13 | #include "../util/cloexec.h" |
827f3b49 | 14 | #include "bench.h" |
49ce8fc6 | 15 | #include "mem-memcpy-arch.h" |
5bce1a57 | 16 | #include "mem-memset-arch.h" |
827f3b49 HM |
17 | |
18 | #include <stdio.h> | |
19 | #include <stdlib.h> | |
20 | #include <string.h> | |
21 | #include <sys/time.h> | |
22 | #include <errno.h> | |
23 | ||
24 | #define K 1024 | |
25 | ||
12eac0bf HM |
26 | static const char *length_str = "1MB"; |
27 | static const char *routine = "default"; | |
e3e877e7 | 28 | static int iterations = 1; |
17d7a112 HM |
29 | static bool use_cycle; |
30 | static int cycle_fd; | |
49ce8fc6 HM |
31 | static bool only_prefault; |
32 | static bool no_prefault; | |
827f3b49 HM |
33 | |
34 | static const struct option options[] = { | |
35 | OPT_STRING('l', "length", &length_str, "1MB", | |
36 | "Specify length of memory to copy. " | |
08942f6d | 37 | "Available units: B, KB, MB, GB and TB (upper and lower)"), |
827f3b49 HM |
38 | OPT_STRING('r', "routine", &routine, "default", |
39 | "Specify routine to copy"), | |
e3e877e7 JB |
40 | OPT_INTEGER('i', "iterations", &iterations, |
41 | "repeat memcpy() invocation this number of times"), | |
17d7a112 | 42 | OPT_BOOLEAN('c', "cycle", &use_cycle, |
08942f6d | 43 | "Use cycles event instead of gettimeofday() for measuring"), |
49ce8fc6 HM |
44 | OPT_BOOLEAN('o', "only-prefault", &only_prefault, |
45 | "Show only the result with page faults before memcpy()"), | |
46 | OPT_BOOLEAN('n', "no-prefault", &no_prefault, | |
47 | "Show only the result without page faults before memcpy()"), | |
827f3b49 HM |
48 | OPT_END() |
49 | }; | |
50 | ||
49ce8fc6 | 51 | typedef void *(*memcpy_t)(void *, const void *, size_t); |
5bce1a57 | 52 | typedef void *(*memset_t)(void *, int, size_t); |
49ce8fc6 | 53 | |
827f3b49 HM |
54 | struct routine { |
55 | const char *name; | |
56 | const char *desc; | |
308197b9 RV |
57 | union { |
58 | memcpy_t memcpy; | |
5bce1a57 | 59 | memset_t memset; |
308197b9 | 60 | } fn; |
827f3b49 HM |
61 | }; |
62 | ||
308197b9 RV |
63 | struct routine memcpy_routines[] = { |
64 | { .name = "default", | |
65 | .desc = "Default memcpy() provided by glibc", | |
66 | .fn.memcpy = memcpy }, | |
89fe808a | 67 | #ifdef HAVE_ARCH_X86_64_SUPPORT |
49ce8fc6 | 68 | |
308197b9 | 69 | #define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, |
49ce8fc6 HM |
70 | #include "mem-memcpy-x86-64-asm-def.h" |
71 | #undef MEMCPY_FN | |
72 | ||
73 | #endif | |
74 | ||
827f3b49 HM |
75 | { NULL, |
76 | NULL, | |
308197b9 | 77 | {NULL} } |
827f3b49 HM |
78 | }; |
79 | ||
80 | static const char * const bench_mem_memcpy_usage[] = { | |
81 | "perf bench mem memcpy <options>", | |
82 | NULL | |
83 | }; | |
84 | ||
17d7a112 | 85 | static struct perf_event_attr cycle_attr = { |
12eac0bf HM |
86 | .type = PERF_TYPE_HARDWARE, |
87 | .config = PERF_COUNT_HW_CPU_CYCLES | |
827f3b49 HM |
88 | }; |
89 | ||
17d7a112 | 90 | static void init_cycle(void) |
827f3b49 | 91 | { |
57480d2c YD |
92 | cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, |
93 | perf_event_open_cloexec_flag()); | |
12eac0bf | 94 | |
17d7a112 | 95 | if (cycle_fd < 0 && errno == ENOSYS) |
12eac0bf HM |
96 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); |
97 | else | |
17d7a112 | 98 | BUG_ON(cycle_fd < 0); |
827f3b49 HM |
99 | } |
100 | ||
17d7a112 | 101 | static u64 get_cycle(void) |
827f3b49 HM |
102 | { |
103 | int ret; | |
104 | u64 clk; | |
105 | ||
17d7a112 | 106 | ret = read(cycle_fd, &clk, sizeof(u64)); |
827f3b49 HM |
107 | BUG_ON(ret != sizeof(u64)); |
108 | ||
109 | return clk; | |
110 | } | |
111 | ||
112 | static double timeval2double(struct timeval *ts) | |
113 | { | |
114 | return (double)ts->tv_sec + | |
115 | (double)ts->tv_usec / (double)1000000; | |
116 | } | |
117 | ||
49ce8fc6 HM |
118 | #define pf (no_prefault ? 0 : 1) |
119 | ||
120 | #define print_bps(x) do { \ | |
121 | if (x < K) \ | |
122 | printf(" %14lf B/Sec", x); \ | |
123 | else if (x < K * K) \ | |
124 | printf(" %14lfd KB/Sec", x / K); \ | |
125 | else if (x < K * K * K) \ | |
126 | printf(" %14lf MB/Sec", x / K / K); \ | |
127 | else \ | |
128 | printf(" %14lf GB/Sec", x / K / K / K); \ | |
129 | } while (0) | |
130 | ||
308197b9 RV |
131 | struct bench_mem_info { |
132 | const struct routine *routines; | |
133 | u64 (*do_cycle)(const struct routine *r, size_t len, bool prefault); | |
134 | double (*do_gettimeofday)(const struct routine *r, size_t len, bool prefault); | |
135 | const char *const *usage; | |
136 | }; | |
137 | ||
138 | static int bench_mem_common(int argc, const char **argv, | |
139 | const char *prefix __maybe_unused, | |
140 | struct bench_mem_info *info) | |
827f3b49 HM |
141 | { |
142 | int i; | |
49ce8fc6 | 143 | size_t len; |
1182f883 | 144 | double totallen; |
49ce8fc6 | 145 | double result_bps[2]; |
17d7a112 | 146 | u64 result_cycle[2]; |
827f3b49 | 147 | |
827f3b49 | 148 | argc = parse_options(argc, argv, options, |
308197b9 | 149 | info->usage, 0); |
827f3b49 | 150 | |
424e9634 DB |
151 | if (no_prefault && only_prefault) { |
152 | fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); | |
153 | return 1; | |
154 | } | |
155 | ||
17d7a112 HM |
156 | if (use_cycle) |
157 | init_cycle(); | |
49ce8fc6 HM |
158 | |
159 | len = (size_t)perf_atoll((char *)length_str); | |
1182f883 | 160 | totallen = (double)len * iterations; |
12eac0bf | 161 | |
17d7a112 | 162 | result_cycle[0] = result_cycle[1] = 0ULL; |
49ce8fc6 HM |
163 | result_bps[0] = result_bps[1] = 0.0; |
164 | ||
165 | if ((s64)len <= 0) { | |
827f3b49 HM |
166 | fprintf(stderr, "Invalid length:%s\n", length_str); |
167 | return 1; | |
168 | } | |
169 | ||
49ce8fc6 HM |
170 | /* same to without specifying either of prefault and no-prefault */ |
171 | if (only_prefault && no_prefault) | |
172 | only_prefault = no_prefault = false; | |
173 | ||
308197b9 RV |
174 | for (i = 0; info->routines[i].name; i++) { |
175 | if (!strcmp(info->routines[i].name, routine)) | |
827f3b49 HM |
176 | break; |
177 | } | |
308197b9 | 178 | if (!info->routines[i].name) { |
827f3b49 HM |
179 | printf("Unknown routine:%s\n", routine); |
180 | printf("Available routines...\n"); | |
308197b9 | 181 | for (i = 0; info->routines[i].name; i++) { |
827f3b49 | 182 | printf("\t%s ... %s\n", |
308197b9 | 183 | info->routines[i].name, info->routines[i].desc); |
827f3b49 HM |
184 | } |
185 | return 1; | |
186 | } | |
187 | ||
49ce8fc6 HM |
188 | if (bench_format == BENCH_FORMAT_DEFAULT) |
189 | printf("# Copying %s Bytes ...\n\n", length_str); | |
827f3b49 | 190 | |
49ce8fc6 HM |
191 | if (!only_prefault && !no_prefault) { |
192 | /* show both of results */ | |
17d7a112 HM |
193 | if (use_cycle) { |
194 | result_cycle[0] = | |
308197b9 | 195 | info->do_cycle(&info->routines[i], len, false); |
17d7a112 | 196 | result_cycle[1] = |
308197b9 | 197 | info->do_cycle(&info->routines[i], len, true); |
49ce8fc6 HM |
198 | } else { |
199 | result_bps[0] = | |
308197b9 | 200 | info->do_gettimeofday(&info->routines[i], |
49ce8fc6 HM |
201 | len, false); |
202 | result_bps[1] = | |
308197b9 | 203 | info->do_gettimeofday(&info->routines[i], |
49ce8fc6 HM |
204 | len, true); |
205 | } | |
827f3b49 | 206 | } else { |
17d7a112 HM |
207 | if (use_cycle) { |
208 | result_cycle[pf] = | |
308197b9 | 209 | info->do_cycle(&info->routines[i], |
49ce8fc6 HM |
210 | len, only_prefault); |
211 | } else { | |
212 | result_bps[pf] = | |
308197b9 | 213 | info->do_gettimeofday(&info->routines[i], |
49ce8fc6 HM |
214 | len, only_prefault); |
215 | } | |
827f3b49 HM |
216 | } |
217 | ||
218 | switch (bench_format) { | |
219 | case BENCH_FORMAT_DEFAULT: | |
49ce8fc6 | 220 | if (!only_prefault && !no_prefault) { |
17d7a112 HM |
221 | if (use_cycle) { |
222 | printf(" %14lf Cycle/Byte\n", | |
223 | (double)result_cycle[0] | |
1182f883 | 224 | / totallen); |
17d7a112 HM |
225 | printf(" %14lf Cycle/Byte (with prefault)\n", |
226 | (double)result_cycle[1] | |
1182f883 | 227 | / totallen); |
49ce8fc6 HM |
228 | } else { |
229 | print_bps(result_bps[0]); | |
230 | printf("\n"); | |
231 | print_bps(result_bps[1]); | |
232 | printf(" (with prefault)\n"); | |
827f3b49 | 233 | } |
49ce8fc6 | 234 | } else { |
17d7a112 HM |
235 | if (use_cycle) { |
236 | printf(" %14lf Cycle/Byte", | |
237 | (double)result_cycle[pf] | |
1182f883 | 238 | / totallen); |
49ce8fc6 HM |
239 | } else |
240 | print_bps(result_bps[pf]); | |
241 | ||
242 | printf("%s\n", only_prefault ? " (with prefault)" : ""); | |
827f3b49 HM |
243 | } |
244 | break; | |
245 | case BENCH_FORMAT_SIMPLE: | |
49ce8fc6 | 246 | if (!only_prefault && !no_prefault) { |
17d7a112 | 247 | if (use_cycle) { |
49ce8fc6 | 248 | printf("%lf %lf\n", |
1182f883 RV |
249 | (double)result_cycle[0] / totallen, |
250 | (double)result_cycle[1] / totallen); | |
49ce8fc6 HM |
251 | } else { |
252 | printf("%lf %lf\n", | |
253 | result_bps[0], result_bps[1]); | |
254 | } | |
255 | } else { | |
17d7a112 HM |
256 | if (use_cycle) { |
257 | printf("%lf\n", (double)result_cycle[pf] | |
1182f883 | 258 | / totallen); |
49ce8fc6 HM |
259 | } else |
260 | printf("%lf\n", result_bps[pf]); | |
261 | } | |
827f3b49 HM |
262 | break; |
263 | default: | |
12eac0bf HM |
264 | /* reaching this means there's some disaster: */ |
265 | die("unknown format: %d\n", bench_format); | |
827f3b49 HM |
266 | break; |
267 | } | |
268 | ||
269 | return 0; | |
270 | } | |
308197b9 RV |
271 | |
272 | static void memcpy_alloc_mem(void **dst, void **src, size_t length) | |
273 | { | |
274 | *dst = zalloc(length); | |
275 | if (!*dst) | |
276 | die("memory allocation failed - maybe length is too large?\n"); | |
277 | ||
278 | *src = zalloc(length); | |
279 | if (!*src) | |
280 | die("memory allocation failed - maybe length is too large?\n"); | |
281 | /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */ | |
282 | memset(*src, 0, length); | |
283 | } | |
284 | ||
285 | static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault) | |
286 | { | |
287 | u64 cycle_start = 0ULL, cycle_end = 0ULL; | |
288 | void *src = NULL, *dst = NULL; | |
289 | memcpy_t fn = r->fn.memcpy; | |
290 | int i; | |
291 | ||
e17fdaea | 292 | memcpy_alloc_mem(&dst, &src, len); |
308197b9 RV |
293 | |
294 | if (prefault) | |
295 | fn(dst, src, len); | |
296 | ||
297 | cycle_start = get_cycle(); | |
298 | for (i = 0; i < iterations; ++i) | |
299 | fn(dst, src, len); | |
300 | cycle_end = get_cycle(); | |
301 | ||
302 | free(src); | |
303 | free(dst); | |
304 | return cycle_end - cycle_start; | |
305 | } | |
306 | ||
307 | static double do_memcpy_gettimeofday(const struct routine *r, size_t len, | |
308 | bool prefault) | |
309 | { | |
310 | struct timeval tv_start, tv_end, tv_diff; | |
311 | memcpy_t fn = r->fn.memcpy; | |
312 | void *src = NULL, *dst = NULL; | |
313 | int i; | |
314 | ||
e17fdaea | 315 | memcpy_alloc_mem(&dst, &src, len); |
308197b9 RV |
316 | |
317 | if (prefault) | |
318 | fn(dst, src, len); | |
319 | ||
320 | BUG_ON(gettimeofday(&tv_start, NULL)); | |
321 | for (i = 0; i < iterations; ++i) | |
322 | fn(dst, src, len); | |
323 | BUG_ON(gettimeofday(&tv_end, NULL)); | |
324 | ||
325 | timersub(&tv_end, &tv_start, &tv_diff); | |
326 | ||
327 | free(src); | |
328 | free(dst); | |
1182f883 | 329 | return (double)(((double)len * iterations) / timeval2double(&tv_diff)); |
308197b9 RV |
330 | } |
331 | ||
332 | int bench_mem_memcpy(int argc, const char **argv, | |
333 | const char *prefix __maybe_unused) | |
334 | { | |
335 | struct bench_mem_info info = { | |
336 | .routines = memcpy_routines, | |
337 | .do_cycle = do_memcpy_cycle, | |
338 | .do_gettimeofday = do_memcpy_gettimeofday, | |
339 | .usage = bench_mem_memcpy_usage, | |
340 | }; | |
341 | ||
342 | return bench_mem_common(argc, argv, prefix, &info); | |
343 | } | |
5bce1a57 RV |
344 | |
345 | static void memset_alloc_mem(void **dst, size_t length) | |
346 | { | |
347 | *dst = zalloc(length); | |
348 | if (!*dst) | |
349 | die("memory allocation failed - maybe length is too large?\n"); | |
350 | } | |
351 | ||
352 | static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault) | |
353 | { | |
354 | u64 cycle_start = 0ULL, cycle_end = 0ULL; | |
355 | memset_t fn = r->fn.memset; | |
356 | void *dst = NULL; | |
357 | int i; | |
358 | ||
359 | memset_alloc_mem(&dst, len); | |
360 | ||
361 | if (prefault) | |
362 | fn(dst, -1, len); | |
363 | ||
364 | cycle_start = get_cycle(); | |
365 | for (i = 0; i < iterations; ++i) | |
366 | fn(dst, i, len); | |
367 | cycle_end = get_cycle(); | |
368 | ||
369 | free(dst); | |
370 | return cycle_end - cycle_start; | |
371 | } | |
372 | ||
373 | static double do_memset_gettimeofday(const struct routine *r, size_t len, | |
374 | bool prefault) | |
375 | { | |
376 | struct timeval tv_start, tv_end, tv_diff; | |
377 | memset_t fn = r->fn.memset; | |
378 | void *dst = NULL; | |
379 | int i; | |
380 | ||
381 | memset_alloc_mem(&dst, len); | |
382 | ||
383 | if (prefault) | |
384 | fn(dst, -1, len); | |
385 | ||
386 | BUG_ON(gettimeofday(&tv_start, NULL)); | |
387 | for (i = 0; i < iterations; ++i) | |
388 | fn(dst, i, len); | |
389 | BUG_ON(gettimeofday(&tv_end, NULL)); | |
390 | ||
391 | timersub(&tv_end, &tv_start, &tv_diff); | |
392 | ||
393 | free(dst); | |
1182f883 | 394 | return (double)(((double)len * iterations) / timeval2double(&tv_diff)); |
5bce1a57 RV |
395 | } |
396 | ||
397 | static const char * const bench_mem_memset_usage[] = { | |
398 | "perf bench mem memset <options>", | |
399 | NULL | |
400 | }; | |
401 | ||
402 | static const struct routine memset_routines[] = { | |
403 | { .name ="default", | |
404 | .desc = "Default memset() provided by glibc", | |
405 | .fn.memset = memset }, | |
406 | #ifdef HAVE_ARCH_X86_64_SUPPORT | |
407 | ||
408 | #define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, | |
409 | #include "mem-memset-x86-64-asm-def.h" | |
410 | #undef MEMSET_FN | |
411 | ||
412 | #endif | |
413 | ||
414 | { .name = NULL, | |
415 | .desc = NULL, | |
416 | .fn.memset = NULL } | |
417 | }; | |
418 | ||
419 | int bench_mem_memset(int argc, const char **argv, | |
420 | const char *prefix __maybe_unused) | |
421 | { | |
422 | struct bench_mem_info info = { | |
423 | .routines = memset_routines, | |
424 | .do_cycle = do_memset_cycle, | |
425 | .do_gettimeofday = do_memset_gettimeofday, | |
426 | .usage = bench_mem_memset_usage, | |
427 | }; | |
428 | ||
429 | return bench_mem_common(argc, argv, prefix, &info); | |
430 | } |