diff options
Diffstat (limited to 'tools/perf/bench/mem-memcpy.c')
-rw-r--r-- | tools/perf/bench/mem-memcpy.c | 286 |
1 files changed, 202 insertions, 84 deletions
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 2465141b554b..6c14afe8c1b1 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include "../util/cloexec.h" | 13 | #include "../util/cloexec.h" |
14 | #include "bench.h" | 14 | #include "bench.h" |
15 | #include "mem-memcpy-arch.h" | 15 | #include "mem-memcpy-arch.h" |
16 | #include "mem-memset-arch.h" | ||
16 | 17 | ||
17 | #include <stdio.h> | 18 | #include <stdio.h> |
18 | #include <stdlib.h> | 19 | #include <stdlib.h> |
@@ -48,20 +49,24 @@ static const struct option options[] = { | |||
48 | }; | 49 | }; |
49 | 50 | ||
50 | typedef void *(*memcpy_t)(void *, const void *, size_t); | 51 | typedef void *(*memcpy_t)(void *, const void *, size_t); |
52 | typedef void *(*memset_t)(void *, int, size_t); | ||
51 | 53 | ||
52 | struct routine { | 54 | struct routine { |
53 | const char *name; | 55 | const char *name; |
54 | const char *desc; | 56 | const char *desc; |
55 | memcpy_t fn; | 57 | union { |
58 | memcpy_t memcpy; | ||
59 | memset_t memset; | ||
60 | } fn; | ||
56 | }; | 61 | }; |
57 | 62 | ||
58 | struct routine routines[] = { | 63 | struct routine memcpy_routines[] = { |
59 | { "default", | 64 | { .name = "default", |
60 | "Default memcpy() provided by glibc", | 65 | .desc = "Default memcpy() provided by glibc", |
61 | memcpy }, | 66 | .fn.memcpy = memcpy }, |
62 | #ifdef HAVE_ARCH_X86_64_SUPPORT | 67 | #ifdef HAVE_ARCH_X86_64_SUPPORT |
63 | 68 | ||
64 | #define MEMCPY_FN(fn, name, desc) { name, desc, fn }, | 69 | #define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, |
65 | #include "mem-memcpy-x86-64-asm-def.h" | 70 | #include "mem-memcpy-x86-64-asm-def.h" |
66 | #undef MEMCPY_FN | 71 | #undef MEMCPY_FN |
67 | 72 | ||
@@ -69,7 +74,7 @@ struct routine routines[] = { | |||
69 | 74 | ||
70 | { NULL, | 75 | { NULL, |
71 | NULL, | 76 | NULL, |
72 | NULL } | 77 | {NULL} } |
73 | }; | 78 | }; |
74 | 79 | ||
75 | static const char * const bench_mem_memcpy_usage[] = { | 80 | static const char * const bench_mem_memcpy_usage[] = { |
@@ -110,63 +115,6 @@ static double timeval2double(struct timeval *ts) | |||
110 | (double)ts->tv_usec / (double)1000000; | 115 | (double)ts->tv_usec / (double)1000000; |
111 | } | 116 | } |
112 | 117 | ||
113 | static void alloc_mem(void **dst, void **src, size_t length) | ||
114 | { | ||
115 | *dst = zalloc(length); | ||
116 | if (!*dst) | ||
117 | die("memory allocation failed - maybe length is too large?\n"); | ||
118 | |||
119 | *src = zalloc(length); | ||
120 | if (!*src) | ||
121 | die("memory allocation failed - maybe length is too large?\n"); | ||
122 | /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */ | ||
123 | memset(*src, 0, length); | ||
124 | } | ||
125 | |||
126 | static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault) | ||
127 | { | ||
128 | u64 cycle_start = 0ULL, cycle_end = 0ULL; | ||
129 | void *src = NULL, *dst = NULL; | ||
130 | int i; | ||
131 | |||
132 | alloc_mem(&src, &dst, len); | ||
133 | |||
134 | if (prefault) | ||
135 | fn(dst, src, len); | ||
136 | |||
137 | cycle_start = get_cycle(); | ||
138 | for (i = 0; i < iterations; ++i) | ||
139 | fn(dst, src, len); | ||
140 | cycle_end = get_cycle(); | ||
141 | |||
142 | free(src); | ||
143 | free(dst); | ||
144 | return cycle_end - cycle_start; | ||
145 | } | ||
146 | |||
147 | static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) | ||
148 | { | ||
149 | struct timeval tv_start, tv_end, tv_diff; | ||
150 | void *src = NULL, *dst = NULL; | ||
151 | int i; | ||
152 | |||
153 | alloc_mem(&src, &dst, len); | ||
154 | |||
155 | if (prefault) | ||
156 | fn(dst, src, len); | ||
157 | |||
158 | BUG_ON(gettimeofday(&tv_start, NULL)); | ||
159 | for (i = 0; i < iterations; ++i) | ||
160 | fn(dst, src, len); | ||
161 | BUG_ON(gettimeofday(&tv_end, NULL)); | ||
162 | |||
163 | timersub(&tv_end, &tv_start, &tv_diff); | ||
164 | |||
165 | free(src); | ||
166 | free(dst); | ||
167 | return (double)((double)len / timeval2double(&tv_diff)); | ||
168 | } | ||
169 | |||
170 | #define pf (no_prefault ? 0 : 1) | 118 | #define pf (no_prefault ? 0 : 1) |
171 | 119 | ||
172 | #define print_bps(x) do { \ | 120 | #define print_bps(x) do { \ |
@@ -180,16 +128,25 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) | |||
180 | printf(" %14lf GB/Sec", x / K / K / K); \ | 128 | printf(" %14lf GB/Sec", x / K / K / K); \ |
181 | } while (0) | 129 | } while (0) |
182 | 130 | ||
183 | int bench_mem_memcpy(int argc, const char **argv, | 131 | struct bench_mem_info { |
184 | const char *prefix __maybe_unused) | 132 | const struct routine *routines; |
133 | u64 (*do_cycle)(const struct routine *r, size_t len, bool prefault); | ||
134 | double (*do_gettimeofday)(const struct routine *r, size_t len, bool prefault); | ||
135 | const char *const *usage; | ||
136 | }; | ||
137 | |||
138 | static int bench_mem_common(int argc, const char **argv, | ||
139 | const char *prefix __maybe_unused, | ||
140 | struct bench_mem_info *info) | ||
185 | { | 141 | { |
186 | int i; | 142 | int i; |
187 | size_t len; | 143 | size_t len; |
144 | double totallen; | ||
188 | double result_bps[2]; | 145 | double result_bps[2]; |
189 | u64 result_cycle[2]; | 146 | u64 result_cycle[2]; |
190 | 147 | ||
191 | argc = parse_options(argc, argv, options, | 148 | argc = parse_options(argc, argv, options, |
192 | bench_mem_memcpy_usage, 0); | 149 | info->usage, 0); |
193 | 150 | ||
194 | if (no_prefault && only_prefault) { | 151 | if (no_prefault && only_prefault) { |
195 | fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); | 152 | fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); |
@@ -200,6 +157,7 @@ int bench_mem_memcpy(int argc, const char **argv, | |||
200 | init_cycle(); | 157 | init_cycle(); |
201 | 158 | ||
202 | len = (size_t)perf_atoll((char *)length_str); | 159 | len = (size_t)perf_atoll((char *)length_str); |
160 | totallen = (double)len * iterations; | ||
203 | 161 | ||
204 | result_cycle[0] = result_cycle[1] = 0ULL; | 162 | result_cycle[0] = result_cycle[1] = 0ULL; |
205 | result_bps[0] = result_bps[1] = 0.0; | 163 | result_bps[0] = result_bps[1] = 0.0; |
@@ -213,16 +171,16 @@ int bench_mem_memcpy(int argc, const char **argv, | |||
213 | if (only_prefault && no_prefault) | 171 | if (only_prefault && no_prefault) |
214 | only_prefault = no_prefault = false; | 172 | only_prefault = no_prefault = false; |
215 | 173 | ||
216 | for (i = 0; routines[i].name; i++) { | 174 | for (i = 0; info->routines[i].name; i++) { |
217 | if (!strcmp(routines[i].name, routine)) | 175 | if (!strcmp(info->routines[i].name, routine)) |
218 | break; | 176 | break; |
219 | } | 177 | } |
220 | if (!routines[i].name) { | 178 | if (!info->routines[i].name) { |
221 | printf("Unknown routine:%s\n", routine); | 179 | printf("Unknown routine:%s\n", routine); |
222 | printf("Available routines...\n"); | 180 | printf("Available routines...\n"); |
223 | for (i = 0; routines[i].name; i++) { | 181 | for (i = 0; info->routines[i].name; i++) { |
224 | printf("\t%s ... %s\n", | 182 | printf("\t%s ... %s\n", |
225 | routines[i].name, routines[i].desc); | 183 | info->routines[i].name, info->routines[i].desc); |
226 | } | 184 | } |
227 | return 1; | 185 | return 1; |
228 | } | 186 | } |
@@ -234,25 +192,25 @@ int bench_mem_memcpy(int argc, const char **argv, | |||
234 | /* show both of results */ | 192 | /* show both of results */ |
235 | if (use_cycle) { | 193 | if (use_cycle) { |
236 | result_cycle[0] = | 194 | result_cycle[0] = |
237 | do_memcpy_cycle(routines[i].fn, len, false); | 195 | info->do_cycle(&info->routines[i], len, false); |
238 | result_cycle[1] = | 196 | result_cycle[1] = |
239 | do_memcpy_cycle(routines[i].fn, len, true); | 197 | info->do_cycle(&info->routines[i], len, true); |
240 | } else { | 198 | } else { |
241 | result_bps[0] = | 199 | result_bps[0] = |
242 | do_memcpy_gettimeofday(routines[i].fn, | 200 | info->do_gettimeofday(&info->routines[i], |
243 | len, false); | 201 | len, false); |
244 | result_bps[1] = | 202 | result_bps[1] = |
245 | do_memcpy_gettimeofday(routines[i].fn, | 203 | info->do_gettimeofday(&info->routines[i], |
246 | len, true); | 204 | len, true); |
247 | } | 205 | } |
248 | } else { | 206 | } else { |
249 | if (use_cycle) { | 207 | if (use_cycle) { |
250 | result_cycle[pf] = | 208 | result_cycle[pf] = |
251 | do_memcpy_cycle(routines[i].fn, | 209 | info->do_cycle(&info->routines[i], |
252 | len, only_prefault); | 210 | len, only_prefault); |
253 | } else { | 211 | } else { |
254 | result_bps[pf] = | 212 | result_bps[pf] = |
255 | do_memcpy_gettimeofday(routines[i].fn, | 213 | info->do_gettimeofday(&info->routines[i], |
256 | len, only_prefault); | 214 | len, only_prefault); |
257 | } | 215 | } |
258 | } | 216 | } |
@@ -263,10 +221,10 @@ int bench_mem_memcpy(int argc, const char **argv, | |||
263 | if (use_cycle) { | 221 | if (use_cycle) { |
264 | printf(" %14lf Cycle/Byte\n", | 222 | printf(" %14lf Cycle/Byte\n", |
265 | (double)result_cycle[0] | 223 | (double)result_cycle[0] |
266 | / (double)len); | 224 | / totallen); |
267 | printf(" %14lf Cycle/Byte (with prefault)\n", | 225 | printf(" %14lf Cycle/Byte (with prefault)\n", |
268 | (double)result_cycle[1] | 226 | (double)result_cycle[1] |
269 | / (double)len); | 227 | / totallen); |
270 | } else { | 228 | } else { |
271 | print_bps(result_bps[0]); | 229 | print_bps(result_bps[0]); |
272 | printf("\n"); | 230 | printf("\n"); |
@@ -277,7 +235,7 @@ int bench_mem_memcpy(int argc, const char **argv, | |||
277 | if (use_cycle) { | 235 | if (use_cycle) { |
278 | printf(" %14lf Cycle/Byte", | 236 | printf(" %14lf Cycle/Byte", |
279 | (double)result_cycle[pf] | 237 | (double)result_cycle[pf] |
280 | / (double)len); | 238 | / totallen); |
281 | } else | 239 | } else |
282 | print_bps(result_bps[pf]); | 240 | print_bps(result_bps[pf]); |
283 | 241 | ||
@@ -288,8 +246,8 @@ int bench_mem_memcpy(int argc, const char **argv, | |||
288 | if (!only_prefault && !no_prefault) { | 246 | if (!only_prefault && !no_prefault) { |
289 | if (use_cycle) { | 247 | if (use_cycle) { |
290 | printf("%lf %lf\n", | 248 | printf("%lf %lf\n", |
291 | (double)result_cycle[0] / (double)len, | 249 | (double)result_cycle[0] / totallen, |
292 | (double)result_cycle[1] / (double)len); | 250 | (double)result_cycle[1] / totallen); |
293 | } else { | 251 | } else { |
294 | printf("%lf %lf\n", | 252 | printf("%lf %lf\n", |
295 | result_bps[0], result_bps[1]); | 253 | result_bps[0], result_bps[1]); |
@@ -297,7 +255,7 @@ int bench_mem_memcpy(int argc, const char **argv, | |||
297 | } else { | 255 | } else { |
298 | if (use_cycle) { | 256 | if (use_cycle) { |
299 | printf("%lf\n", (double)result_cycle[pf] | 257 | printf("%lf\n", (double)result_cycle[pf] |
300 | / (double)len); | 258 | / totallen); |
301 | } else | 259 | } else |
302 | printf("%lf\n", result_bps[pf]); | 260 | printf("%lf\n", result_bps[pf]); |
303 | } | 261 | } |
@@ -310,3 +268,163 @@ int bench_mem_memcpy(int argc, const char **argv, | |||
310 | 268 | ||
311 | return 0; | 269 | return 0; |
312 | } | 270 | } |
271 | |||
272 | static void memcpy_alloc_mem(void **dst, void **src, size_t length) | ||
273 | { | ||
274 | *dst = zalloc(length); | ||
275 | if (!*dst) | ||
276 | die("memory allocation failed - maybe length is too large?\n"); | ||
277 | |||
278 | *src = zalloc(length); | ||
279 | if (!*src) | ||
280 | die("memory allocation failed - maybe length is too large?\n"); | ||
281 | /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */ | ||
282 | memset(*src, 0, length); | ||
283 | } | ||
284 | |||
285 | static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault) | ||
286 | { | ||
287 | u64 cycle_start = 0ULL, cycle_end = 0ULL; | ||
288 | void *src = NULL, *dst = NULL; | ||
289 | memcpy_t fn = r->fn.memcpy; | ||
290 | int i; | ||
291 | |||
292 | memcpy_alloc_mem(&src, &dst, len); | ||
293 | |||
294 | if (prefault) | ||
295 | fn(dst, src, len); | ||
296 | |||
297 | cycle_start = get_cycle(); | ||
298 | for (i = 0; i < iterations; ++i) | ||
299 | fn(dst, src, len); | ||
300 | cycle_end = get_cycle(); | ||
301 | |||
302 | free(src); | ||
303 | free(dst); | ||
304 | return cycle_end - cycle_start; | ||
305 | } | ||
306 | |||
307 | static double do_memcpy_gettimeofday(const struct routine *r, size_t len, | ||
308 | bool prefault) | ||
309 | { | ||
310 | struct timeval tv_start, tv_end, tv_diff; | ||
311 | memcpy_t fn = r->fn.memcpy; | ||
312 | void *src = NULL, *dst = NULL; | ||
313 | int i; | ||
314 | |||
315 | memcpy_alloc_mem(&src, &dst, len); | ||
316 | |||
317 | if (prefault) | ||
318 | fn(dst, src, len); | ||
319 | |||
320 | BUG_ON(gettimeofday(&tv_start, NULL)); | ||
321 | for (i = 0; i < iterations; ++i) | ||
322 | fn(dst, src, len); | ||
323 | BUG_ON(gettimeofday(&tv_end, NULL)); | ||
324 | |||
325 | timersub(&tv_end, &tv_start, &tv_diff); | ||
326 | |||
327 | free(src); | ||
328 | free(dst); | ||
329 | return (double)(((double)len * iterations) / timeval2double(&tv_diff)); | ||
330 | } | ||
331 | |||
332 | int bench_mem_memcpy(int argc, const char **argv, | ||
333 | const char *prefix __maybe_unused) | ||
334 | { | ||
335 | struct bench_mem_info info = { | ||
336 | .routines = memcpy_routines, | ||
337 | .do_cycle = do_memcpy_cycle, | ||
338 | .do_gettimeofday = do_memcpy_gettimeofday, | ||
339 | .usage = bench_mem_memcpy_usage, | ||
340 | }; | ||
341 | |||
342 | return bench_mem_common(argc, argv, prefix, &info); | ||
343 | } | ||
344 | |||
345 | static void memset_alloc_mem(void **dst, size_t length) | ||
346 | { | ||
347 | *dst = zalloc(length); | ||
348 | if (!*dst) | ||
349 | die("memory allocation failed - maybe length is too large?\n"); | ||
350 | } | ||
351 | |||
352 | static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault) | ||
353 | { | ||
354 | u64 cycle_start = 0ULL, cycle_end = 0ULL; | ||
355 | memset_t fn = r->fn.memset; | ||
356 | void *dst = NULL; | ||
357 | int i; | ||
358 | |||
359 | memset_alloc_mem(&dst, len); | ||
360 | |||
361 | if (prefault) | ||
362 | fn(dst, -1, len); | ||
363 | |||
364 | cycle_start = get_cycle(); | ||
365 | for (i = 0; i < iterations; ++i) | ||
366 | fn(dst, i, len); | ||
367 | cycle_end = get_cycle(); | ||
368 | |||
369 | free(dst); | ||
370 | return cycle_end - cycle_start; | ||
371 | } | ||
372 | |||
373 | static double do_memset_gettimeofday(const struct routine *r, size_t len, | ||
374 | bool prefault) | ||
375 | { | ||
376 | struct timeval tv_start, tv_end, tv_diff; | ||
377 | memset_t fn = r->fn.memset; | ||
378 | void *dst = NULL; | ||
379 | int i; | ||
380 | |||
381 | memset_alloc_mem(&dst, len); | ||
382 | |||
383 | if (prefault) | ||
384 | fn(dst, -1, len); | ||
385 | |||
386 | BUG_ON(gettimeofday(&tv_start, NULL)); | ||
387 | for (i = 0; i < iterations; ++i) | ||
388 | fn(dst, i, len); | ||
389 | BUG_ON(gettimeofday(&tv_end, NULL)); | ||
390 | |||
391 | timersub(&tv_end, &tv_start, &tv_diff); | ||
392 | |||
393 | free(dst); | ||
394 | return (double)(((double)len * iterations) / timeval2double(&tv_diff)); | ||
395 | } | ||
396 | |||
397 | static const char * const bench_mem_memset_usage[] = { | ||
398 | "perf bench mem memset <options>", | ||
399 | NULL | ||
400 | }; | ||
401 | |||
402 | static const struct routine memset_routines[] = { | ||
403 | { .name ="default", | ||
404 | .desc = "Default memset() provided by glibc", | ||
405 | .fn.memset = memset }, | ||
406 | #ifdef HAVE_ARCH_X86_64_SUPPORT | ||
407 | |||
408 | #define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, | ||
409 | #include "mem-memset-x86-64-asm-def.h" | ||
410 | #undef MEMSET_FN | ||
411 | |||
412 | #endif | ||
413 | |||
414 | { .name = NULL, | ||
415 | .desc = NULL, | ||
416 | .fn.memset = NULL } | ||
417 | }; | ||
418 | |||
419 | int bench_mem_memset(int argc, const char **argv, | ||
420 | const char *prefix __maybe_unused) | ||
421 | { | ||
422 | struct bench_mem_info info = { | ||
423 | .routines = memset_routines, | ||
424 | .do_cycle = do_memset_cycle, | ||
425 | .do_gettimeofday = do_memset_gettimeofday, | ||
426 | .usage = bench_mem_memset_usage, | ||
427 | }; | ||
428 | |||
429 | return bench_mem_common(argc, argv, prefix, &info); | ||
430 | } | ||