diff options
| author | Rabin Vincent <rabin.vincent@axis.com> | 2014-12-02 10:50:40 -0500 |
|---|---|---|
| committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2014-12-09 07:14:05 -0500 |
| commit | 5bce1a5772cb52aad7e0466484ba07cfbfec2478 (patch) | |
| tree | 28dc1e7b55b1d4619647526237de47a990ac226f /tools/perf | |
| parent | 308197b9474bcde2cafba2cd19bef46e0c0428bd (diff) | |
perf bench: Merge memset into memcpy
The memset benchmark is largely copy-pasted from the memcpy benchmark.
Merge the two now that memcpy is made more generic.
Signed-off-by: Rabin Vincent <rabin.vincent@axis.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Rabin Vincent <rabinv@axis.com>
Link: http://lkml.kernel.org/r/1417535441-3965-2-git-send-email-rabin.vincent@axis.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf')
| -rw-r--r-- | tools/perf/Makefile.perf | 1 | ||||
| -rw-r--r-- | tools/perf/bench/mem-memcpy.c | 90 | ||||
| -rw-r--r-- | tools/perf/bench/mem-memset.c | 304 |
3 files changed, 90 insertions, 305 deletions
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 478efa9b2364..763e68fb5767 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf | |||
| @@ -458,7 +458,6 @@ BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o | |||
| 458 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o | 458 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o |
| 459 | endif | 459 | endif |
| 460 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o | 460 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o |
| 461 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o | ||
| 462 | BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o | 461 | BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o |
| 463 | BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o | 462 | BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o |
| 464 | BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o | 463 | BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o |
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 4a4493a7a1f8..e18be70c8a47 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include "../util/cloexec.h" | 13 | #include "../util/cloexec.h" |
| 14 | #include "bench.h" | 14 | #include "bench.h" |
| 15 | #include "mem-memcpy-arch.h" | 15 | #include "mem-memcpy-arch.h" |
| 16 | #include "mem-memset-arch.h" | ||
| 16 | 17 | ||
| 17 | #include <stdio.h> | 18 | #include <stdio.h> |
| 18 | #include <stdlib.h> | 19 | #include <stdlib.h> |
| @@ -48,12 +49,14 @@ static const struct option options[] = { | |||
| 48 | }; | 49 | }; |
| 49 | 50 | ||
| 50 | typedef void *(*memcpy_t)(void *, const void *, size_t); | 51 | typedef void *(*memcpy_t)(void *, const void *, size_t); |
| 52 | typedef void *(*memset_t)(void *, int, size_t); | ||
| 51 | 53 | ||
| 52 | struct routine { | 54 | struct routine { |
| 53 | const char *name; | 55 | const char *name; |
| 54 | const char *desc; | 56 | const char *desc; |
| 55 | union { | 57 | union { |
| 56 | memcpy_t memcpy; | 58 | memcpy_t memcpy; |
| 59 | memset_t memset; | ||
| 57 | } fn; | 60 | } fn; |
| 58 | }; | 61 | }; |
| 59 | 62 | ||
| @@ -336,3 +339,90 @@ int bench_mem_memcpy(int argc, const char **argv, | |||
| 336 | 339 | ||
| 337 | return bench_mem_common(argc, argv, prefix, &info); | 340 | return bench_mem_common(argc, argv, prefix, &info); |
| 338 | } | 341 | } |
| 342 | |||
| 343 | static void memset_alloc_mem(void **dst, size_t length) | ||
| 344 | { | ||
| 345 | *dst = zalloc(length); | ||
| 346 | if (!*dst) | ||
| 347 | die("memory allocation failed - maybe length is too large?\n"); | ||
| 348 | } | ||
| 349 | |||
| 350 | static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault) | ||
| 351 | { | ||
| 352 | u64 cycle_start = 0ULL, cycle_end = 0ULL; | ||
| 353 | memset_t fn = r->fn.memset; | ||
| 354 | void *dst = NULL; | ||
| 355 | int i; | ||
| 356 | |||
| 357 | memset_alloc_mem(&dst, len); | ||
| 358 | |||
| 359 | if (prefault) | ||
| 360 | fn(dst, -1, len); | ||
| 361 | |||
| 362 | cycle_start = get_cycle(); | ||
| 363 | for (i = 0; i < iterations; ++i) | ||
| 364 | fn(dst, i, len); | ||
| 365 | cycle_end = get_cycle(); | ||
| 366 | |||
| 367 | free(dst); | ||
| 368 | return cycle_end - cycle_start; | ||
| 369 | } | ||
| 370 | |||
| 371 | static double do_memset_gettimeofday(const struct routine *r, size_t len, | ||
| 372 | bool prefault) | ||
| 373 | { | ||
| 374 | struct timeval tv_start, tv_end, tv_diff; | ||
| 375 | memset_t fn = r->fn.memset; | ||
| 376 | void *dst = NULL; | ||
| 377 | int i; | ||
| 378 | |||
| 379 | memset_alloc_mem(&dst, len); | ||
| 380 | |||
| 381 | if (prefault) | ||
| 382 | fn(dst, -1, len); | ||
| 383 | |||
| 384 | BUG_ON(gettimeofday(&tv_start, NULL)); | ||
| 385 | for (i = 0; i < iterations; ++i) | ||
| 386 | fn(dst, i, len); | ||
| 387 | BUG_ON(gettimeofday(&tv_end, NULL)); | ||
| 388 | |||
| 389 | timersub(&tv_end, &tv_start, &tv_diff); | ||
| 390 | |||
| 391 | free(dst); | ||
| 392 | return (double)((double)len / timeval2double(&tv_diff)); | ||
| 393 | } | ||
| 394 | |||
| 395 | static const char * const bench_mem_memset_usage[] = { | ||
| 396 | "perf bench mem memset <options>", | ||
| 397 | NULL | ||
| 398 | }; | ||
| 399 | |||
| 400 | static const struct routine memset_routines[] = { | ||
| 401 | { .name ="default", | ||
| 402 | .desc = "Default memset() provided by glibc", | ||
| 403 | .fn.memset = memset }, | ||
| 404 | #ifdef HAVE_ARCH_X86_64_SUPPORT | ||
| 405 | |||
| 406 | #define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, | ||
| 407 | #include "mem-memset-x86-64-asm-def.h" | ||
| 408 | #undef MEMSET_FN | ||
| 409 | |||
| 410 | #endif | ||
| 411 | |||
| 412 | { .name = NULL, | ||
| 413 | .desc = NULL, | ||
| 414 | .fn.memset = NULL } | ||
| 415 | }; | ||
| 416 | |||
| 417 | int bench_mem_memset(int argc, const char **argv, | ||
| 418 | const char *prefix __maybe_unused) | ||
| 419 | { | ||
| 420 | struct bench_mem_info info = { | ||
| 421 | .routines = memset_routines, | ||
| 422 | .do_cycle = do_memset_cycle, | ||
| 423 | .do_gettimeofday = do_memset_gettimeofday, | ||
| 424 | .usage = bench_mem_memset_usage, | ||
| 425 | }; | ||
| 426 | |||
| 427 | return bench_mem_common(argc, argv, prefix, &info); | ||
| 428 | } | ||
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c deleted file mode 100644 index 75fc3e65fb2a..000000000000 --- a/tools/perf/bench/mem-memset.c +++ /dev/null | |||
| @@ -1,304 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * mem-memset.c | ||
| 3 | * | ||
| 4 | * memset: Simple memory set in various ways | ||
| 5 | * | ||
| 6 | * Trivial clone of mem-memcpy.c. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include "../perf.h" | ||
| 10 | #include "../util/util.h" | ||
| 11 | #include "../util/parse-options.h" | ||
| 12 | #include "../util/header.h" | ||
| 13 | #include "../util/cloexec.h" | ||
| 14 | #include "bench.h" | ||
| 15 | #include "mem-memset-arch.h" | ||
| 16 | |||
| 17 | #include <stdio.h> | ||
| 18 | #include <stdlib.h> | ||
| 19 | #include <string.h> | ||
| 20 | #include <sys/time.h> | ||
| 21 | #include <errno.h> | ||
| 22 | |||
| 23 | #define K 1024 | ||
| 24 | |||
| 25 | static const char *length_str = "1MB"; | ||
| 26 | static const char *routine = "default"; | ||
| 27 | static int iterations = 1; | ||
| 28 | static bool use_cycle; | ||
| 29 | static int cycle_fd; | ||
| 30 | static bool only_prefault; | ||
| 31 | static bool no_prefault; | ||
| 32 | |||
| 33 | static const struct option options[] = { | ||
| 34 | OPT_STRING('l', "length", &length_str, "1MB", | ||
| 35 | "Specify length of memory to set. " | ||
| 36 | "Available units: B, KB, MB, GB and TB (upper and lower)"), | ||
| 37 | OPT_STRING('r', "routine", &routine, "default", | ||
| 38 | "Specify routine to set"), | ||
| 39 | OPT_INTEGER('i', "iterations", &iterations, | ||
| 40 | "repeat memset() invocation this number of times"), | ||
| 41 | OPT_BOOLEAN('c', "cycle", &use_cycle, | ||
| 42 | "Use cycles event instead of gettimeofday() for measuring"), | ||
| 43 | OPT_BOOLEAN('o', "only-prefault", &only_prefault, | ||
| 44 | "Show only the result with page faults before memset()"), | ||
| 45 | OPT_BOOLEAN('n', "no-prefault", &no_prefault, | ||
| 46 | "Show only the result without page faults before memset()"), | ||
| 47 | OPT_END() | ||
| 48 | }; | ||
| 49 | |||
| 50 | typedef void *(*memset_t)(void *, int, size_t); | ||
| 51 | |||
| 52 | struct routine { | ||
| 53 | const char *name; | ||
| 54 | const char *desc; | ||
| 55 | memset_t fn; | ||
| 56 | }; | ||
| 57 | |||
| 58 | static const struct routine routines[] = { | ||
| 59 | { "default", | ||
| 60 | "Default memset() provided by glibc", | ||
| 61 | memset }, | ||
| 62 | #ifdef HAVE_ARCH_X86_64_SUPPORT | ||
| 63 | |||
| 64 | #define MEMSET_FN(fn, name, desc) { name, desc, fn }, | ||
| 65 | #include "mem-memset-x86-64-asm-def.h" | ||
| 66 | #undef MEMSET_FN | ||
| 67 | |||
| 68 | #endif | ||
| 69 | |||
| 70 | { NULL, | ||
| 71 | NULL, | ||
| 72 | NULL } | ||
| 73 | }; | ||
| 74 | |||
| 75 | static const char * const bench_mem_memset_usage[] = { | ||
| 76 | "perf bench mem memset <options>", | ||
| 77 | NULL | ||
| 78 | }; | ||
| 79 | |||
| 80 | static struct perf_event_attr cycle_attr = { | ||
| 81 | .type = PERF_TYPE_HARDWARE, | ||
| 82 | .config = PERF_COUNT_HW_CPU_CYCLES | ||
| 83 | }; | ||
| 84 | |||
| 85 | static void init_cycle(void) | ||
| 86 | { | ||
| 87 | cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, | ||
| 88 | perf_event_open_cloexec_flag()); | ||
| 89 | |||
| 90 | if (cycle_fd < 0 && errno == ENOSYS) | ||
| 91 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); | ||
| 92 | else | ||
| 93 | BUG_ON(cycle_fd < 0); | ||
| 94 | } | ||
| 95 | |||
| 96 | static u64 get_cycle(void) | ||
| 97 | { | ||
| 98 | int ret; | ||
| 99 | u64 clk; | ||
| 100 | |||
| 101 | ret = read(cycle_fd, &clk, sizeof(u64)); | ||
| 102 | BUG_ON(ret != sizeof(u64)); | ||
| 103 | |||
| 104 | return clk; | ||
| 105 | } | ||
| 106 | |||
| 107 | static double timeval2double(struct timeval *ts) | ||
| 108 | { | ||
| 109 | return (double)ts->tv_sec + | ||
| 110 | (double)ts->tv_usec / (double)1000000; | ||
| 111 | } | ||
| 112 | |||
| 113 | static void alloc_mem(void **dst, size_t length) | ||
| 114 | { | ||
| 115 | *dst = zalloc(length); | ||
| 116 | if (!*dst) | ||
| 117 | die("memory allocation failed - maybe length is too large?\n"); | ||
| 118 | } | ||
| 119 | |||
| 120 | static u64 do_memset_cycle(memset_t fn, size_t len, bool prefault) | ||
| 121 | { | ||
| 122 | u64 cycle_start = 0ULL, cycle_end = 0ULL; | ||
| 123 | void *dst = NULL; | ||
| 124 | int i; | ||
| 125 | |||
| 126 | alloc_mem(&dst, len); | ||
| 127 | |||
| 128 | if (prefault) | ||
| 129 | fn(dst, -1, len); | ||
| 130 | |||
| 131 | cycle_start = get_cycle(); | ||
| 132 | for (i = 0; i < iterations; ++i) | ||
| 133 | fn(dst, i, len); | ||
| 134 | cycle_end = get_cycle(); | ||
| 135 | |||
| 136 | free(dst); | ||
| 137 | return cycle_end - cycle_start; | ||
| 138 | } | ||
| 139 | |||
| 140 | static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) | ||
| 141 | { | ||
| 142 | struct timeval tv_start, tv_end, tv_diff; | ||
| 143 | void *dst = NULL; | ||
| 144 | int i; | ||
| 145 | |||
| 146 | alloc_mem(&dst, len); | ||
| 147 | |||
| 148 | if (prefault) | ||
| 149 | fn(dst, -1, len); | ||
| 150 | |||
| 151 | BUG_ON(gettimeofday(&tv_start, NULL)); | ||
| 152 | for (i = 0; i < iterations; ++i) | ||
| 153 | fn(dst, i, len); | ||
| 154 | BUG_ON(gettimeofday(&tv_end, NULL)); | ||
| 155 | |||
| 156 | timersub(&tv_end, &tv_start, &tv_diff); | ||
| 157 | |||
| 158 | free(dst); | ||
| 159 | return (double)((double)len / timeval2double(&tv_diff)); | ||
| 160 | } | ||
| 161 | |||
| 162 | #define pf (no_prefault ? 0 : 1) | ||
| 163 | |||
| 164 | #define print_bps(x) do { \ | ||
| 165 | if (x < K) \ | ||
| 166 | printf(" %14lf B/Sec", x); \ | ||
| 167 | else if (x < K * K) \ | ||
| 168 | printf(" %14lfd KB/Sec", x / K); \ | ||
| 169 | else if (x < K * K * K) \ | ||
| 170 | printf(" %14lf MB/Sec", x / K / K); \ | ||
| 171 | else \ | ||
| 172 | printf(" %14lf GB/Sec", x / K / K / K); \ | ||
| 173 | } while (0) | ||
| 174 | |||
| 175 | int bench_mem_memset(int argc, const char **argv, | ||
| 176 | const char *prefix __maybe_unused) | ||
| 177 | { | ||
| 178 | int i; | ||
| 179 | size_t len; | ||
| 180 | double result_bps[2]; | ||
| 181 | u64 result_cycle[2]; | ||
| 182 | |||
| 183 | argc = parse_options(argc, argv, options, | ||
| 184 | bench_mem_memset_usage, 0); | ||
| 185 | |||
| 186 | if (no_prefault && only_prefault) { | ||
| 187 | fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); | ||
| 188 | return 1; | ||
| 189 | } | ||
| 190 | |||
| 191 | if (use_cycle) | ||
| 192 | init_cycle(); | ||
| 193 | |||
| 194 | len = (size_t)perf_atoll((char *)length_str); | ||
| 195 | |||
| 196 | result_cycle[0] = result_cycle[1] = 0ULL; | ||
| 197 | result_bps[0] = result_bps[1] = 0.0; | ||
| 198 | |||
| 199 | if ((s64)len <= 0) { | ||
| 200 | fprintf(stderr, "Invalid length:%s\n", length_str); | ||
| 201 | return 1; | ||
| 202 | } | ||
| 203 | |||
| 204 | /* same to without specifying either of prefault and no-prefault */ | ||
| 205 | if (only_prefault && no_prefault) | ||
| 206 | only_prefault = no_prefault = false; | ||
| 207 | |||
| 208 | for (i = 0; routines[i].name; i++) { | ||
| 209 | if (!strcmp(routines[i].name, routine)) | ||
| 210 | break; | ||
| 211 | } | ||
| 212 | if (!routines[i].name) { | ||
| 213 | printf("Unknown routine:%s\n", routine); | ||
| 214 | printf("Available routines...\n"); | ||
| 215 | for (i = 0; routines[i].name; i++) { | ||
| 216 | printf("\t%s ... %s\n", | ||
| 217 | routines[i].name, routines[i].desc); | ||
| 218 | } | ||
| 219 | return 1; | ||
| 220 | } | ||
| 221 | |||
| 222 | if (bench_format == BENCH_FORMAT_DEFAULT) | ||
| 223 | printf("# Copying %s Bytes ...\n\n", length_str); | ||
| 224 | |||
| 225 | if (!only_prefault && !no_prefault) { | ||
| 226 | /* show both of results */ | ||
| 227 | if (use_cycle) { | ||
| 228 | result_cycle[0] = | ||
| 229 | do_memset_cycle(routines[i].fn, len, false); | ||
| 230 | result_cycle[1] = | ||
| 231 | do_memset_cycle(routines[i].fn, len, true); | ||
| 232 | } else { | ||
| 233 | result_bps[0] = | ||
| 234 | do_memset_gettimeofday(routines[i].fn, | ||
| 235 | len, false); | ||
| 236 | result_bps[1] = | ||
| 237 | do_memset_gettimeofday(routines[i].fn, | ||
| 238 | len, true); | ||
| 239 | } | ||
| 240 | } else { | ||
| 241 | if (use_cycle) { | ||
| 242 | result_cycle[pf] = | ||
| 243 | do_memset_cycle(routines[i].fn, | ||
| 244 | len, only_prefault); | ||
| 245 | } else { | ||
| 246 | result_bps[pf] = | ||
| 247 | do_memset_gettimeofday(routines[i].fn, | ||
| 248 | len, only_prefault); | ||
| 249 | } | ||
| 250 | } | ||
| 251 | |||
| 252 | switch (bench_format) { | ||
| 253 | case BENCH_FORMAT_DEFAULT: | ||
| 254 | if (!only_prefault && !no_prefault) { | ||
| 255 | if (use_cycle) { | ||
| 256 | printf(" %14lf Cycle/Byte\n", | ||
| 257 | (double)result_cycle[0] | ||
| 258 | / (double)len); | ||
| 259 | printf(" %14lf Cycle/Byte (with prefault)\n ", | ||
| 260 | (double)result_cycle[1] | ||
| 261 | / (double)len); | ||
| 262 | } else { | ||
| 263 | print_bps(result_bps[0]); | ||
| 264 | printf("\n"); | ||
| 265 | print_bps(result_bps[1]); | ||
| 266 | printf(" (with prefault)\n"); | ||
| 267 | } | ||
| 268 | } else { | ||
| 269 | if (use_cycle) { | ||
| 270 | printf(" %14lf Cycle/Byte", | ||
| 271 | (double)result_cycle[pf] | ||
| 272 | / (double)len); | ||
| 273 | } else | ||
| 274 | print_bps(result_bps[pf]); | ||
| 275 | |||
| 276 | printf("%s\n", only_prefault ? " (with prefault)" : ""); | ||
| 277 | } | ||
| 278 | break; | ||
| 279 | case BENCH_FORMAT_SIMPLE: | ||
| 280 | if (!only_prefault && !no_prefault) { | ||
| 281 | if (use_cycle) { | ||
| 282 | printf("%lf %lf\n", | ||
| 283 | (double)result_cycle[0] / (double)len, | ||
| 284 | (double)result_cycle[1] / (double)len); | ||
| 285 | } else { | ||
| 286 | printf("%lf %lf\n", | ||
| 287 | result_bps[0], result_bps[1]); | ||
| 288 | } | ||
| 289 | } else { | ||
| 290 | if (use_cycle) { | ||
| 291 | printf("%lf\n", (double)result_cycle[pf] | ||
| 292 | / (double)len); | ||
| 293 | } else | ||
| 294 | printf("%lf\n", result_bps[pf]); | ||
| 295 | } | ||
| 296 | break; | ||
| 297 | default: | ||
| 298 | /* reaching this means there's some disaster: */ | ||
| 299 | die("unknown format: %d\n", bench_format); | ||
| 300 | break; | ||
| 301 | } | ||
| 302 | |||
| 303 | return 0; | ||
| 304 | } | ||
