diff options
| author | Jan Beulich <jbeulich@suse.com> | 2012-01-24 07:03:22 -0500 |
|---|---|---|
| committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2012-01-24 17:25:32 -0500 |
| commit | be3de80dc2e671d9ee15e69fe9cd84d2b71e2225 (patch) | |
| tree | 8c9519ac9c6235ad8469d3f8d7ef2da660840bc5 /tools | |
| parent | 800eb01484b3ca1eaf4eb5186df13fb24de2db19 (diff) | |
perf bench: Also allow measuring memset()
This simply clones the respective memcpy() implementation.
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/4F16D743020000780006D735@nat28.tlf.novell.com
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/perf/Makefile | 4 | ||||
| -rw-r--r-- | tools/perf/bench/bench.h | 1 | ||||
| -rw-r--r-- | tools/perf/bench/mem-memset-arch.h | 12 | ||||
| -rw-r--r-- | tools/perf/bench/mem-memset-x86-64-asm-def.h | 12 | ||||
| -rw-r--r-- | tools/perf/bench/mem-memset-x86-64-asm.S | 6 | ||||
| -rw-r--r-- | tools/perf/bench/mem-memset.c | 291 | ||||
| -rw-r--r-- | tools/perf/builtin-bench.c | 3 | ||||
| -rw-r--r-- | tools/perf/util/include/asm/dwarf2.h | 4 |
8 files changed, 331 insertions, 2 deletions
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index ac86d67b636e..599031ac69ac 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile | |||
| @@ -61,7 +61,7 @@ ifeq ($(ARCH),x86_64) | |||
| 61 | ifeq (${IS_X86_64}, 1) | 61 | ifeq (${IS_X86_64}, 1) |
| 62 | RAW_ARCH := x86_64 | 62 | RAW_ARCH := x86_64 |
| 63 | ARCH_CFLAGS := -DARCH_X86_64 | 63 | ARCH_CFLAGS := -DARCH_X86_64 |
| 64 | ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S | 64 | ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S |
| 65 | endif | 65 | endif |
| 66 | endif | 66 | endif |
| 67 | 67 | ||
| @@ -362,8 +362,10 @@ BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o | |||
| 362 | BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o | 362 | BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o |
| 363 | ifeq ($(RAW_ARCH),x86_64) | 363 | ifeq ($(RAW_ARCH),x86_64) |
| 364 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o | 364 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o |
| 365 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o | ||
| 365 | endif | 366 | endif |
| 366 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o | 367 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o |
| 368 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o | ||
| 367 | 369 | ||
| 368 | BUILTIN_OBJS += $(OUTPUT)builtin-diff.o | 370 | BUILTIN_OBJS += $(OUTPUT)builtin-diff.o |
| 369 | BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o | 371 | BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o |
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index f7781c6267c0..a09bece6dad2 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); | 4 | extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); |
| 5 | extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); | 5 | extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); |
| 6 | extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); | 6 | extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); |
| 7 | extern int bench_mem_memset(int argc, const char **argv, const char *prefix); | ||
| 7 | 8 | ||
| 8 | #define BENCH_FORMAT_DEFAULT_STR "default" | 9 | #define BENCH_FORMAT_DEFAULT_STR "default" |
| 9 | #define BENCH_FORMAT_DEFAULT 0 | 10 | #define BENCH_FORMAT_DEFAULT 0 |
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h new file mode 100644 index 000000000000..a040fa77665b --- /dev/null +++ b/tools/perf/bench/mem-memset-arch.h | |||
| @@ -0,0 +1,12 @@ | |||
| 1 | |||
| 2 | #ifdef ARCH_X86_64 | ||
| 3 | |||
| 4 | #define MEMSET_FN(fn, name, desc) \ | ||
| 5 | extern void *fn(void *, int, size_t); | ||
| 6 | |||
| 7 | #include "mem-memset-x86-64-asm-def.h" | ||
| 8 | |||
| 9 | #undef MEMSET_FN | ||
| 10 | |||
| 11 | #endif | ||
| 12 | |||
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h new file mode 100644 index 000000000000..a71dff97c1f5 --- /dev/null +++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h | |||
| @@ -0,0 +1,12 @@ | |||
| 1 | |||
| 2 | MEMSET_FN(__memset, | ||
| 3 | "x86-64-unrolled", | ||
| 4 | "unrolled memset() in arch/x86/lib/memset_64.S") | ||
| 5 | |||
| 6 | MEMSET_FN(memset_c, | ||
| 7 | "x86-64-stosq", | ||
| 8 | "movsq-based memset() in arch/x86/lib/memset_64.S") | ||
| 9 | |||
| 10 | MEMSET_FN(memset_c_e, | ||
| 11 | "x86-64-stosb", | ||
| 12 | "movsb-based memset() in arch/x86/lib/memset_64.S") | ||
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S new file mode 100644 index 000000000000..cb9217063776 --- /dev/null +++ b/tools/perf/bench/mem-memset-x86-64-asm.S | |||
| @@ -0,0 +1,6 @@ | |||
| 1 | #define memset MEMSET /* don't hide glibc's memset() */ | ||
| 2 | #define altinstr_replacement text | ||
| 3 | #define globl p2align 4; .globl | ||
| 4 | #define Lmemset_c globl memset_c; memset_c | ||
| 5 | #define Lmemset_c_e globl memset_c_e; memset_c_e | ||
| 6 | #include "../../../arch/x86/lib/memset_64.S" | ||
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c new file mode 100644 index 000000000000..9c0c6f0cba9b --- /dev/null +++ b/tools/perf/bench/mem-memset.c | |||
| @@ -0,0 +1,291 @@ | |||
| 1 | /* | ||
| 2 | * mem-memset.c | ||
| 3 | * | ||
| 4 | * memset: Simple memory set in various ways | ||
| 5 | * | ||
| 6 | * Trivial clone of mem-memcpy.c. | ||
| 7 | */ | ||
| 8 | #include <ctype.h> | ||
| 9 | |||
| 10 | #include "../perf.h" | ||
| 11 | #include "../util/util.h" | ||
| 12 | #include "../util/parse-options.h" | ||
| 13 | #include "../util/header.h" | ||
| 14 | #include "bench.h" | ||
| 15 | #include "mem-memset-arch.h" | ||
| 16 | |||
| 17 | #include <stdio.h> | ||
| 18 | #include <stdlib.h> | ||
| 19 | #include <string.h> | ||
| 20 | #include <sys/time.h> | ||
| 21 | #include <errno.h> | ||
| 22 | |||
| 23 | #define K 1024 | ||
| 24 | |||
| 25 | static const char *length_str = "1MB"; | ||
| 26 | static const char *routine = "default"; | ||
| 27 | static bool use_clock; | ||
| 28 | static int clock_fd; | ||
| 29 | static bool only_prefault; | ||
| 30 | static bool no_prefault; | ||
| 31 | |||
| 32 | static const struct option options[] = { | ||
| 33 | OPT_STRING('l', "length", &length_str, "1MB", | ||
| 34 | "Specify length of memory to copy. " | ||
| 35 | "available unit: B, MB, GB (upper and lower)"), | ||
| 36 | OPT_STRING('r', "routine", &routine, "default", | ||
| 37 | "Specify routine to copy"), | ||
| 38 | OPT_BOOLEAN('c', "clock", &use_clock, | ||
| 39 | "Use CPU clock for measuring"), | ||
| 40 | OPT_BOOLEAN('o', "only-prefault", &only_prefault, | ||
| 41 | "Show only the result with page faults before memset()"), | ||
| 42 | OPT_BOOLEAN('n', "no-prefault", &no_prefault, | ||
| 43 | "Show only the result without page faults before memset()"), | ||
| 44 | OPT_END() | ||
| 45 | }; | ||
| 46 | |||
| 47 | typedef void *(*memset_t)(void *, int, size_t); | ||
| 48 | |||
| 49 | struct routine { | ||
| 50 | const char *name; | ||
| 51 | const char *desc; | ||
| 52 | memset_t fn; | ||
| 53 | }; | ||
| 54 | |||
| 55 | static const struct routine routines[] = { | ||
| 56 | { "default", | ||
| 57 | "Default memset() provided by glibc", | ||
| 58 | memset }, | ||
| 59 | #ifdef ARCH_X86_64 | ||
| 60 | |||
| 61 | #define MEMSET_FN(fn, name, desc) { name, desc, fn }, | ||
| 62 | #include "mem-memset-x86-64-asm-def.h" | ||
| 63 | #undef MEMSET_FN | ||
| 64 | |||
| 65 | #endif | ||
| 66 | |||
| 67 | { NULL, | ||
| 68 | NULL, | ||
| 69 | NULL } | ||
| 70 | }; | ||
| 71 | |||
| 72 | static const char * const bench_mem_memset_usage[] = { | ||
| 73 | "perf bench mem memset <options>", | ||
| 74 | NULL | ||
| 75 | }; | ||
| 76 | |||
| 77 | static struct perf_event_attr clock_attr = { | ||
| 78 | .type = PERF_TYPE_HARDWARE, | ||
| 79 | .config = PERF_COUNT_HW_CPU_CYCLES | ||
| 80 | }; | ||
| 81 | |||
| 82 | static void init_clock(void) | ||
| 83 | { | ||
| 84 | clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); | ||
| 85 | |||
| 86 | if (clock_fd < 0 && errno == ENOSYS) | ||
| 87 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); | ||
| 88 | else | ||
| 89 | BUG_ON(clock_fd < 0); | ||
| 90 | } | ||
| 91 | |||
| 92 | static u64 get_clock(void) | ||
| 93 | { | ||
| 94 | int ret; | ||
| 95 | u64 clk; | ||
| 96 | |||
| 97 | ret = read(clock_fd, &clk, sizeof(u64)); | ||
| 98 | BUG_ON(ret != sizeof(u64)); | ||
| 99 | |||
| 100 | return clk; | ||
| 101 | } | ||
| 102 | |||
| 103 | static double timeval2double(struct timeval *ts) | ||
| 104 | { | ||
| 105 | return (double)ts->tv_sec + | ||
| 106 | (double)ts->tv_usec / (double)1000000; | ||
| 107 | } | ||
| 108 | |||
| 109 | static void alloc_mem(void **dst, size_t length) | ||
| 110 | { | ||
| 111 | *dst = zalloc(length); | ||
| 112 | if (!dst) | ||
| 113 | die("memory allocation failed - maybe length is too large?\n"); | ||
| 114 | } | ||
| 115 | |||
| 116 | static u64 do_memset_clock(memset_t fn, size_t len, bool prefault) | ||
| 117 | { | ||
| 118 | u64 clock_start = 0ULL, clock_end = 0ULL; | ||
| 119 | void *dst = NULL; | ||
| 120 | |||
| 121 | alloc_mem(&dst, len); | ||
| 122 | |||
| 123 | if (prefault) | ||
| 124 | fn(dst, -1, len); | ||
| 125 | |||
| 126 | clock_start = get_clock(); | ||
| 127 | fn(dst, 0, len); | ||
| 128 | clock_end = get_clock(); | ||
| 129 | |||
| 130 | free(dst); | ||
| 131 | return clock_end - clock_start; | ||
| 132 | } | ||
| 133 | |||
| 134 | static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) | ||
| 135 | { | ||
| 136 | struct timeval tv_start, tv_end, tv_diff; | ||
| 137 | void *dst = NULL; | ||
| 138 | |||
| 139 | alloc_mem(&dst, len); | ||
| 140 | |||
| 141 | if (prefault) | ||
| 142 | fn(dst, -1, len); | ||
| 143 | |||
| 144 | BUG_ON(gettimeofday(&tv_start, NULL)); | ||
| 145 | fn(dst, 0, len); | ||
| 146 | BUG_ON(gettimeofday(&tv_end, NULL)); | ||
| 147 | |||
| 148 | timersub(&tv_end, &tv_start, &tv_diff); | ||
| 149 | |||
| 150 | free(dst); | ||
| 151 | return (double)((double)len / timeval2double(&tv_diff)); | ||
| 152 | } | ||
| 153 | |||
| 154 | #define pf (no_prefault ? 0 : 1) | ||
| 155 | |||
| 156 | #define print_bps(x) do { \ | ||
| 157 | if (x < K) \ | ||
| 158 | printf(" %14lf B/Sec", x); \ | ||
| 159 | else if (x < K * K) \ | ||
| 160 | printf(" %14lfd KB/Sec", x / K); \ | ||
| 161 | else if (x < K * K * K) \ | ||
| 162 | printf(" %14lf MB/Sec", x / K / K); \ | ||
| 163 | else \ | ||
| 164 | printf(" %14lf GB/Sec", x / K / K / K); \ | ||
| 165 | } while (0) | ||
| 166 | |||
| 167 | int bench_mem_memset(int argc, const char **argv, | ||
| 168 | const char *prefix __used) | ||
| 169 | { | ||
| 170 | int i; | ||
| 171 | size_t len; | ||
| 172 | double result_bps[2]; | ||
| 173 | u64 result_clock[2]; | ||
| 174 | |||
| 175 | argc = parse_options(argc, argv, options, | ||
| 176 | bench_mem_memset_usage, 0); | ||
| 177 | |||
| 178 | if (use_clock) | ||
| 179 | init_clock(); | ||
| 180 | |||
| 181 | len = (size_t)perf_atoll((char *)length_str); | ||
| 182 | |||
| 183 | result_clock[0] = result_clock[1] = 0ULL; | ||
| 184 | result_bps[0] = result_bps[1] = 0.0; | ||
| 185 | |||
| 186 | if ((s64)len <= 0) { | ||
| 187 | fprintf(stderr, "Invalid length:%s\n", length_str); | ||
| 188 | return 1; | ||
| 189 | } | ||
| 190 | |||
| 191 | /* same to without specifying either of prefault and no-prefault */ | ||
| 192 | if (only_prefault && no_prefault) | ||
| 193 | only_prefault = no_prefault = false; | ||
| 194 | |||
| 195 | for (i = 0; routines[i].name; i++) { | ||
| 196 | if (!strcmp(routines[i].name, routine)) | ||
| 197 | break; | ||
| 198 | } | ||
| 199 | if (!routines[i].name) { | ||
| 200 | printf("Unknown routine:%s\n", routine); | ||
| 201 | printf("Available routines...\n"); | ||
| 202 | for (i = 0; routines[i].name; i++) { | ||
| 203 | printf("\t%s ... %s\n", | ||
| 204 | routines[i].name, routines[i].desc); | ||
| 205 | } | ||
| 206 | return 1; | ||
| 207 | } | ||
| 208 | |||
| 209 | if (bench_format == BENCH_FORMAT_DEFAULT) | ||
| 210 | printf("# Copying %s Bytes ...\n\n", length_str); | ||
| 211 | |||
| 212 | if (!only_prefault && !no_prefault) { | ||
| 213 | /* show both of results */ | ||
| 214 | if (use_clock) { | ||
| 215 | result_clock[0] = | ||
| 216 | do_memset_clock(routines[i].fn, len, false); | ||
| 217 | result_clock[1] = | ||
| 218 | do_memset_clock(routines[i].fn, len, true); | ||
| 219 | } else { | ||
| 220 | result_bps[0] = | ||
| 221 | do_memset_gettimeofday(routines[i].fn, | ||
| 222 | len, false); | ||
| 223 | result_bps[1] = | ||
| 224 | do_memset_gettimeofday(routines[i].fn, | ||
| 225 | len, true); | ||
| 226 | } | ||
| 227 | } else { | ||
| 228 | if (use_clock) { | ||
| 229 | result_clock[pf] = | ||
| 230 | do_memset_clock(routines[i].fn, | ||
| 231 | len, only_prefault); | ||
| 232 | } else { | ||
| 233 | result_bps[pf] = | ||
| 234 | do_memset_gettimeofday(routines[i].fn, | ||
| 235 | len, only_prefault); | ||
| 236 | } | ||
| 237 | } | ||
| 238 | |||
| 239 | switch (bench_format) { | ||
| 240 | case BENCH_FORMAT_DEFAULT: | ||
| 241 | if (!only_prefault && !no_prefault) { | ||
| 242 | if (use_clock) { | ||
| 243 | printf(" %14lf Clock/Byte\n", | ||
| 244 | (double)result_clock[0] | ||
| 245 | / (double)len); | ||
| 246 | printf(" %14lf Clock/Byte (with prefault)\n ", | ||
| 247 | (double)result_clock[1] | ||
| 248 | / (double)len); | ||
| 249 | } else { | ||
| 250 | print_bps(result_bps[0]); | ||
| 251 | printf("\n"); | ||
| 252 | print_bps(result_bps[1]); | ||
| 253 | printf(" (with prefault)\n"); | ||
| 254 | } | ||
| 255 | } else { | ||
| 256 | if (use_clock) { | ||
| 257 | printf(" %14lf Clock/Byte", | ||
| 258 | (double)result_clock[pf] | ||
| 259 | / (double)len); | ||
| 260 | } else | ||
| 261 | print_bps(result_bps[pf]); | ||
| 262 | |||
| 263 | printf("%s\n", only_prefault ? " (with prefault)" : ""); | ||
| 264 | } | ||
| 265 | break; | ||
| 266 | case BENCH_FORMAT_SIMPLE: | ||
| 267 | if (!only_prefault && !no_prefault) { | ||
| 268 | if (use_clock) { | ||
| 269 | printf("%lf %lf\n", | ||
| 270 | (double)result_clock[0] / (double)len, | ||
| 271 | (double)result_clock[1] / (double)len); | ||
| 272 | } else { | ||
| 273 | printf("%lf %lf\n", | ||
| 274 | result_bps[0], result_bps[1]); | ||
| 275 | } | ||
| 276 | } else { | ||
| 277 | if (use_clock) { | ||
| 278 | printf("%lf\n", (double)result_clock[pf] | ||
| 279 | / (double)len); | ||
| 280 | } else | ||
| 281 | printf("%lf\n", result_bps[pf]); | ||
| 282 | } | ||
| 283 | break; | ||
| 284 | default: | ||
| 285 | /* reaching this means there's some disaster: */ | ||
| 286 | die("unknown format: %d\n", bench_format); | ||
| 287 | break; | ||
| 288 | } | ||
| 289 | |||
| 290 | return 0; | ||
| 291 | } | ||
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index fcb96269852a..b0e74ab2d7a2 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c | |||
| @@ -52,6 +52,9 @@ static struct bench_suite mem_suites[] = { | |||
| 52 | { "memcpy", | 52 | { "memcpy", |
| 53 | "Simple memory copy in various ways", | 53 | "Simple memory copy in various ways", |
| 54 | bench_mem_memcpy }, | 54 | bench_mem_memcpy }, |
| 55 | { "memset", | ||
| 56 | "Simple memory set in various ways", | ||
| 57 | bench_mem_memset }, | ||
| 55 | suite_all, | 58 | suite_all, |
| 56 | { NULL, | 59 | { NULL, |
| 57 | NULL, | 60 | NULL, |
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h index bb4198e7837a..afe38199e922 100644 --- a/tools/perf/util/include/asm/dwarf2.h +++ b/tools/perf/util/include/asm/dwarf2.h | |||
| @@ -2,10 +2,12 @@ | |||
| 2 | #ifndef PERF_DWARF2_H | 2 | #ifndef PERF_DWARF2_H |
| 3 | #define PERF_DWARF2_H | 3 | #define PERF_DWARF2_H |
| 4 | 4 | ||
| 5 | /* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */ | 5 | /* dwarf2.h ... dummy header file for including arch/x86/lib/mem{cpy,set}_64.S */ |
| 6 | 6 | ||
| 7 | #define CFI_STARTPROC | 7 | #define CFI_STARTPROC |
| 8 | #define CFI_ENDPROC | 8 | #define CFI_ENDPROC |
| 9 | #define CFI_REMEMBER_STATE | ||
| 10 | #define CFI_RESTORE_STATE | ||
| 9 | 11 | ||
| 10 | #endif /* PERF_DWARF2_H */ | 12 | #endif /* PERF_DWARF2_H */ |
| 11 | 13 | ||
