aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@suse.com>2012-01-24 07:03:22 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2012-01-24 17:25:32 -0500
commitbe3de80dc2e671d9ee15e69fe9cd84d2b71e2225 (patch)
tree8c9519ac9c6235ad8469d3f8d7ef2da660840bc5 /tools/perf
parent800eb01484b3ca1eaf4eb5186df13fb24de2db19 (diff)
perf bench: Also allow measuring memset()
This simply clones the respective memcpy() implementation. Cc: Ingo Molnar <mingo@elte.hu> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/4F16D743020000780006D735@nat28.tlf.novell.com Signed-off-by: Jan Beulich <jbeulich@suse.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Makefile4
-rw-r--r--tools/perf/bench/bench.h1
-rw-r--r--tools/perf/bench/mem-memset-arch.h12
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm-def.h12
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm.S6
-rw-r--r--tools/perf/bench/mem-memset.c291
-rw-r--r--tools/perf/builtin-bench.c3
-rw-r--r--tools/perf/util/include/asm/dwarf2.h4
8 files changed, 331 insertions, 2 deletions
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index ac86d67b636..599031ac69a 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -61,7 +61,7 @@ ifeq ($(ARCH),x86_64)
61 ifeq (${IS_X86_64}, 1) 61 ifeq (${IS_X86_64}, 1)
62 RAW_ARCH := x86_64 62 RAW_ARCH := x86_64
63 ARCH_CFLAGS := -DARCH_X86_64 63 ARCH_CFLAGS := -DARCH_X86_64
64 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S 64 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
65 endif 65 endif
66endif 66endif
67 67
@@ -362,8 +362,10 @@ BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
362BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o 362BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
363ifeq ($(RAW_ARCH),x86_64) 363ifeq ($(RAW_ARCH),x86_64)
364BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o 364BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
365BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
365endif 366endif
366BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o 367BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
368BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
367 369
368BUILTIN_OBJS += $(OUTPUT)builtin-diff.o 370BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
369BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o 371BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index f7781c6267c..a09bece6dad 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -4,6 +4,7 @@
4extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); 4extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
5extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); 5extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
6extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); 6extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);
7extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
7 8
8#define BENCH_FORMAT_DEFAULT_STR "default" 9#define BENCH_FORMAT_DEFAULT_STR "default"
9#define BENCH_FORMAT_DEFAULT 0 10#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h
new file mode 100644
index 00000000000..a040fa77665
--- /dev/null
+++ b/tools/perf/bench/mem-memset-arch.h
@@ -0,0 +1,12 @@
1
2#ifdef ARCH_X86_64
3
4#define MEMSET_FN(fn, name, desc) \
5 extern void *fn(void *, int, size_t);
6
7#include "mem-memset-x86-64-asm-def.h"
8
9#undef MEMSET_FN
10
11#endif
12
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h
new file mode 100644
index 00000000000..a71dff97c1f
--- /dev/null
+++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h
@@ -0,0 +1,12 @@
1
2MEMSET_FN(__memset,
3 "x86-64-unrolled",
4 "unrolled memset() in arch/x86/lib/memset_64.S")
5
6MEMSET_FN(memset_c,
7 "x86-64-stosq",
8 "movsq-based memset() in arch/x86/lib/memset_64.S")
9
10MEMSET_FN(memset_c_e,
11 "x86-64-stosb",
12 "movsb-based memset() in arch/x86/lib/memset_64.S")
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
new file mode 100644
index 00000000000..cb921706377
--- /dev/null
+++ b/tools/perf/bench/mem-memset-x86-64-asm.S
@@ -0,0 +1,6 @@
1#define memset MEMSET /* don't hide glibc's memset() */
2#define altinstr_replacement text
3#define globl p2align 4; .globl
4#define Lmemset_c globl memset_c; memset_c
5#define Lmemset_c_e globl memset_c_e; memset_c_e
6#include "../../../arch/x86/lib/memset_64.S"
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
new file mode 100644
index 00000000000..9c0c6f0cba9
--- /dev/null
+++ b/tools/perf/bench/mem-memset.c
@@ -0,0 +1,291 @@
1/*
2 * mem-memset.c
3 *
4 * memset: Simple memory set in various ways
5 *
6 * Trivial clone of mem-memcpy.c.
7 */
8#include <ctype.h>
9
10#include "../perf.h"
11#include "../util/util.h"
12#include "../util/parse-options.h"
13#include "../util/header.h"
14#include "bench.h"
15#include "mem-memset-arch.h"
16
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
20#include <sys/time.h>
21#include <errno.h>
22
23#define K 1024
24
25static const char *length_str = "1MB";
26static const char *routine = "default";
27static bool use_clock;
28static int clock_fd;
29static bool only_prefault;
30static bool no_prefault;
31
32static const struct option options[] = {
33 OPT_STRING('l', "length", &length_str, "1MB",
34 "Specify length of memory to copy. "
35 "available unit: B, MB, GB (upper and lower)"),
36 OPT_STRING('r', "routine", &routine, "default",
37 "Specify routine to copy"),
38 OPT_BOOLEAN('c', "clock", &use_clock,
39 "Use CPU clock for measuring"),
40 OPT_BOOLEAN('o', "only-prefault", &only_prefault,
41 "Show only the result with page faults before memset()"),
42 OPT_BOOLEAN('n', "no-prefault", &no_prefault,
43 "Show only the result without page faults before memset()"),
44 OPT_END()
45};
46
47typedef void *(*memset_t)(void *, int, size_t);
48
49struct routine {
50 const char *name;
51 const char *desc;
52 memset_t fn;
53};
54
55static const struct routine routines[] = {
56 { "default",
57 "Default memset() provided by glibc",
58 memset },
59#ifdef ARCH_X86_64
60
61#define MEMSET_FN(fn, name, desc) { name, desc, fn },
62#include "mem-memset-x86-64-asm-def.h"
63#undef MEMSET_FN
64
65#endif
66
67 { NULL,
68 NULL,
69 NULL }
70};
71
72static const char * const bench_mem_memset_usage[] = {
73 "perf bench mem memset <options>",
74 NULL
75};
76
77static struct perf_event_attr clock_attr = {
78 .type = PERF_TYPE_HARDWARE,
79 .config = PERF_COUNT_HW_CPU_CYCLES
80};
81
82static void init_clock(void)
83{
84 clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
85
86 if (clock_fd < 0 && errno == ENOSYS)
87 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
88 else
89 BUG_ON(clock_fd < 0);
90}
91
92static u64 get_clock(void)
93{
94 int ret;
95 u64 clk;
96
97 ret = read(clock_fd, &clk, sizeof(u64));
98 BUG_ON(ret != sizeof(u64));
99
100 return clk;
101}
102
103static double timeval2double(struct timeval *ts)
104{
105 return (double)ts->tv_sec +
106 (double)ts->tv_usec / (double)1000000;
107}
108
109static void alloc_mem(void **dst, size_t length)
110{
111 *dst = zalloc(length);
112 if (!dst)
113 die("memory allocation failed - maybe length is too large?\n");
114}
115
116static u64 do_memset_clock(memset_t fn, size_t len, bool prefault)
117{
118 u64 clock_start = 0ULL, clock_end = 0ULL;
119 void *dst = NULL;
120
121 alloc_mem(&dst, len);
122
123 if (prefault)
124 fn(dst, -1, len);
125
126 clock_start = get_clock();
127 fn(dst, 0, len);
128 clock_end = get_clock();
129
130 free(dst);
131 return clock_end - clock_start;
132}
133
134static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault)
135{
136 struct timeval tv_start, tv_end, tv_diff;
137 void *dst = NULL;
138
139 alloc_mem(&dst, len);
140
141 if (prefault)
142 fn(dst, -1, len);
143
144 BUG_ON(gettimeofday(&tv_start, NULL));
145 fn(dst, 0, len);
146 BUG_ON(gettimeofday(&tv_end, NULL));
147
148 timersub(&tv_end, &tv_start, &tv_diff);
149
150 free(dst);
151 return (double)((double)len / timeval2double(&tv_diff));
152}
153
154#define pf (no_prefault ? 0 : 1)
155
156#define print_bps(x) do { \
157 if (x < K) \
158 printf(" %14lf B/Sec", x); \
159 else if (x < K * K) \
160 printf(" %14lfd KB/Sec", x / K); \
161 else if (x < K * K * K) \
162 printf(" %14lf MB/Sec", x / K / K); \
163 else \
164 printf(" %14lf GB/Sec", x / K / K / K); \
165 } while (0)
166
167int bench_mem_memset(int argc, const char **argv,
168 const char *prefix __used)
169{
170 int i;
171 size_t len;
172 double result_bps[2];
173 u64 result_clock[2];
174
175 argc = parse_options(argc, argv, options,
176 bench_mem_memset_usage, 0);
177
178 if (use_clock)
179 init_clock();
180
181 len = (size_t)perf_atoll((char *)length_str);
182
183 result_clock[0] = result_clock[1] = 0ULL;
184 result_bps[0] = result_bps[1] = 0.0;
185
186 if ((s64)len <= 0) {
187 fprintf(stderr, "Invalid length:%s\n", length_str);
188 return 1;
189 }
190
191 /* same to without specifying either of prefault and no-prefault */
192 if (only_prefault && no_prefault)
193 only_prefault = no_prefault = false;
194
195 for (i = 0; routines[i].name; i++) {
196 if (!strcmp(routines[i].name, routine))
197 break;
198 }
199 if (!routines[i].name) {
200 printf("Unknown routine:%s\n", routine);
201 printf("Available routines...\n");
202 for (i = 0; routines[i].name; i++) {
203 printf("\t%s ... %s\n",
204 routines[i].name, routines[i].desc);
205 }
206 return 1;
207 }
208
209 if (bench_format == BENCH_FORMAT_DEFAULT)
210 printf("# Copying %s Bytes ...\n\n", length_str);
211
212 if (!only_prefault && !no_prefault) {
213 /* show both of results */
214 if (use_clock) {
215 result_clock[0] =
216 do_memset_clock(routines[i].fn, len, false);
217 result_clock[1] =
218 do_memset_clock(routines[i].fn, len, true);
219 } else {
220 result_bps[0] =
221 do_memset_gettimeofday(routines[i].fn,
222 len, false);
223 result_bps[1] =
224 do_memset_gettimeofday(routines[i].fn,
225 len, true);
226 }
227 } else {
228 if (use_clock) {
229 result_clock[pf] =
230 do_memset_clock(routines[i].fn,
231 len, only_prefault);
232 } else {
233 result_bps[pf] =
234 do_memset_gettimeofday(routines[i].fn,
235 len, only_prefault);
236 }
237 }
238
239 switch (bench_format) {
240 case BENCH_FORMAT_DEFAULT:
241 if (!only_prefault && !no_prefault) {
242 if (use_clock) {
243 printf(" %14lf Clock/Byte\n",
244 (double)result_clock[0]
245 / (double)len);
246 printf(" %14lf Clock/Byte (with prefault)\n ",
247 (double)result_clock[1]
248 / (double)len);
249 } else {
250 print_bps(result_bps[0]);
251 printf("\n");
252 print_bps(result_bps[1]);
253 printf(" (with prefault)\n");
254 }
255 } else {
256 if (use_clock) {
257 printf(" %14lf Clock/Byte",
258 (double)result_clock[pf]
259 / (double)len);
260 } else
261 print_bps(result_bps[pf]);
262
263 printf("%s\n", only_prefault ? " (with prefault)" : "");
264 }
265 break;
266 case BENCH_FORMAT_SIMPLE:
267 if (!only_prefault && !no_prefault) {
268 if (use_clock) {
269 printf("%lf %lf\n",
270 (double)result_clock[0] / (double)len,
271 (double)result_clock[1] / (double)len);
272 } else {
273 printf("%lf %lf\n",
274 result_bps[0], result_bps[1]);
275 }
276 } else {
277 if (use_clock) {
278 printf("%lf\n", (double)result_clock[pf]
279 / (double)len);
280 } else
281 printf("%lf\n", result_bps[pf]);
282 }
283 break;
284 default:
285 /* reaching this means there's some disaster: */
286 die("unknown format: %d\n", bench_format);
287 break;
288 }
289
290 return 0;
291}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index fcb96269852..b0e74ab2d7a 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -52,6 +52,9 @@ static struct bench_suite mem_suites[] = {
52 { "memcpy", 52 { "memcpy",
53 "Simple memory copy in various ways", 53 "Simple memory copy in various ways",
54 bench_mem_memcpy }, 54 bench_mem_memcpy },
55 { "memset",
56 "Simple memory set in various ways",
57 bench_mem_memset },
55 suite_all, 58 suite_all,
56 { NULL, 59 { NULL,
57 NULL, 60 NULL,
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h
index bb4198e7837..afe38199e92 100644
--- a/tools/perf/util/include/asm/dwarf2.h
+++ b/tools/perf/util/include/asm/dwarf2.h
@@ -2,10 +2,12 @@
2#ifndef PERF_DWARF2_H 2#ifndef PERF_DWARF2_H
3#define PERF_DWARF2_H 3#define PERF_DWARF2_H
4 4
5/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */ 5/* dwarf2.h ... dummy header file for including arch/x86/lib/mem{cpy,set}_64.S */
6 6
7#define CFI_STARTPROC 7#define CFI_STARTPROC
8#define CFI_ENDPROC 8#define CFI_ENDPROC
9#define CFI_REMEMBER_STATE
10#define CFI_RESTORE_STATE
9 11
10#endif /* PERF_DWARF2_H */ 12#endif /* PERF_DWARF2_H */
11 13