diff options
author | Jan Beulich <jbeulich@suse.com> | 2012-01-24 07:03:22 -0500 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2012-01-24 17:25:32 -0500 |
commit | be3de80dc2e671d9ee15e69fe9cd84d2b71e2225 (patch) | |
tree | 8c9519ac9c6235ad8469d3f8d7ef2da660840bc5 /tools/perf/bench | |
parent | 800eb01484b3ca1eaf4eb5186df13fb24de2db19 (diff) |
perf bench: Also allow measuring memset()
This simply clones the respective memcpy() implementation.
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/4F16D743020000780006D735@nat28.tlf.novell.com
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/bench')
-rw-r--r-- | tools/perf/bench/bench.h | 1 | ||||
-rw-r--r-- | tools/perf/bench/mem-memset-arch.h | 12 | ||||
-rw-r--r-- | tools/perf/bench/mem-memset-x86-64-asm-def.h | 12 | ||||
-rw-r--r-- | tools/perf/bench/mem-memset-x86-64-asm.S | 6 | ||||
-rw-r--r-- | tools/perf/bench/mem-memset.c | 291 |
5 files changed, 322 insertions, 0 deletions
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index f7781c6267c0..a09bece6dad2 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h | |||
@@ -4,6 +4,7 @@ | |||
4 | extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); | 4 | extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); |
5 | extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); | 5 | extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); |
6 | extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); | 6 | extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); |
7 | extern int bench_mem_memset(int argc, const char **argv, const char *prefix); | ||
7 | 8 | ||
8 | #define BENCH_FORMAT_DEFAULT_STR "default" | 9 | #define BENCH_FORMAT_DEFAULT_STR "default" |
9 | #define BENCH_FORMAT_DEFAULT 0 | 10 | #define BENCH_FORMAT_DEFAULT 0 |
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h new file mode 100644 index 000000000000..a040fa77665b --- /dev/null +++ b/tools/perf/bench/mem-memset-arch.h | |||
@@ -0,0 +1,12 @@ | |||
1 | |||
2 | #ifdef ARCH_X86_64 | ||
3 | |||
4 | #define MEMSET_FN(fn, name, desc) \ | ||
5 | extern void *fn(void *, int, size_t); | ||
6 | |||
7 | #include "mem-memset-x86-64-asm-def.h" | ||
8 | |||
9 | #undef MEMSET_FN | ||
10 | |||
11 | #endif | ||
12 | |||
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h new file mode 100644 index 000000000000..a71dff97c1f5 --- /dev/null +++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h | |||
@@ -0,0 +1,12 @@ | |||
1 | |||
2 | MEMSET_FN(__memset, | ||
3 | "x86-64-unrolled", | ||
4 | "unrolled memset() in arch/x86/lib/memset_64.S") | ||
5 | |||
6 | MEMSET_FN(memset_c, | ||
7 | "x86-64-stosq", | ||
8 | "movsq-based memset() in arch/x86/lib/memset_64.S") | ||
9 | |||
10 | MEMSET_FN(memset_c_e, | ||
11 | "x86-64-stosb", | ||
12 | "movsb-based memset() in arch/x86/lib/memset_64.S") | ||
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S new file mode 100644 index 000000000000..cb9217063776 --- /dev/null +++ b/tools/perf/bench/mem-memset-x86-64-asm.S | |||
@@ -0,0 +1,6 @@ | |||
1 | #define memset MEMSET /* don't hide glibc's memset() */ | ||
2 | #define altinstr_replacement text | ||
3 | #define globl p2align 4; .globl | ||
4 | #define Lmemset_c globl memset_c; memset_c | ||
5 | #define Lmemset_c_e globl memset_c_e; memset_c_e | ||
6 | #include "../../../arch/x86/lib/memset_64.S" | ||
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c new file mode 100644 index 000000000000..9c0c6f0cba9b --- /dev/null +++ b/tools/perf/bench/mem-memset.c | |||
@@ -0,0 +1,291 @@ | |||
1 | /* | ||
2 | * mem-memset.c | ||
3 | * | ||
4 | * memset: Simple memory set in various ways | ||
5 | * | ||
6 | * Trivial clone of mem-memcpy.c. | ||
7 | */ | ||
8 | #include <ctype.h> | ||
9 | |||
10 | #include "../perf.h" | ||
11 | #include "../util/util.h" | ||
12 | #include "../util/parse-options.h" | ||
13 | #include "../util/header.h" | ||
14 | #include "bench.h" | ||
15 | #include "mem-memset-arch.h" | ||
16 | |||
17 | #include <stdio.h> | ||
18 | #include <stdlib.h> | ||
19 | #include <string.h> | ||
20 | #include <sys/time.h> | ||
21 | #include <errno.h> | ||
22 | |||
23 | #define K 1024 | ||
24 | |||
25 | static const char *length_str = "1MB"; | ||
26 | static const char *routine = "default"; | ||
27 | static bool use_clock; | ||
28 | static int clock_fd; | ||
29 | static bool only_prefault; | ||
30 | static bool no_prefault; | ||
31 | |||
32 | static const struct option options[] = { | ||
33 | OPT_STRING('l', "length", &length_str, "1MB", | ||
34 | "Specify length of memory to copy. " | ||
35 | "available unit: B, MB, GB (upper and lower)"), | ||
36 | OPT_STRING('r', "routine", &routine, "default", | ||
37 | "Specify routine to copy"), | ||
38 | OPT_BOOLEAN('c', "clock", &use_clock, | ||
39 | "Use CPU clock for measuring"), | ||
40 | OPT_BOOLEAN('o', "only-prefault", &only_prefault, | ||
41 | "Show only the result with page faults before memset()"), | ||
42 | OPT_BOOLEAN('n', "no-prefault", &no_prefault, | ||
43 | "Show only the result without page faults before memset()"), | ||
44 | OPT_END() | ||
45 | }; | ||
46 | |||
47 | typedef void *(*memset_t)(void *, int, size_t); | ||
48 | |||
49 | struct routine { | ||
50 | const char *name; | ||
51 | const char *desc; | ||
52 | memset_t fn; | ||
53 | }; | ||
54 | |||
55 | static const struct routine routines[] = { | ||
56 | { "default", | ||
57 | "Default memset() provided by glibc", | ||
58 | memset }, | ||
59 | #ifdef ARCH_X86_64 | ||
60 | |||
61 | #define MEMSET_FN(fn, name, desc) { name, desc, fn }, | ||
62 | #include "mem-memset-x86-64-asm-def.h" | ||
63 | #undef MEMSET_FN | ||
64 | |||
65 | #endif | ||
66 | |||
67 | { NULL, | ||
68 | NULL, | ||
69 | NULL } | ||
70 | }; | ||
71 | |||
72 | static const char * const bench_mem_memset_usage[] = { | ||
73 | "perf bench mem memset <options>", | ||
74 | NULL | ||
75 | }; | ||
76 | |||
77 | static struct perf_event_attr clock_attr = { | ||
78 | .type = PERF_TYPE_HARDWARE, | ||
79 | .config = PERF_COUNT_HW_CPU_CYCLES | ||
80 | }; | ||
81 | |||
82 | static void init_clock(void) | ||
83 | { | ||
84 | clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); | ||
85 | |||
86 | if (clock_fd < 0 && errno == ENOSYS) | ||
87 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); | ||
88 | else | ||
89 | BUG_ON(clock_fd < 0); | ||
90 | } | ||
91 | |||
92 | static u64 get_clock(void) | ||
93 | { | ||
94 | int ret; | ||
95 | u64 clk; | ||
96 | |||
97 | ret = read(clock_fd, &clk, sizeof(u64)); | ||
98 | BUG_ON(ret != sizeof(u64)); | ||
99 | |||
100 | return clk; | ||
101 | } | ||
102 | |||
103 | static double timeval2double(struct timeval *ts) | ||
104 | { | ||
105 | return (double)ts->tv_sec + | ||
106 | (double)ts->tv_usec / (double)1000000; | ||
107 | } | ||
108 | |||
109 | static void alloc_mem(void **dst, size_t length) | ||
110 | { | ||
111 | *dst = zalloc(length); | ||
112 | if (!dst) | ||
113 | die("memory allocation failed - maybe length is too large?\n"); | ||
114 | } | ||
115 | |||
116 | static u64 do_memset_clock(memset_t fn, size_t len, bool prefault) | ||
117 | { | ||
118 | u64 clock_start = 0ULL, clock_end = 0ULL; | ||
119 | void *dst = NULL; | ||
120 | |||
121 | alloc_mem(&dst, len); | ||
122 | |||
123 | if (prefault) | ||
124 | fn(dst, -1, len); | ||
125 | |||
126 | clock_start = get_clock(); | ||
127 | fn(dst, 0, len); | ||
128 | clock_end = get_clock(); | ||
129 | |||
130 | free(dst); | ||
131 | return clock_end - clock_start; | ||
132 | } | ||
133 | |||
134 | static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) | ||
135 | { | ||
136 | struct timeval tv_start, tv_end, tv_diff; | ||
137 | void *dst = NULL; | ||
138 | |||
139 | alloc_mem(&dst, len); | ||
140 | |||
141 | if (prefault) | ||
142 | fn(dst, -1, len); | ||
143 | |||
144 | BUG_ON(gettimeofday(&tv_start, NULL)); | ||
145 | fn(dst, 0, len); | ||
146 | BUG_ON(gettimeofday(&tv_end, NULL)); | ||
147 | |||
148 | timersub(&tv_end, &tv_start, &tv_diff); | ||
149 | |||
150 | free(dst); | ||
151 | return (double)((double)len / timeval2double(&tv_diff)); | ||
152 | } | ||
153 | |||
154 | #define pf (no_prefault ? 0 : 1) | ||
155 | |||
156 | #define print_bps(x) do { \ | ||
157 | if (x < K) \ | ||
158 | printf(" %14lf B/Sec", x); \ | ||
159 | else if (x < K * K) \ | ||
160 | printf(" %14lfd KB/Sec", x / K); \ | ||
161 | else if (x < K * K * K) \ | ||
162 | printf(" %14lf MB/Sec", x / K / K); \ | ||
163 | else \ | ||
164 | printf(" %14lf GB/Sec", x / K / K / K); \ | ||
165 | } while (0) | ||
166 | |||
167 | int bench_mem_memset(int argc, const char **argv, | ||
168 | const char *prefix __used) | ||
169 | { | ||
170 | int i; | ||
171 | size_t len; | ||
172 | double result_bps[2]; | ||
173 | u64 result_clock[2]; | ||
174 | |||
175 | argc = parse_options(argc, argv, options, | ||
176 | bench_mem_memset_usage, 0); | ||
177 | |||
178 | if (use_clock) | ||
179 | init_clock(); | ||
180 | |||
181 | len = (size_t)perf_atoll((char *)length_str); | ||
182 | |||
183 | result_clock[0] = result_clock[1] = 0ULL; | ||
184 | result_bps[0] = result_bps[1] = 0.0; | ||
185 | |||
186 | if ((s64)len <= 0) { | ||
187 | fprintf(stderr, "Invalid length:%s\n", length_str); | ||
188 | return 1; | ||
189 | } | ||
190 | |||
191 | /* same to without specifying either of prefault and no-prefault */ | ||
192 | if (only_prefault && no_prefault) | ||
193 | only_prefault = no_prefault = false; | ||
194 | |||
195 | for (i = 0; routines[i].name; i++) { | ||
196 | if (!strcmp(routines[i].name, routine)) | ||
197 | break; | ||
198 | } | ||
199 | if (!routines[i].name) { | ||
200 | printf("Unknown routine:%s\n", routine); | ||
201 | printf("Available routines...\n"); | ||
202 | for (i = 0; routines[i].name; i++) { | ||
203 | printf("\t%s ... %s\n", | ||
204 | routines[i].name, routines[i].desc); | ||
205 | } | ||
206 | return 1; | ||
207 | } | ||
208 | |||
209 | if (bench_format == BENCH_FORMAT_DEFAULT) | ||
210 | printf("# Copying %s Bytes ...\n\n", length_str); | ||
211 | |||
212 | if (!only_prefault && !no_prefault) { | ||
213 | /* show both of results */ | ||
214 | if (use_clock) { | ||
215 | result_clock[0] = | ||
216 | do_memset_clock(routines[i].fn, len, false); | ||
217 | result_clock[1] = | ||
218 | do_memset_clock(routines[i].fn, len, true); | ||
219 | } else { | ||
220 | result_bps[0] = | ||
221 | do_memset_gettimeofday(routines[i].fn, | ||
222 | len, false); | ||
223 | result_bps[1] = | ||
224 | do_memset_gettimeofday(routines[i].fn, | ||
225 | len, true); | ||
226 | } | ||
227 | } else { | ||
228 | if (use_clock) { | ||
229 | result_clock[pf] = | ||
230 | do_memset_clock(routines[i].fn, | ||
231 | len, only_prefault); | ||
232 | } else { | ||
233 | result_bps[pf] = | ||
234 | do_memset_gettimeofday(routines[i].fn, | ||
235 | len, only_prefault); | ||
236 | } | ||
237 | } | ||
238 | |||
239 | switch (bench_format) { | ||
240 | case BENCH_FORMAT_DEFAULT: | ||
241 | if (!only_prefault && !no_prefault) { | ||
242 | if (use_clock) { | ||
243 | printf(" %14lf Clock/Byte\n", | ||
244 | (double)result_clock[0] | ||
245 | / (double)len); | ||
246 | printf(" %14lf Clock/Byte (with prefault)\n ", | ||
247 | (double)result_clock[1] | ||
248 | / (double)len); | ||
249 | } else { | ||
250 | print_bps(result_bps[0]); | ||
251 | printf("\n"); | ||
252 | print_bps(result_bps[1]); | ||
253 | printf(" (with prefault)\n"); | ||
254 | } | ||
255 | } else { | ||
256 | if (use_clock) { | ||
257 | printf(" %14lf Clock/Byte", | ||
258 | (double)result_clock[pf] | ||
259 | / (double)len); | ||
260 | } else | ||
261 | print_bps(result_bps[pf]); | ||
262 | |||
263 | printf("%s\n", only_prefault ? " (with prefault)" : ""); | ||
264 | } | ||
265 | break; | ||
266 | case BENCH_FORMAT_SIMPLE: | ||
267 | if (!only_prefault && !no_prefault) { | ||
268 | if (use_clock) { | ||
269 | printf("%lf %lf\n", | ||
270 | (double)result_clock[0] / (double)len, | ||
271 | (double)result_clock[1] / (double)len); | ||
272 | } else { | ||
273 | printf("%lf %lf\n", | ||
274 | result_bps[0], result_bps[1]); | ||
275 | } | ||
276 | } else { | ||
277 | if (use_clock) { | ||
278 | printf("%lf\n", (double)result_clock[pf] | ||
279 | / (double)len); | ||
280 | } else | ||
281 | printf("%lf\n", result_bps[pf]); | ||
282 | } | ||
283 | break; | ||
284 | default: | ||
285 | /* reaching this means there's some disaster: */ | ||
286 | die("unknown format: %d\n", bench_format); | ||
287 | break; | ||
288 | } | ||
289 | |||
290 | return 0; | ||
291 | } | ||