aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRabin Vincent <rabin.vincent@axis.com>2014-12-02 10:50:40 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2014-12-09 07:14:05 -0500
commit5bce1a5772cb52aad7e0466484ba07cfbfec2478 (patch)
tree28dc1e7b55b1d4619647526237de47a990ac226f
parent308197b9474bcde2cafba2cd19bef46e0c0428bd (diff)
perf bench: Merge memset into memcpy
The memset benchmark is largely copy-pasted from the memcpy benchmark. Merge the two now that memcpy is made more generic. Signed-off-by: Rabin Vincent <rabin.vincent@axis.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Rabin Vincent <rabinv@axis.com> Link: http://lkml.kernel.org/r/1417535441-3965-2-git-send-email-rabin.vincent@axis.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/Makefile.perf1
-rw-r--r--tools/perf/bench/mem-memcpy.c90
-rw-r--r--tools/perf/bench/mem-memset.c304
3 files changed, 90 insertions, 305 deletions
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 478efa9b2364..763e68fb5767 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -458,7 +458,6 @@ BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
458BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o 458BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
459endif 459endif
460BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o 460BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
461BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
462BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o 461BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o
463BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o 462BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o
464BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o 463BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 4a4493a7a1f8..e18be70c8a47 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -13,6 +13,7 @@
13#include "../util/cloexec.h" 13#include "../util/cloexec.h"
14#include "bench.h" 14#include "bench.h"
15#include "mem-memcpy-arch.h" 15#include "mem-memcpy-arch.h"
16#include "mem-memset-arch.h"
16 17
17#include <stdio.h> 18#include <stdio.h>
18#include <stdlib.h> 19#include <stdlib.h>
@@ -48,12 +49,14 @@ static const struct option options[] = {
48}; 49};
49 50
50typedef void *(*memcpy_t)(void *, const void *, size_t); 51typedef void *(*memcpy_t)(void *, const void *, size_t);
52typedef void *(*memset_t)(void *, int, size_t);
51 53
52struct routine { 54struct routine {
53 const char *name; 55 const char *name;
54 const char *desc; 56 const char *desc;
55 union { 57 union {
56 memcpy_t memcpy; 58 memcpy_t memcpy;
59 memset_t memset;
57 } fn; 60 } fn;
58}; 61};
59 62
@@ -336,3 +339,90 @@ int bench_mem_memcpy(int argc, const char **argv,
336 339
337 return bench_mem_common(argc, argv, prefix, &info); 340 return bench_mem_common(argc, argv, prefix, &info);
338} 341}
342
343static void memset_alloc_mem(void **dst, size_t length)
344{
345 *dst = zalloc(length);
346 if (!*dst)
347 die("memory allocation failed - maybe length is too large?\n");
348}
349
350static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault)
351{
352 u64 cycle_start = 0ULL, cycle_end = 0ULL;
353 memset_t fn = r->fn.memset;
354 void *dst = NULL;
355 int i;
356
357 memset_alloc_mem(&dst, len);
358
359 if (prefault)
360 fn(dst, -1, len);
361
362 cycle_start = get_cycle();
363 for (i = 0; i < iterations; ++i)
364 fn(dst, i, len);
365 cycle_end = get_cycle();
366
367 free(dst);
368 return cycle_end - cycle_start;
369}
370
371static double do_memset_gettimeofday(const struct routine *r, size_t len,
372 bool prefault)
373{
374 struct timeval tv_start, tv_end, tv_diff;
375 memset_t fn = r->fn.memset;
376 void *dst = NULL;
377 int i;
378
379 memset_alloc_mem(&dst, len);
380
381 if (prefault)
382 fn(dst, -1, len);
383
384 BUG_ON(gettimeofday(&tv_start, NULL));
385 for (i = 0; i < iterations; ++i)
386 fn(dst, i, len);
387 BUG_ON(gettimeofday(&tv_end, NULL));
388
389 timersub(&tv_end, &tv_start, &tv_diff);
390
391 free(dst);
392 return (double)((double)len / timeval2double(&tv_diff));
393}
394
395static const char * const bench_mem_memset_usage[] = {
396 "perf bench mem memset <options>",
397 NULL
398};
399
400static const struct routine memset_routines[] = {
401 { .name ="default",
402 .desc = "Default memset() provided by glibc",
403 .fn.memset = memset },
404#ifdef HAVE_ARCH_X86_64_SUPPORT
405
406#define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
407#include "mem-memset-x86-64-asm-def.h"
408#undef MEMSET_FN
409
410#endif
411
412 { .name = NULL,
413 .desc = NULL,
414 .fn.memset = NULL }
415};
416
417int bench_mem_memset(int argc, const char **argv,
418 const char *prefix __maybe_unused)
419{
420 struct bench_mem_info info = {
421 .routines = memset_routines,
422 .do_cycle = do_memset_cycle,
423 .do_gettimeofday = do_memset_gettimeofday,
424 .usage = bench_mem_memset_usage,
425 };
426
427 return bench_mem_common(argc, argv, prefix, &info);
428}
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
deleted file mode 100644
index 75fc3e65fb2a..000000000000
--- a/tools/perf/bench/mem-memset.c
+++ /dev/null
@@ -1,304 +0,0 @@
1/*
2 * mem-memset.c
3 *
4 * memset: Simple memory set in various ways
5 *
6 * Trivial clone of mem-memcpy.c.
7 */
8
9#include "../perf.h"
10#include "../util/util.h"
11#include "../util/parse-options.h"
12#include "../util/header.h"
13#include "../util/cloexec.h"
14#include "bench.h"
15#include "mem-memset-arch.h"
16
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
20#include <sys/time.h>
21#include <errno.h>
22
23#define K 1024
24
25static const char *length_str = "1MB";
26static const char *routine = "default";
27static int iterations = 1;
28static bool use_cycle;
29static int cycle_fd;
30static bool only_prefault;
31static bool no_prefault;
32
33static const struct option options[] = {
34 OPT_STRING('l', "length", &length_str, "1MB",
35 "Specify length of memory to set. "
36 "Available units: B, KB, MB, GB and TB (upper and lower)"),
37 OPT_STRING('r', "routine", &routine, "default",
38 "Specify routine to set"),
39 OPT_INTEGER('i', "iterations", &iterations,
40 "repeat memset() invocation this number of times"),
41 OPT_BOOLEAN('c', "cycle", &use_cycle,
42 "Use cycles event instead of gettimeofday() for measuring"),
43 OPT_BOOLEAN('o', "only-prefault", &only_prefault,
44 "Show only the result with page faults before memset()"),
45 OPT_BOOLEAN('n', "no-prefault", &no_prefault,
46 "Show only the result without page faults before memset()"),
47 OPT_END()
48};
49
50typedef void *(*memset_t)(void *, int, size_t);
51
52struct routine {
53 const char *name;
54 const char *desc;
55 memset_t fn;
56};
57
58static const struct routine routines[] = {
59 { "default",
60 "Default memset() provided by glibc",
61 memset },
62#ifdef HAVE_ARCH_X86_64_SUPPORT
63
64#define MEMSET_FN(fn, name, desc) { name, desc, fn },
65#include "mem-memset-x86-64-asm-def.h"
66#undef MEMSET_FN
67
68#endif
69
70 { NULL,
71 NULL,
72 NULL }
73};
74
75static const char * const bench_mem_memset_usage[] = {
76 "perf bench mem memset <options>",
77 NULL
78};
79
80static struct perf_event_attr cycle_attr = {
81 .type = PERF_TYPE_HARDWARE,
82 .config = PERF_COUNT_HW_CPU_CYCLES
83};
84
85static void init_cycle(void)
86{
87 cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1,
88 perf_event_open_cloexec_flag());
89
90 if (cycle_fd < 0 && errno == ENOSYS)
91 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
92 else
93 BUG_ON(cycle_fd < 0);
94}
95
96static u64 get_cycle(void)
97{
98 int ret;
99 u64 clk;
100
101 ret = read(cycle_fd, &clk, sizeof(u64));
102 BUG_ON(ret != sizeof(u64));
103
104 return clk;
105}
106
107static double timeval2double(struct timeval *ts)
108{
109 return (double)ts->tv_sec +
110 (double)ts->tv_usec / (double)1000000;
111}
112
113static void alloc_mem(void **dst, size_t length)
114{
115 *dst = zalloc(length);
116 if (!*dst)
117 die("memory allocation failed - maybe length is too large?\n");
118}
119
120static u64 do_memset_cycle(memset_t fn, size_t len, bool prefault)
121{
122 u64 cycle_start = 0ULL, cycle_end = 0ULL;
123 void *dst = NULL;
124 int i;
125
126 alloc_mem(&dst, len);
127
128 if (prefault)
129 fn(dst, -1, len);
130
131 cycle_start = get_cycle();
132 for (i = 0; i < iterations; ++i)
133 fn(dst, i, len);
134 cycle_end = get_cycle();
135
136 free(dst);
137 return cycle_end - cycle_start;
138}
139
140static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault)
141{
142 struct timeval tv_start, tv_end, tv_diff;
143 void *dst = NULL;
144 int i;
145
146 alloc_mem(&dst, len);
147
148 if (prefault)
149 fn(dst, -1, len);
150
151 BUG_ON(gettimeofday(&tv_start, NULL));
152 for (i = 0; i < iterations; ++i)
153 fn(dst, i, len);
154 BUG_ON(gettimeofday(&tv_end, NULL));
155
156 timersub(&tv_end, &tv_start, &tv_diff);
157
158 free(dst);
159 return (double)((double)len / timeval2double(&tv_diff));
160}
161
162#define pf (no_prefault ? 0 : 1)
163
164#define print_bps(x) do { \
165 if (x < K) \
166 printf(" %14lf B/Sec", x); \
167 else if (x < K * K) \
168 printf(" %14lfd KB/Sec", x / K); \
169 else if (x < K * K * K) \
170 printf(" %14lf MB/Sec", x / K / K); \
171 else \
172 printf(" %14lf GB/Sec", x / K / K / K); \
173 } while (0)
174
175int bench_mem_memset(int argc, const char **argv,
176 const char *prefix __maybe_unused)
177{
178 int i;
179 size_t len;
180 double result_bps[2];
181 u64 result_cycle[2];
182
183 argc = parse_options(argc, argv, options,
184 bench_mem_memset_usage, 0);
185
186 if (no_prefault && only_prefault) {
187 fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
188 return 1;
189 }
190
191 if (use_cycle)
192 init_cycle();
193
194 len = (size_t)perf_atoll((char *)length_str);
195
196 result_cycle[0] = result_cycle[1] = 0ULL;
197 result_bps[0] = result_bps[1] = 0.0;
198
199 if ((s64)len <= 0) {
200 fprintf(stderr, "Invalid length:%s\n", length_str);
201 return 1;
202 }
203
204 /* same to without specifying either of prefault and no-prefault */
205 if (only_prefault && no_prefault)
206 only_prefault = no_prefault = false;
207
208 for (i = 0; routines[i].name; i++) {
209 if (!strcmp(routines[i].name, routine))
210 break;
211 }
212 if (!routines[i].name) {
213 printf("Unknown routine:%s\n", routine);
214 printf("Available routines...\n");
215 for (i = 0; routines[i].name; i++) {
216 printf("\t%s ... %s\n",
217 routines[i].name, routines[i].desc);
218 }
219 return 1;
220 }
221
222 if (bench_format == BENCH_FORMAT_DEFAULT)
223 printf("# Copying %s Bytes ...\n\n", length_str);
224
225 if (!only_prefault && !no_prefault) {
226 /* show both of results */
227 if (use_cycle) {
228 result_cycle[0] =
229 do_memset_cycle(routines[i].fn, len, false);
230 result_cycle[1] =
231 do_memset_cycle(routines[i].fn, len, true);
232 } else {
233 result_bps[0] =
234 do_memset_gettimeofday(routines[i].fn,
235 len, false);
236 result_bps[1] =
237 do_memset_gettimeofday(routines[i].fn,
238 len, true);
239 }
240 } else {
241 if (use_cycle) {
242 result_cycle[pf] =
243 do_memset_cycle(routines[i].fn,
244 len, only_prefault);
245 } else {
246 result_bps[pf] =
247 do_memset_gettimeofday(routines[i].fn,
248 len, only_prefault);
249 }
250 }
251
252 switch (bench_format) {
253 case BENCH_FORMAT_DEFAULT:
254 if (!only_prefault && !no_prefault) {
255 if (use_cycle) {
256 printf(" %14lf Cycle/Byte\n",
257 (double)result_cycle[0]
258 / (double)len);
259 printf(" %14lf Cycle/Byte (with prefault)\n ",
260 (double)result_cycle[1]
261 / (double)len);
262 } else {
263 print_bps(result_bps[0]);
264 printf("\n");
265 print_bps(result_bps[1]);
266 printf(" (with prefault)\n");
267 }
268 } else {
269 if (use_cycle) {
270 printf(" %14lf Cycle/Byte",
271 (double)result_cycle[pf]
272 / (double)len);
273 } else
274 print_bps(result_bps[pf]);
275
276 printf("%s\n", only_prefault ? " (with prefault)" : "");
277 }
278 break;
279 case BENCH_FORMAT_SIMPLE:
280 if (!only_prefault && !no_prefault) {
281 if (use_cycle) {
282 printf("%lf %lf\n",
283 (double)result_cycle[0] / (double)len,
284 (double)result_cycle[1] / (double)len);
285 } else {
286 printf("%lf %lf\n",
287 result_bps[0], result_bps[1]);
288 }
289 } else {
290 if (use_cycle) {
291 printf("%lf\n", (double)result_cycle[pf]
292 / (double)len);
293 } else
294 printf("%lf\n", result_bps[pf]);
295 }
296 break;
297 default:
298 /* reaching this means there's some disaster: */
299 die("unknown format: %d\n", bench_format);
300 break;
301 }
302
303 return 0;
304}