aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorDavidlohr Bueso <davidlohr@hp.com>2013-12-14 23:31:55 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2014-03-14 10:20:43 -0400
commita043971141f163f9845324a2f83502d15011485d (patch)
tree83be443556d7d7f5d2b650623fd18de0fd2cdc67 /tools/perf
parent81827ed8d85e892311965dc9ec4120b2b2e745bd (diff)
perf bench: Add futex-hash microbenchmark
Introduce futexes to perf-bench and add a program that stresses and measures the kernel's implementation of the hash table. This is a multi-threaded program that simply measures the amount of failed futex wait calls - we only want to deal with the hashing overhead, so a negative return of futex_wait_setup() is enough to do the trick. An example run: $ perf bench futex hash -t 32 Run summary [PID 10989]: 32 threads, each operating on 1024 [private] futexes for 10 secs. [thread 0] futexes: 0x19d9b10 ... 0x19dab0c [ 418713 ops/sec ] [thread 1] futexes: 0x19daca0 ... 0x19dbc9c [ 469913 ops/sec ] [thread 2] futexes: 0x19dbe30 ... 0x19dce2c [ 479744 ops/sec ] ... [thread 31] futexes: 0x19fbb80 ... 0x19fcb7c [ 464179 ops/sec ] Averaged 454310 operations/sec (+- 0.84%), total secs = 10 Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> Acked-by: Darren Hart <dvhart@linux.intel.com> Cc: Aswin Chandramouleeswaran <aswin@hp.com> Cc: Darren Hart <dvhart@linux.intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jason Low <jason.low2@hp.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Scott J Norton <scott.norton@hp.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Waiman Long <Waiman.Long@hp.com> Link: http://lkml.kernel.org/r/1387081917-9102-2-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Makefile.perf1
-rw-r--r--tools/perf/bench/bench.h1
-rw-r--r--tools/perf/bench/futex-hash.c212
-rw-r--r--tools/perf/bench/futex.h48
-rw-r--r--tools/perf/builtin-bench.c10
-rw-r--r--tools/perf/perf.h6
6 files changed, 277 insertions, 1 deletions
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 1f7ec48ac959..54ae547b4bc4 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -426,6 +426,7 @@ BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
426endif 426endif
427BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o 427BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
428BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o 428BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
429BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o
429 430
430BUILTIN_OBJS += $(OUTPUT)builtin-diff.o 431BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
431BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o 432BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 0fdc85269c4d..34edb5c34db3 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -31,6 +31,7 @@ extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
31extern int bench_mem_memcpy(int argc, const char **argv, 31extern int bench_mem_memcpy(int argc, const char **argv,
32 const char *prefix __maybe_unused); 32 const char *prefix __maybe_unused);
33extern int bench_mem_memset(int argc, const char **argv, const char *prefix); 33extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
34extern int bench_futex_hash(int argc, const char **argv, const char *prefix);
34 35
35#define BENCH_FORMAT_DEFAULT_STR "default" 36#define BENCH_FORMAT_DEFAULT_STR "default"
36#define BENCH_FORMAT_DEFAULT 0 37#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
new file mode 100644
index 000000000000..a84206e9c4aa
--- /dev/null
+++ b/tools/perf/bench/futex-hash.c
@@ -0,0 +1,212 @@
1/*
2 * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com>
3 *
4 * futex-hash: Stress the hell out of the Linux kernel futex uaddr hashing.
5 *
6 * This program is particularly useful for measuring the kernel's futex hash
7 * table/function implementation. In order for it to make sense, use with as
8 * many threads and futexes as possible.
9 */
10
11#include "../perf.h"
12#include "../util/util.h"
13#include "../util/stat.h"
14#include "../util/parse-options.h"
15#include "../util/header.h"
16#include "bench.h"
17#include "futex.h"
18
19#include <err.h>
20#include <stdlib.h>
21#include <sys/time.h>
22#include <pthread.h>
23
24static unsigned int nthreads = 0;
25static unsigned int nsecs = 10;
26/* amount of futexes per thread */
27static unsigned int nfutexes = 1024;
28static bool fshared = false, done = false, silent = false;
29
30struct timeval start, end, runtime;
31static pthread_mutex_t thread_lock;
32static unsigned int threads_starting;
33static struct stats throughput_stats;
34static pthread_cond_t thread_parent, thread_worker;
35
36struct worker {
37 int tid;
38 u_int32_t *futex;
39 pthread_t thread;
40 unsigned long ops;
41};
42
43static const struct option options[] = {
44 OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
45 OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
46 OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"),
47 OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
48 OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
49 OPT_END()
50};
51
52static const char * const bench_futex_hash_usage[] = {
53 "perf bench futex hash <options>",
54 NULL
55};
56
57static void *workerfn(void *arg)
58{
59 int ret;
60 unsigned int i;
61 struct worker *w = (struct worker *) arg;
62
63 pthread_mutex_lock(&thread_lock);
64 threads_starting--;
65 if (!threads_starting)
66 pthread_cond_signal(&thread_parent);
67 pthread_cond_wait(&thread_worker, &thread_lock);
68 pthread_mutex_unlock(&thread_lock);
69
70 do {
71 for (i = 0; i < nfutexes; i++, w->ops++) {
72 /*
73 * We want the futex calls to fail in order to stress
74 * the hashing of uaddr and not measure other steps,
75 * such as internal waitqueue handling, thus enlarging
76 * the critical region protected by hb->lock.
77 */
78 ret = futex_wait(&w->futex[i], 1234, NULL,
79 fshared ? 0 : FUTEX_PRIVATE_FLAG);
80 if (!silent &&
81 (!ret || errno != EAGAIN || errno != EWOULDBLOCK))
82 warn("Non-expected futex return call");
83 }
84 } while (!done);
85
86 return NULL;
87}
88
89static void toggle_done(int sig __maybe_unused,
90 siginfo_t *info __maybe_unused,
91 void *uc __maybe_unused)
92{
93 /* inform all threads that we're done for the day */
94 done = true;
95 gettimeofday(&end, NULL);
96 timersub(&end, &start, &runtime);
97}
98
99static void print_summary(void)
100{
101 unsigned long avg = avg_stats(&throughput_stats);
102 double stddev = stddev_stats(&throughput_stats);
103
104 printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
105 !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
106 (int) runtime.tv_sec);
107}
108
109int bench_futex_hash(int argc, const char **argv,
110 const char *prefix __maybe_unused)
111{
112 int ret = 0;
113 cpu_set_t cpu;
114 struct sigaction act;
115 unsigned int i, ncpus;
116 pthread_attr_t thread_attr;
117 struct worker *worker = NULL;
118
119 argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
120 if (argc) {
121 usage_with_options(bench_futex_hash_usage, options);
122 exit(EXIT_FAILURE);
123 }
124
125 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
126
127 sigfillset(&act.sa_mask);
128 act.sa_sigaction = toggle_done;
129 sigaction(SIGINT, &act, NULL);
130
131 if (!nthreads) /* default to the number of CPUs */
132 nthreads = ncpus;
133
134 worker = calloc(nthreads, sizeof(*worker));
135 if (!worker)
136 goto errmem;
137
138 printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
139 getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs);
140
141 init_stats(&throughput_stats);
142 pthread_mutex_init(&thread_lock, NULL);
143 pthread_cond_init(&thread_parent, NULL);
144 pthread_cond_init(&thread_worker, NULL);
145
146 threads_starting = nthreads;
147 pthread_attr_init(&thread_attr);
148 gettimeofday(&start, NULL);
149 for (i = 0; i < nthreads; i++) {
150 worker[i].tid = i;
151 worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex));
152 if (!worker[i].futex)
153 goto errmem;
154
155 CPU_ZERO(&cpu);
156 CPU_SET(i % ncpus, &cpu);
157
158 ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu);
159 if (ret)
160 err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
161
162 ret = pthread_create(&worker[i].thread, &thread_attr, workerfn,
163 (void *)(struct worker *) &worker[i]);
164 if (ret)
165 err(EXIT_FAILURE, "pthread_create");
166
167 }
168 pthread_attr_destroy(&thread_attr);
169
170 pthread_mutex_lock(&thread_lock);
171 while (threads_starting)
172 pthread_cond_wait(&thread_parent, &thread_lock);
173 pthread_cond_broadcast(&thread_worker);
174 pthread_mutex_unlock(&thread_lock);
175
176 sleep(nsecs);
177 toggle_done(0, NULL, NULL);
178
179 for (i = 0; i < nthreads; i++) {
180 ret = pthread_join(worker[i].thread, NULL);
181 if (ret)
182 err(EXIT_FAILURE, "pthread_join");
183 }
184
185 /* cleanup & report results */
186 pthread_cond_destroy(&thread_parent);
187 pthread_cond_destroy(&thread_worker);
188 pthread_mutex_destroy(&thread_lock);
189
190 for (i = 0; i < nthreads; i++) {
191 unsigned long t = worker[i].ops/runtime.tv_sec;
192 update_stats(&throughput_stats, t);
193 if (!silent) {
194 if (nfutexes == 1)
195 printf("[thread %2d] futex: %p [ %ld ops/sec ]\n",
196 worker[i].tid, &worker[i].futex[0], t);
197 else
198 printf("[thread %2d] futexes: %p ... %p [ %ld ops/sec ]\n",
199 worker[i].tid, &worker[i].futex[0],
200 &worker[i].futex[nfutexes-1], t);
201 }
202
203 free(worker[i].futex);
204 }
205
206 print_summary();
207
208 free(worker);
209 return ret;
210errmem:
211 err(EXIT_FAILURE, "calloc");
212}
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
new file mode 100644
index 000000000000..7d0bda543e3d
--- /dev/null
+++ b/tools/perf/bench/futex.h
@@ -0,0 +1,48 @@
1/*
2 * Glibc independent futex library for testing kernel functionality.
3 * Shamelessly stolen from Darren Hart <dvhltc@us.ibm.com>
4 * http://git.kernel.org/cgit/linux/kernel/git/dvhart/futextest.git/
5 */
6
7#ifndef _FUTEX_H
8#define _FUTEX_H
9
10#include <unistd.h>
11#include <sys/syscall.h>
12#include <sys/types.h>
13#include <linux/futex.h>
14
15/**
16 * futex() - SYS_futex syscall wrapper
17 * @uaddr: address of first futex
18 * @op: futex op code
19 * @val: typically expected value of uaddr, but varies by op
20 * @timeout: typically an absolute struct timespec (except where noted
21 * otherwise). Overloaded by some ops
22 * @uaddr2: address of second futex for some ops\
23 * @val3: varies by op
24 * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG
25 *
26 * futex() is used by all the following futex op wrappers. It can also be
27 * used for misuse and abuse testing. Generally, the specific op wrappers
28 * should be used instead. It is a macro instead of an static inline function as
29 * some of the types over overloaded (timeout is used for nr_requeue for
30 * example).
31 *
32 * These argument descriptions are the defaults for all
33 * like-named arguments in the following wrappers except where noted below.
34 */
35#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \
36 syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3)
37
38/**
39 * futex_wait() - block on uaddr with optional timeout
40 * @timeout: relative timeout
41 */
42static inline int
43futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflags)
44{
45 return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags);
46}
47
48#endif /* _FUTEX_H */
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index e47f90cc7b98..a8b0138fc6a0 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -12,6 +12,7 @@
12 * sched ... scheduler and IPC performance 12 * sched ... scheduler and IPC performance
13 * mem ... memory access performance 13 * mem ... memory access performance
14 * numa ... NUMA scheduling and MM performance 14 * numa ... NUMA scheduling and MM performance
15 * futex ... Futex performance
15 */ 16 */
16#include "perf.h" 17#include "perf.h"
17#include "util/util.h" 18#include "util/util.h"
@@ -54,6 +55,12 @@ static struct bench mem_benchmarks[] = {
54 { NULL, NULL, NULL } 55 { NULL, NULL, NULL }
55}; 56};
56 57
58static struct bench futex_benchmarks[] = {
59 { "hash", "Benchmark for futex hash table", bench_futex_hash },
60 { "all", "Test all futex benchmarks", NULL },
61 { NULL, NULL, NULL }
62};
63
57struct collection { 64struct collection {
58 const char *name; 65 const char *name;
59 const char *summary; 66 const char *summary;
@@ -61,11 +68,12 @@ struct collection {
61}; 68};
62 69
63static struct collection collections[] = { 70static struct collection collections[] = {
64 { "sched", "Scheduler and IPC benchmarks", sched_benchmarks }, 71 { "sched", "Scheduler and IPC benchmarks", sched_benchmarks },
65 { "mem", "Memory access benchmarks", mem_benchmarks }, 72 { "mem", "Memory access benchmarks", mem_benchmarks },
66#ifdef HAVE_LIBNUMA_SUPPORT 73#ifdef HAVE_LIBNUMA_SUPPORT
67 { "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks }, 74 { "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks },
68#endif 75#endif
76 {"futex", "Futex stressing benchmarks", futex_benchmarks },
69 { "all", "All benchmarks", NULL }, 77 { "all", "All benchmarks", NULL },
70 { NULL, NULL, NULL } 78 { NULL, NULL, NULL }
71}; 79};
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 6898ad0e199f..e18a8b5e6953 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -12,6 +12,9 @@
12#ifndef __NR_perf_event_open 12#ifndef __NR_perf_event_open
13# define __NR_perf_event_open 336 13# define __NR_perf_event_open 336
14#endif 14#endif
15#ifndef __NR_futex
16# define __NR_futex 240
17#endif
15#endif 18#endif
16 19
17#if defined(__x86_64__) 20#if defined(__x86_64__)
@@ -23,6 +26,9 @@
23#ifndef __NR_perf_event_open 26#ifndef __NR_perf_event_open
24# define __NR_perf_event_open 298 27# define __NR_perf_event_open 298
25#endif 28#endif
29#ifndef __NR_futex
30# define __NR_futex 202
31#endif
26#endif 32#endif
27 33
28#ifdef __powerpc__ 34#ifdef __powerpc__