aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavidlohr Bueso <davidlohr@hp.com>2013-12-14 23:31:56 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2014-03-14 10:20:43 -0400
commit27db78307481dbba68c5f3563c6cb694b25521d9 (patch)
treec83a241df6bae6c30fec37c6e26dcc7f3a4a9520
parenta043971141f163f9845324a2f83502d15011485d (diff)
perf bench: Add futex-wake microbenchmark
Block a bunch of threads on a futex and wake them up, N at a time. This program is particularly useful to measure the latency of nthread wakeups in non-error situations: all waiters are queued and all wake calls wakeup one or more tasks. An example run: $ perf bench futex wake -t 512 -r 100 Run summary [PID 27823]: blocking on 512 threads (at futex 0x7e10d4), waking up 1 at a time. [Run 1]: Wokeup 512 of 512 threads in 6.0080 ms [Run 2]: Wokeup 512 of 512 threads in 5.2280 ms [Run 3]: Wokeup 512 of 512 threads in 4.8300 ms ... [Run 100]: Wokeup 512 of 512 threads in 5.0100 ms Wokeup 512 of 512 threads in 5.0109 ms (+-2.25%) Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> Acked-by: Darren Hart <dvhart@linux.intel.com> Cc: Aswin Chandramouleeswaran <aswin@hp.com> Cc: Darren Hart <dvhart@linux.intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jason Low <jason.low2@hp.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Scott J Norton <scott.norton@hp.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Waiman Long <Waiman.Long@hp.com> Link: http://lkml.kernel.org/r/1387081917-9102-3-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/Makefile.perf1
-rw-r--r--tools/perf/bench/bench.h1
-rw-r--r--tools/perf/bench/futex-wake.c201
-rw-r--r--tools/perf/bench/futex.h10
-rw-r--r--tools/perf/builtin-bench.c1
5 files changed, 214 insertions, 0 deletions
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 54ae547b4bc4..6fa5d8b74635 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -427,6 +427,7 @@ endif
427BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o 427BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
428BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o 428BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
429BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o 429BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o
430BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o
430 431
431BUILTIN_OBJS += $(OUTPUT)builtin-diff.o 432BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
432BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o 433BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 34edb5c34db3..6ac3f1d083cc 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -32,6 +32,7 @@ extern int bench_mem_memcpy(int argc, const char **argv,
32 const char *prefix __maybe_unused); 32 const char *prefix __maybe_unused);
33extern int bench_mem_memset(int argc, const char **argv, const char *prefix); 33extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
34extern int bench_futex_hash(int argc, const char **argv, const char *prefix); 34extern int bench_futex_hash(int argc, const char **argv, const char *prefix);
35extern int bench_futex_wake(int argc, const char **argv, const char *prefix);
35 36
36#define BENCH_FORMAT_DEFAULT_STR "default" 37#define BENCH_FORMAT_DEFAULT_STR "default"
37#define BENCH_FORMAT_DEFAULT 0 38#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
new file mode 100644
index 000000000000..d096169b161e
--- /dev/null
+++ b/tools/perf/bench/futex-wake.c
@@ -0,0 +1,201 @@
1/*
2 * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com>
3 *
4 * futex-wake: Block a bunch of threads on a futex and wake'em up, N at a time.
5 *
6 * This program is particularly useful to measure the latency of nthread wakeups
7 * in non-error situations: all waiters are queued and all wake calls wakeup
8 * one or more tasks, and thus the waitqueue is never empty.
9 */
10
11#include "../perf.h"
12#include "../util/util.h"
13#include "../util/stat.h"
14#include "../util/parse-options.h"
15#include "../util/header.h"
16#include "bench.h"
17#include "futex.h"
18
19#include <err.h>
20#include <stdlib.h>
21#include <sys/time.h>
22#include <pthread.h>
23
24/* all threads will block on the same futex */
25static u_int32_t futex1 = 0;
26
27/*
28 * How many wakeups to do at a time.
29 * Default to 1 in order to make the kernel work more.
30 */
31static unsigned int nwakes = 1;
32
33/*
34 * There can be significant variance from run to run,
35 * the more repeats, the more exact the overall avg and
36 * the better idea of the futex latency.
37 */
38static unsigned int repeat = 10;
39
40pthread_t *worker;
41static bool done = 0, silent = 0;
42static pthread_mutex_t thread_lock;
43static pthread_cond_t thread_parent, thread_worker;
44static struct stats waketime_stats, wakeup_stats;
45static unsigned int ncpus, threads_starting, nthreads = 0;
46
47static const struct option options[] = {
48 OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
49 OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"),
50 OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"),
51 OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
52 OPT_END()
53};
54
55static const char * const bench_futex_wake_usage[] = {
56 "perf bench futex wake <options>",
57 NULL
58};
59
60static void *workerfn(void *arg __maybe_unused)
61{
62 pthread_mutex_lock(&thread_lock);
63 threads_starting--;
64 if (!threads_starting)
65 pthread_cond_signal(&thread_parent);
66 pthread_cond_wait(&thread_worker, &thread_lock);
67 pthread_mutex_unlock(&thread_lock);
68
69 futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG);
70 return NULL;
71}
72
73static void print_summary(void)
74{
75 double waketime_avg = avg_stats(&waketime_stats);
76 double waketime_stddev = stddev_stats(&waketime_stats);
77 unsigned int wakeup_avg = avg_stats(&wakeup_stats);
78
79 printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n",
80 wakeup_avg,
81 nthreads,
82 waketime_avg/1e3,
83 rel_stddev_stats(waketime_stddev, waketime_avg));
84}
85
86static void block_threads(pthread_t *w,
87 pthread_attr_t thread_attr)
88{
89 cpu_set_t cpu;
90 unsigned int i;
91
92 threads_starting = nthreads;
93
94 /* create and block all threads */
95 for (i = 0; i < nthreads; i++) {
96 CPU_ZERO(&cpu);
97 CPU_SET(i % ncpus, &cpu);
98
99 if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
100 err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
101
102 if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
103 err(EXIT_FAILURE, "pthread_create");
104 }
105}
106
107static void toggle_done(int sig __maybe_unused,
108 siginfo_t *info __maybe_unused,
109 void *uc __maybe_unused)
110{
111 done = true;
112}
113
114int bench_futex_wake(int argc, const char **argv,
115 const char *prefix __maybe_unused)
116{
117 int ret = 0;
118 unsigned int i, j;
119 struct sigaction act;
120 pthread_attr_t thread_attr;
121
122 argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0);
123 if (argc) {
124 usage_with_options(bench_futex_wake_usage, options);
125 exit(EXIT_FAILURE);
126 }
127
128 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
129
130 sigfillset(&act.sa_mask);
131 act.sa_sigaction = toggle_done;
132 sigaction(SIGINT, &act, NULL);
133
134 if (!nthreads)
135 nthreads = ncpus;
136
137 worker = calloc(nthreads, sizeof(*worker));
138 if (!worker)
139 err(EXIT_FAILURE, "calloc");
140
141 printf("Run summary [PID %d]: blocking on %d threads (at futex %p), "
142 "waking up %d at a time.\n\n",
143 getpid(), nthreads, &futex1, nwakes);
144
145 init_stats(&wakeup_stats);
146 init_stats(&waketime_stats);
147 pthread_attr_init(&thread_attr);
148 pthread_mutex_init(&thread_lock, NULL);
149 pthread_cond_init(&thread_parent, NULL);
150 pthread_cond_init(&thread_worker, NULL);
151
152 for (j = 0; j < repeat && !done; j++) {
153 unsigned int nwoken = 0;
154 struct timeval start, end, runtime;
155
156 /* create, launch & block all threads */
157 block_threads(worker, thread_attr);
158
159 /* make sure all threads are already blocked */
160 pthread_mutex_lock(&thread_lock);
161 while (threads_starting)
162 pthread_cond_wait(&thread_parent, &thread_lock);
163 pthread_cond_broadcast(&thread_worker);
164 pthread_mutex_unlock(&thread_lock);
165
166 usleep(100000);
167
168 /* Ok, all threads are patiently blocked, start waking folks up */
169 gettimeofday(&start, NULL);
170 while (nwoken != nthreads)
171 nwoken += futex_wake(&futex1, nwakes, FUTEX_PRIVATE_FLAG);
172 gettimeofday(&end, NULL);
173 timersub(&end, &start, &runtime);
174
175 update_stats(&wakeup_stats, nwoken);
176 update_stats(&waketime_stats, runtime.tv_usec);
177
178 if (!silent) {
179 printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n",
180 j + 1, nwoken, nthreads, runtime.tv_usec/1e3);
181 }
182
183 for (i = 0; i < nthreads; i++) {
184 ret = pthread_join(worker[i], NULL);
185 if (ret)
186 err(EXIT_FAILURE, "pthread_join");
187 }
188
189 }
190
191 /* cleanup & report results */
192 pthread_cond_destroy(&thread_parent);
193 pthread_cond_destroy(&thread_worker);
194 pthread_mutex_destroy(&thread_lock);
195 pthread_attr_destroy(&thread_attr);
196
197 print_summary();
198
199 free(worker);
200 return ret;
201}
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index 7d0bda543e3d..6ac45093fac4 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -45,4 +45,14 @@ futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflag
45 return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); 45 return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags);
46} 46}
47 47
48/**
49 * futex_wake() - wake one or more tasks blocked on uaddr
50 * @nr_wake: wake up to this many tasks
51 */
52static inline int
53futex_wake(u_int32_t *uaddr, int nr_wake, int opflags)
54{
55 return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags);
56}
57
48#endif /* _FUTEX_H */ 58#endif /* _FUTEX_H */
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index a8b0138fc6a0..743a30a8baa0 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -57,6 +57,7 @@ static struct bench mem_benchmarks[] = {
57 57
58static struct bench futex_benchmarks[] = { 58static struct bench futex_benchmarks[] = {
59 { "hash", "Benchmark for futex hash table", bench_futex_hash }, 59 { "hash", "Benchmark for futex hash table", bench_futex_hash },
60 { "wake", "Benchmark for futex wake calls", bench_futex_wake },
60 { "all", "Test all futex benchmarks", NULL }, 61 { "all", "Test all futex benchmarks", NULL },
61 { NULL, NULL, NULL } 62 { NULL, NULL, NULL }
62}; 63};