aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavidlohr Bueso <dave@stgolabs.net>2015-07-07 04:55:53 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2015-07-20 16:49:51 -0400
commitd2f3f5d2e9cae6e73f9642a5ddc8c8a07c35e79b (patch)
tree76994d160a27adaab0a7263c2697afdcae4f5df5
parent52c0a18b9010fb19d10889e8a00aa784197d357c (diff)
perf bench futex: Add lock_pi stresser
Allows a way of measuring low level kernel implementation of FUTEX_LOCK_PI and FUTEX_UNLOCK_PI. The program comes in two flavors: (i) single futex (default), all threads contend on the same uaddr. For the sake of the benchmark, we call into kernel space even when the lock is uncontended. The kernel will set it to TID, any waters that come in and contend for the pi futex will be handled respectively by the kernel. (ii) -M option for multiple futexes, each thread deals with its own futex. This is a trivial scenario and only measures kernel handling of 0->TID transition. Signed-off-by: Davidlohr Bueso <dbueso@suse.de> Cc: Mel Gorman <mgorman@suse.de> Link: http://lkml.kernel.org/r/1436259353.12255.78.camel@stgolabs.net Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/Documentation/perf-bench.txt4
-rw-r--r--tools/perf/bench/Build1
-rw-r--r--tools/perf/bench/bench.h2
-rw-r--r--tools/perf/bench/futex-lock-pi.c219
-rw-r--r--tools/perf/bench/futex.h20
-rw-r--r--tools/perf/builtin-bench.c2
6 files changed, 248 insertions, 0 deletions
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index bf3d0644bf10..ab632d9fbd7d 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -216,6 +216,10 @@ Suite for evaluating parallel wake calls.
216*requeue*:: 216*requeue*::
217Suite for evaluating requeue calls. 217Suite for evaluating requeue calls.
218 218
219*lock-pi*::
220Suite for evaluating futex lock_pi calls.
221
222
219SEE ALSO 223SEE ALSO
220-------- 224--------
221linkperf:perf[1] 225linkperf:perf[1]
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index c3ab760e06b4..573e28896038 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -5,6 +5,7 @@ perf-y += futex-hash.o
5perf-y += futex-wake.o 5perf-y += futex-wake.o
6perf-y += futex-wake-parallel.o 6perf-y += futex-wake-parallel.o
7perf-y += futex-requeue.o 7perf-y += futex-requeue.o
8perf-y += futex-lock-pi.o
8 9
9perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o 10perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
10perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o 11perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 70b2f718cc21..a50df86f2b9b 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -36,6 +36,8 @@ extern int bench_futex_wake(int argc, const char **argv, const char *prefix);
36extern int bench_futex_wake_parallel(int argc, const char **argv, 36extern int bench_futex_wake_parallel(int argc, const char **argv,
37 const char *prefix); 37 const char *prefix);
38extern int bench_futex_requeue(int argc, const char **argv, const char *prefix); 38extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
39/* pi futexes */
40extern int bench_futex_lock_pi(int argc, const char **argv, const char *prefix);
39 41
40#define BENCH_FORMAT_DEFAULT_STR "default" 42#define BENCH_FORMAT_DEFAULT_STR "default"
41#define BENCH_FORMAT_DEFAULT 0 43#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
new file mode 100644
index 000000000000..bc6a16adbca8
--- /dev/null
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -0,0 +1,219 @@
1/*
2 * Copyright (C) 2015 Davidlohr Bueso.
3 */
4
5#include "../perf.h"
6#include "../util/util.h"
7#include "../util/stat.h"
8#include "../util/parse-options.h"
9#include "../util/header.h"
10#include "bench.h"
11#include "futex.h"
12
13#include <err.h>
14#include <stdlib.h>
15#include <sys/time.h>
16#include <pthread.h>
17
18struct worker {
19 int tid;
20 u_int32_t *futex;
21 pthread_t thread;
22 unsigned long ops;
23};
24
25static u_int32_t global_futex = 0;
26static struct worker *worker;
27static unsigned int nsecs = 10;
28static bool silent = false, multi = false;
29static bool done = false, fshared = false;
30static unsigned int ncpus, nthreads = 0;
31static int futex_flag = 0;
32struct timeval start, end, runtime;
33static pthread_mutex_t thread_lock;
34static unsigned int threads_starting;
35static struct stats throughput_stats;
36static pthread_cond_t thread_parent, thread_worker;
37
38static const struct option options[] = {
39 OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
40 OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
41 OPT_BOOLEAN( 'M', "multi", &multi, "Use multiple futexes"),
42 OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
43 OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
44 OPT_END()
45};
46
47static const char * const bench_futex_lock_pi_usage[] = {
48 "perf bench futex requeue <options>",
49 NULL
50};
51
52static void print_summary(void)
53{
54 unsigned long avg = avg_stats(&throughput_stats);
55 double stddev = stddev_stats(&throughput_stats);
56
57 printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
58 !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
59 (int) runtime.tv_sec);
60}
61
62static void toggle_done(int sig __maybe_unused,
63 siginfo_t *info __maybe_unused,
64 void *uc __maybe_unused)
65{
66 /* inform all threads that we're done for the day */
67 done = true;
68 gettimeofday(&end, NULL);
69 timersub(&end, &start, &runtime);
70}
71
72static void *workerfn(void *arg)
73{
74 struct worker *w = (struct worker *) arg;
75
76 pthread_mutex_lock(&thread_lock);
77 threads_starting--;
78 if (!threads_starting)
79 pthread_cond_signal(&thread_parent);
80 pthread_cond_wait(&thread_worker, &thread_lock);
81 pthread_mutex_unlock(&thread_lock);
82
83 do {
84 int ret;
85 again:
86 ret = futex_lock_pi(w->futex, NULL, 0, futex_flag);
87
88 if (ret) { /* handle lock acquisition */
89 if (!silent)
90 warn("thread %d: Could not lock pi-lock for %p (%d)",
91 w->tid, w->futex, ret);
92 if (done)
93 break;
94
95 goto again;
96 }
97
98 usleep(1);
99 ret = futex_unlock_pi(w->futex, futex_flag);
100 if (ret && !silent)
101 warn("thread %d: Could not unlock pi-lock for %p (%d)",
102 w->tid, w->futex, ret);
103 w->ops++; /* account for thread's share of work */
104 } while (!done);
105
106 return NULL;
107}
108
109static void create_threads(struct worker *w, pthread_attr_t thread_attr)
110{
111 cpu_set_t cpu;
112 unsigned int i;
113
114 threads_starting = nthreads;
115
116 for (i = 0; i < nthreads; i++) {
117 worker[i].tid = i;
118
119 if (multi) {
120 worker[i].futex = calloc(1, sizeof(u_int32_t));
121 if (!worker[i].futex)
122 err(EXIT_FAILURE, "calloc");
123 } else
124 worker[i].futex = &global_futex;
125
126 CPU_ZERO(&cpu);
127 CPU_SET(i % ncpus, &cpu);
128
129 if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
130 err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
131
132 if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
133 err(EXIT_FAILURE, "pthread_create");
134 }
135}
136
137int bench_futex_lock_pi(int argc, const char **argv,
138 const char *prefix __maybe_unused)
139{
140 int ret = 0;
141 unsigned int i;
142 struct sigaction act;
143 pthread_attr_t thread_attr;
144
145 argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0);
146 if (argc)
147 goto err;
148
149 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
150
151 sigfillset(&act.sa_mask);
152 act.sa_sigaction = toggle_done;
153 sigaction(SIGINT, &act, NULL);
154
155 if (!nthreads)
156 nthreads = ncpus;
157
158 worker = calloc(nthreads, sizeof(*worker));
159 if (!worker)
160 err(EXIT_FAILURE, "calloc");
161
162 if (!fshared)
163 futex_flag = FUTEX_PRIVATE_FLAG;
164
165 printf("Run summary [PID %d]: %d threads doing pi lock/unlock pairing for %d secs.\n\n",
166 getpid(), nthreads, nsecs);
167
168 init_stats(&throughput_stats);
169 pthread_mutex_init(&thread_lock, NULL);
170 pthread_cond_init(&thread_parent, NULL);
171 pthread_cond_init(&thread_worker, NULL);
172
173 threads_starting = nthreads;
174 pthread_attr_init(&thread_attr);
175 gettimeofday(&start, NULL);
176
177 create_threads(worker, thread_attr);
178 pthread_attr_destroy(&thread_attr);
179
180 pthread_mutex_lock(&thread_lock);
181 while (threads_starting)
182 pthread_cond_wait(&thread_parent, &thread_lock);
183 pthread_cond_broadcast(&thread_worker);
184 pthread_mutex_unlock(&thread_lock);
185
186 sleep(nsecs);
187 toggle_done(0, NULL, NULL);
188
189 for (i = 0; i < nthreads; i++) {
190 ret = pthread_join(worker[i].thread, NULL);
191 if (ret)
192 err(EXIT_FAILURE, "pthread_join");
193 }
194
195 /* cleanup & report results */
196 pthread_cond_destroy(&thread_parent);
197 pthread_cond_destroy(&thread_worker);
198 pthread_mutex_destroy(&thread_lock);
199
200 for (i = 0; i < nthreads; i++) {
201 unsigned long t = worker[i].ops/runtime.tv_sec;
202
203 update_stats(&throughput_stats, t);
204 if (!silent)
205 printf("[thread %3d] futex: %p [ %ld ops/sec ]\n",
206 worker[i].tid, worker[i].futex, t);
207
208 if (multi)
209 free(worker[i].futex);
210 }
211
212 print_summary();
213
214 free(worker);
215 return ret;
216err:
217 usage_with_options(bench_futex_lock_pi_usage, options);
218 exit(EXIT_FAILURE);
219}
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index 7ed22ff1e1ac..d44de9f44281 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -56,6 +56,26 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags)
56} 56}
57 57
58/** 58/**
59 * futex_lock_pi() - block on uaddr as a PI mutex
60 * @detect: whether (1) or not (0) to perform deadlock detection
61 */
62static inline int
63futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int detect,
64 int opflags)
65{
66 return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags);
67}
68
69/**
70 * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter
71 */
72static inline int
73futex_unlock_pi(u_int32_t *uaddr, int opflags)
74{
75 return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags);
76}
77
78/**
59* futex_cmp_requeue() - requeue tasks from uaddr to uaddr2 79* futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
60* @nr_wake: wake up to this many tasks 80* @nr_wake: wake up to this many tasks
61* @nr_requeue: requeue up to this many tasks 81* @nr_requeue: requeue up to this many tasks
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index b5314e452ec7..f67934d46d40 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -60,6 +60,8 @@ static struct bench futex_benchmarks[] = {
60 { "wake", "Benchmark for futex wake calls", bench_futex_wake }, 60 { "wake", "Benchmark for futex wake calls", bench_futex_wake },
61 { "wake-parallel", "Benchmark for parallel futex wake calls", bench_futex_wake_parallel }, 61 { "wake-parallel", "Benchmark for parallel futex wake calls", bench_futex_wake_parallel },
62 { "requeue", "Benchmark for futex requeue calls", bench_futex_requeue }, 62 { "requeue", "Benchmark for futex requeue calls", bench_futex_requeue },
63 /* pi-futexes */
64 { "lock-pi", "Benchmark for futex lock_pi calls", bench_futex_lock_pi },
63 { "all", "Test all futex benchmarks", NULL }, 65 { "all", "Test all futex benchmarks", NULL },
64 { NULL, NULL, NULL } 66 { NULL, NULL, NULL }
65}; 67};