aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/trace_workqueue.c
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2009-01-12 17:15:46 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-14 06:11:43 -0500
commite1d8aa9f1dd655a3534b22fcfbecb70cdb125766 (patch)
treeeb01b005a1d378515d5839bf1c7ea9836355ec9c /kernel/trace/trace_workqueue.c
parent002bb86d8d42f18937aef396c3ecd65c7e02e21a (diff)
tracing: add a new workqueue tracer
Impact: new tracer The workqueue tracer provides some statistical informations about each cpu workqueue thread such as the number of the works inserted and executed since their creation. It can help to evaluate the amount of work each of them have to perform. For example it can help a developer to decide whether he should choose a per cpu workqueue instead of a singlethreaded one. It only traces statistical informations for now but it will probably later provide event tracing too. Such a tracer could help too, and be improved, to help rt priority sorted workqueue development. To have a snapshot of the workqueues state at any time, just do cat /debugfs/tracing/trace_stat/workqueues Ie: 1 125 125 reiserfs/1 1 0 0 scsi_tgtd/1 1 0 0 aio/1 1 0 0 ata/1 1 114 114 kblockd/1 1 0 0 kintegrityd/1 1 2147 2147 events/1 0 0 0 kpsmoused 0 105 105 reiserfs/0 0 0 0 scsi_tgtd/0 0 0 0 aio/0 0 0 0 ata_aux 0 0 0 ata/0 0 0 0 cqueue 0 0 0 kacpi_notify 0 0 0 kacpid 0 149 149 kblockd/0 0 0 0 kintegrityd/0 0 1000 1000 khelper 0 2270 2270 events/0 Changes in V2: _ Drop the static array based on NR_CPU and dynamically allocate the stat array with num_possible_cpus() and other cpu mask facilities.... _ Trace workqueue insertion at a bit lower level (insert_work instead of queue_work) to handle even the workqueue barriers. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/trace/trace_workqueue.c')
-rw-r--r--kernel/trace/trace_workqueue.c287
1 files changed, 287 insertions, 0 deletions
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
new file mode 100644
index 000000000000..f8118d39ca9b
--- /dev/null
+++ b/kernel/trace/trace_workqueue.c
@@ -0,0 +1,287 @@
1/*
2 * Workqueue statistical tracer.
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 */
7
8
9#include <trace/workqueue.h>
10#include <linux/list.h>
11#include "trace_stat.h"
12#include "trace.h"
13
14
15/* A cpu workqueue thread */
16struct cpu_workqueue_stats {
17 struct list_head list;
18/* Useful to know if we print the cpu headers */
19 bool first_entry;
20 int cpu;
21 pid_t pid;
22/* Can be inserted from interrupt or user context, need to be atomic */
23 atomic_t inserted;
24/*
25 * Don't need to be atomic, works are serialized in a single workqueue thread
26 * on a single CPU.
27 */
28 unsigned int executed;
29};
30
31/* List of workqueue threads on one cpu */
32struct workqueue_global_stats {
33 struct list_head list;
34 spinlock_t lock;
35};
36
37/* Don't need a global lock because allocated before the workqueues, and
38 * never freed.
39 */
40static struct workqueue_global_stats *all_workqueue_stat;
41
42/* Insertion of a work */
43static void
44probe_workqueue_insertion(struct task_struct *wq_thread,
45 struct work_struct *work)
46{
47 int cpu = cpumask_first(&wq_thread->cpus_allowed);
48 struct cpu_workqueue_stats *node, *next;
49 unsigned long flags;
50
51 spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
52 list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
53 list) {
54 if (node->pid == wq_thread->pid) {
55 atomic_inc(&node->inserted);
56 goto found;
57 }
58 }
59 pr_debug("trace_workqueue: entry not found\n");
60found:
61 spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
62}
63
64/* Execution of a work */
65static void
66probe_workqueue_execution(struct task_struct *wq_thread,
67 struct work_struct *work)
68{
69 int cpu = cpumask_first(&wq_thread->cpus_allowed);
70 struct cpu_workqueue_stats *node, *next;
71 unsigned long flags;
72
73 spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
74 list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
75 list) {
76 if (node->pid == wq_thread->pid) {
77 node->executed++;
78 goto found;
79 }
80 }
81 pr_debug("trace_workqueue: entry not found\n");
82found:
83 spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
84}
85
86/* Creation of a cpu workqueue thread */
87static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
88{
89 struct cpu_workqueue_stats *cws;
90 unsigned long flags;
91
92 WARN_ON(cpu < 0 || cpu >= num_possible_cpus());
93
94 /* Workqueues are sometimes created in atomic context */
95 cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC);
96 if (!cws) {
97 pr_warning("trace_workqueue: not enough memory\n");
98 return;
99 }
100 tracing_record_cmdline(wq_thread);
101
102 INIT_LIST_HEAD(&cws->list);
103 cws->cpu = cpu;
104
105 cws->pid = wq_thread->pid;
106
107 spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
108 if (list_empty(&all_workqueue_stat[cpu].list))
109 cws->first_entry = true;
110 list_add_tail(&cws->list, &all_workqueue_stat[cpu].list);
111 spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
112}
113
114/* Destruction of a cpu workqueue thread */
115static void probe_workqueue_destruction(struct task_struct *wq_thread)
116{
117 /* Workqueue only execute on one cpu */
118 int cpu = cpumask_first(&wq_thread->cpus_allowed);
119 struct cpu_workqueue_stats *node, *next;
120 unsigned long flags;
121
122 spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
123 list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
124 list) {
125 if (node->pid == wq_thread->pid) {
126 list_del(&node->list);
127 kfree(node);
128 goto found;
129 }
130 }
131
132 pr_debug("trace_workqueue: don't find workqueue to destroy\n");
133found:
134 spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
135
136}
137
138static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
139{
140 unsigned long flags;
141 struct cpu_workqueue_stats *ret = NULL;
142
143
144 spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
145
146 if (!list_empty(&all_workqueue_stat[cpu].list))
147 ret = list_entry(all_workqueue_stat[cpu].list.next,
148 struct cpu_workqueue_stats, list);
149
150 spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
151
152 return ret;
153}
154
155static void *workqueue_stat_start(void)
156{
157 int cpu;
158 void *ret = NULL;
159
160 for_each_possible_cpu(cpu) {
161 ret = workqueue_stat_start_cpu(cpu);
162 if (ret)
163 return ret;
164 }
165 return NULL;
166}
167
168static void *workqueue_stat_next(void *prev, int idx)
169{
170 struct cpu_workqueue_stats *prev_cws = prev;
171 int cpu = prev_cws->cpu;
172 unsigned long flags;
173 void *ret = NULL;
174
175 spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
176 if (list_is_last(&prev_cws->list, &all_workqueue_stat[cpu].list)) {
177 spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
178 for (++cpu ; cpu < num_possible_cpus(); cpu++) {
179 ret = workqueue_stat_start_cpu(cpu);
180 if (ret)
181 return ret;
182 }
183 return NULL;
184 }
185 spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
186
187 return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
188 list);
189}
190
191static int workqueue_stat_show(struct seq_file *s, void *p)
192{
193 struct cpu_workqueue_stats *cws = p;
194 unsigned long flags;
195 int cpu = cws->cpu;
196
197 seq_printf(s, "%3d %6d %6u %s\n", cws->cpu,
198 atomic_read(&cws->inserted),
199 cws->executed,
200 trace_find_cmdline(cws->pid));
201
202 spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
203 if (&cws->list == all_workqueue_stat[cpu].list.next)
204 seq_printf(s, "\n");
205 spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
206
207 return 0;
208}
209
210static int workqueue_stat_headers(struct seq_file *s)
211{
212 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
213 seq_printf(s, "# | | | |\n\n");
214 return 0;
215}
216
217struct tracer_stat workqueue_stats __read_mostly = {
218 .name = "workqueues",
219 .stat_start = workqueue_stat_start,
220 .stat_next = workqueue_stat_next,
221 .stat_show = workqueue_stat_show,
222 .stat_headers = workqueue_stat_headers
223};
224
225
226int __init stat_workqueue_init(void)
227{
228 if (register_stat_tracer(&workqueue_stats)) {
229 pr_warning("Unable to register workqueue stat tracer\n");
230 return 1;
231 }
232
233 return 0;
234}
235fs_initcall(stat_workqueue_init);
236
237/*
238 * Workqueues are created very early, just after pre-smp initcalls.
239 * So we must register our tracepoints at this stage.
240 */
241int __init trace_workqueue_early_init(void)
242{
243 int ret, cpu;
244
245 ret = register_trace_workqueue_insertion(probe_workqueue_insertion);
246 if (ret)
247 goto out;
248
249 ret = register_trace_workqueue_execution(probe_workqueue_execution);
250 if (ret)
251 goto no_insertion;
252
253 ret = register_trace_workqueue_creation(probe_workqueue_creation);
254 if (ret)
255 goto no_execution;
256
257 ret = register_trace_workqueue_destruction(probe_workqueue_destruction);
258 if (ret)
259 goto no_creation;
260
261 all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats)
262 * num_possible_cpus(), GFP_KERNEL);
263
264 if (!all_workqueue_stat) {
265 pr_warning("trace_workqueue: not enough memory\n");
266 goto no_creation;
267 }
268
269 for_each_possible_cpu(cpu) {
270 spin_lock_init(&all_workqueue_stat[cpu].lock);
271 INIT_LIST_HEAD(&all_workqueue_stat[cpu].list);
272 }
273
274 return 0;
275
276no_creation:
277 unregister_trace_workqueue_creation(probe_workqueue_creation);
278no_execution:
279 unregister_trace_workqueue_execution(probe_workqueue_execution);
280no_insertion:
281 unregister_trace_workqueue_insertion(probe_workqueue_insertion);
282out:
283 pr_warning("trace_workqueue: unable to trace workqueues\n");
284
285 return 1;
286}
287early_initcall(trace_workqueue_early_init);