aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorArjan van de Ven <arjan@linux.intel.com>2008-01-25 15:08:34 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-25 15:08:34 -0500
commit9745512ce79de686df354dc70a8d1a74d801892d (patch)
tree9b64e2b2e6d2ae534beef136922082f21701c7b9 /kernel
parent326587b840785c60f5dc18557235a23bafefd620 (diff)
sched: latencytop support
LatencyTOP kernel infrastructure; it measures latencies in the scheduler and tracks it system wide and per process. Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/latencytop.c239
-rw-r--r--kernel/sched_fair.c8
-rw-r--r--kernel/sysctl.c10
5 files changed, 258 insertions, 1 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 68755cd9a7e..390d4214626 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -62,6 +62,7 @@ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
62obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o 62obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
63obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o 63obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
64obj-$(CONFIG_MARKERS) += marker.o 64obj-$(CONFIG_MARKERS) += marker.o
65obj-$(CONFIG_LATENCYTOP) += latencytop.o
65 66
66ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) 67ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
67# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 68# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/fork.c b/kernel/fork.c
index 0c969f4fade..39d22b3357d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1205,6 +1205,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1205#ifdef TIF_SYSCALL_EMU 1205#ifdef TIF_SYSCALL_EMU
1206 clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); 1206 clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
1207#endif 1207#endif
1208 clear_all_latency_tracing(p);
1208 1209
1209 /* Our parent execution domain becomes current domain 1210 /* Our parent execution domain becomes current domain
1210 These must match for thread signalling to apply */ 1211 These must match for thread signalling to apply */
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
new file mode 100644
index 00000000000..b4e3c85abe7
--- /dev/null
+++ b/kernel/latencytop.c
@@ -0,0 +1,239 @@
1/*
2 * latencytop.c: Latency display infrastructure
3 *
4 * (C) Copyright 2008 Intel Corporation
5 * Author: Arjan van de Ven <arjan@linux.intel.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 */
12#include <linux/latencytop.h>
13#include <linux/kallsyms.h>
14#include <linux/seq_file.h>
15#include <linux/notifier.h>
16#include <linux/spinlock.h>
17#include <linux/proc_fs.h>
18#include <linux/module.h>
19#include <linux/sched.h>
20#include <linux/list.h>
21#include <linux/slab.h>
22#include <linux/stacktrace.h>
23
24static DEFINE_SPINLOCK(latency_lock);
25
26#define MAXLR 128
27static struct latency_record latency_record[MAXLR];
28
29int latencytop_enabled;
30
31void clear_all_latency_tracing(struct task_struct *p)
32{
33 unsigned long flags;
34
35 if (!latencytop_enabled)
36 return;
37
38 spin_lock_irqsave(&latency_lock, flags);
39 memset(&p->latency_record, 0, sizeof(p->latency_record));
40 p->latency_record_count = 0;
41 spin_unlock_irqrestore(&latency_lock, flags);
42}
43
44static void clear_global_latency_tracing(void)
45{
46 unsigned long flags;
47
48 spin_lock_irqsave(&latency_lock, flags);
49 memset(&latency_record, 0, sizeof(latency_record));
50 spin_unlock_irqrestore(&latency_lock, flags);
51}
52
53static void __sched
54account_global_scheduler_latency(struct task_struct *tsk, struct latency_record *lat)
55{
56 int firstnonnull = MAXLR + 1;
57 int i;
58
59 if (!latencytop_enabled)
60 return;
61
62 /* skip kernel threads for now */
63 if (!tsk->mm)
64 return;
65
66 for (i = 0; i < MAXLR; i++) {
67 int q;
68 int same = 1;
69 /* Nothing stored: */
70 if (!latency_record[i].backtrace[0]) {
71 if (firstnonnull > i)
72 firstnonnull = i;
73 continue;
74 }
75 for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) {
76 if (latency_record[i].backtrace[q] !=
77 lat->backtrace[q])
78 same = 0;
79 if (same && lat->backtrace[q] == 0)
80 break;
81 if (same && lat->backtrace[q] == ULONG_MAX)
82 break;
83 }
84 if (same) {
85 latency_record[i].count++;
86 latency_record[i].time += lat->time;
87 if (lat->time > latency_record[i].max)
88 latency_record[i].max = lat->time;
89 return;
90 }
91 }
92
93 i = firstnonnull;
94 if (i >= MAXLR - 1)
95 return;
96
97 /* Allocted a new one: */
98 memcpy(&latency_record[i], lat, sizeof(struct latency_record));
99}
100
101static inline void store_stacktrace(struct task_struct *tsk, struct latency_record *lat)
102{
103 struct stack_trace trace;
104
105 memset(&trace, 0, sizeof(trace));
106 trace.max_entries = LT_BACKTRACEDEPTH;
107 trace.entries = &lat->backtrace[0];
108 trace.skip = 0;
109 save_stack_trace_tsk(tsk, &trace);
110}
111
112void __sched
113account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
114{
115 unsigned long flags;
116 int i, q;
117 struct latency_record lat;
118
119 if (!latencytop_enabled)
120 return;
121
122 /* Long interruptible waits are generally user requested... */
123 if (inter && usecs > 5000)
124 return;
125
126 memset(&lat, 0, sizeof(lat));
127 lat.count = 1;
128 lat.time = usecs;
129 lat.max = usecs;
130 store_stacktrace(tsk, &lat);
131
132 spin_lock_irqsave(&latency_lock, flags);
133
134 account_global_scheduler_latency(tsk, &lat);
135
136 /*
137 * short term hack; if we're > 32 we stop; future we recycle:
138 */
139 tsk->latency_record_count++;
140 if (tsk->latency_record_count >= LT_SAVECOUNT)
141 goto out_unlock;
142
143 for (i = 0; i < LT_SAVECOUNT ; i++) {
144 struct latency_record *mylat;
145 int same = 1;
146 mylat = &tsk->latency_record[i];
147 for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) {
148 if (mylat->backtrace[q] !=
149 lat.backtrace[q])
150 same = 0;
151 if (same && lat.backtrace[q] == 0)
152 break;
153 if (same && lat.backtrace[q] == ULONG_MAX)
154 break;
155 }
156 if (same) {
157 mylat->count++;
158 mylat->time += lat.time;
159 if (lat.time > mylat->max)
160 mylat->max = lat.time;
161 goto out_unlock;
162 }
163 }
164
165 /* Allocated a new one: */
166 i = tsk->latency_record_count;
167 memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));
168
169out_unlock:
170 spin_unlock_irqrestore(&latency_lock, flags);
171}
172
173static int lstats_show(struct seq_file *m, void *v)
174{
175 int i;
176
177 seq_puts(m, "Latency Top version : v0.1\n");
178
179 for (i = 0; i < MAXLR; i++) {
180 if (latency_record[i].backtrace[0]) {
181 int q;
182 seq_printf(m, "%i %li %li ",
183 latency_record[i].count,
184 latency_record[i].time,
185 latency_record[i].max);
186 for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
187 char sym[KSYM_NAME_LEN];
188 char *c;
189 if (!latency_record[i].backtrace[q])
190 break;
191 if (latency_record[i].backtrace[q] == ULONG_MAX)
192 break;
193 sprint_symbol(sym, latency_record[i].backtrace[q]);
194 c = strchr(sym, '+');
195 if (c)
196 *c = 0;
197 seq_printf(m, "%s ", sym);
198 }
199 seq_printf(m, "\n");
200 }
201 }
202 return 0;
203}
204
205static ssize_t
206lstats_write(struct file *file, const char __user *buf, size_t count,
207 loff_t *offs)
208{
209 clear_global_latency_tracing();
210
211 return count;
212}
213
214static int lstats_open(struct inode *inode, struct file *filp)
215{
216 return single_open(filp, lstats_show, NULL);
217}
218
219static struct file_operations lstats_fops = {
220 .open = lstats_open,
221 .read = seq_read,
222 .write = lstats_write,
223 .llseek = seq_lseek,
224 .release = single_release,
225};
226
227static int __init init_lstats_procfs(void)
228{
229 struct proc_dir_entry *pe;
230
231 pe = create_proc_entry("latency_stats", 0644, NULL);
232 if (!pe)
233 return -ENOMEM;
234
235 pe->proc_fops = &lstats_fops;
236
237 return 0;
238}
239__initcall(init_lstats_procfs);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 3dab1ff83c4..1b3b40ad7c5 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -20,6 +20,8 @@
20 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 20 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
21 */ 21 */
22 22
23#include <linux/latencytop.h>
24
23/* 25/*
24 * Targeted preemption latency for CPU-bound tasks: 26 * Targeted preemption latency for CPU-bound tasks:
25 * (default: 20ms * (1 + ilog(ncpus)), units: nanoseconds) 27 * (default: 20ms * (1 + ilog(ncpus)), units: nanoseconds)
@@ -434,6 +436,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
434#ifdef CONFIG_SCHEDSTATS 436#ifdef CONFIG_SCHEDSTATS
435 if (se->sleep_start) { 437 if (se->sleep_start) {
436 u64 delta = rq_of(cfs_rq)->clock - se->sleep_start; 438 u64 delta = rq_of(cfs_rq)->clock - se->sleep_start;
439 struct task_struct *tsk = task_of(se);
437 440
438 if ((s64)delta < 0) 441 if ((s64)delta < 0)
439 delta = 0; 442 delta = 0;
@@ -443,9 +446,12 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
443 446
444 se->sleep_start = 0; 447 se->sleep_start = 0;
445 se->sum_sleep_runtime += delta; 448 se->sum_sleep_runtime += delta;
449
450 account_scheduler_latency(tsk, delta >> 10, 1);
446 } 451 }
447 if (se->block_start) { 452 if (se->block_start) {
448 u64 delta = rq_of(cfs_rq)->clock - se->block_start; 453 u64 delta = rq_of(cfs_rq)->clock - se->block_start;
454 struct task_struct *tsk = task_of(se);
449 455
450 if ((s64)delta < 0) 456 if ((s64)delta < 0)
451 delta = 0; 457 delta = 0;
@@ -462,11 +468,11 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
462 * time that the task spent sleeping: 468 * time that the task spent sleeping:
463 */ 469 */
464 if (unlikely(prof_on == SLEEP_PROFILING)) { 470 if (unlikely(prof_on == SLEEP_PROFILING)) {
465 struct task_struct *tsk = task_of(se);
466 471
467 profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk), 472 profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk),
468 delta >> 20); 473 delta >> 20);
469 } 474 }
475 account_scheduler_latency(tsk, delta >> 10, 0);
470 } 476 }
471#endif 477#endif
472} 478}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3afbd25f43e..5418ef61e16 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -81,6 +81,7 @@ extern int compat_log;
81extern int maps_protect; 81extern int maps_protect;
82extern int sysctl_stat_interval; 82extern int sysctl_stat_interval;
83extern int audit_argv_kb; 83extern int audit_argv_kb;
84extern int latencytop_enabled;
84 85
85/* Constants used for minimum and maximum */ 86/* Constants used for minimum and maximum */
86#ifdef CONFIG_DETECT_SOFTLOCKUP 87#ifdef CONFIG_DETECT_SOFTLOCKUP
@@ -416,6 +417,15 @@ static struct ctl_table kern_table[] = {
416 .proc_handler = &proc_dointvec_taint, 417 .proc_handler = &proc_dointvec_taint,
417 }, 418 },
418#endif 419#endif
420#ifdef CONFIG_LATENCYTOP
421 {
422 .procname = "latencytop",
423 .data = &latencytop_enabled,
424 .maxlen = sizeof(int),
425 .mode = 0644,
426 .proc_handler = &proc_dointvec,
427 },
428#endif
419#ifdef CONFIG_SECURITY_CAPABILITIES 429#ifdef CONFIG_SECURITY_CAPABILITIES
420 { 430 {
421 .procname = "cap-bound", 431 .procname = "cap-bound",