aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-01-25 15:08:02 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-25 15:08:02 -0500
commit82a1fcb90287052aabfa235e7ffc693ea003fe69 (patch)
tree826b464a248bebe259fe787f7b8d17d5626cf2c5
parentd0d23b5432fe61229dd3641c5e94d4130bc4e61b (diff)
softlockup: automatically detect hung TASK_UNINTERRUPTIBLE tasks
this patch extends the soft-lockup detector to automatically detect hung TASK_UNINTERRUPTIBLE tasks. Such hung tasks are printed the following way: ------------------> INFO: task prctl:3042 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message prctl D fd5e3793 0 3042 2997 f6050f38 00000046 00000001 fd5e3793 00000009 c06d8264 c06dae80 00000286 f6050f40 f6050f00 f7d34d90 f7d34fc8 c1e1be80 00000001 f6050000 00000000 f7e92d00 00000286 f6050f18 c0489d1a f6050f40 00006605 00000000 c0133a5b Call Trace: [<c04883a5>] schedule_timeout+0x6d/0x8b [<c04883d8>] schedule_timeout_uninterruptible+0x15/0x17 [<c0133a76>] msleep+0x10/0x16 [<c0138974>] sys_prctl+0x30/0x1e2 [<c0104c52>] sysenter_past_esp+0x5f/0xa5 ======================= 2 locks held by prctl/3042: #0: (&sb->s_type->i_mutex_key#5){--..}, at: [<c0197d11>] do_fsync+0x38/0x7a #1: (jbd_handle){--..}, at: [<c01ca3d2>] journal_start+0xc7/0xe9 <------------------ the current default timeout is 120 seconds. Such messages are printed up to 10 times per bootup. If the system has crashed already then the messages are not printed. if lockdep is enabled then all held locks are printed as well. this feature is a natural extension to the softlockup-detector (kernel locked up without scheduling) and to the NMI watchdog (kernel locked up with IRQs disabled). [ Gautham R Shenoy <ego@in.ibm.com>: CPU hotplug fixes. ] [ Andrew Morton <akpm@linux-foundation.org>: build warning fix. ] Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
-rw-r--r--include/linux/debug_locks.h5
-rw-r--r--include/linux/sched.h10
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/lockdep.c12
-rw-r--r--kernel/sched.c4
-rw-r--r--kernel/softlockup.c114
-rw-r--r--kernel/sysctl.c27
7 files changed, 164 insertions, 13 deletions
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index 1678a5de7013..f4a5871767f5 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -47,6 +47,7 @@ struct task_struct;
47 47
48#ifdef CONFIG_LOCKDEP 48#ifdef CONFIG_LOCKDEP
49extern void debug_show_all_locks(void); 49extern void debug_show_all_locks(void);
50extern void __debug_show_held_locks(struct task_struct *task);
50extern void debug_show_held_locks(struct task_struct *task); 51extern void debug_show_held_locks(struct task_struct *task);
51extern void debug_check_no_locks_freed(const void *from, unsigned long len); 52extern void debug_check_no_locks_freed(const void *from, unsigned long len);
52extern void debug_check_no_locks_held(struct task_struct *task); 53extern void debug_check_no_locks_held(struct task_struct *task);
@@ -55,6 +56,10 @@ static inline void debug_show_all_locks(void)
55{ 56{
56} 57}
57 58
59static inline void __debug_show_held_locks(struct task_struct *task)
60{
61}
62
58static inline void debug_show_held_locks(struct task_struct *task) 63static inline void debug_show_held_locks(struct task_struct *task)
59{ 64{
60} 65}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 288245f83bd4..0846f1f9e196 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -258,12 +258,17 @@ extern void account_process_tick(struct task_struct *task, int user);
258extern void update_process_times(int user); 258extern void update_process_times(int user);
259extern void scheduler_tick(void); 259extern void scheduler_tick(void);
260 260
261extern void sched_show_task(struct task_struct *p);
262
261#ifdef CONFIG_DETECT_SOFTLOCKUP 263#ifdef CONFIG_DETECT_SOFTLOCKUP
262extern void softlockup_tick(void); 264extern void softlockup_tick(void);
263extern void spawn_softlockup_task(void); 265extern void spawn_softlockup_task(void);
264extern void touch_softlockup_watchdog(void); 266extern void touch_softlockup_watchdog(void);
265extern void touch_all_softlockup_watchdogs(void); 267extern void touch_all_softlockup_watchdogs(void);
266extern int softlockup_thresh; 268extern int softlockup_thresh;
269extern unsigned long sysctl_hung_task_check_count;
270extern unsigned long sysctl_hung_task_timeout_secs;
271extern long sysctl_hung_task_warnings;
267#else 272#else
268static inline void softlockup_tick(void) 273static inline void softlockup_tick(void)
269{ 274{
@@ -1041,6 +1046,11 @@ struct task_struct {
1041/* ipc stuff */ 1046/* ipc stuff */
1042 struct sysv_sem sysvsem; 1047 struct sysv_sem sysvsem;
1043#endif 1048#endif
1049#ifdef CONFIG_DETECT_SOFTLOCKUP
1050/* hung task detection */
1051 unsigned long last_switch_timestamp;
1052 unsigned long last_switch_count;
1053#endif
1044/* CPU-specific state of this task */ 1054/* CPU-specific state of this task */
1045 struct thread_struct thread; 1055 struct thread_struct thread;
1046/* filesystem information */ 1056/* filesystem information */
diff --git a/kernel/fork.c b/kernel/fork.c
index 8dd8ff281009..09c0b90a69cc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1059,6 +1059,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1059 p->prev_utime = cputime_zero; 1059 p->prev_utime = cputime_zero;
1060 p->prev_stime = cputime_zero; 1060 p->prev_stime = cputime_zero;
1061 1061
1062#ifdef CONFIG_DETECT_SOFTLOCKUP
1063 p->last_switch_count = 0;
1064 p->last_switch_timestamp = 0;
1065#endif
1066
1062#ifdef CONFIG_TASK_XACCT 1067#ifdef CONFIG_TASK_XACCT
1063 p->rchar = 0; /* I/O counter: bytes read */ 1068 p->rchar = 0; /* I/O counter: bytes read */
1064 p->wchar = 0; /* I/O counter: bytes written */ 1069 p->wchar = 0; /* I/O counter: bytes written */
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index e2c07ece367d..3574379f4d62 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3206,7 +3206,11 @@ retry:
3206 3206
3207EXPORT_SYMBOL_GPL(debug_show_all_locks); 3207EXPORT_SYMBOL_GPL(debug_show_all_locks);
3208 3208
3209void debug_show_held_locks(struct task_struct *task) 3209/*
3210 * Careful: only use this function if you are sure that
3211 * the task cannot run in parallel!
3212 */
3213void __debug_show_held_locks(struct task_struct *task)
3210{ 3214{
3211 if (unlikely(!debug_locks)) { 3215 if (unlikely(!debug_locks)) {
3212 printk("INFO: lockdep is turned off.\n"); 3216 printk("INFO: lockdep is turned off.\n");
@@ -3214,6 +3218,12 @@ void debug_show_held_locks(struct task_struct *task)
3214 } 3218 }
3215 lockdep_print_held_locks(task); 3219 lockdep_print_held_locks(task);
3216} 3220}
3221EXPORT_SYMBOL_GPL(__debug_show_held_locks);
3222
3223void debug_show_held_locks(struct task_struct *task)
3224{
3225 __debug_show_held_locks(task);
3226}
3217 3227
3218EXPORT_SYMBOL_GPL(debug_show_held_locks); 3228EXPORT_SYMBOL_GPL(debug_show_held_locks);
3219 3229
diff --git a/kernel/sched.c b/kernel/sched.c
index c0e2db683e29..5b3d46574eeb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4945,7 +4945,7 @@ out_unlock:
4945 4945
4946static const char stat_nam[] = "RSDTtZX"; 4946static const char stat_nam[] = "RSDTtZX";
4947 4947
4948static void show_task(struct task_struct *p) 4948void sched_show_task(struct task_struct *p)
4949{ 4949{
4950 unsigned long free = 0; 4950 unsigned long free = 0;
4951 unsigned state; 4951 unsigned state;
@@ -4998,7 +4998,7 @@ void show_state_filter(unsigned long state_filter)
4998 */ 4998 */
4999 touch_nmi_watchdog(); 4999 touch_nmi_watchdog();
5000 if (!state_filter || (p->state & state_filter)) 5000 if (!state_filter || (p->state & state_filter))
5001 show_task(p); 5001 sched_show_task(p);
5002 } while_each_thread(g, p); 5002 } while_each_thread(g, p);
5003 5003
5004 touch_all_softlockup_watchdogs(); 5004 touch_all_softlockup_watchdogs();
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 11df812263c8..02f0ad534441 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -8,6 +8,7 @@
8 */ 8 */
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/cpu.h> 10#include <linux/cpu.h>
11#include <linux/nmi.h>
11#include <linux/init.h> 12#include <linux/init.h>
12#include <linux/delay.h> 13#include <linux/delay.h>
13#include <linux/freezer.h> 14#include <linux/freezer.h>
@@ -24,7 +25,7 @@ static DEFINE_PER_CPU(unsigned long, print_timestamp);
24static DEFINE_PER_CPU(struct task_struct *, watchdog_task); 25static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
25 26
26static int did_panic; 27static int did_panic;
27int softlockup_thresh = 10; 28int softlockup_thresh = 60;
28 29
29static int 30static int
30softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) 31softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
@@ -45,7 +46,7 @@ static struct notifier_block panic_block = {
45 */ 46 */
46static unsigned long get_timestamp(int this_cpu) 47static unsigned long get_timestamp(int this_cpu)
47{ 48{
48 return cpu_clock(this_cpu) >> 30; /* 2^30 ~= 10^9 */ 49 return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
49} 50}
50 51
51void touch_softlockup_watchdog(void) 52void touch_softlockup_watchdog(void)
@@ -100,11 +101,7 @@ void softlockup_tick(void)
100 101
101 now = get_timestamp(this_cpu); 102 now = get_timestamp(this_cpu);
102 103
103 /* Wake up the high-prio watchdog task every second: */ 104 /* Warn about unreasonable delays: */
104 if (now > (touch_timestamp + 1))
105 wake_up_process(per_cpu(watchdog_task, this_cpu));
106
107 /* Warn about unreasonable 10+ seconds delays: */
108 if (now <= (touch_timestamp + softlockup_thresh)) 105 if (now <= (touch_timestamp + softlockup_thresh))
109 return; 106 return;
110 107
@@ -122,11 +119,93 @@ void softlockup_tick(void)
122} 119}
123 120
124/* 121/*
122 * Have a reasonable limit on the number of tasks checked:
123 */
124unsigned long sysctl_hung_task_check_count = 1024;
125
126/*
127 * Zero means infinite timeout - no checking done:
128 */
129unsigned long sysctl_hung_task_timeout_secs = 120;
130
131long sysctl_hung_task_warnings = 10;
132
133/*
134 * Only do the hung-tasks check on one CPU:
135 */
136static int check_cpu __read_mostly = -1;
137
138static void check_hung_task(struct task_struct *t, unsigned long now)
139{
140 unsigned long switch_count = t->nvcsw + t->nivcsw;
141
142 if (t->flags & PF_FROZEN)
143 return;
144
145 if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
146 t->last_switch_count = switch_count;
147 t->last_switch_timestamp = now;
148 return;
149 }
150 if ((long)(now - t->last_switch_timestamp) <
151 sysctl_hung_task_timeout_secs)
152 return;
153 if (sysctl_hung_task_warnings < 0)
154 return;
155 sysctl_hung_task_warnings--;
156
157 /*
158 * Ok, the task did not get scheduled for more than 2 minutes,
159 * complain:
160 */
161 printk(KERN_ERR "INFO: task %s:%d blocked for more than "
162 "%ld seconds.\n", t->comm, t->pid,
163 sysctl_hung_task_timeout_secs);
164 printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
165 " disables this message.\n");
166 sched_show_task(t);
167 __debug_show_held_locks(t);
168
169 t->last_switch_timestamp = now;
170 touch_nmi_watchdog();
171}
172
173/*
174 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
175 * a really long time (120 seconds). If that happens, print out
176 * a warning.
177 */
178static void check_hung_uninterruptible_tasks(int this_cpu)
179{
180 int max_count = sysctl_hung_task_check_count;
181 unsigned long now = get_timestamp(this_cpu);
182 struct task_struct *g, *t;
183
184 /*
185 * If the system crashed already then all bets are off,
186 * do not report extra hung tasks:
187 */
188 if ((tainted & TAINT_DIE) || did_panic)
189 return;
190
191 read_lock(&tasklist_lock);
192 do_each_thread(g, t) {
193 if (!--max_count)
194 break;
195 if (t->state & TASK_UNINTERRUPTIBLE)
196 check_hung_task(t, now);
197 } while_each_thread(g, t);
198
199 read_unlock(&tasklist_lock);
200}
201
202/*
125 * The watchdog thread - runs every second and touches the timestamp. 203 * The watchdog thread - runs every second and touches the timestamp.
126 */ 204 */
127static int watchdog(void *__bind_cpu) 205static int watchdog(void *__bind_cpu)
128{ 206{
129 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 207 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
208 int this_cpu = (long)__bind_cpu;
130 209
131 sched_setscheduler(current, SCHED_FIFO, &param); 210 sched_setscheduler(current, SCHED_FIFO, &param);
132 211
@@ -135,13 +214,18 @@ static int watchdog(void *__bind_cpu)
135 214
136 /* 215 /*
137 * Run briefly once per second to reset the softlockup timestamp. 216 * Run briefly once per second to reset the softlockup timestamp.
138 * If this gets delayed for more than 10 seconds then the 217 * If this gets delayed for more than 60 seconds then the
139 * debug-printout triggers in softlockup_tick(). 218 * debug-printout triggers in softlockup_tick().
140 */ 219 */
141 while (!kthread_should_stop()) { 220 while (!kthread_should_stop()) {
142 set_current_state(TASK_INTERRUPTIBLE);
143 touch_softlockup_watchdog(); 221 touch_softlockup_watchdog();
144 schedule(); 222 msleep_interruptible(10000);
223
224 if (this_cpu != check_cpu)
225 continue;
226
227 if (sysctl_hung_task_timeout_secs)
228 check_hung_uninterruptible_tasks(this_cpu);
145 } 229 }
146 230
147 return 0; 231 return 0;
@@ -171,6 +255,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
171 break; 255 break;
172 case CPU_ONLINE: 256 case CPU_ONLINE:
173 case CPU_ONLINE_FROZEN: 257 case CPU_ONLINE_FROZEN:
258 check_cpu = any_online_cpu(cpu_online_map);
174 wake_up_process(per_cpu(watchdog_task, hotcpu)); 259 wake_up_process(per_cpu(watchdog_task, hotcpu));
175 break; 260 break;
176#ifdef CONFIG_HOTPLUG_CPU 261#ifdef CONFIG_HOTPLUG_CPU
@@ -181,6 +266,15 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
181 /* Unbind so it can run. Fall thru. */ 266 /* Unbind so it can run. Fall thru. */
182 kthread_bind(per_cpu(watchdog_task, hotcpu), 267 kthread_bind(per_cpu(watchdog_task, hotcpu),
183 any_online_cpu(cpu_online_map)); 268 any_online_cpu(cpu_online_map));
269 case CPU_DOWN_PREPARE:
270 case CPU_DOWN_PREPARE_FROZEN:
271 if (hotcpu == check_cpu) {
272 cpumask_t temp_cpu_online_map = cpu_online_map;
273
274 cpu_clear(hotcpu, temp_cpu_online_map);
275 check_cpu = any_online_cpu(temp_cpu_online_map);
276 }
277 break;
184 case CPU_DEAD: 278 case CPU_DEAD:
185 case CPU_DEAD_FROZEN: 279 case CPU_DEAD_FROZEN:
186 p = per_cpu(watchdog_task, hotcpu); 280 p = per_cpu(watchdog_task, hotcpu);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c95f3ed34474..96f31c1bc4f0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -753,6 +753,33 @@ static struct ctl_table kern_table[] = {
753 .extra1 = &one, 753 .extra1 = &one,
754 .extra2 = &sixty, 754 .extra2 = &sixty,
755 }, 755 },
756 {
757 .ctl_name = CTL_UNNUMBERED,
758 .procname = "hung_task_check_count",
759 .data = &sysctl_hung_task_check_count,
760 .maxlen = sizeof(int),
761 .mode = 0644,
762 .proc_handler = &proc_dointvec_minmax,
763 .strategy = &sysctl_intvec,
764 },
765 {
766 .ctl_name = CTL_UNNUMBERED,
767 .procname = "hung_task_timeout_secs",
768 .data = &sysctl_hung_task_timeout_secs,
769 .maxlen = sizeof(int),
770 .mode = 0644,
771 .proc_handler = &proc_dointvec_minmax,
772 .strategy = &sysctl_intvec,
773 },
774 {
775 .ctl_name = CTL_UNNUMBERED,
776 .procname = "hung_task_warnings",
777 .data = &sysctl_hung_task_warnings,
778 .maxlen = sizeof(int),
779 .mode = 0644,
780 .proc_handler = &proc_dointvec_minmax,
781 .strategy = &sysctl_intvec,
782 },
756#endif 783#endif
757#ifdef CONFIG_COMPAT 784#ifdef CONFIG_COMPAT
758 { 785 {