aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/debug_locks.h5
-rw-r--r--include/linux/sched.h10
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/lockdep.c12
-rw-r--r--kernel/sched.c4
-rw-r--r--kernel/softlockup.c114
-rw-r--r--kernel/sysctl.c27
7 files changed, 164 insertions, 13 deletions
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index 1678a5de7013..f4a5871767f5 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -47,6 +47,7 @@ struct task_struct;
47 47
48#ifdef CONFIG_LOCKDEP 48#ifdef CONFIG_LOCKDEP
49extern void debug_show_all_locks(void); 49extern void debug_show_all_locks(void);
50extern void __debug_show_held_locks(struct task_struct *task);
50extern void debug_show_held_locks(struct task_struct *task); 51extern void debug_show_held_locks(struct task_struct *task);
51extern void debug_check_no_locks_freed(const void *from, unsigned long len); 52extern void debug_check_no_locks_freed(const void *from, unsigned long len);
52extern void debug_check_no_locks_held(struct task_struct *task); 53extern void debug_check_no_locks_held(struct task_struct *task);
@@ -55,6 +56,10 @@ static inline void debug_show_all_locks(void)
55{ 56{
56} 57}
57 58
59static inline void __debug_show_held_locks(struct task_struct *task)
60{
61}
62
58static inline void debug_show_held_locks(struct task_struct *task) 63static inline void debug_show_held_locks(struct task_struct *task)
59{ 64{
60} 65}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 288245f83bd4..0846f1f9e196 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -258,12 +258,17 @@ extern void account_process_tick(struct task_struct *task, int user);
258extern void update_process_times(int user); 258extern void update_process_times(int user);
259extern void scheduler_tick(void); 259extern void scheduler_tick(void);
260 260
261extern void sched_show_task(struct task_struct *p);
262
261#ifdef CONFIG_DETECT_SOFTLOCKUP 263#ifdef CONFIG_DETECT_SOFTLOCKUP
262extern void softlockup_tick(void); 264extern void softlockup_tick(void);
263extern void spawn_softlockup_task(void); 265extern void spawn_softlockup_task(void);
264extern void touch_softlockup_watchdog(void); 266extern void touch_softlockup_watchdog(void);
265extern void touch_all_softlockup_watchdogs(void); 267extern void touch_all_softlockup_watchdogs(void);
266extern int softlockup_thresh; 268extern int softlockup_thresh;
269extern unsigned long sysctl_hung_task_check_count;
270extern unsigned long sysctl_hung_task_timeout_secs;
271extern long sysctl_hung_task_warnings;
267#else 272#else
268static inline void softlockup_tick(void) 273static inline void softlockup_tick(void)
269{ 274{
@@ -1041,6 +1046,11 @@ struct task_struct {
1041/* ipc stuff */ 1046/* ipc stuff */
1042 struct sysv_sem sysvsem; 1047 struct sysv_sem sysvsem;
1043#endif 1048#endif
1049#ifdef CONFIG_DETECT_SOFTLOCKUP
1050/* hung task detection */
1051 unsigned long last_switch_timestamp;
1052 unsigned long last_switch_count;
1053#endif
1044/* CPU-specific state of this task */ 1054/* CPU-specific state of this task */
1045 struct thread_struct thread; 1055 struct thread_struct thread;
1046/* filesystem information */ 1056/* filesystem information */
diff --git a/kernel/fork.c b/kernel/fork.c
index 8dd8ff281009..09c0b90a69cc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1059,6 +1059,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1059 p->prev_utime = cputime_zero; 1059 p->prev_utime = cputime_zero;
1060 p->prev_stime = cputime_zero; 1060 p->prev_stime = cputime_zero;
1061 1061
1062#ifdef CONFIG_DETECT_SOFTLOCKUP
1063 p->last_switch_count = 0;
1064 p->last_switch_timestamp = 0;
1065#endif
1066
1062#ifdef CONFIG_TASK_XACCT 1067#ifdef CONFIG_TASK_XACCT
1063 p->rchar = 0; /* I/O counter: bytes read */ 1068 p->rchar = 0; /* I/O counter: bytes read */
1064 p->wchar = 0; /* I/O counter: bytes written */ 1069 p->wchar = 0; /* I/O counter: bytes written */
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index e2c07ece367d..3574379f4d62 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3206,7 +3206,11 @@ retry:
3206 3206
3207EXPORT_SYMBOL_GPL(debug_show_all_locks); 3207EXPORT_SYMBOL_GPL(debug_show_all_locks);
3208 3208
3209void debug_show_held_locks(struct task_struct *task) 3209/*
3210 * Careful: only use this function if you are sure that
3211 * the task cannot run in parallel!
3212 */
3213void __debug_show_held_locks(struct task_struct *task)
3210{ 3214{
3211 if (unlikely(!debug_locks)) { 3215 if (unlikely(!debug_locks)) {
3212 printk("INFO: lockdep is turned off.\n"); 3216 printk("INFO: lockdep is turned off.\n");
@@ -3214,6 +3218,12 @@ void debug_show_held_locks(struct task_struct *task)
3214 } 3218 }
3215 lockdep_print_held_locks(task); 3219 lockdep_print_held_locks(task);
3216} 3220}
3221EXPORT_SYMBOL_GPL(__debug_show_held_locks);
3222
3223void debug_show_held_locks(struct task_struct *task)
3224{
3225 __debug_show_held_locks(task);
3226}
3217 3227
3218EXPORT_SYMBOL_GPL(debug_show_held_locks); 3228EXPORT_SYMBOL_GPL(debug_show_held_locks);
3219 3229
diff --git a/kernel/sched.c b/kernel/sched.c
index c0e2db683e29..5b3d46574eeb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4945,7 +4945,7 @@ out_unlock:
4945 4945
4946static const char stat_nam[] = "RSDTtZX"; 4946static const char stat_nam[] = "RSDTtZX";
4947 4947
4948static void show_task(struct task_struct *p) 4948void sched_show_task(struct task_struct *p)
4949{ 4949{
4950 unsigned long free = 0; 4950 unsigned long free = 0;
4951 unsigned state; 4951 unsigned state;
@@ -4998,7 +4998,7 @@ void show_state_filter(unsigned long state_filter)
4998 */ 4998 */
4999 touch_nmi_watchdog(); 4999 touch_nmi_watchdog();
5000 if (!state_filter || (p->state & state_filter)) 5000 if (!state_filter || (p->state & state_filter))
5001 show_task(p); 5001 sched_show_task(p);
5002 } while_each_thread(g, p); 5002 } while_each_thread(g, p);
5003 5003
5004 touch_all_softlockup_watchdogs(); 5004 touch_all_softlockup_watchdogs();
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 11df812263c8..02f0ad534441 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -8,6 +8,7 @@
8 */ 8 */
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/cpu.h> 10#include <linux/cpu.h>
11#include <linux/nmi.h>
11#include <linux/init.h> 12#include <linux/init.h>
12#include <linux/delay.h> 13#include <linux/delay.h>
13#include <linux/freezer.h> 14#include <linux/freezer.h>
@@ -24,7 +25,7 @@ static DEFINE_PER_CPU(unsigned long, print_timestamp);
24static DEFINE_PER_CPU(struct task_struct *, watchdog_task); 25static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
25 26
26static int did_panic; 27static int did_panic;
27int softlockup_thresh = 10; 28int softlockup_thresh = 60;
28 29
29static int 30static int
30softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) 31softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
@@ -45,7 +46,7 @@ static struct notifier_block panic_block = {
45 */ 46 */
46static unsigned long get_timestamp(int this_cpu) 47static unsigned long get_timestamp(int this_cpu)
47{ 48{
48 return cpu_clock(this_cpu) >> 30; /* 2^30 ~= 10^9 */ 49 return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
49} 50}
50 51
51void touch_softlockup_watchdog(void) 52void touch_softlockup_watchdog(void)
@@ -100,11 +101,7 @@ void softlockup_tick(void)
100 101
101 now = get_timestamp(this_cpu); 102 now = get_timestamp(this_cpu);
102 103
103 /* Wake up the high-prio watchdog task every second: */ 104 /* Warn about unreasonable delays: */
104 if (now > (touch_timestamp + 1))
105 wake_up_process(per_cpu(watchdog_task, this_cpu));
106
107 /* Warn about unreasonable 10+ seconds delays: */
108 if (now <= (touch_timestamp + softlockup_thresh)) 105 if (now <= (touch_timestamp + softlockup_thresh))
109 return; 106 return;
110 107
@@ -122,11 +119,93 @@ void softlockup_tick(void)
122} 119}
123 120
124/* 121/*
122 * Have a reasonable limit on the number of tasks checked:
123 */
124unsigned long sysctl_hung_task_check_count = 1024;
125
126/*
127 * Zero means infinite timeout - no checking done:
128 */
129unsigned long sysctl_hung_task_timeout_secs = 120;
130
131long sysctl_hung_task_warnings = 10;
132
133/*
134 * Only do the hung-tasks check on one CPU:
135 */
136static int check_cpu __read_mostly = -1;
137
138static void check_hung_task(struct task_struct *t, unsigned long now)
139{
140 unsigned long switch_count = t->nvcsw + t->nivcsw;
141
142 if (t->flags & PF_FROZEN)
143 return;
144
145 if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
146 t->last_switch_count = switch_count;
147 t->last_switch_timestamp = now;
148 return;
149 }
150 if ((long)(now - t->last_switch_timestamp) <
151 sysctl_hung_task_timeout_secs)
152 return;
153 if (sysctl_hung_task_warnings < 0)
154 return;
155 sysctl_hung_task_warnings--;
156
157 /*
158 * Ok, the task did not get scheduled for more than 2 minutes,
159 * complain:
160 */
161 printk(KERN_ERR "INFO: task %s:%d blocked for more than "
162 "%ld seconds.\n", t->comm, t->pid,
163 sysctl_hung_task_timeout_secs);
164 printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
165 " disables this message.\n");
166 sched_show_task(t);
167 __debug_show_held_locks(t);
168
169 t->last_switch_timestamp = now;
170 touch_nmi_watchdog();
171}
172
173/*
174 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
175 * a really long time (120 seconds). If that happens, print out
176 * a warning.
177 */
178static void check_hung_uninterruptible_tasks(int this_cpu)
179{
180 int max_count = sysctl_hung_task_check_count;
181 unsigned long now = get_timestamp(this_cpu);
182 struct task_struct *g, *t;
183
184 /*
185 * If the system crashed already then all bets are off,
186 * do not report extra hung tasks:
187 */
188 if ((tainted & TAINT_DIE) || did_panic)
189 return;
190
191 read_lock(&tasklist_lock);
192 do_each_thread(g, t) {
193 if (!--max_count)
194 break;
195 if (t->state & TASK_UNINTERRUPTIBLE)
196 check_hung_task(t, now);
197 } while_each_thread(g, t);
198
199 read_unlock(&tasklist_lock);
200}
201
202/*
125 * The watchdog thread - runs every second and touches the timestamp. 203 * The watchdog thread - runs every second and touches the timestamp.
126 */ 204 */
127static int watchdog(void *__bind_cpu) 205static int watchdog(void *__bind_cpu)
128{ 206{
129 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 207 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
208 int this_cpu = (long)__bind_cpu;
130 209
131 sched_setscheduler(current, SCHED_FIFO, &param); 210 sched_setscheduler(current, SCHED_FIFO, &param);
132 211
@@ -135,13 +214,18 @@ static int watchdog(void *__bind_cpu)
135 214
136 /* 215 /*
137 * Run briefly once per second to reset the softlockup timestamp. 216 * Run briefly once per second to reset the softlockup timestamp.
138 * If this gets delayed for more than 10 seconds then the 217 * If this gets delayed for more than 60 seconds then the
139 * debug-printout triggers in softlockup_tick(). 218 * debug-printout triggers in softlockup_tick().
140 */ 219 */
141 while (!kthread_should_stop()) { 220 while (!kthread_should_stop()) {
142 set_current_state(TASK_INTERRUPTIBLE);
143 touch_softlockup_watchdog(); 221 touch_softlockup_watchdog();
144 schedule(); 222 msleep_interruptible(10000);
223
224 if (this_cpu != check_cpu)
225 continue;
226
227 if (sysctl_hung_task_timeout_secs)
228 check_hung_uninterruptible_tasks(this_cpu);
145 } 229 }
146 230
147 return 0; 231 return 0;
@@ -171,6 +255,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
171 break; 255 break;
172 case CPU_ONLINE: 256 case CPU_ONLINE:
173 case CPU_ONLINE_FROZEN: 257 case CPU_ONLINE_FROZEN:
258 check_cpu = any_online_cpu(cpu_online_map);
174 wake_up_process(per_cpu(watchdog_task, hotcpu)); 259 wake_up_process(per_cpu(watchdog_task, hotcpu));
175 break; 260 break;
176#ifdef CONFIG_HOTPLUG_CPU 261#ifdef CONFIG_HOTPLUG_CPU
@@ -181,6 +266,15 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
181 /* Unbind so it can run. Fall thru. */ 266 /* Unbind so it can run. Fall thru. */
182 kthread_bind(per_cpu(watchdog_task, hotcpu), 267 kthread_bind(per_cpu(watchdog_task, hotcpu),
183 any_online_cpu(cpu_online_map)); 268 any_online_cpu(cpu_online_map));
269 case CPU_DOWN_PREPARE:
270 case CPU_DOWN_PREPARE_FROZEN:
271 if (hotcpu == check_cpu) {
272 cpumask_t temp_cpu_online_map = cpu_online_map;
273
274 cpu_clear(hotcpu, temp_cpu_online_map);
275 check_cpu = any_online_cpu(temp_cpu_online_map);
276 }
277 break;
184 case CPU_DEAD: 278 case CPU_DEAD:
185 case CPU_DEAD_FROZEN: 279 case CPU_DEAD_FROZEN:
186 p = per_cpu(watchdog_task, hotcpu); 280 p = per_cpu(watchdog_task, hotcpu);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c95f3ed34474..96f31c1bc4f0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -753,6 +753,33 @@ static struct ctl_table kern_table[] = {
753 .extra1 = &one, 753 .extra1 = &one,
754 .extra2 = &sixty, 754 .extra2 = &sixty,
755 }, 755 },
756 {
757 .ctl_name = CTL_UNNUMBERED,
758 .procname = "hung_task_check_count",
759 .data = &sysctl_hung_task_check_count,
760 .maxlen = sizeof(int),
761 .mode = 0644,
762 .proc_handler = &proc_dointvec_minmax,
763 .strategy = &sysctl_intvec,
764 },
765 {
766 .ctl_name = CTL_UNNUMBERED,
767 .procname = "hung_task_timeout_secs",
768 .data = &sysctl_hung_task_timeout_secs,
769 .maxlen = sizeof(int),
770 .mode = 0644,
771 .proc_handler = &proc_dointvec_minmax,
772 .strategy = &sysctl_intvec,
773 },
774 {
775 .ctl_name = CTL_UNNUMBERED,
776 .procname = "hung_task_warnings",
777 .data = &sysctl_hung_task_warnings,
778 .maxlen = sizeof(int),
779 .mode = 0644,
780 .proc_handler = &proc_dointvec_minmax,
781 .strategy = &sysctl_intvec,
782 },
756#endif 783#endif
757#ifdef CONFIG_COMPAT 784#ifdef CONFIG_COMPAT
758 { 785 {