diff options
-rw-r--r-- | include/linux/debug_locks.h | 5 | ||||
-rw-r--r-- | include/linux/sched.h | 10 | ||||
-rw-r--r-- | kernel/fork.c | 5 | ||||
-rw-r--r-- | kernel/lockdep.c | 12 | ||||
-rw-r--r-- | kernel/sched.c | 4 | ||||
-rw-r--r-- | kernel/softlockup.c | 114 | ||||
-rw-r--r-- | kernel/sysctl.c | 27 |
7 files changed, 164 insertions, 13 deletions
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 1678a5de7013..f4a5871767f5 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h | |||
@@ -47,6 +47,7 @@ struct task_struct; | |||
47 | 47 | ||
48 | #ifdef CONFIG_LOCKDEP | 48 | #ifdef CONFIG_LOCKDEP |
49 | extern void debug_show_all_locks(void); | 49 | extern void debug_show_all_locks(void); |
50 | extern void __debug_show_held_locks(struct task_struct *task); | ||
50 | extern void debug_show_held_locks(struct task_struct *task); | 51 | extern void debug_show_held_locks(struct task_struct *task); |
51 | extern void debug_check_no_locks_freed(const void *from, unsigned long len); | 52 | extern void debug_check_no_locks_freed(const void *from, unsigned long len); |
52 | extern void debug_check_no_locks_held(struct task_struct *task); | 53 | extern void debug_check_no_locks_held(struct task_struct *task); |
@@ -55,6 +56,10 @@ static inline void debug_show_all_locks(void) | |||
55 | { | 56 | { |
56 | } | 57 | } |
57 | 58 | ||
59 | static inline void __debug_show_held_locks(struct task_struct *task) | ||
60 | { | ||
61 | } | ||
62 | |||
58 | static inline void debug_show_held_locks(struct task_struct *task) | 63 | static inline void debug_show_held_locks(struct task_struct *task) |
59 | { | 64 | { |
60 | } | 65 | } |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 288245f83bd4..0846f1f9e196 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -258,12 +258,17 @@ extern void account_process_tick(struct task_struct *task, int user); | |||
258 | extern void update_process_times(int user); | 258 | extern void update_process_times(int user); |
259 | extern void scheduler_tick(void); | 259 | extern void scheduler_tick(void); |
260 | 260 | ||
261 | extern void sched_show_task(struct task_struct *p); | ||
262 | |||
261 | #ifdef CONFIG_DETECT_SOFTLOCKUP | 263 | #ifdef CONFIG_DETECT_SOFTLOCKUP |
262 | extern void softlockup_tick(void); | 264 | extern void softlockup_tick(void); |
263 | extern void spawn_softlockup_task(void); | 265 | extern void spawn_softlockup_task(void); |
264 | extern void touch_softlockup_watchdog(void); | 266 | extern void touch_softlockup_watchdog(void); |
265 | extern void touch_all_softlockup_watchdogs(void); | 267 | extern void touch_all_softlockup_watchdogs(void); |
266 | extern int softlockup_thresh; | 268 | extern int softlockup_thresh; |
269 | extern unsigned long sysctl_hung_task_check_count; | ||
270 | extern unsigned long sysctl_hung_task_timeout_secs; | ||
271 | extern long sysctl_hung_task_warnings; | ||
267 | #else | 272 | #else |
268 | static inline void softlockup_tick(void) | 273 | static inline void softlockup_tick(void) |
269 | { | 274 | { |
@@ -1041,6 +1046,11 @@ struct task_struct { | |||
1041 | /* ipc stuff */ | 1046 | /* ipc stuff */ |
1042 | struct sysv_sem sysvsem; | 1047 | struct sysv_sem sysvsem; |
1043 | #endif | 1048 | #endif |
1049 | #ifdef CONFIG_DETECT_SOFTLOCKUP | ||
1050 | /* hung task detection */ | ||
1051 | unsigned long last_switch_timestamp; | ||
1052 | unsigned long last_switch_count; | ||
1053 | #endif | ||
1044 | /* CPU-specific state of this task */ | 1054 | /* CPU-specific state of this task */ |
1045 | struct thread_struct thread; | 1055 | struct thread_struct thread; |
1046 | /* filesystem information */ | 1056 | /* filesystem information */ |
diff --git a/kernel/fork.c b/kernel/fork.c index 8dd8ff281009..09c0b90a69cc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1059,6 +1059,11 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1059 | p->prev_utime = cputime_zero; | 1059 | p->prev_utime = cputime_zero; |
1060 | p->prev_stime = cputime_zero; | 1060 | p->prev_stime = cputime_zero; |
1061 | 1061 | ||
1062 | #ifdef CONFIG_DETECT_SOFTLOCKUP | ||
1063 | p->last_switch_count = 0; | ||
1064 | p->last_switch_timestamp = 0; | ||
1065 | #endif | ||
1066 | |||
1062 | #ifdef CONFIG_TASK_XACCT | 1067 | #ifdef CONFIG_TASK_XACCT |
1063 | p->rchar = 0; /* I/O counter: bytes read */ | 1068 | p->rchar = 0; /* I/O counter: bytes read */ |
1064 | p->wchar = 0; /* I/O counter: bytes written */ | 1069 | p->wchar = 0; /* I/O counter: bytes written */ |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index e2c07ece367d..3574379f4d62 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -3206,7 +3206,11 @@ retry: | |||
3206 | 3206 | ||
3207 | EXPORT_SYMBOL_GPL(debug_show_all_locks); | 3207 | EXPORT_SYMBOL_GPL(debug_show_all_locks); |
3208 | 3208 | ||
3209 | void debug_show_held_locks(struct task_struct *task) | 3209 | /* |
3210 | * Careful: only use this function if you are sure that | ||
3211 | * the task cannot run in parallel! | ||
3212 | */ | ||
3213 | void __debug_show_held_locks(struct task_struct *task) | ||
3210 | { | 3214 | { |
3211 | if (unlikely(!debug_locks)) { | 3215 | if (unlikely(!debug_locks)) { |
3212 | printk("INFO: lockdep is turned off.\n"); | 3216 | printk("INFO: lockdep is turned off.\n"); |
@@ -3214,6 +3218,12 @@ void debug_show_held_locks(struct task_struct *task) | |||
3214 | } | 3218 | } |
3215 | lockdep_print_held_locks(task); | 3219 | lockdep_print_held_locks(task); |
3216 | } | 3220 | } |
3221 | EXPORT_SYMBOL_GPL(__debug_show_held_locks); | ||
3222 | |||
3223 | void debug_show_held_locks(struct task_struct *task) | ||
3224 | { | ||
3225 | __debug_show_held_locks(task); | ||
3226 | } | ||
3217 | 3227 | ||
3218 | EXPORT_SYMBOL_GPL(debug_show_held_locks); | 3228 | EXPORT_SYMBOL_GPL(debug_show_held_locks); |
3219 | 3229 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index c0e2db683e29..5b3d46574eeb 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -4945,7 +4945,7 @@ out_unlock: | |||
4945 | 4945 | ||
4946 | static const char stat_nam[] = "RSDTtZX"; | 4946 | static const char stat_nam[] = "RSDTtZX"; |
4947 | 4947 | ||
4948 | static void show_task(struct task_struct *p) | 4948 | void sched_show_task(struct task_struct *p) |
4949 | { | 4949 | { |
4950 | unsigned long free = 0; | 4950 | unsigned long free = 0; |
4951 | unsigned state; | 4951 | unsigned state; |
@@ -4998,7 +4998,7 @@ void show_state_filter(unsigned long state_filter) | |||
4998 | */ | 4998 | */ |
4999 | touch_nmi_watchdog(); | 4999 | touch_nmi_watchdog(); |
5000 | if (!state_filter || (p->state & state_filter)) | 5000 | if (!state_filter || (p->state & state_filter)) |
5001 | show_task(p); | 5001 | sched_show_task(p); |
5002 | } while_each_thread(g, p); | 5002 | } while_each_thread(g, p); |
5003 | 5003 | ||
5004 | touch_all_softlockup_watchdogs(); | 5004 | touch_all_softlockup_watchdogs(); |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 11df812263c8..02f0ad534441 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
@@ -8,6 +8,7 @@ | |||
8 | */ | 8 | */ |
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/cpu.h> | 10 | #include <linux/cpu.h> |
11 | #include <linux/nmi.h> | ||
11 | #include <linux/init.h> | 12 | #include <linux/init.h> |
12 | #include <linux/delay.h> | 13 | #include <linux/delay.h> |
13 | #include <linux/freezer.h> | 14 | #include <linux/freezer.h> |
@@ -24,7 +25,7 @@ static DEFINE_PER_CPU(unsigned long, print_timestamp); | |||
24 | static DEFINE_PER_CPU(struct task_struct *, watchdog_task); | 25 | static DEFINE_PER_CPU(struct task_struct *, watchdog_task); |
25 | 26 | ||
26 | static int did_panic; | 27 | static int did_panic; |
27 | int softlockup_thresh = 10; | 28 | int softlockup_thresh = 60; |
28 | 29 | ||
29 | static int | 30 | static int |
30 | softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) | 31 | softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) |
@@ -45,7 +46,7 @@ static struct notifier_block panic_block = { | |||
45 | */ | 46 | */ |
46 | static unsigned long get_timestamp(int this_cpu) | 47 | static unsigned long get_timestamp(int this_cpu) |
47 | { | 48 | { |
48 | return cpu_clock(this_cpu) >> 30; /* 2^30 ~= 10^9 */ | 49 | return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ |
49 | } | 50 | } |
50 | 51 | ||
51 | void touch_softlockup_watchdog(void) | 52 | void touch_softlockup_watchdog(void) |
@@ -100,11 +101,7 @@ void softlockup_tick(void) | |||
100 | 101 | ||
101 | now = get_timestamp(this_cpu); | 102 | now = get_timestamp(this_cpu); |
102 | 103 | ||
103 | /* Wake up the high-prio watchdog task every second: */ | 104 | /* Warn about unreasonable delays: */ |
104 | if (now > (touch_timestamp + 1)) | ||
105 | wake_up_process(per_cpu(watchdog_task, this_cpu)); | ||
106 | |||
107 | /* Warn about unreasonable 10+ seconds delays: */ | ||
108 | if (now <= (touch_timestamp + softlockup_thresh)) | 105 | if (now <= (touch_timestamp + softlockup_thresh)) |
109 | return; | 106 | return; |
110 | 107 | ||
@@ -122,11 +119,93 @@ void softlockup_tick(void) | |||
122 | } | 119 | } |
123 | 120 | ||
124 | /* | 121 | /* |
122 | * Have a reasonable limit on the number of tasks checked: | ||
123 | */ | ||
124 | unsigned long sysctl_hung_task_check_count = 1024; | ||
125 | |||
126 | /* | ||
127 | * Zero means infinite timeout - no checking done: | ||
128 | */ | ||
129 | unsigned long sysctl_hung_task_timeout_secs = 120; | ||
130 | |||
131 | long sysctl_hung_task_warnings = 10; | ||
132 | |||
133 | /* | ||
134 | * Only do the hung-tasks check on one CPU: | ||
135 | */ | ||
136 | static int check_cpu __read_mostly = -1; | ||
137 | |||
138 | static void check_hung_task(struct task_struct *t, unsigned long now) | ||
139 | { | ||
140 | unsigned long switch_count = t->nvcsw + t->nivcsw; | ||
141 | |||
142 | if (t->flags & PF_FROZEN) | ||
143 | return; | ||
144 | |||
145 | if (switch_count != t->last_switch_count || !t->last_switch_timestamp) { | ||
146 | t->last_switch_count = switch_count; | ||
147 | t->last_switch_timestamp = now; | ||
148 | return; | ||
149 | } | ||
150 | if ((long)(now - t->last_switch_timestamp) < | ||
151 | sysctl_hung_task_timeout_secs) | ||
152 | return; | ||
153 | if (sysctl_hung_task_warnings < 0) | ||
154 | return; | ||
155 | sysctl_hung_task_warnings--; | ||
156 | |||
157 | /* | ||
158 | * Ok, the task did not get scheduled for more than 2 minutes, | ||
159 | * complain: | ||
160 | */ | ||
161 | printk(KERN_ERR "INFO: task %s:%d blocked for more than " | ||
162 | "%ld seconds.\n", t->comm, t->pid, | ||
163 | sysctl_hung_task_timeout_secs); | ||
164 | printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" | ||
165 | " disables this message.\n"); | ||
166 | sched_show_task(t); | ||
167 | __debug_show_held_locks(t); | ||
168 | |||
169 | t->last_switch_timestamp = now; | ||
170 | touch_nmi_watchdog(); | ||
171 | } | ||
172 | |||
173 | /* | ||
174 | * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for | ||
175 | * a really long time (120 seconds). If that happens, print out | ||
176 | * a warning. | ||
177 | */ | ||
178 | static void check_hung_uninterruptible_tasks(int this_cpu) | ||
179 | { | ||
180 | int max_count = sysctl_hung_task_check_count; | ||
181 | unsigned long now = get_timestamp(this_cpu); | ||
182 | struct task_struct *g, *t; | ||
183 | |||
184 | /* | ||
185 | * If the system crashed already then all bets are off, | ||
186 | * do not report extra hung tasks: | ||
187 | */ | ||
188 | if ((tainted & TAINT_DIE) || did_panic) | ||
189 | return; | ||
190 | |||
191 | read_lock(&tasklist_lock); | ||
192 | do_each_thread(g, t) { | ||
193 | if (!--max_count) | ||
194 | break; | ||
195 | if (t->state & TASK_UNINTERRUPTIBLE) | ||
196 | check_hung_task(t, now); | ||
197 | } while_each_thread(g, t); | ||
198 | |||
199 | read_unlock(&tasklist_lock); | ||
200 | } | ||
201 | |||
202 | /* | ||
125 | * The watchdog thread - runs every second and touches the timestamp. | 203 | * The watchdog thread - runs every second and touches the timestamp. |
126 | */ | 204 | */ |
127 | static int watchdog(void *__bind_cpu) | 205 | static int watchdog(void *__bind_cpu) |
128 | { | 206 | { |
129 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | 207 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; |
208 | int this_cpu = (long)__bind_cpu; | ||
130 | 209 | ||
131 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 210 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
132 | 211 | ||
@@ -135,13 +214,18 @@ static int watchdog(void *__bind_cpu) | |||
135 | 214 | ||
136 | /* | 215 | /* |
137 | * Run briefly once per second to reset the softlockup timestamp. | 216 | * Run briefly once per second to reset the softlockup timestamp. |
138 | * If this gets delayed for more than 10 seconds then the | 217 | * If this gets delayed for more than 60 seconds then the |
139 | * debug-printout triggers in softlockup_tick(). | 218 | * debug-printout triggers in softlockup_tick(). |
140 | */ | 219 | */ |
141 | while (!kthread_should_stop()) { | 220 | while (!kthread_should_stop()) { |
142 | set_current_state(TASK_INTERRUPTIBLE); | ||
143 | touch_softlockup_watchdog(); | 221 | touch_softlockup_watchdog(); |
144 | schedule(); | 222 | msleep_interruptible(10000); |
223 | |||
224 | if (this_cpu != check_cpu) | ||
225 | continue; | ||
226 | |||
227 | if (sysctl_hung_task_timeout_secs) | ||
228 | check_hung_uninterruptible_tasks(this_cpu); | ||
145 | } | 229 | } |
146 | 230 | ||
147 | return 0; | 231 | return 0; |
@@ -171,6 +255,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
171 | break; | 255 | break; |
172 | case CPU_ONLINE: | 256 | case CPU_ONLINE: |
173 | case CPU_ONLINE_FROZEN: | 257 | case CPU_ONLINE_FROZEN: |
258 | check_cpu = any_online_cpu(cpu_online_map); | ||
174 | wake_up_process(per_cpu(watchdog_task, hotcpu)); | 259 | wake_up_process(per_cpu(watchdog_task, hotcpu)); |
175 | break; | 260 | break; |
176 | #ifdef CONFIG_HOTPLUG_CPU | 261 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -181,6 +266,15 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
181 | /* Unbind so it can run. Fall thru. */ | 266 | /* Unbind so it can run. Fall thru. */ |
182 | kthread_bind(per_cpu(watchdog_task, hotcpu), | 267 | kthread_bind(per_cpu(watchdog_task, hotcpu), |
183 | any_online_cpu(cpu_online_map)); | 268 | any_online_cpu(cpu_online_map)); |
269 | case CPU_DOWN_PREPARE: | ||
270 | case CPU_DOWN_PREPARE_FROZEN: | ||
271 | if (hotcpu == check_cpu) { | ||
272 | cpumask_t temp_cpu_online_map = cpu_online_map; | ||
273 | |||
274 | cpu_clear(hotcpu, temp_cpu_online_map); | ||
275 | check_cpu = any_online_cpu(temp_cpu_online_map); | ||
276 | } | ||
277 | break; | ||
184 | case CPU_DEAD: | 278 | case CPU_DEAD: |
185 | case CPU_DEAD_FROZEN: | 279 | case CPU_DEAD_FROZEN: |
186 | p = per_cpu(watchdog_task, hotcpu); | 280 | p = per_cpu(watchdog_task, hotcpu); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c95f3ed34474..96f31c1bc4f0 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -753,6 +753,33 @@ static struct ctl_table kern_table[] = { | |||
753 | .extra1 = &one, | 753 | .extra1 = &one, |
754 | .extra2 = &sixty, | 754 | .extra2 = &sixty, |
755 | }, | 755 | }, |
756 | { | ||
757 | .ctl_name = CTL_UNNUMBERED, | ||
758 | .procname = "hung_task_check_count", | ||
759 | .data = &sysctl_hung_task_check_count, | ||
760 | .maxlen = sizeof(int), | ||
761 | .mode = 0644, | ||
762 | .proc_handler = &proc_dointvec_minmax, | ||
763 | .strategy = &sysctl_intvec, | ||
764 | }, | ||
765 | { | ||
766 | .ctl_name = CTL_UNNUMBERED, | ||
767 | .procname = "hung_task_timeout_secs", | ||
768 | .data = &sysctl_hung_task_timeout_secs, | ||
769 | .maxlen = sizeof(int), | ||
770 | .mode = 0644, | ||
771 | .proc_handler = &proc_dointvec_minmax, | ||
772 | .strategy = &sysctl_intvec, | ||
773 | }, | ||
774 | { | ||
775 | .ctl_name = CTL_UNNUMBERED, | ||
776 | .procname = "hung_task_warnings", | ||
777 | .data = &sysctl_hung_task_warnings, | ||
778 | .maxlen = sizeof(int), | ||
779 | .mode = 0644, | ||
780 | .proc_handler = &proc_dointvec_minmax, | ||
781 | .strategy = &sysctl_intvec, | ||
782 | }, | ||
756 | #endif | 783 | #endif |
757 | #ifdef CONFIG_COMPAT | 784 | #ifdef CONFIG_COMPAT |
758 | { | 785 | { |