Merge branch 'master' into next

author: James Morris <jmorris@namei.org> 2009-05-08 03:56:47 -0400
committer: James Morris <jmorris@namei.org> 2009-05-08 03:56:47 -0400
commit: d254117099d711f215e62427f55dfb8ebd5ad011 (patch)
tree: 0848ff8dd74314fec14a86497f8d288c86ba7c65 /kernel/hung_task.c
parent: 07ff7a0b187f3951788f64ae1f30e8109bc8e9eb (diff)
parent: 8c9ed899b44c19e81859fbb0e9d659fe2f8630fc (diff)
1 files changed, 217 insertions, 0 deletions
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
new file mode 100644
index 000000000000..022a4927b785
--- /dev/null
+++ b/kernel/hung_task.c
@@ -0,0 +1,217 @@
+/*
+ * Detect Hung Task
+ *
+ * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
+ *
+ */
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/nmi.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/lockdep.h>
+#include <linux/module.h>
+#include <linux/sysctl.h>
+/*
+ * The number of tasks checked:
+ */
+unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
+/*
+ * Limit number of tasks checked in a batch.
+ *
+ * This value controls the preemptibility of khungtaskd since preemption
+ * is disabled during the critical section. It also controls the size of
+ * the RCU grace period. So it needs to be upper-bound.
+ */
+#define HUNG_TASK_BATCHING 1024
+/*
+ * Zero means infinite timeout - no checking done:
+ */
+unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
+unsigned long __read_mostly sysctl_hung_task_warnings = 10;
+static int __read_mostly did_panic;
+static struct task_struct *watchdog_task;
+/*
+ * Should we panic (and reboot, if panic_timeout= is set) when a
+ * hung task is detected:
+ */
+unsigned int __read_mostly sysctl_hung_task_panic =
+                                CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
+static int __init hung_task_panic_setup(char *str)
+{
+        sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);
+        return 1;
+}
+__setup("hung_task_panic=", hung_task_panic_setup);
+static int
+hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
+{
+        did_panic = 1;
+        return NOTIFY_DONE;
+}
+static struct notifier_block panic_block = {
+        .notifier_call = hung_task_panic,
+};
+static void check_hung_task(struct task_struct *t, unsigned long timeout)
+{
+        unsigned long switch_count = t->nvcsw + t->nivcsw;
+        /*
+         * Ensure the task is not frozen.
+         * Also, when a freshly created task is scheduled once, changes
+         * its state to TASK_UNINTERRUPTIBLE without having ever been
+         * switched out once, it musn't be checked.
+         */
+        if (unlikely(t->flags & PF_FROZEN || !switch_count))
+                return;
+        if (switch_count != t->last_switch_count) {
+                t->last_switch_count = switch_count;
+                return;
+        }
+        if (!sysctl_hung_task_warnings)
+                return;
+        sysctl_hung_task_warnings--;
+        /*
+         * Ok, the task did not get scheduled for more than 2 minutes,
+         * complain:
+         */
+        printk(KERN_ERR "INFO: task %s:%d blocked for more than "
+                        "%ld seconds.\n", t->comm, t->pid, timeout);
+        printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
+                        " disables this message.\n");
+        sched_show_task(t);
+        __debug_show_held_locks(t);
+        touch_nmi_watchdog();
+        if (sysctl_hung_task_panic)
+                panic("hung_task: blocked tasks");
+}
+/*
+ * To avoid extending the RCU grace period for an unbounded amount of time,
+ * periodically exit the critical section and enter a new one.
+ *
+ * For preemptible RCU it is sufficient to call rcu_read_unlock in order
+ * exit the grace period. For classic RCU, a reschedule is required.
+ */
+static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
+{
+        get_task_struct(g);
+        get_task_struct(t);
+        rcu_read_unlock();
+        cond_resched();
+        rcu_read_lock();
+        put_task_struct(t);
+        put_task_struct(g);
+}
+/*
+ * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
+ * a really long time (120 seconds). If that happens, print out
+ * a warning.
+ */
+static void check_hung_uninterruptible_tasks(unsigned long timeout)
+{
+        int max_count = sysctl_hung_task_check_count;
+        int batch_count = HUNG_TASK_BATCHING;
+        struct task_struct *g, *t;
+        /*
+         * If the system crashed already then all bets are off,
+         * do not report extra hung tasks:
+         */
+        if (test_taint(TAINT_DIE) || did_panic)
+                return;
+        rcu_read_lock();
+        do_each_thread(g, t) {
+                if (!--max_count)
+                        goto unlock;
+                if (!--batch_count) {
+                        batch_count = HUNG_TASK_BATCHING;
+                        rcu_lock_break(g, t);
+                        /* Exit if t or g was unhashed during refresh. */
+                        if (t->state == TASK_DEAD || g->state == TASK_DEAD)
+                                goto unlock;
+                }
+                /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
+                if (t->state == TASK_UNINTERRUPTIBLE)
+                        check_hung_task(t, timeout);
+        } while_each_thread(g, t);
+ unlock:
+        rcu_read_unlock();
+}
+static unsigned long timeout_jiffies(unsigned long timeout)
+{
+        /* timeout of 0 will disable the watchdog */
+        return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
+}
+/*
+ * Process updating of timeout sysctl
+ */
+int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+                                  struct file *filp, void __user *buffer,
+                                  size_t *lenp, loff_t *ppos)
+{
+        int ret;
+        ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+        if (ret || !write)
+                goto out;
+        wake_up_process(watchdog_task);
+ out:
+        return ret;
+}
+/*
+ * kthread which checks for tasks stuck in D state
+ */
+static int watchdog(void *dummy)
+{
+        set_user_nice(current, 0);
+        for ( ; ; ) {
+                unsigned long timeout = sysctl_hung_task_timeout_secs;
+                while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
+                        timeout = sysctl_hung_task_timeout_secs;
+                check_hung_uninterruptible_tasks(timeout);
+        }
+        return 0;
+}
+static int __init hung_task_init(void)
+{
+        atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
+        watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
+        return 0;
+}
+module_init(hung_task_init);
author	James Morris <jmorris@namei.org>	2009-05-08 03:56:47 -0400
committer	James Morris <jmorris@namei.org>	2009-05-08 03:56:47 -0400
commit	d254117099d711f215e62427f55dfb8ebd5ad011 (patch)
tree	0848ff8dd74314fec14a86497f8d288c86ba7c65 /kernel/hung_task.c
parent	07ff7a0b187f3951788f64ae1f30e8109bc8e9eb (diff)
parent	8c9ed899b44c19e81859fbb0e9d659fe2f8630fc (diff)

diff --git a/kernel/hung_task.c b/kernel/hung_task.c new file mode 100644 index 000000000000..022a4927b785 --- /dev/null +++ b/kernel/hung_task.c
@@ -0,0 +1,217 @@
	1	/*
	2	* Detect Hung Task
	3	*
	4	* kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
	5	*
	6	*/
	7
	8	#include <linux/mm.h>
	9	#include <linux/cpu.h>
	10	#include <linux/nmi.h>
	11	#include <linux/init.h>
	12	#include <linux/delay.h>
	13	#include <linux/freezer.h>
	14	#include <linux/kthread.h>
	15	#include <linux/lockdep.h>
	16	#include <linux/module.h>
	17	#include <linux/sysctl.h>
	18
	19	/*
	20	* The number of tasks checked:
	21	*/
	22	unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
	23
	24	/*
	25	* Limit number of tasks checked in a batch.
	26	*
	27	* This value controls the preemptibility of khungtaskd since preemption
	28	* is disabled during the critical section. It also controls the size of
	29	* the RCU grace period. So it needs to be upper-bound.
	30	*/
	31	#define HUNG_TASK_BATCHING 1024
	32
	33	/*
	34	* Zero means infinite timeout - no checking done:
	35	*/
	36	unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
	37
	38	unsigned long __read_mostly sysctl_hung_task_warnings = 10;
	39
	40	static int __read_mostly did_panic;
	41
	42	static struct task_struct *watchdog_task;
	43
	44	/*
	45	* Should we panic (and reboot, if panic_timeout= is set) when a
	46	* hung task is detected:
	47	*/
	48	unsigned int __read_mostly sysctl_hung_task_panic =
	49	CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
	50
	51	static int __init hung_task_panic_setup(char *str)
	52	{
	53	sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);
	54
	55	return 1;
	56	}
	57	__setup("hung_task_panic=", hung_task_panic_setup);
	58
	59	static int
	60	hung_task_panic(struct notifier_block this, unsigned long event, void ptr)
	61	{
	62	did_panic = 1;
	63
	64	return NOTIFY_DONE;
	65	}
	66
	67	static struct notifier_block panic_block = {
	68	.notifier_call = hung_task_panic,
	69	};
	70
	71	static void check_hung_task(struct task_struct *t, unsigned long timeout)
	72	{
	73	unsigned long switch_count = t->nvcsw + t->nivcsw;
	74
	75	/*
	76	* Ensure the task is not frozen.
	77	* Also, when a freshly created task is scheduled once, changes
	78	* its state to TASK_UNINTERRUPTIBLE without having ever been
	79	* switched out once, it musn't be checked.
	80	*/
	81	if (unlikely(t->flags & PF_FROZEN \|\| !switch_count))
	82	return;
	83
	84	if (switch_count != t->last_switch_count) {
	85	t->last_switch_count = switch_count;
	86	return;
	87	}
	88	if (!sysctl_hung_task_warnings)
	89	return;
	90	sysctl_hung_task_warnings--;
	91
	92	/*
	93	* Ok, the task did not get scheduled for more than 2 minutes,
	94	* complain:
	95	*/
	96	printk(KERN_ERR "INFO: task %s:%d blocked for more than "
	97	"%ld seconds.\n", t->comm, t->pid, timeout);
	98	printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
	99	" disables this message.\n");
	100	sched_show_task(t);
	101	__debug_show_held_locks(t);
	102
	103	touch_nmi_watchdog();
	104
	105	if (sysctl_hung_task_panic)
	106	panic("hung_task: blocked tasks");
	107	}
	108
	109	/*
	110	* To avoid extending the RCU grace period for an unbounded amount of time,
	111	* periodically exit the critical section and enter a new one.
	112	*
	113	* For preemptible RCU it is sufficient to call rcu_read_unlock in order
	114	* exit the grace period. For classic RCU, a reschedule is required.
	115	*/
	116	static void rcu_lock_break(struct task_struct g, struct task_struct t)
	117	{
	118	get_task_struct(g);
	119	get_task_struct(t);
	120	rcu_read_unlock();
	121	cond_resched();
	122	rcu_read_lock();
	123	put_task_struct(t);
	124	put_task_struct(g);
	125	}
	126
	127	/*
	128	* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
	129	* a really long time (120 seconds). If that happens, print out
	130	* a warning.
	131	*/
	132	static void check_hung_uninterruptible_tasks(unsigned long timeout)
	133	{
	134	int max_count = sysctl_hung_task_check_count;
	135	int batch_count = HUNG_TASK_BATCHING;
	136	struct task_struct g, t;
	137
	138	/*
	139	* If the system crashed already then all bets are off,
	140	* do not report extra hung tasks:
	141	*/
	142	if (test_taint(TAINT_DIE) \|\| did_panic)
	143	return;
	144
	145	rcu_read_lock();
	146	do_each_thread(g, t) {
	147	if (!--max_count)
	148	goto unlock;
	149	if (!--batch_count) {
	150	batch_count = HUNG_TASK_BATCHING;
	151	rcu_lock_break(g, t);
	152	/* Exit if t or g was unhashed during refresh. */
	153	if (t->state == TASK_DEAD \|\| g->state == TASK_DEAD)
	154	goto unlock;
	155	}
	156	/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
	157	if (t->state == TASK_UNINTERRUPTIBLE)
	158	check_hung_task(t, timeout);
	159	} while_each_thread(g, t);
	160	unlock:
	161	rcu_read_unlock();
	162	}
	163
	164	static unsigned long timeout_jiffies(unsigned long timeout)
	165	{
	166	/* timeout of 0 will disable the watchdog */
	167	return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
	168	}
	169
	170	/*
	171	* Process updating of timeout sysctl
	172	*/
	173	int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
	174	struct file filp, void __user buffer,
	175	size_t lenp, loff_t ppos)
	176	{
	177	int ret;
	178
	179	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
	180
	181	if (ret \|\| !write)
	182	goto out;
	183
	184	wake_up_process(watchdog_task);
	185
	186	out:
	187	return ret;
	188	}
	189
	190	/*
	191	* kthread which checks for tasks stuck in D state
	192	*/
	193	static int watchdog(void *dummy)
	194	{
	195	set_user_nice(current, 0);
	196
	197	for ( ; ; ) {
	198	unsigned long timeout = sysctl_hung_task_timeout_secs;
	199
	200	while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
	201	timeout = sysctl_hung_task_timeout_secs;
	202
	203	check_hung_uninterruptible_tasks(timeout);
	204	}
	205
	206	return 0;
	207	}
	208
	209	static int __init hung_task_init(void)
	210	{
	211	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
	212	watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
	213
	214	return 0;
	215	}
	216
	217	module_init(hung_task_init);