rcu, debug: detect stalled grace periods

this is a diagnostic patch for Classic RCU. The approach is to record a timestamp at the beginning of the grace period (in rcu_start_batch()), then have rcu_check_callbacks() complain if: 1. it is running on a CPU that has holding up grace periods for a long time (say one second). This will identify the culprit assuming that the culprit has not disabled hardware irqs, instruction execution, or some such. 2. it is running on a CPU that is not holding up grace periods, but grace periods have been held up for an even longer time (say two seconds). It is enabled via the default-off CONFIG_DEBUG_RCU_STALL kernel parameter. Rather than exponential backoff, it backs off to once per 30 seconds. My feeling upon thinking on it was that if you have stalled RCU grace periods for that long, a few extra printk() messages are probably the least of your worries... Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Yinghai Lu <yhlu.kernel@gmail.com> Cc: David Witbrodt <dawitbro@sbcglobal.net> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 2008-08-10 21:35:38 -0400
committer: Ingo Molnar <mingo@elte.hu> 2008-08-11 07:35:18 -0400
commit: 67182ae1c42206e516f7efb292b745e826497b24 (patch)
tree: d2d402550a0432489090264df95a8154597dc989
parent: c4c0c56a7a85ed5725786219e4fbca7e840b1531 (diff)
3 files changed, 96 insertions, 0 deletions
diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 04c728147be0..16589958b40e 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -46,6 +46,9 @@ struct rcu_ctrlblk {
        long    cur;            /* Current batch number.                      */
        long    completed;      /* Number of the last completed batch         */
        long    pending;        /* Number of the last pending batch           */
+#ifdef CONFIG_DEBUG_RCU_STALL
+        unsigned long gp_check; /* Time grace period should end, in seconds.  */
+#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
        int     signaled;
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index d4271146a9bd..d7ec731de75c 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -47,6 +47,7 @@
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
+#include <linux/time.h>
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key rcu_lock_key;
@@ -286,6 +287,81 @@ static void rcu_do_batch(struct rcu_data *rdp)
 *   rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
 *   period (if necessary).
 */
+#ifdef CONFIG_DEBUG_RCU_STALL
+static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
+{
+        rcp->gp_check = get_seconds() + 3;
+}
+static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+        int cpu;
+        long delta;
+        /* Only let one CPU complain about others per time interval. */
+        spin_lock(&rcp->lock);
+        delta = get_seconds() - rcp->gp_check;
+        if (delta < 2L ||
+            cpus_empty(rcp->cpumask)) {
+                spin_unlock(&rcp->lock);
+                return;
+        rcp->gp_check = get_seconds() + 30;
+        }
+        spin_unlock(&rcp->lock);
+        /* OK, time to rat on our buddy... */
+        printk(KERN_ERR "RCU detected CPU stalls:");
+        for_each_cpu_mask(cpu, rcp->cpumask)
+                printk(" %d", cpu);
+        printk(" (detected by %d, t=%lu/%lu)\n",
+               smp_processor_id(), get_seconds(), rcp->gp_check);
+}
+static void print_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+        printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n",
+                        smp_processor_id(), get_seconds(), rcp->gp_check);
+        dump_stack();
+        spin_lock(&rcp->lock);
+        if ((long)(get_seconds() - rcp->gp_check) >= 0L)
+                rcp->gp_check = get_seconds() + 30;
+        spin_unlock(&rcp->lock);
+}
+static inline void check_cpu_stall(struct rcu_ctrlblk *rcp,
+                                   struct rcu_data *rdp)
+{
+        long delta;
+        delta = get_seconds() - rcp->gp_check;
+        if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0L) {
+                /* We haven't checked in, so go dump stack. */
+                print_cpu_stall(rcp);
+        } else if (!cpus_empty(rcp->cpumask) && delta >= 2L) {
+                /* They had two seconds to dump stack, so complain. */
+                print_other_cpu_stall(rcp);
+        }
+}
+#else /* #ifdef CONFIG_DEBUG_RCU_STALL */
+static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
+{
+}
+static inline void check_cpu_stall(struct rcu_ctrlblk *rcp,
+                                   struct rcu_data *rdp)
+{
+}
+#endif /* #else #ifdef CONFIG_DEBUG_RCU_STALL */
 /*
 * Register a new batch of callbacks, and start it up if there is currently no
 * active batch and the batch to be registered has not already occurred.
@@ -296,6 +372,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
        if (rcp->cur != rcp->pending &&
                        rcp->completed == rcp->cur) {
                rcp->cur++;
+                record_gp_check_time(rcp);
                /*
                 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
@@ -489,6 +566,9 @@ static void rcu_process_callbacks(struct softirq_action *unused)
 static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
 {
+        /* Check for CPU stalls, if enabled. */
+        check_cpu_stall(rcp, rdp);
        if (rdp->nxtlist) {
                /*
                 * This cpu has pending rcu entries and the grace period
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e1d4764435ed..2fb6d90bf1e6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE
          Say N here if you want the RCU torture tests to start only
          after being manually enabled via /proc.
+config RCU_CPU_STALL
+        bool "Check for stalled CPUs delaying RCU grace periods"
+        depends on CLASSIC_RCU
+        default n
+        help
+          This option causes RCU to printk information on which
+          CPUs are delaying the current grace period, but only when
+          the grace period extends for excessive time periods.
+          Say Y if you want RCU to perform such checks.
+          Say N if you are unsure.
 config KPROBES_SANITY_TEST
        bool "Kprobes sanity tests"
        depends on DEBUG_KERNEL
author	Paul E. McKenney <paulmck@linux.vnet.ibm.com>	2008-08-10 21:35:38 -0400
committer	Ingo Molnar <mingo@elte.hu>	2008-08-11 07:35:18 -0400
commit	67182ae1c42206e516f7efb292b745e826497b24 (patch)
tree	d2d402550a0432489090264df95a8154597dc989
parent	c4c0c56a7a85ed5725786219e4fbca7e840b1531 (diff)

diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index 04c728147be0..16589958b40e 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h
@@ -46,6 +46,9 @@ struct rcu_ctrlblk {
46	long cur; /* Current batch number. */	46	long cur; /* Current batch number. */
47	long completed; /* Number of the last completed batch */	47	long completed; /* Number of the last completed batch */
48	long pending; /* Number of the last pending batch */	48	long pending; /* Number of the last pending batch */
		49	#ifdef CONFIG_DEBUG_RCU_STALL
		50	unsigned long gp_check; /* Time grace period should end, in seconds. */
		51	#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
49		52
50	int signaled;	53	int signaled;
51		54


diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index d4271146a9bd..d7ec731de75c 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c
@@ -47,6 +47,7 @@
47	#include <linux/notifier.h>	47	#include <linux/notifier.h>
48	#include <linux/cpu.h>	48	#include <linux/cpu.h>
49	#include <linux/mutex.h>	49	#include <linux/mutex.h>
		50	#include <linux/time.h>
50		51
51	#ifdef CONFIG_DEBUG_LOCK_ALLOC	52	#ifdef CONFIG_DEBUG_LOCK_ALLOC
52	static struct lock_class_key rcu_lock_key;	53	static struct lock_class_key rcu_lock_key;
@@ -286,6 +287,81 @@ static void rcu_do_batch(struct rcu_data *rdp)
286	* rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace	287	* rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
287	* period (if necessary).	288	* period (if necessary).
288	*/	289	*/
		290
		291	#ifdef CONFIG_DEBUG_RCU_STALL
		292
		293	static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
		294	{
		295	rcp->gp_check = get_seconds() + 3;
		296	}
		297	static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
		298	{
		299	int cpu;
		300	long delta;
		301
		302	/* Only let one CPU complain about others per time interval. */
		303
		304	spin_lock(&rcp->lock);
		305	delta = get_seconds() - rcp->gp_check;
		306	if (delta < 2L \|\|
		307	cpus_empty(rcp->cpumask)) {
		308	spin_unlock(&rcp->lock);
		309	return;
		310	rcp->gp_check = get_seconds() + 30;
		311	}
		312	spin_unlock(&rcp->lock);
		313
		314	/* OK, time to rat on our buddy... */
		315
		316	printk(KERN_ERR "RCU detected CPU stalls:");
		317	for_each_cpu_mask(cpu, rcp->cpumask)
		318	printk(" %d", cpu);
		319	printk(" (detected by %d, t=%lu/%lu)\n",
		320	smp_processor_id(), get_seconds(), rcp->gp_check);
		321	}
		322	static void print_cpu_stall(struct rcu_ctrlblk *rcp)
		323	{
		324	printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n",
		325	smp_processor_id(), get_seconds(), rcp->gp_check);
		326	dump_stack();
		327	spin_lock(&rcp->lock);
		328	if ((long)(get_seconds() - rcp->gp_check) >= 0L)
		329	rcp->gp_check = get_seconds() + 30;
		330	spin_unlock(&rcp->lock);
		331	}
		332	static inline void check_cpu_stall(struct rcu_ctrlblk *rcp,
		333	struct rcu_data *rdp)
		334	{
		335	long delta;
		336
		337	delta = get_seconds() - rcp->gp_check;
		338	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0L) {
		339
		340	/* We haven't checked in, so go dump stack. */
		341
		342	print_cpu_stall(rcp);
		343
		344	} else if (!cpus_empty(rcp->cpumask) && delta >= 2L) {
		345
		346	/* They had two seconds to dump stack, so complain. */
		347
		348	print_other_cpu_stall(rcp);
		349
		350	}
		351	}
		352
		353	#else /* #ifdef CONFIG_DEBUG_RCU_STALL */
		354
		355	static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
		356	{
		357	}
		358	static inline void check_cpu_stall(struct rcu_ctrlblk *rcp,
		359	struct rcu_data *rdp)
		360	{
		361	}
		362
		363	#endif /* #else #ifdef CONFIG_DEBUG_RCU_STALL */
		364
289	/*	365	/*
290	* Register a new batch of callbacks, and start it up if there is currently no	366	* Register a new batch of callbacks, and start it up if there is currently no
291	* active batch and the batch to be registered has not already occurred.	367	* active batch and the batch to be registered has not already occurred.
@@ -296,6 +372,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
296	if (rcp->cur != rcp->pending &&	372	if (rcp->cur != rcp->pending &&
297	rcp->completed == rcp->cur) {	373	rcp->completed == rcp->cur) {
298	rcp->cur++;	374	rcp->cur++;
		375	record_gp_check_time(rcp);
299		376
300	/*	377	/*
301	* Accessing nohz_cpu_mask before incrementing rcp->cur needs a	378	* Accessing nohz_cpu_mask before incrementing rcp->cur needs a
@@ -489,6 +566,9 @@ static void rcu_process_callbacks(struct softirq_action *unused)
489		566
490	static int __rcu_pending(struct rcu_ctrlblk rcp, struct rcu_data rdp)	567	static int __rcu_pending(struct rcu_ctrlblk rcp, struct rcu_data rdp)
491	{	568	{
		569	/* Check for CPU stalls, if enabled. */
		570	check_cpu_stall(rcp, rdp);
		571
492	if (rdp->nxtlist) {	572	if (rdp->nxtlist) {
493	/*	573	/*
494	* This cpu has pending rcu entries and the grace period	574	* This cpu has pending rcu entries and the grace period


diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e1d4764435ed..2fb6d90bf1e6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug
@@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE
597	Say N here if you want the RCU torture tests to start only	597	Say N here if you want the RCU torture tests to start only
598	after being manually enabled via /proc.	598	after being manually enabled via /proc.
599		599
		600	config RCU_CPU_STALL
		601	bool "Check for stalled CPUs delaying RCU grace periods"
		602	depends on CLASSIC_RCU
		603	default n
		604	help
		605	This option causes RCU to printk information on which
		606	CPUs are delaying the current grace period, but only when
		607	the grace period extends for excessive time periods.
		608
		609	Say Y if you want RCU to perform such checks.
		610
		611	Say N if you are unsure.
		612
600	config KPROBES_SANITY_TEST	613	config KPROBES_SANITY_TEST
601	bool "Kprobes sanity tests"	614	bool "Kprobes sanity tests"
602	depends on DEBUG_KERNEL	615	depends on DEBUG_KERNEL