aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcu
diff options
context:
space:
mode:
authorDaniel Bristot de Oliveira <bristot@redhat.com>2016-06-02 12:51:41 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2016-06-15 19:00:05 -0400
commit088e9d253d3a4ab7e058dd84bb532c32dadf1882 (patch)
treefd8423b8a89ba645d7a2820485309c49c58fbd07 /kernel/rcu
parentaab057382cb9b16249552684c1ebd270f070ec02 (diff)
rcu: sysctl: Panic on RCU Stall
It is not always easy to determine the cause of an RCU stall just by analysing the RCU stall messages, mainly when the problem is caused by the indirect starvation of rcu threads. For example, when preempt_rcu is not awakened due to the starvation of a timer softirq. We have been hard coding panic() in the RCU stall functions for some time while testing the kernel-rt. But this is not possible in some scenarios, like when supporting customers. This patch implements the sysctl kernel.panic_on_rcu_stall. If set to 1, the system will panic() when an RCU stall takes place, enabling the capture of a vmcore. The vmcore provides a way to analyze all kernel/tasks states, helping out to point to the culprit and the solution for the stall. The kernel.panic_on_rcu_stall sysctl is disabled by default. Changes from v1: - Fixed a typo in the git log - The if(sysctl_panic_on_rcu_stall) panic() is in a static function - Fixed the CONFIG_TINY_RCU compilation issue - The var sysctl_panic_on_rcu_stall is now __read_mostly Cc: Jonathan Corbet <corbet@lwn.net> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Josh Triplett <josh@joshtriplett.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Lai Jiangshan <jiangshanlai@gmail.com> Acked-by: Christian Borntraeger <borntraeger@de.ibm.com> Reviewed-by: Josh Triplett <josh@joshtriplett.org> Reviewed-by: Arnaldo Carvalho de Melo <acme@kernel.org> Tested-by: "Luis Claudio R. Goncalves" <lgoncalv@redhat.com> Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcu')
-rw-r--r--kernel/rcu/tree.c12
1 files changed, 12 insertions, 0 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c844b6142a86..e5ca15a461b9 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -125,6 +125,8 @@ int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
125/* Number of rcu_nodes at specified level. */ 125/* Number of rcu_nodes at specified level. */
126static int num_rcu_lvl[] = NUM_RCU_LVL_INIT; 126static int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
127int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ 127int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
128/* panic() on RCU Stall sysctl. */
129int sysctl_panic_on_rcu_stall __read_mostly;
128 130
129/* 131/*
130 * The rcu_scheduler_active variable transitions from zero to one just 132 * The rcu_scheduler_active variable transitions from zero to one just
@@ -1312,6 +1314,12 @@ static void rcu_stall_kick_kthreads(struct rcu_state *rsp)
1312 } 1314 }
1313} 1315}
1314 1316
1317static inline void panic_on_rcu_stall(void)
1318{
1319 if (sysctl_panic_on_rcu_stall)
1320 panic("RCU Stall\n");
1321}
1322
1315static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) 1323static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
1316{ 1324{
1317 int cpu; 1325 int cpu;
@@ -1391,6 +1399,8 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
1391 1399
1392 rcu_check_gp_kthread_starvation(rsp); 1400 rcu_check_gp_kthread_starvation(rsp);
1393 1401
1402 panic_on_rcu_stall();
1403
1394 force_quiescent_state(rsp); /* Kick them all. */ 1404 force_quiescent_state(rsp); /* Kick them all. */
1395} 1405}
1396 1406
@@ -1431,6 +1441,8 @@ static void print_cpu_stall(struct rcu_state *rsp)
1431 jiffies + 3 * rcu_jiffies_till_stall_check() + 3); 1441 jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
1432 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 1442 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1433 1443
1444 panic_on_rcu_stall();
1445
1434 /* 1446 /*
1435 * Attempt to revive the RCU machinery by forcing a context switch. 1447 * Attempt to revive the RCU machinery by forcing a context switch.
1436 * 1448 *