diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-06-13 15:36:42 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-06-13 15:36:42 -0400 |
commit | cb7e9704d58dab4b1b4284903e6bf973ade3863e (patch) | |
tree | bd66e94c4e34a9b89d4265591eb683a4fe8dad96 | |
parent | dcae7f2dfcc6c948c313d72df6a0d7e466c6707a (diff) | |
parent | 971394f389992f8462c4e5ae0e3b49a10a9534a3 (diff) |
Merge branch 'rcu/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu
Pull RCU fixes from Paul McKenney:
"I must confess that this past merge window was not RCU's best showing.
This series contains three more fixes for RCU regressions:
1. A fix to __DECLARE_TRACE_RCU() that causes it to act as an
interrupt from idle rather than as a task switch from idle.
This change is needed due to the recent use of _rcuidle()
tracepoints that can be invoked from interrupt handlers as well
as from idle. Without this fix, invoking _rcuidle() tracepoints
from interrupt handlers results in splats and (more seriously)
confusion on RCU's part as to whether a given CPU is idle or not.
This confusion can in turn result in too-short grace periods and
therefore random memory corruption.
2. A fix to a subtle deadlock that could result due to RCU doing
a wakeup while holding one of its rcu_node structure's locks.
Although the probability of occurrence is low, it really
does happen. The fix, courtesy of Steven Rostedt, uses
irq_work_queue() to avoid the deadlock.
3. A fix to a silent deadlock (invisible to lockdep) due to the
interaction of timeouts posted by RCU debug code enabled by
CONFIG_PROVE_RCU_DELAY=y, grace-period initialization, and CPU
hotplug operations. This will not occur in production kernels,
but really does occur in randconfig testing. Diagnosis courtesy
of Steven Rostedt"
* 'rcu/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu:
rcu: Fix deadlock with CPU hotplug, RCU GP init, and timer migration
rcu: Don't call wakeup() with rcu_node structure ->lock held
trace: Allow idle-safe tracepoints to be called from irq
-rw-r--r-- | include/linux/tracepoint.h | 4 | ||||
-rw-r--r-- | init/Kconfig | 1 | ||||
-rw-r--r-- | kernel/rcutree.c | 21 | ||||
-rw-r--r-- | kernel/rcutree.h | 2 |
4 files changed, 22 insertions, 6 deletions
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 2f322c38bd4d..f8e084d0fc77 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h | |||
@@ -145,8 +145,8 @@ static inline void tracepoint_synchronize_unregister(void) | |||
145 | TP_PROTO(data_proto), \ | 145 | TP_PROTO(data_proto), \ |
146 | TP_ARGS(data_args), \ | 146 | TP_ARGS(data_args), \ |
147 | TP_CONDITION(cond), \ | 147 | TP_CONDITION(cond), \ |
148 | rcu_idle_exit(), \ | 148 | rcu_irq_enter(), \ |
149 | rcu_idle_enter()); \ | 149 | rcu_irq_exit()); \ |
150 | } | 150 | } |
151 | #else | 151 | #else |
152 | #define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) | 152 | #define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) |
diff --git a/init/Kconfig b/init/Kconfig index 9d3a7887a6d3..2d9b83104dcf 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -431,6 +431,7 @@ choice | |||
431 | config TREE_RCU | 431 | config TREE_RCU |
432 | bool "Tree-based hierarchical RCU" | 432 | bool "Tree-based hierarchical RCU" |
433 | depends on !PREEMPT && SMP | 433 | depends on !PREEMPT && SMP |
434 | select IRQ_WORK | ||
434 | help | 435 | help |
435 | This option selects the RCU implementation that is | 436 | This option selects the RCU implementation that is |
436 | designed for very large SMP system with hundreds or | 437 | designed for very large SMP system with hundreds or |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 16ea67925015..35380019f0fc 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -1451,9 +1451,9 @@ static int rcu_gp_init(struct rcu_state *rsp) | |||
1451 | rnp->grphi, rnp->qsmask); | 1451 | rnp->grphi, rnp->qsmask); |
1452 | raw_spin_unlock_irq(&rnp->lock); | 1452 | raw_spin_unlock_irq(&rnp->lock); |
1453 | #ifdef CONFIG_PROVE_RCU_DELAY | 1453 | #ifdef CONFIG_PROVE_RCU_DELAY |
1454 | if ((prandom_u32() % (rcu_num_nodes * 8)) == 0 && | 1454 | if ((prandom_u32() % (rcu_num_nodes + 1)) == 0 && |
1455 | system_state == SYSTEM_RUNNING) | 1455 | system_state == SYSTEM_RUNNING) |
1456 | schedule_timeout_uninterruptible(2); | 1456 | udelay(200); |
1457 | #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ | 1457 | #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ |
1458 | cond_resched(); | 1458 | cond_resched(); |
1459 | } | 1459 | } |
@@ -1613,6 +1613,14 @@ static int __noreturn rcu_gp_kthread(void *arg) | |||
1613 | } | 1613 | } |
1614 | } | 1614 | } |
1615 | 1615 | ||
1616 | static void rsp_wakeup(struct irq_work *work) | ||
1617 | { | ||
1618 | struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work); | ||
1619 | |||
1620 | /* Wake up rcu_gp_kthread() to start the grace period. */ | ||
1621 | wake_up(&rsp->gp_wq); | ||
1622 | } | ||
1623 | |||
1616 | /* | 1624 | /* |
1617 | * Start a new RCU grace period if warranted, re-initializing the hierarchy | 1625 | * Start a new RCU grace period if warranted, re-initializing the hierarchy |
1618 | * in preparation for detecting the next grace period. The caller must hold | 1626 | * in preparation for detecting the next grace period. The caller must hold |
@@ -1637,8 +1645,12 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, | |||
1637 | } | 1645 | } |
1638 | rsp->gp_flags = RCU_GP_FLAG_INIT; | 1646 | rsp->gp_flags = RCU_GP_FLAG_INIT; |
1639 | 1647 | ||
1640 | /* Wake up rcu_gp_kthread() to start the grace period. */ | 1648 | /* |
1641 | wake_up(&rsp->gp_wq); | 1649 | * We can't do wakeups while holding the rnp->lock, as that |
1650 | * could cause possible deadlocks with the rq->lock. Deter | ||
1651 | * the wakeup to interrupt context. | ||
1652 | */ | ||
1653 | irq_work_queue(&rsp->wakeup_work); | ||
1642 | } | 1654 | } |
1643 | 1655 | ||
1644 | /* | 1656 | /* |
@@ -3235,6 +3247,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
3235 | 3247 | ||
3236 | rsp->rda = rda; | 3248 | rsp->rda = rda; |
3237 | init_waitqueue_head(&rsp->gp_wq); | 3249 | init_waitqueue_head(&rsp->gp_wq); |
3250 | init_irq_work(&rsp->wakeup_work, rsp_wakeup); | ||
3238 | rnp = rsp->level[rcu_num_lvls - 1]; | 3251 | rnp = rsp->level[rcu_num_lvls - 1]; |
3239 | for_each_possible_cpu(i) { | 3252 | for_each_possible_cpu(i) { |
3240 | while (i > rnp->grphi) | 3253 | while (i > rnp->grphi) |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index da77a8f57ff9..4df503470e42 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/threads.h> | 27 | #include <linux/threads.h> |
28 | #include <linux/cpumask.h> | 28 | #include <linux/cpumask.h> |
29 | #include <linux/seqlock.h> | 29 | #include <linux/seqlock.h> |
30 | #include <linux/irq_work.h> | ||
30 | 31 | ||
31 | /* | 32 | /* |
32 | * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and | 33 | * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and |
@@ -442,6 +443,7 @@ struct rcu_state { | |||
442 | char *name; /* Name of structure. */ | 443 | char *name; /* Name of structure. */ |
443 | char abbr; /* Abbreviated name. */ | 444 | char abbr; /* Abbreviated name. */ |
444 | struct list_head flavors; /* List of RCU flavors. */ | 445 | struct list_head flavors; /* List of RCU flavors. */ |
446 | struct irq_work wakeup_work; /* Postponed wakeups */ | ||
445 | }; | 447 | }; |
446 | 448 | ||
447 | /* Values for rcu_state structure's gp_flags field. */ | 449 | /* Values for rcu_state structure's gp_flags field. */ |