aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--init/Kconfig2
-rw-r--r--kernel/rcutree.c115
-rw-r--r--kernel/rcutree.h31
-rw-r--r--kernel/rcutree_plugin.h314
4 files changed, 411 insertions, 51 deletions
diff --git a/init/Kconfig b/init/Kconfig
index d886b1e9278e..2d964fa40f5b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -485,7 +485,7 @@ config TREE_RCU_TRACE
485 485
486config RCU_BOOST 486config RCU_BOOST
487 bool "Enable RCU priority boosting" 487 bool "Enable RCU priority boosting"
488 depends on RT_MUTEXES && TINY_PREEMPT_RCU 488 depends on RT_MUTEXES && PREEMPT_RCU
489 default n 489 default n
490 help 490 help
491 This option boosts the priority of preempted RCU readers that 491 This option boosts the priority of preempted RCU readers that
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 18e33313873e..28fd92a9e0d0 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -81,6 +81,8 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
81struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); 81struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
82DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 82DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
83 83
84static struct rcu_state *rcu_state;
85
84int rcu_scheduler_active __read_mostly; 86int rcu_scheduler_active __read_mostly;
85EXPORT_SYMBOL_GPL(rcu_scheduler_active); 87EXPORT_SYMBOL_GPL(rcu_scheduler_active);
86 88
@@ -94,7 +96,7 @@ static DEFINE_PER_CPU(char, rcu_cpu_has_work);
94static char rcu_kthreads_spawnable; 96static char rcu_kthreads_spawnable;
95 97
96static void rcu_node_kthread_setaffinity(struct rcu_node *rnp); 98static void rcu_node_kthread_setaffinity(struct rcu_node *rnp);
97static void invoke_rcu_kthread(void); 99static void invoke_rcu_cpu_kthread(void);
98 100
99#define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ 101#define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */
100 102
@@ -791,6 +793,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
791 rnp->completed = rsp->completed; 793 rnp->completed = rsp->completed;
792 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ 794 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
793 rcu_start_gp_per_cpu(rsp, rnp, rdp); 795 rcu_start_gp_per_cpu(rsp, rnp, rdp);
796 rcu_preempt_boost_start_gp(rnp);
794 raw_spin_unlock_irqrestore(&rnp->lock, flags); 797 raw_spin_unlock_irqrestore(&rnp->lock, flags);
795 return; 798 return;
796 } 799 }
@@ -826,6 +829,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
826 rnp->completed = rsp->completed; 829 rnp->completed = rsp->completed;
827 if (rnp == rdp->mynode) 830 if (rnp == rdp->mynode)
828 rcu_start_gp_per_cpu(rsp, rnp, rdp); 831 rcu_start_gp_per_cpu(rsp, rnp, rdp);
832 rcu_preempt_boost_start_gp(rnp);
829 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 833 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
830 } 834 }
831 835
@@ -882,7 +886,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
882 return; 886 return;
883 } 887 }
884 rnp->qsmask &= ~mask; 888 rnp->qsmask &= ~mask;
885 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { 889 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
886 890
887 /* Other bits still set at this level, so done. */ 891 /* Other bits still set at this level, so done. */
888 raw_spin_unlock_irqrestore(&rnp->lock, flags); 892 raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -1089,8 +1093,11 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1089 t = rnp->node_kthread_task; 1093 t = rnp->node_kthread_task;
1090 if (t != NULL && 1094 if (t != NULL &&
1091 rnp->qsmaskinit == 0) { 1095 rnp->qsmaskinit == 0) {
1092 kthread_stop(t); 1096 raw_spin_lock_irqsave(&rnp->lock, flags);
1093 rnp->node_kthread_task = NULL; 1097 rnp->node_kthread_task = NULL;
1098 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1099 kthread_stop(t);
1100 rcu_stop_boost_kthread(rnp);
1094 } else 1101 } else
1095 rcu_node_kthread_setaffinity(rnp); 1102 rcu_node_kthread_setaffinity(rnp);
1096} 1103}
@@ -1190,7 +1197,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1190 1197
1191 /* Re-raise the RCU softirq if there are callbacks remaining. */ 1198 /* Re-raise the RCU softirq if there are callbacks remaining. */
1192 if (cpu_has_callbacks_ready_to_invoke(rdp)) 1199 if (cpu_has_callbacks_ready_to_invoke(rdp))
1193 invoke_rcu_kthread(); 1200 invoke_rcu_cpu_kthread();
1194} 1201}
1195 1202
1196/* 1203/*
@@ -1236,7 +1243,7 @@ void rcu_check_callbacks(int cpu, int user)
1236 } 1243 }
1237 rcu_preempt_check_callbacks(cpu); 1244 rcu_preempt_check_callbacks(cpu);
1238 if (rcu_pending(cpu)) 1245 if (rcu_pending(cpu))
1239 invoke_rcu_kthread(); 1246 invoke_rcu_cpu_kthread();
1240} 1247}
1241 1248
1242#ifdef CONFIG_SMP 1249#ifdef CONFIG_SMP
@@ -1244,6 +1251,8 @@ void rcu_check_callbacks(int cpu, int user)
1244/* 1251/*
1245 * Scan the leaf rcu_node structures, processing dyntick state for any that 1252 * Scan the leaf rcu_node structures, processing dyntick state for any that
1246 * have not yet encountered a quiescent state, using the function specified. 1253 * have not yet encountered a quiescent state, using the function specified.
1254 * Also initiate boosting for any threads blocked on the root rcu_node.
1255 *
1247 * The caller must have suppressed start of new grace periods. 1256 * The caller must have suppressed start of new grace periods.
1248 */ 1257 */
1249static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) 1258static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
@@ -1262,6 +1271,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
1262 return; 1271 return;
1263 } 1272 }
1264 if (rnp->qsmask == 0) { 1273 if (rnp->qsmask == 0) {
1274 rcu_initiate_boost(rnp);
1265 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1275 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1266 continue; 1276 continue;
1267 } 1277 }
@@ -1280,6 +1290,11 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
1280 } 1290 }
1281 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1291 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1282 } 1292 }
1293 rnp = rcu_get_root(rsp);
1294 raw_spin_lock_irqsave(&rnp->lock, flags);
1295 if (rnp->qsmask == 0)
1296 rcu_initiate_boost(rnp);
1297 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1283} 1298}
1284 1299
1285/* 1300/*
@@ -1417,7 +1432,7 @@ static void rcu_process_callbacks(void)
1417 * the current CPU with interrupts disabled, the rcu_cpu_kthread_task 1432 * the current CPU with interrupts disabled, the rcu_cpu_kthread_task
1418 * cannot disappear out from under us. 1433 * cannot disappear out from under us.
1419 */ 1434 */
1420static void invoke_rcu_kthread(void) 1435static void invoke_rcu_cpu_kthread(void)
1421{ 1436{
1422 unsigned long flags; 1437 unsigned long flags;
1423 wait_queue_head_t *q; 1438 wait_queue_head_t *q;
@@ -1436,24 +1451,33 @@ static void invoke_rcu_kthread(void)
1436} 1451}
1437 1452
1438/* 1453/*
1454 * Wake up the specified per-rcu_node-structure kthread.
1455 * The caller must hold ->lock.
1456 */
1457static void invoke_rcu_node_kthread(struct rcu_node *rnp)
1458{
1459 struct task_struct *t;
1460
1461 t = rnp->node_kthread_task;
1462 if (t != NULL)
1463 wake_up_process(t);
1464}
1465
1466/*
1439 * Timer handler to initiate the waking up of per-CPU kthreads that 1467 * Timer handler to initiate the waking up of per-CPU kthreads that
1440 * have yielded the CPU due to excess numbers of RCU callbacks. 1468 * have yielded the CPU due to excess numbers of RCU callbacks.
1469 * We wake up the per-rcu_node kthread, which in turn will wake up
1470 * the booster kthread.
1441 */ 1471 */
1442static void rcu_cpu_kthread_timer(unsigned long arg) 1472static void rcu_cpu_kthread_timer(unsigned long arg)
1443{ 1473{
1444 unsigned long flags; 1474 unsigned long flags;
1445 struct rcu_data *rdp = (struct rcu_data *)arg; 1475 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
1446 struct rcu_node *rnp = rdp->mynode; 1476 struct rcu_node *rnp = rdp->mynode;
1447 struct task_struct *t;
1448 1477
1449 raw_spin_lock_irqsave(&rnp->lock, flags); 1478 raw_spin_lock_irqsave(&rnp->lock, flags);
1450 rnp->wakemask |= rdp->grpmask; 1479 rnp->wakemask |= rdp->grpmask;
1451 t = rnp->node_kthread_task; 1480 invoke_rcu_node_kthread(rnp);
1452 if (t == NULL) {
1453 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1454 return;
1455 }
1456 wake_up_process(t);
1457 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1481 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1458} 1482}
1459 1483
@@ -1463,13 +1487,12 @@ static void rcu_cpu_kthread_timer(unsigned long arg)
1463 * remain preempted. Either way, we restore our real-time priority 1487 * remain preempted. Either way, we restore our real-time priority
1464 * before returning. 1488 * before returning.
1465 */ 1489 */
1466static void rcu_yield(int cpu) 1490static void rcu_yield(void (*f)(unsigned long), unsigned long arg)
1467{ 1491{
1468 struct rcu_data *rdp = per_cpu_ptr(rcu_sched_state.rda, cpu);
1469 struct sched_param sp; 1492 struct sched_param sp;
1470 struct timer_list yield_timer; 1493 struct timer_list yield_timer;
1471 1494
1472 setup_timer_on_stack(&yield_timer, rcu_cpu_kthread_timer, (unsigned long)rdp); 1495 setup_timer_on_stack(&yield_timer, f, arg);
1473 mod_timer(&yield_timer, jiffies + 2); 1496 mod_timer(&yield_timer, jiffies + 2);
1474 sp.sched_priority = 0; 1497 sp.sched_priority = 0;
1475 sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); 1498 sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp);
@@ -1540,7 +1563,7 @@ static int rcu_cpu_kthread(void *arg)
1540 else 1563 else
1541 spincnt = 0; 1564 spincnt = 0;
1542 if (spincnt > 10) { 1565 if (spincnt > 10) {
1543 rcu_yield(cpu); 1566 rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu);
1544 spincnt = 0; 1567 spincnt = 0;
1545 } 1568 }
1546 } 1569 }
@@ -1597,6 +1620,7 @@ static int rcu_node_kthread(void *arg)
1597 raw_spin_lock_irqsave(&rnp->lock, flags); 1620 raw_spin_lock_irqsave(&rnp->lock, flags);
1598 mask = rnp->wakemask; 1621 mask = rnp->wakemask;
1599 rnp->wakemask = 0; 1622 rnp->wakemask = 0;
1623 rcu_initiate_boost(rnp);
1600 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1624 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1601 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { 1625 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
1602 if ((mask & 0x1) == 0) 1626 if ((mask & 0x1) == 0)
@@ -1618,7 +1642,8 @@ static int rcu_node_kthread(void *arg)
1618 1642
1619/* 1643/*
1620 * Set the per-rcu_node kthread's affinity to cover all CPUs that are 1644 * Set the per-rcu_node kthread's affinity to cover all CPUs that are
1621 * served by the rcu_node in question. 1645 * served by the rcu_node in question. The CPU hotplug lock is still
1646 * held, so the value of rnp->qsmaskinit will be stable.
1622 */ 1647 */
1623static void rcu_node_kthread_setaffinity(struct rcu_node *rnp) 1648static void rcu_node_kthread_setaffinity(struct rcu_node *rnp)
1624{ 1649{
@@ -1626,8 +1651,7 @@ static void rcu_node_kthread_setaffinity(struct rcu_node *rnp)
1626 int cpu; 1651 int cpu;
1627 unsigned long mask = rnp->qsmaskinit; 1652 unsigned long mask = rnp->qsmaskinit;
1628 1653
1629 if (rnp->node_kthread_task == NULL || 1654 if (rnp->node_kthread_task == NULL || mask == 0)
1630 rnp->qsmaskinit == 0)
1631 return; 1655 return;
1632 if (!alloc_cpumask_var(&cm, GFP_KERNEL)) 1656 if (!alloc_cpumask_var(&cm, GFP_KERNEL))
1633 return; 1657 return;
@@ -1636,31 +1660,40 @@ static void rcu_node_kthread_setaffinity(struct rcu_node *rnp)
1636 if (mask & 0x1) 1660 if (mask & 0x1)
1637 cpumask_set_cpu(cpu, cm); 1661 cpumask_set_cpu(cpu, cm);
1638 set_cpus_allowed_ptr(rnp->node_kthread_task, cm); 1662 set_cpus_allowed_ptr(rnp->node_kthread_task, cm);
1663 rcu_boost_kthread_setaffinity(rnp, cm);
1639 free_cpumask_var(cm); 1664 free_cpumask_var(cm);
1640} 1665}
1641 1666
1642/* 1667/*
1643 * Spawn a per-rcu_node kthread, setting priority and affinity. 1668 * Spawn a per-rcu_node kthread, setting priority and affinity.
1669 * Called during boot before online/offline can happen, or, if
1670 * during runtime, with the main CPU-hotplug locks held. So only
1671 * one of these can be executing at a time.
1644 */ 1672 */
1645static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, 1673static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
1646 struct rcu_node *rnp) 1674 struct rcu_node *rnp)
1647{ 1675{
1676 unsigned long flags;
1648 int rnp_index = rnp - &rsp->node[0]; 1677 int rnp_index = rnp - &rsp->node[0];
1649 struct sched_param sp; 1678 struct sched_param sp;
1650 struct task_struct *t; 1679 struct task_struct *t;
1651 1680
1652 if (!rcu_kthreads_spawnable || 1681 if (!rcu_kthreads_spawnable ||
1653 rnp->qsmaskinit == 0 || 1682 rnp->qsmaskinit == 0)
1654 rnp->node_kthread_task != NULL)
1655 return 0; 1683 return 0;
1656 t = kthread_create(rcu_node_kthread, (void *)rnp, "rcun%d", rnp_index); 1684 if (rnp->node_kthread_task == NULL) {
1657 if (IS_ERR(t)) 1685 t = kthread_create(rcu_node_kthread, (void *)rnp,
1658 return PTR_ERR(t); 1686 "rcun%d", rnp_index);
1659 rnp->node_kthread_task = t; 1687 if (IS_ERR(t))
1660 wake_up_process(t); 1688 return PTR_ERR(t);
1661 sp.sched_priority = 99; 1689 raw_spin_lock_irqsave(&rnp->lock, flags);
1662 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1690 rnp->node_kthread_task = t;
1663 return 0; 1691 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1692 wake_up_process(t);
1693 sp.sched_priority = 99;
1694 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1695 }
1696 return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
1664} 1697}
1665 1698
1666/* 1699/*
@@ -1678,10 +1711,16 @@ static int __init rcu_spawn_kthreads(void)
1678 if (cpu_online(cpu)) 1711 if (cpu_online(cpu))
1679 (void)rcu_spawn_one_cpu_kthread(cpu); 1712 (void)rcu_spawn_one_cpu_kthread(cpu);
1680 } 1713 }
1681 rcu_for_each_leaf_node(&rcu_sched_state, rnp) { 1714 rnp = rcu_get_root(rcu_state);
1682 init_waitqueue_head(&rnp->node_wq); 1715 init_waitqueue_head(&rnp->node_wq);
1683 (void)rcu_spawn_one_node_kthread(&rcu_sched_state, rnp); 1716 rcu_init_boost_waitqueue(rnp);
1684 } 1717 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1718 if (NUM_RCU_NODES > 1)
1719 rcu_for_each_leaf_node(rcu_state, rnp) {
1720 init_waitqueue_head(&rnp->node_wq);
1721 rcu_init_boost_waitqueue(rnp);
1722 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1723 }
1685 return 0; 1724 return 0;
1686} 1725}
1687early_initcall(rcu_spawn_kthreads); 1726early_initcall(rcu_spawn_kthreads);
@@ -2087,14 +2126,14 @@ static void __cpuinit rcu_online_cpu(int cpu)
2087 2126
2088static void __cpuinit rcu_online_kthreads(int cpu) 2127static void __cpuinit rcu_online_kthreads(int cpu)
2089{ 2128{
2090 struct rcu_data *rdp = per_cpu_ptr(rcu_sched_state.rda, cpu); 2129 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2091 struct rcu_node *rnp = rdp->mynode; 2130 struct rcu_node *rnp = rdp->mynode;
2092 2131
2093 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ 2132 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
2094 if (rcu_kthreads_spawnable) { 2133 if (rcu_kthreads_spawnable) {
2095 (void)rcu_spawn_one_cpu_kthread(cpu); 2134 (void)rcu_spawn_one_cpu_kthread(cpu);
2096 if (rnp->node_kthread_task == NULL) 2135 if (rnp->node_kthread_task == NULL)
2097 (void)rcu_spawn_one_node_kthread(&rcu_sched_state, rnp); 2136 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
2098 } 2137 }
2099} 2138}
2100 2139
@@ -2105,7 +2144,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2105 unsigned long action, void *hcpu) 2144 unsigned long action, void *hcpu)
2106{ 2145{
2107 long cpu = (long)hcpu; 2146 long cpu = (long)hcpu;
2108 struct rcu_data *rdp = per_cpu_ptr(rcu_sched_state.rda, cpu); 2147 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2109 struct rcu_node *rnp = rdp->mynode; 2148 struct rcu_node *rnp = rdp->mynode;
2110 2149
2111 switch (action) { 2150 switch (action) {
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index c0213802d164..8db0cdc7f450 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -135,6 +135,24 @@ struct rcu_node {
135 /* if there is no such task. If there */ 135 /* if there is no such task. If there */
136 /* is no current expedited grace period, */ 136 /* is no current expedited grace period, */
137 /* then there can cannot be any such task. */ 137 /* then there can cannot be any such task. */
138#ifdef CONFIG_RCU_BOOST
139 struct list_head *boost_tasks;
140 /* Pointer to first task that needs to be */
141 /* priority boosted, or NULL if no priority */
142 /* boosting is needed for this rcu_node */
143 /* structure. If there are no tasks */
144 /* queued on this rcu_node structure that */
145 /* are blocking the current grace period, */
146 /* there can be no such task. */
147 unsigned long boost_time;
148 /* When to start boosting (jiffies). */
149 struct task_struct *boost_kthread_task;
150 /* kthread that takes care of priority */
151 /* boosting for this rcu_node structure. */
152 wait_queue_head_t boost_wq;
153 /* Wait queue on which to park the boost */
154 /* kthread. */
155#endif /* #ifdef CONFIG_RCU_BOOST */
138 struct task_struct *node_kthread_task; 156 struct task_struct *node_kthread_task;
139 /* kthread that takes care of this rcu_node */ 157 /* kthread that takes care of this rcu_node */
140 /* structure, for example, awakening the */ 158 /* structure, for example, awakening the */
@@ -365,7 +383,7 @@ DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
365static void rcu_bootup_announce(void); 383static void rcu_bootup_announce(void);
366long rcu_batches_completed(void); 384long rcu_batches_completed(void);
367static void rcu_preempt_note_context_switch(int cpu); 385static void rcu_preempt_note_context_switch(int cpu);
368static int rcu_preempted_readers(struct rcu_node *rnp); 386static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
369#ifdef CONFIG_HOTPLUG_CPU 387#ifdef CONFIG_HOTPLUG_CPU
370static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, 388static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
371 unsigned long flags); 389 unsigned long flags);
@@ -392,5 +410,16 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
392static void rcu_preempt_send_cbs_to_online(void); 410static void rcu_preempt_send_cbs_to_online(void);
393static void __init __rcu_init_preempt(void); 411static void __init __rcu_init_preempt(void);
394static void rcu_needs_cpu_flush(void); 412static void rcu_needs_cpu_flush(void);
413static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp);
414static void rcu_initiate_boost(struct rcu_node *rnp);
415static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
416 cpumask_var_t cm);
417static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
418static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
419 struct rcu_node *rnp,
420 int rnp_index);
421#ifdef CONFIG_HOTPLUG_CPU
422static void rcu_stop_boost_kthread(struct rcu_node *rnp);
423#endif /* #ifdef CONFIG_HOTPLUG_CPU */
395 424
396#endif /* #ifndef RCU_TREE_NONCORE */ 425#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index b9bd69a5a4fe..5964f82e2d96 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -66,6 +66,7 @@ static void __init rcu_bootup_announce_oddness(void)
66 66
67struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); 67struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
68DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 68DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
69static struct rcu_state *rcu_state = &rcu_preempt_state;
69 70
70static int rcu_preempted_readers_exp(struct rcu_node *rnp); 71static int rcu_preempted_readers_exp(struct rcu_node *rnp);
71 72
@@ -179,6 +180,10 @@ static void rcu_preempt_note_context_switch(int cpu)
179 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { 180 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
180 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); 181 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
181 rnp->gp_tasks = &t->rcu_node_entry; 182 rnp->gp_tasks = &t->rcu_node_entry;
183#ifdef CONFIG_RCU_BOOST
184 if (rnp->boost_tasks != NULL)
185 rnp->boost_tasks = rnp->gp_tasks;
186#endif /* #ifdef CONFIG_RCU_BOOST */
182 } else { 187 } else {
183 list_add(&t->rcu_node_entry, &rnp->blkd_tasks); 188 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
184 if (rnp->qsmask & rdp->grpmask) 189 if (rnp->qsmask & rdp->grpmask)
@@ -218,7 +223,7 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock);
218 * for the specified rcu_node structure. If the caller needs a reliable 223 * for the specified rcu_node structure. If the caller needs a reliable
219 * answer, it must hold the rcu_node's ->lock. 224 * answer, it must hold the rcu_node's ->lock.
220 */ 225 */
221static int rcu_preempted_readers(struct rcu_node *rnp) 226static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
222{ 227{
223 return rnp->gp_tasks != NULL; 228 return rnp->gp_tasks != NULL;
224} 229}
@@ -236,7 +241,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
236 unsigned long mask; 241 unsigned long mask;
237 struct rcu_node *rnp_p; 242 struct rcu_node *rnp_p;
238 243
239 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { 244 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
240 raw_spin_unlock_irqrestore(&rnp->lock, flags); 245 raw_spin_unlock_irqrestore(&rnp->lock, flags);
241 return; /* Still need more quiescent states! */ 246 return; /* Still need more quiescent states! */
242 } 247 }
@@ -325,7 +330,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
325 break; 330 break;
326 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 331 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
327 } 332 }
328 empty = !rcu_preempted_readers(rnp); 333 empty = !rcu_preempt_blocked_readers_cgp(rnp);
329 empty_exp = !rcu_preempted_readers_exp(rnp); 334 empty_exp = !rcu_preempted_readers_exp(rnp);
330 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 335 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
331 np = rcu_next_node_entry(t, rnp); 336 np = rcu_next_node_entry(t, rnp);
@@ -334,6 +339,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
334 rnp->gp_tasks = np; 339 rnp->gp_tasks = np;
335 if (&t->rcu_node_entry == rnp->exp_tasks) 340 if (&t->rcu_node_entry == rnp->exp_tasks)
336 rnp->exp_tasks = np; 341 rnp->exp_tasks = np;
342#ifdef CONFIG_RCU_BOOST
343 if (&t->rcu_node_entry == rnp->boost_tasks)
344 rnp->boost_tasks = np;
345#endif /* #ifdef CONFIG_RCU_BOOST */
337 t->rcu_blocked_node = NULL; 346 t->rcu_blocked_node = NULL;
338 347
339 /* 348 /*
@@ -346,6 +355,15 @@ static void rcu_read_unlock_special(struct task_struct *t)
346 else 355 else
347 rcu_report_unblock_qs_rnp(rnp, flags); 356 rcu_report_unblock_qs_rnp(rnp, flags);
348 357
358#ifdef CONFIG_RCU_BOOST
359 /* Unboost if we were boosted. */
360 if (special & RCU_READ_UNLOCK_BOOSTED) {
361 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
362 rt_mutex_unlock(t->rcu_boost_mutex);
363 t->rcu_boost_mutex = NULL;
364 }
365#endif /* #ifdef CONFIG_RCU_BOOST */
366
349 /* 367 /*
350 * If this was the last task on the expedited lists, 368 * If this was the last task on the expedited lists,
351 * then we need to report up the rcu_node hierarchy. 369 * then we need to report up the rcu_node hierarchy.
@@ -391,7 +409,7 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
391 unsigned long flags; 409 unsigned long flags;
392 struct task_struct *t; 410 struct task_struct *t;
393 411
394 if (!rcu_preempted_readers(rnp)) 412 if (!rcu_preempt_blocked_readers_cgp(rnp))
395 return; 413 return;
396 raw_spin_lock_irqsave(&rnp->lock, flags); 414 raw_spin_lock_irqsave(&rnp->lock, flags);
397 t = list_entry(rnp->gp_tasks, 415 t = list_entry(rnp->gp_tasks,
@@ -430,7 +448,7 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
430{ 448{
431 struct task_struct *t; 449 struct task_struct *t;
432 450
433 if (!rcu_preempted_readers(rnp)) 451 if (!rcu_preempt_blocked_readers_cgp(rnp))
434 return; 452 return;
435 t = list_entry(rnp->gp_tasks, 453 t = list_entry(rnp->gp_tasks,
436 struct task_struct, rcu_node_entry); 454 struct task_struct, rcu_node_entry);
@@ -460,7 +478,7 @@ static void rcu_preempt_stall_reset(void)
460 */ 478 */
461static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 479static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
462{ 480{
463 WARN_ON_ONCE(rcu_preempted_readers(rnp)); 481 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
464 if (!list_empty(&rnp->blkd_tasks)) 482 if (!list_empty(&rnp->blkd_tasks))
465 rnp->gp_tasks = rnp->blkd_tasks.next; 483 rnp->gp_tasks = rnp->blkd_tasks.next;
466 WARN_ON_ONCE(rnp->qsmask); 484 WARN_ON_ONCE(rnp->qsmask);
@@ -509,7 +527,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
509 * absolutely necessary, but this is a good performance/complexity 527 * absolutely necessary, but this is a good performance/complexity
510 * tradeoff. 528 * tradeoff.
511 */ 529 */
512 if (rcu_preempted_readers(rnp)) 530 if (rcu_preempt_blocked_readers_cgp(rnp))
513 retval |= RCU_OFL_TASKS_NORM_GP; 531 retval |= RCU_OFL_TASKS_NORM_GP;
514 if (rcu_preempted_readers_exp(rnp)) 532 if (rcu_preempted_readers_exp(rnp))
515 retval |= RCU_OFL_TASKS_EXP_GP; 533 retval |= RCU_OFL_TASKS_EXP_GP;
@@ -525,8 +543,22 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
525 rnp_root->gp_tasks = rnp->gp_tasks; 543 rnp_root->gp_tasks = rnp->gp_tasks;
526 if (&t->rcu_node_entry == rnp->exp_tasks) 544 if (&t->rcu_node_entry == rnp->exp_tasks)
527 rnp_root->exp_tasks = rnp->exp_tasks; 545 rnp_root->exp_tasks = rnp->exp_tasks;
546#ifdef CONFIG_RCU_BOOST
547 if (&t->rcu_node_entry == rnp->boost_tasks)
548 rnp_root->boost_tasks = rnp->boost_tasks;
549#endif /* #ifdef CONFIG_RCU_BOOST */
528 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ 550 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
529 } 551 }
552
553#ifdef CONFIG_RCU_BOOST
554 /* In case root is being boosted and leaf is not. */
555 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
556 if (rnp_root->boost_tasks != NULL &&
557 rnp_root->boost_tasks != rnp_root->gp_tasks)
558 rnp_root->boost_tasks = rnp_root->gp_tasks;
559 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
560#endif /* #ifdef CONFIG_RCU_BOOST */
561
530 rnp->gp_tasks = NULL; 562 rnp->gp_tasks = NULL;
531 rnp->exp_tasks = NULL; 563 rnp->exp_tasks = NULL;
532 return retval; 564 return retval;
@@ -684,6 +716,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
684 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 716 raw_spin_lock(&rnp->lock); /* irqs already disabled */
685 if (!list_empty(&rnp->blkd_tasks)) { 717 if (!list_empty(&rnp->blkd_tasks)) {
686 rnp->exp_tasks = rnp->blkd_tasks.next; 718 rnp->exp_tasks = rnp->blkd_tasks.next;
719 rcu_initiate_boost(rnp);
687 must_wait = 1; 720 must_wait = 1;
688 } 721 }
689 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 722 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
@@ -830,6 +863,8 @@ void exit_rcu(void)
830 863
831#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 864#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
832 865
866static struct rcu_state *rcu_state = &rcu_sched_state;
867
833/* 868/*
834 * Tell them what RCU they are running. 869 * Tell them what RCU they are running.
835 */ 870 */
@@ -870,7 +905,7 @@ static void rcu_preempt_note_context_switch(int cpu)
870 * Because preemptable RCU does not exist, there are never any preempted 905 * Because preemptable RCU does not exist, there are never any preempted
871 * RCU readers. 906 * RCU readers.
872 */ 907 */
873static int rcu_preempted_readers(struct rcu_node *rnp) 908static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
874{ 909{
875 return 0; 910 return 0;
876} 911}
@@ -1034,6 +1069,263 @@ static void __init __rcu_init_preempt(void)
1034 1069
1035#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 1070#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
1036 1071
1072#ifdef CONFIG_RCU_BOOST
1073
1074#include "rtmutex_common.h"
1075
1076/*
1077 * Carry out RCU priority boosting on the task indicated by ->exp_tasks
1078 * or ->boost_tasks, advancing the pointer to the next task in the
1079 * ->blkd_tasks list.
1080 *
1081 * Note that irqs must be enabled: boosting the task can block.
1082 * Returns 1 if there are more tasks needing to be boosted.
1083 */
1084static int rcu_boost(struct rcu_node *rnp)
1085{
1086 unsigned long flags;
1087 struct rt_mutex mtx;
1088 struct task_struct *t;
1089 struct list_head *tb;
1090
1091 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL)
1092 return 0; /* Nothing left to boost. */
1093
1094 raw_spin_lock_irqsave(&rnp->lock, flags);
1095
1096 /*
1097 * Recheck under the lock: all tasks in need of boosting
1098 * might exit their RCU read-side critical sections on their own.
1099 */
1100 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
1101 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1102 return 0;
1103 }
1104
1105 /*
1106 * Preferentially boost tasks blocking expedited grace periods.
1107 * This cannot starve the normal grace periods because a second
1108 * expedited grace period must boost all blocked tasks, including
1109 * those blocking the pre-existing normal grace period.
1110 */
1111 if (rnp->exp_tasks != NULL)
1112 tb = rnp->exp_tasks;
1113 else
1114 tb = rnp->boost_tasks;
1115
1116 /*
1117 * We boost task t by manufacturing an rt_mutex that appears to
1118 * be held by task t. We leave a pointer to that rt_mutex where
1119 * task t can find it, and task t will release the mutex when it
1120 * exits its outermost RCU read-side critical section. Then
1121 * simply acquiring this artificial rt_mutex will boost task
1122 * t's priority. (Thanks to tglx for suggesting this approach!)
1123 *
1124 * Note that task t must acquire rnp->lock to remove itself from
1125 * the ->blkd_tasks list, which it will do from exit() if from
1126 * nowhere else. We therefore are guaranteed that task t will
1127 * stay around at least until we drop rnp->lock. Note that
1128 * rnp->lock also resolves races between our priority boosting
1129 * and task t's exiting its outermost RCU read-side critical
1130 * section.
1131 */
1132 t = container_of(tb, struct task_struct, rcu_node_entry);
1133 rt_mutex_init_proxy_locked(&mtx, t);
1134 t->rcu_boost_mutex = &mtx;
1135 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
1136 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1137 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
1138 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
1139
1140 return rnp->exp_tasks != NULL || rnp->boost_tasks != NULL;
1141}
1142
1143/*
1144 * Timer handler to initiate waking up of boost kthreads that
1145 * have yielded the CPU due to excessive numbers of tasks to
1146 * boost. We wake up the per-rcu_node kthread, which in turn
1147 * will wake up the booster kthread.
1148 */
1149static void rcu_boost_kthread_timer(unsigned long arg)
1150{
1151 unsigned long flags;
1152 struct rcu_node *rnp = (struct rcu_node *)arg;
1153
1154 raw_spin_lock_irqsave(&rnp->lock, flags);
1155 invoke_rcu_node_kthread(rnp);
1156 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1157}
1158
1159/*
1160 * Priority-boosting kthread. One per leaf rcu_node and one for the
1161 * root rcu_node.
1162 */
1163static int rcu_boost_kthread(void *arg)
1164{
1165 struct rcu_node *rnp = (struct rcu_node *)arg;
1166 int spincnt = 0;
1167 int more2boost;
1168
1169 for (;;) {
1170 wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks ||
1171 rnp->exp_tasks ||
1172 kthread_should_stop());
1173 if (kthread_should_stop())
1174 break;
1175 more2boost = rcu_boost(rnp);
1176 if (more2boost)
1177 spincnt++;
1178 else
1179 spincnt = 0;
1180 if (spincnt > 10) {
1181 rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp);
1182 spincnt = 0;
1183 }
1184 }
1185 return 0;
1186}
1187
1188/*
1189 * Check to see if it is time to start boosting RCU readers that are
1190 * blocking the current grace period, and, if so, tell the per-rcu_node
1191 * kthread to start boosting them. If there is an expedited grace
1192 * period in progress, it is always time to boost.
1193 *
1194 * The caller must hold rnp->lock.
1195 */
1196static void rcu_initiate_boost(struct rcu_node *rnp)
1197{
1198 struct task_struct *t;
1199
1200 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL)
1201 return;
1202 if (rnp->exp_tasks != NULL ||
1203 (rnp->gp_tasks != NULL &&
1204 rnp->boost_tasks == NULL &&
1205 rnp->qsmask == 0 &&
1206 ULONG_CMP_GE(jiffies, rnp->boost_time))) {
1207 if (rnp->exp_tasks == NULL)
1208 rnp->boost_tasks = rnp->gp_tasks;
1209 t = rnp->boost_kthread_task;
1210 if (t != NULL)
1211 wake_up_process(t);
1212 }
1213}
1214
1215static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
1216 cpumask_var_t cm)
1217{
1218 unsigned long flags;
1219 struct task_struct *t;
1220
1221 raw_spin_lock_irqsave(&rnp->lock, flags);
1222 t = rnp->boost_kthread_task;
1223 if (t != NULL)
1224 set_cpus_allowed_ptr(rnp->boost_kthread_task, cm);
1225 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1226}
1227
1228#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
1229
1230/*
1231 * Do priority-boost accounting for the start of a new grace period.
1232 */
1233static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1234{
1235 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
1236}
1237
1238/*
1239 * Initialize the RCU-boost waitqueue.
1240 */
1241static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
1242{
1243 init_waitqueue_head(&rnp->boost_wq);
1244}
1245
1246/*
1247 * Create an RCU-boost kthread for the specified node if one does not
1248 * already exist. We only create this kthread for preemptible RCU.
1249 * Returns zero if all is well, a negated errno otherwise.
1250 */
1251static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1252 struct rcu_node *rnp,
1253 int rnp_index)
1254{
1255 unsigned long flags;
1256 struct sched_param sp;
1257 struct task_struct *t;
1258
1259 if (&rcu_preempt_state != rsp)
1260 return 0;
1261 if (rnp->boost_kthread_task != NULL)
1262 return 0;
1263 t = kthread_create(rcu_boost_kthread, (void *)rnp,
1264 "rcub%d", rnp_index);
1265 if (IS_ERR(t))
1266 return PTR_ERR(t);
1267 raw_spin_lock_irqsave(&rnp->lock, flags);
1268 rnp->boost_kthread_task = t;
1269 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1270 wake_up_process(t);
1271 sp.sched_priority = RCU_KTHREAD_PRIO;
1272 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1273 return 0;
1274}
1275
1276#ifdef CONFIG_HOTPLUG_CPU
1277
1278static void rcu_stop_boost_kthread(struct rcu_node *rnp)
1279{
1280 unsigned long flags;
1281 struct task_struct *t;
1282
1283 raw_spin_lock_irqsave(&rnp->lock, flags);
1284 t = rnp->boost_kthread_task;
1285 rnp->boost_kthread_task = NULL;
1286 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1287 if (t != NULL)
1288 kthread_stop(t);
1289}
1290
1291#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1292
1293#else /* #ifdef CONFIG_RCU_BOOST */
1294
1295static void rcu_initiate_boost(struct rcu_node *rnp)
1296{
1297}
1298
1299static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
1300 cpumask_var_t cm)
1301{
1302}
1303
1304static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1305{
1306}
1307
1308static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
1309{
1310}
1311
1312static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1313 struct rcu_node *rnp,
1314 int rnp_index)
1315{
1316 return 0;
1317}
1318
1319#ifdef CONFIG_HOTPLUG_CPU
1320
1321static void rcu_stop_boost_kthread(struct rcu_node *rnp)
1322{
1323}
1324
1325#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1326
1327#endif /* #else #ifdef CONFIG_RCU_BOOST */
1328
1037#ifndef CONFIG_SMP 1329#ifndef CONFIG_SMP
1038 1330
1039void synchronize_sched_expedited(void) 1331void synchronize_sched_expedited(void)
@@ -1206,8 +1498,8 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
1206 * 1498 *
1207 * Because it is not legal to invoke rcu_process_callbacks() with irqs 1499 * Because it is not legal to invoke rcu_process_callbacks() with irqs
1208 * disabled, we do one pass of force_quiescent_state(), then do a 1500 * disabled, we do one pass of force_quiescent_state(), then do a
1209 * invoke_rcu_kthread() to cause rcu_process_callbacks() to be invoked later. 1501 * invoke_rcu_cpu_kthread() to cause rcu_process_callbacks() to be invoked
1210 * The per-cpu rcu_dyntick_drain variable controls the sequencing. 1502 * later. The per-cpu rcu_dyntick_drain variable controls the sequencing.
1211 */ 1503 */
1212int rcu_needs_cpu(int cpu) 1504int rcu_needs_cpu(int cpu)
1213{ 1505{
@@ -1257,7 +1549,7 @@ int rcu_needs_cpu(int cpu)
1257 1549
1258 /* If RCU callbacks are still pending, RCU still needs this CPU. */ 1550 /* If RCU callbacks are still pending, RCU still needs this CPU. */
1259 if (c) 1551 if (c)
1260 invoke_rcu_kthread(); 1552 invoke_rcu_cpu_kthread();
1261 return c; 1553 return c;
1262} 1554}
1263 1555