aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree.c
diff options
context:
space:
mode:
authorPaul E. McKenney <paul.mckenney@linaro.org>2011-02-07 15:47:15 -0500
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2011-05-06 02:16:55 -0400
commit27f4d28057adf98750cf863c40baefb12f5b6d21 (patch)
treea66584d2cb68cebda327313f78b8f4eb602ed4ef /kernel/rcutree.c
parenta26ac2455ffcf3be5c6ef92bc6df7182700f2114 (diff)
rcu: priority boosting for TREE_PREEMPT_RCU
Add priority boosting for TREE_PREEMPT_RCU, similar to that for TINY_PREEMPT_RCU. This is enabled by the default-off RCU_BOOST kernel parameter. The priority to which to boost preempted RCU readers is controlled by the RCU_BOOST_PRIO kernel parameter (defaulting to real-time priority 1) and the time to wait before boosting the readers who are blocking a given grace period is controlled by the RCU_BOOST_DELAY kernel parameter (defaulting to 500 milliseconds). Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r--kernel/rcutree.c115
1 files changed, 77 insertions, 38 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 18e33313873e..28fd92a9e0d0 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -81,6 +81,8 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
81struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); 81struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
82DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 82DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
83 83
84static struct rcu_state *rcu_state;
85
84int rcu_scheduler_active __read_mostly; 86int rcu_scheduler_active __read_mostly;
85EXPORT_SYMBOL_GPL(rcu_scheduler_active); 87EXPORT_SYMBOL_GPL(rcu_scheduler_active);
86 88
@@ -94,7 +96,7 @@ static DEFINE_PER_CPU(char, rcu_cpu_has_work);
94static char rcu_kthreads_spawnable; 96static char rcu_kthreads_spawnable;
95 97
96static void rcu_node_kthread_setaffinity(struct rcu_node *rnp); 98static void rcu_node_kthread_setaffinity(struct rcu_node *rnp);
97static void invoke_rcu_kthread(void); 99static void invoke_rcu_cpu_kthread(void);
98 100
99#define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ 101#define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */
100 102
@@ -791,6 +793,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
791 rnp->completed = rsp->completed; 793 rnp->completed = rsp->completed;
792 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ 794 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
793 rcu_start_gp_per_cpu(rsp, rnp, rdp); 795 rcu_start_gp_per_cpu(rsp, rnp, rdp);
796 rcu_preempt_boost_start_gp(rnp);
794 raw_spin_unlock_irqrestore(&rnp->lock, flags); 797 raw_spin_unlock_irqrestore(&rnp->lock, flags);
795 return; 798 return;
796 } 799 }
@@ -826,6 +829,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
826 rnp->completed = rsp->completed; 829 rnp->completed = rsp->completed;
827 if (rnp == rdp->mynode) 830 if (rnp == rdp->mynode)
828 rcu_start_gp_per_cpu(rsp, rnp, rdp); 831 rcu_start_gp_per_cpu(rsp, rnp, rdp);
832 rcu_preempt_boost_start_gp(rnp);
829 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 833 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
830 } 834 }
831 835
@@ -882,7 +886,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
882 return; 886 return;
883 } 887 }
884 rnp->qsmask &= ~mask; 888 rnp->qsmask &= ~mask;
885 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { 889 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
886 890
887 /* Other bits still set at this level, so done. */ 891 /* Other bits still set at this level, so done. */
888 raw_spin_unlock_irqrestore(&rnp->lock, flags); 892 raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -1089,8 +1093,11 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1089 t = rnp->node_kthread_task; 1093 t = rnp->node_kthread_task;
1090 if (t != NULL && 1094 if (t != NULL &&
1091 rnp->qsmaskinit == 0) { 1095 rnp->qsmaskinit == 0) {
1092 kthread_stop(t); 1096 raw_spin_lock_irqsave(&rnp->lock, flags);
1093 rnp->node_kthread_task = NULL; 1097 rnp->node_kthread_task = NULL;
1098 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1099 kthread_stop(t);
1100 rcu_stop_boost_kthread(rnp);
1094 } else 1101 } else
1095 rcu_node_kthread_setaffinity(rnp); 1102 rcu_node_kthread_setaffinity(rnp);
1096} 1103}
@@ -1190,7 +1197,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1190 1197
1191 /* Re-raise the RCU softirq if there are callbacks remaining. */ 1198 /* Re-raise the RCU softirq if there are callbacks remaining. */
1192 if (cpu_has_callbacks_ready_to_invoke(rdp)) 1199 if (cpu_has_callbacks_ready_to_invoke(rdp))
1193 invoke_rcu_kthread(); 1200 invoke_rcu_cpu_kthread();
1194} 1201}
1195 1202
1196/* 1203/*
@@ -1236,7 +1243,7 @@ void rcu_check_callbacks(int cpu, int user)
1236 } 1243 }
1237 rcu_preempt_check_callbacks(cpu); 1244 rcu_preempt_check_callbacks(cpu);
1238 if (rcu_pending(cpu)) 1245 if (rcu_pending(cpu))
1239 invoke_rcu_kthread(); 1246 invoke_rcu_cpu_kthread();
1240} 1247}
1241 1248
1242#ifdef CONFIG_SMP 1249#ifdef CONFIG_SMP
@@ -1244,6 +1251,8 @@ void rcu_check_callbacks(int cpu, int user)
1244/* 1251/*
1245 * Scan the leaf rcu_node structures, processing dyntick state for any that 1252 * Scan the leaf rcu_node structures, processing dyntick state for any that
1246 * have not yet encountered a quiescent state, using the function specified. 1253 * have not yet encountered a quiescent state, using the function specified.
1254 * Also initiate boosting for any threads blocked on the root rcu_node.
1255 *
1247 * The caller must have suppressed start of new grace periods. 1256 * The caller must have suppressed start of new grace periods.
1248 */ 1257 */
1249static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) 1258static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
@@ -1262,6 +1271,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
1262 return; 1271 return;
1263 } 1272 }
1264 if (rnp->qsmask == 0) { 1273 if (rnp->qsmask == 0) {
1274 rcu_initiate_boost(rnp);
1265 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1275 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1266 continue; 1276 continue;
1267 } 1277 }
@@ -1280,6 +1290,11 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
1280 } 1290 }
1281 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1291 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1282 } 1292 }
1293 rnp = rcu_get_root(rsp);
1294 raw_spin_lock_irqsave(&rnp->lock, flags);
1295 if (rnp->qsmask == 0)
1296 rcu_initiate_boost(rnp);
1297 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1283} 1298}
1284 1299
1285/* 1300/*
@@ -1417,7 +1432,7 @@ static void rcu_process_callbacks(void)
1417 * the current CPU with interrupts disabled, the rcu_cpu_kthread_task 1432 * the current CPU with interrupts disabled, the rcu_cpu_kthread_task
1418 * cannot disappear out from under us. 1433 * cannot disappear out from under us.
1419 */ 1434 */
1420static void invoke_rcu_kthread(void) 1435static void invoke_rcu_cpu_kthread(void)
1421{ 1436{
1422 unsigned long flags; 1437 unsigned long flags;
1423 wait_queue_head_t *q; 1438 wait_queue_head_t *q;
@@ -1436,24 +1451,33 @@ static void invoke_rcu_kthread(void)
1436} 1451}
1437 1452
1438/* 1453/*
1454 * Wake up the specified per-rcu_node-structure kthread.
1455 * The caller must hold ->lock.
1456 */
1457static void invoke_rcu_node_kthread(struct rcu_node *rnp)
1458{
1459 struct task_struct *t;
1460
1461 t = rnp->node_kthread_task;
1462 if (t != NULL)
1463 wake_up_process(t);
1464}
1465
1466/*
1439 * Timer handler to initiate the waking up of per-CPU kthreads that 1467 * Timer handler to initiate the waking up of per-CPU kthreads that
1440 * have yielded the CPU due to excess numbers of RCU callbacks. 1468 * have yielded the CPU due to excess numbers of RCU callbacks.
1469 * We wake up the per-rcu_node kthread, which in turn will wake up
1470 * the booster kthread.
1441 */ 1471 */
1442static void rcu_cpu_kthread_timer(unsigned long arg) 1472static void rcu_cpu_kthread_timer(unsigned long arg)
1443{ 1473{
1444 unsigned long flags; 1474 unsigned long flags;
1445 struct rcu_data *rdp = (struct rcu_data *)arg; 1475 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
1446 struct rcu_node *rnp = rdp->mynode; 1476 struct rcu_node *rnp = rdp->mynode;
1447 struct task_struct *t;
1448 1477
1449 raw_spin_lock_irqsave(&rnp->lock, flags); 1478 raw_spin_lock_irqsave(&rnp->lock, flags);
1450 rnp->wakemask |= rdp->grpmask; 1479 rnp->wakemask |= rdp->grpmask;
1451 t = rnp->node_kthread_task; 1480 invoke_rcu_node_kthread(rnp);
1452 if (t == NULL) {
1453 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1454 return;
1455 }
1456 wake_up_process(t);
1457 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1481 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1458} 1482}
1459 1483
@@ -1463,13 +1487,12 @@ static void rcu_cpu_kthread_timer(unsigned long arg)
1463 * remain preempted. Either way, we restore our real-time priority 1487 * remain preempted. Either way, we restore our real-time priority
1464 * before returning. 1488 * before returning.
1465 */ 1489 */
1466static void rcu_yield(int cpu) 1490static void rcu_yield(void (*f)(unsigned long), unsigned long arg)
1467{ 1491{
1468 struct rcu_data *rdp = per_cpu_ptr(rcu_sched_state.rda, cpu);
1469 struct sched_param sp; 1492 struct sched_param sp;
1470 struct timer_list yield_timer; 1493 struct timer_list yield_timer;
1471 1494
1472 setup_timer_on_stack(&yield_timer, rcu_cpu_kthread_timer, (unsigned long)rdp); 1495 setup_timer_on_stack(&yield_timer, f, arg);
1473 mod_timer(&yield_timer, jiffies + 2); 1496 mod_timer(&yield_timer, jiffies + 2);
1474 sp.sched_priority = 0; 1497 sp.sched_priority = 0;
1475 sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); 1498 sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp);
@@ -1540,7 +1563,7 @@ static int rcu_cpu_kthread(void *arg)
1540 else 1563 else
1541 spincnt = 0; 1564 spincnt = 0;
1542 if (spincnt > 10) { 1565 if (spincnt > 10) {
1543 rcu_yield(cpu); 1566 rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu);
1544 spincnt = 0; 1567 spincnt = 0;
1545 } 1568 }
1546 } 1569 }
@@ -1597,6 +1620,7 @@ static int rcu_node_kthread(void *arg)
1597 raw_spin_lock_irqsave(&rnp->lock, flags); 1620 raw_spin_lock_irqsave(&rnp->lock, flags);
1598 mask = rnp->wakemask; 1621 mask = rnp->wakemask;
1599 rnp->wakemask = 0; 1622 rnp->wakemask = 0;
1623 rcu_initiate_boost(rnp);
1600 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1624 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1601 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { 1625 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
1602 if ((mask & 0x1) == 0) 1626 if ((mask & 0x1) == 0)
@@ -1618,7 +1642,8 @@ static int rcu_node_kthread(void *arg)
1618 1642
1619/* 1643/*
1620 * Set the per-rcu_node kthread's affinity to cover all CPUs that are 1644 * Set the per-rcu_node kthread's affinity to cover all CPUs that are
1621 * served by the rcu_node in question. 1645 * served by the rcu_node in question. The CPU hotplug lock is still
1646 * held, so the value of rnp->qsmaskinit will be stable.
1622 */ 1647 */
1623static void rcu_node_kthread_setaffinity(struct rcu_node *rnp) 1648static void rcu_node_kthread_setaffinity(struct rcu_node *rnp)
1624{ 1649{
@@ -1626,8 +1651,7 @@ static void rcu_node_kthread_setaffinity(struct rcu_node *rnp)
1626 int cpu; 1651 int cpu;
1627 unsigned long mask = rnp->qsmaskinit; 1652 unsigned long mask = rnp->qsmaskinit;
1628 1653
1629 if (rnp->node_kthread_task == NULL || 1654 if (rnp->node_kthread_task == NULL || mask == 0)
1630 rnp->qsmaskinit == 0)
1631 return; 1655 return;
1632 if (!alloc_cpumask_var(&cm, GFP_KERNEL)) 1656 if (!alloc_cpumask_var(&cm, GFP_KERNEL))
1633 return; 1657 return;
@@ -1636,31 +1660,40 @@ static void rcu_node_kthread_setaffinity(struct rcu_node *rnp)
1636 if (mask & 0x1) 1660 if (mask & 0x1)
1637 cpumask_set_cpu(cpu, cm); 1661 cpumask_set_cpu(cpu, cm);
1638 set_cpus_allowed_ptr(rnp->node_kthread_task, cm); 1662 set_cpus_allowed_ptr(rnp->node_kthread_task, cm);
1663 rcu_boost_kthread_setaffinity(rnp, cm);
1639 free_cpumask_var(cm); 1664 free_cpumask_var(cm);
1640} 1665}
1641 1666
1642/* 1667/*
1643 * Spawn a per-rcu_node kthread, setting priority and affinity. 1668 * Spawn a per-rcu_node kthread, setting priority and affinity.
1669 * Called during boot before online/offline can happen, or, if
1670 * during runtime, with the main CPU-hotplug locks held. So only
1671 * one of these can be executing at a time.
1644 */ 1672 */
1645static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, 1673static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
1646 struct rcu_node *rnp) 1674 struct rcu_node *rnp)
1647{ 1675{
1676 unsigned long flags;
1648 int rnp_index = rnp - &rsp->node[0]; 1677 int rnp_index = rnp - &rsp->node[0];
1649 struct sched_param sp; 1678 struct sched_param sp;
1650 struct task_struct *t; 1679 struct task_struct *t;
1651 1680
1652 if (!rcu_kthreads_spawnable || 1681 if (!rcu_kthreads_spawnable ||
1653 rnp->qsmaskinit == 0 || 1682 rnp->qsmaskinit == 0)
1654 rnp->node_kthread_task != NULL)
1655 return 0; 1683 return 0;
1656 t = kthread_create(rcu_node_kthread, (void *)rnp, "rcun%d", rnp_index); 1684 if (rnp->node_kthread_task == NULL) {
1657 if (IS_ERR(t)) 1685 t = kthread_create(rcu_node_kthread, (void *)rnp,
1658 return PTR_ERR(t); 1686 "rcun%d", rnp_index);
1659 rnp->node_kthread_task = t; 1687 if (IS_ERR(t))
1660 wake_up_process(t); 1688 return PTR_ERR(t);
1661 sp.sched_priority = 99; 1689 raw_spin_lock_irqsave(&rnp->lock, flags);
1662 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1690 rnp->node_kthread_task = t;
1663 return 0; 1691 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1692 wake_up_process(t);
1693 sp.sched_priority = 99;
1694 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1695 }
1696 return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
1664} 1697}
1665 1698
1666/* 1699/*
@@ -1678,10 +1711,16 @@ static int __init rcu_spawn_kthreads(void)
1678 if (cpu_online(cpu)) 1711 if (cpu_online(cpu))
1679 (void)rcu_spawn_one_cpu_kthread(cpu); 1712 (void)rcu_spawn_one_cpu_kthread(cpu);
1680 } 1713 }
1681 rcu_for_each_leaf_node(&rcu_sched_state, rnp) { 1714 rnp = rcu_get_root(rcu_state);
1682 init_waitqueue_head(&rnp->node_wq); 1715 init_waitqueue_head(&rnp->node_wq);
1683 (void)rcu_spawn_one_node_kthread(&rcu_sched_state, rnp); 1716 rcu_init_boost_waitqueue(rnp);
1684 } 1717 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1718 if (NUM_RCU_NODES > 1)
1719 rcu_for_each_leaf_node(rcu_state, rnp) {
1720 init_waitqueue_head(&rnp->node_wq);
1721 rcu_init_boost_waitqueue(rnp);
1722 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1723 }
1685 return 0; 1724 return 0;
1686} 1725}
1687early_initcall(rcu_spawn_kthreads); 1726early_initcall(rcu_spawn_kthreads);
@@ -2087,14 +2126,14 @@ static void __cpuinit rcu_online_cpu(int cpu)
2087 2126
2088static void __cpuinit rcu_online_kthreads(int cpu) 2127static void __cpuinit rcu_online_kthreads(int cpu)
2089{ 2128{
2090 struct rcu_data *rdp = per_cpu_ptr(rcu_sched_state.rda, cpu); 2129 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2091 struct rcu_node *rnp = rdp->mynode; 2130 struct rcu_node *rnp = rdp->mynode;
2092 2131
2093 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ 2132 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
2094 if (rcu_kthreads_spawnable) { 2133 if (rcu_kthreads_spawnable) {
2095 (void)rcu_spawn_one_cpu_kthread(cpu); 2134 (void)rcu_spawn_one_cpu_kthread(cpu);
2096 if (rnp->node_kthread_task == NULL) 2135 if (rnp->node_kthread_task == NULL)
2097 (void)rcu_spawn_one_node_kthread(&rcu_sched_state, rnp); 2136 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
2098 } 2137 }
2099} 2138}
2100 2139
@@ -2105,7 +2144,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2105 unsigned long action, void *hcpu) 2144 unsigned long action, void *hcpu)
2106{ 2145{
2107 long cpu = (long)hcpu; 2146 long cpu = (long)hcpu;
2108 struct rcu_data *rdp = per_cpu_ptr(rcu_sched_state.rda, cpu); 2147 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2109 struct rcu_node *rnp = rdp->mynode; 2148 struct rcu_node *rnp = rdp->mynode;
2110 2149
2111 switch (action) { 2150 switch (action) {