aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree.c
diff options
context:
space:
mode:
authorPaul E. McKenney <paul.mckenney@linaro.org>2011-01-12 17:10:23 -0500
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2011-05-06 02:16:54 -0400
commita26ac2455ffcf3be5c6ef92bc6df7182700f2114 (patch)
tree601697c7c3fd152a1b3f29f43d3a028fc8fefd5f /kernel/rcutree.c
parent12f5f524cafef3ab689929b118f2dfb8bf2be321 (diff)
rcu: move TREE_RCU from softirq to kthread
If RCU priority boosting is to be meaningful, callback invocation must be boosted in addition to preempted RCU readers. Otherwise, in presence of CPU real-time threads, the grace period ends, but the callbacks don't get invoked. If the callbacks don't get invoked, the associated memory doesn't get freed, so the system is still subject to OOM. But it is not reasonable to priority-boost RCU_SOFTIRQ, so this commit moves the callback invocations to a kthread, which can be boosted easily. Also add comments and properly synchronized all accesses to rcu_cpu_kthread_task, as suggested by Lai Jiangshan. Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r--kernel/rcutree.c340
1 files changed, 336 insertions, 4 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0ac1cc03f935..18e33313873e 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -47,6 +47,8 @@
47#include <linux/mutex.h> 47#include <linux/mutex.h>
48#include <linux/time.h> 48#include <linux/time.h>
49#include <linux/kernel_stat.h> 49#include <linux/kernel_stat.h>
50#include <linux/wait.h>
51#include <linux/kthread.h>
50 52
51#include "rcutree.h" 53#include "rcutree.h"
52 54
@@ -83,6 +85,20 @@ int rcu_scheduler_active __read_mostly;
83EXPORT_SYMBOL_GPL(rcu_scheduler_active); 85EXPORT_SYMBOL_GPL(rcu_scheduler_active);
84 86
85/* 87/*
88 * Control variables for per-CPU and per-rcu_node kthreads. These
89 * handle all flavors of RCU.
90 */
91static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
92static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq);
93static DEFINE_PER_CPU(char, rcu_cpu_has_work);
94static char rcu_kthreads_spawnable;
95
96static void rcu_node_kthread_setaffinity(struct rcu_node *rnp);
97static void invoke_rcu_kthread(void);
98
99#define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */
100
101/*
86 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s 102 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
87 * permit this function to be invoked without holding the root rcu_node 103 * permit this function to be invoked without holding the root rcu_node
88 * structure's ->lock, but of course results can be subject to change. 104 * structure's ->lock, but of course results can be subject to change.
@@ -1009,6 +1025,8 @@ static void rcu_send_cbs_to_online(struct rcu_state *rsp)
1009/* 1025/*
1010 * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy 1026 * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy
1011 * and move all callbacks from the outgoing CPU to the current one. 1027 * and move all callbacks from the outgoing CPU to the current one.
1028 * There can only be one CPU hotplug operation at a time, so no other
1029 * CPU can be attempting to update rcu_cpu_kthread_task.
1012 */ 1030 */
1013static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) 1031static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1014{ 1032{
@@ -1017,6 +1035,14 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1017 int need_report = 0; 1035 int need_report = 0;
1018 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 1036 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1019 struct rcu_node *rnp; 1037 struct rcu_node *rnp;
1038 struct task_struct *t;
1039
1040 /* Stop the CPU's kthread. */
1041 t = per_cpu(rcu_cpu_kthread_task, cpu);
1042 if (t != NULL) {
1043 per_cpu(rcu_cpu_kthread_task, cpu) = NULL;
1044 kthread_stop(t);
1045 }
1020 1046
1021 /* Exclude any attempts to start a new grace period. */ 1047 /* Exclude any attempts to start a new grace period. */
1022 raw_spin_lock_irqsave(&rsp->onofflock, flags); 1048 raw_spin_lock_irqsave(&rsp->onofflock, flags);
@@ -1054,6 +1080,19 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1054 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1080 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1055 if (need_report & RCU_OFL_TASKS_EXP_GP) 1081 if (need_report & RCU_OFL_TASKS_EXP_GP)
1056 rcu_report_exp_rnp(rsp, rnp); 1082 rcu_report_exp_rnp(rsp, rnp);
1083
1084 /*
1085 * If there are no more online CPUs for this rcu_node structure,
1086 * kill the rcu_node structure's kthread. Otherwise, adjust its
1087 * affinity.
1088 */
1089 t = rnp->node_kthread_task;
1090 if (t != NULL &&
1091 rnp->qsmaskinit == 0) {
1092 kthread_stop(t);
1093 rnp->node_kthread_task = NULL;
1094 } else
1095 rcu_node_kthread_setaffinity(rnp);
1057} 1096}
1058 1097
1059/* 1098/*
@@ -1151,7 +1190,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1151 1190
1152 /* Re-raise the RCU softirq if there are callbacks remaining. */ 1191 /* Re-raise the RCU softirq if there are callbacks remaining. */
1153 if (cpu_has_callbacks_ready_to_invoke(rdp)) 1192 if (cpu_has_callbacks_ready_to_invoke(rdp))
1154 raise_softirq(RCU_SOFTIRQ); 1193 invoke_rcu_kthread();
1155} 1194}
1156 1195
1157/* 1196/*
@@ -1197,7 +1236,7 @@ void rcu_check_callbacks(int cpu, int user)
1197 } 1236 }
1198 rcu_preempt_check_callbacks(cpu); 1237 rcu_preempt_check_callbacks(cpu);
1199 if (rcu_pending(cpu)) 1238 if (rcu_pending(cpu))
1200 raise_softirq(RCU_SOFTIRQ); 1239 invoke_rcu_kthread();
1201} 1240}
1202 1241
1203#ifdef CONFIG_SMP 1242#ifdef CONFIG_SMP
@@ -1361,7 +1400,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1361/* 1400/*
1362 * Do softirq processing for the current CPU. 1401 * Do softirq processing for the current CPU.
1363 */ 1402 */
1364static void rcu_process_callbacks(struct softirq_action *unused) 1403static void rcu_process_callbacks(void)
1365{ 1404{
1366 __rcu_process_callbacks(&rcu_sched_state, 1405 __rcu_process_callbacks(&rcu_sched_state,
1367 &__get_cpu_var(rcu_sched_data)); 1406 &__get_cpu_var(rcu_sched_data));
@@ -1372,6 +1411,281 @@ static void rcu_process_callbacks(struct softirq_action *unused)
1372 rcu_needs_cpu_flush(); 1411 rcu_needs_cpu_flush();
1373} 1412}
1374 1413
1414/*
1415 * Wake up the current CPU's kthread. This replaces raise_softirq()
1416 * in earlier versions of RCU. Note that because we are running on
1417 * the current CPU with interrupts disabled, the rcu_cpu_kthread_task
1418 * cannot disappear out from under us.
1419 */
1420static void invoke_rcu_kthread(void)
1421{
1422 unsigned long flags;
1423 wait_queue_head_t *q;
1424 int cpu;
1425
1426 local_irq_save(flags);
1427 cpu = smp_processor_id();
1428 per_cpu(rcu_cpu_has_work, cpu) = 1;
1429 if (per_cpu(rcu_cpu_kthread_task, cpu) == NULL) {
1430 local_irq_restore(flags);
1431 return;
1432 }
1433 q = &per_cpu(rcu_cpu_wq, cpu);
1434 wake_up(q);
1435 local_irq_restore(flags);
1436}
1437
1438/*
1439 * Timer handler to initiate the waking up of per-CPU kthreads that
1440 * have yielded the CPU due to excess numbers of RCU callbacks.
1441 */
1442static void rcu_cpu_kthread_timer(unsigned long arg)
1443{
1444 unsigned long flags;
1445 struct rcu_data *rdp = (struct rcu_data *)arg;
1446 struct rcu_node *rnp = rdp->mynode;
1447 struct task_struct *t;
1448
1449 raw_spin_lock_irqsave(&rnp->lock, flags);
1450 rnp->wakemask |= rdp->grpmask;
1451 t = rnp->node_kthread_task;
1452 if (t == NULL) {
1453 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1454 return;
1455 }
1456 wake_up_process(t);
1457 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1458}
1459
1460/*
1461 * Drop to non-real-time priority and yield, but only after posting a
1462 * timer that will cause us to regain our real-time priority if we
1463 * remain preempted. Either way, we restore our real-time priority
1464 * before returning.
1465 */
1466static void rcu_yield(int cpu)
1467{
1468 struct rcu_data *rdp = per_cpu_ptr(rcu_sched_state.rda, cpu);
1469 struct sched_param sp;
1470 struct timer_list yield_timer;
1471
1472 setup_timer_on_stack(&yield_timer, rcu_cpu_kthread_timer, (unsigned long)rdp);
1473 mod_timer(&yield_timer, jiffies + 2);
1474 sp.sched_priority = 0;
1475 sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp);
1476 schedule();
1477 sp.sched_priority = RCU_KTHREAD_PRIO;
1478 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1479 del_timer(&yield_timer);
1480}
1481
1482/*
1483 * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU.
1484 * This can happen while the corresponding CPU is either coming online
1485 * or going offline. We cannot wait until the CPU is fully online
1486 * before starting the kthread, because the various notifier functions
1487 * can wait for RCU grace periods. So we park rcu_cpu_kthread() until
1488 * the corresponding CPU is online.
1489 *
1490 * Return 1 if the kthread needs to stop, 0 otherwise.
1491 *
1492 * Caller must disable bh. This function can momentarily enable it.
1493 */
1494static int rcu_cpu_kthread_should_stop(int cpu)
1495{
1496 while (cpu_is_offline(cpu) ||
1497 !cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)) ||
1498 smp_processor_id() != cpu) {
1499 if (kthread_should_stop())
1500 return 1;
1501 local_bh_enable();
1502 schedule_timeout_uninterruptible(1);
1503 if (!cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)))
1504 set_cpus_allowed_ptr(current, cpumask_of(cpu));
1505 local_bh_disable();
1506 }
1507 return 0;
1508}
1509
1510/*
1511 * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
1512 * earlier RCU softirq.
1513 */
1514static int rcu_cpu_kthread(void *arg)
1515{
1516 int cpu = (int)(long)arg;
1517 unsigned long flags;
1518 int spincnt = 0;
1519 wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu);
1520 char work;
1521 char *workp = &per_cpu(rcu_cpu_has_work, cpu);
1522
1523 for (;;) {
1524 wait_event_interruptible(*wqp,
1525 *workp != 0 || kthread_should_stop());
1526 local_bh_disable();
1527 if (rcu_cpu_kthread_should_stop(cpu)) {
1528 local_bh_enable();
1529 break;
1530 }
1531 local_irq_save(flags);
1532 work = *workp;
1533 *workp = 0;
1534 local_irq_restore(flags);
1535 if (work)
1536 rcu_process_callbacks();
1537 local_bh_enable();
1538 if (*workp != 0)
1539 spincnt++;
1540 else
1541 spincnt = 0;
1542 if (spincnt > 10) {
1543 rcu_yield(cpu);
1544 spincnt = 0;
1545 }
1546 }
1547 return 0;
1548}
1549
1550/*
1551 * Spawn a per-CPU kthread, setting up affinity and priority.
1552 * Because the CPU hotplug lock is held, no other CPU will be attempting
1553 * to manipulate rcu_cpu_kthread_task. There might be another CPU
1554 * attempting to access it during boot, but the locking in kthread_bind()
1555 * will enforce sufficient ordering.
1556 */
1557static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
1558{
1559 struct sched_param sp;
1560 struct task_struct *t;
1561
1562 if (!rcu_kthreads_spawnable ||
1563 per_cpu(rcu_cpu_kthread_task, cpu) != NULL)
1564 return 0;
1565 t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu);
1566 if (IS_ERR(t))
1567 return PTR_ERR(t);
1568 kthread_bind(t, cpu);
1569 WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
1570 per_cpu(rcu_cpu_kthread_task, cpu) = t;
1571 wake_up_process(t);
1572 sp.sched_priority = RCU_KTHREAD_PRIO;
1573 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1574 return 0;
1575}
1576
1577/*
1578 * Per-rcu_node kthread, which is in charge of waking up the per-CPU
1579 * kthreads when needed. We ignore requests to wake up kthreads
1580 * for offline CPUs, which is OK because force_quiescent_state()
1581 * takes care of this case.
1582 */
1583static int rcu_node_kthread(void *arg)
1584{
1585 int cpu;
1586 unsigned long flags;
1587 unsigned long mask;
1588 struct rcu_node *rnp = (struct rcu_node *)arg;
1589 struct sched_param sp;
1590 struct task_struct *t;
1591
1592 for (;;) {
1593 wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0 ||
1594 kthread_should_stop());
1595 if (kthread_should_stop())
1596 break;
1597 raw_spin_lock_irqsave(&rnp->lock, flags);
1598 mask = rnp->wakemask;
1599 rnp->wakemask = 0;
1600 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1601 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
1602 if ((mask & 0x1) == 0)
1603 continue;
1604 preempt_disable();
1605 t = per_cpu(rcu_cpu_kthread_task, cpu);
1606 if (!cpu_online(cpu) || t == NULL) {
1607 preempt_enable();
1608 continue;
1609 }
1610 per_cpu(rcu_cpu_has_work, cpu) = 1;
1611 sp.sched_priority = RCU_KTHREAD_PRIO;
1612 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1613 preempt_enable();
1614 }
1615 }
1616 return 0;
1617}
1618
1619/*
1620 * Set the per-rcu_node kthread's affinity to cover all CPUs that are
1621 * served by the rcu_node in question.
1622 */
1623static void rcu_node_kthread_setaffinity(struct rcu_node *rnp)
1624{
1625 cpumask_var_t cm;
1626 int cpu;
1627 unsigned long mask = rnp->qsmaskinit;
1628
1629 if (rnp->node_kthread_task == NULL ||
1630 rnp->qsmaskinit == 0)
1631 return;
1632 if (!alloc_cpumask_var(&cm, GFP_KERNEL))
1633 return;
1634 cpumask_clear(cm);
1635 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
1636 if (mask & 0x1)
1637 cpumask_set_cpu(cpu, cm);
1638 set_cpus_allowed_ptr(rnp->node_kthread_task, cm);
1639 free_cpumask_var(cm);
1640}
1641
1642/*
1643 * Spawn a per-rcu_node kthread, setting priority and affinity.
1644 */
1645static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
1646 struct rcu_node *rnp)
1647{
1648 int rnp_index = rnp - &rsp->node[0];
1649 struct sched_param sp;
1650 struct task_struct *t;
1651
1652 if (!rcu_kthreads_spawnable ||
1653 rnp->qsmaskinit == 0 ||
1654 rnp->node_kthread_task != NULL)
1655 return 0;
1656 t = kthread_create(rcu_node_kthread, (void *)rnp, "rcun%d", rnp_index);
1657 if (IS_ERR(t))
1658 return PTR_ERR(t);
1659 rnp->node_kthread_task = t;
1660 wake_up_process(t);
1661 sp.sched_priority = 99;
1662 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1663 return 0;
1664}
1665
1666/*
1667 * Spawn all kthreads -- called as soon as the scheduler is running.
1668 */
1669static int __init rcu_spawn_kthreads(void)
1670{
1671 int cpu;
1672 struct rcu_node *rnp;
1673
1674 rcu_kthreads_spawnable = 1;
1675 for_each_possible_cpu(cpu) {
1676 init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu));
1677 per_cpu(rcu_cpu_has_work, cpu) = 0;
1678 if (cpu_online(cpu))
1679 (void)rcu_spawn_one_cpu_kthread(cpu);
1680 }
1681 rcu_for_each_leaf_node(&rcu_sched_state, rnp) {
1682 init_waitqueue_head(&rnp->node_wq);
1683 (void)rcu_spawn_one_node_kthread(&rcu_sched_state, rnp);
1684 }
1685 return 0;
1686}
1687early_initcall(rcu_spawn_kthreads);
1688
1375static void 1689static void
1376__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), 1690__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1377 struct rcu_state *rsp) 1691 struct rcu_state *rsp)
@@ -1771,6 +2085,19 @@ static void __cpuinit rcu_online_cpu(int cpu)
1771 rcu_preempt_init_percpu_data(cpu); 2085 rcu_preempt_init_percpu_data(cpu);
1772} 2086}
1773 2087
2088static void __cpuinit rcu_online_kthreads(int cpu)
2089{
2090 struct rcu_data *rdp = per_cpu_ptr(rcu_sched_state.rda, cpu);
2091 struct rcu_node *rnp = rdp->mynode;
2092
2093 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
2094 if (rcu_kthreads_spawnable) {
2095 (void)rcu_spawn_one_cpu_kthread(cpu);
2096 if (rnp->node_kthread_task == NULL)
2097 (void)rcu_spawn_one_node_kthread(&rcu_sched_state, rnp);
2098 }
2099}
2100
1774/* 2101/*
1775 * Handle CPU online/offline notification events. 2102 * Handle CPU online/offline notification events.
1776 */ 2103 */
@@ -1778,11 +2105,17 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
1778 unsigned long action, void *hcpu) 2105 unsigned long action, void *hcpu)
1779{ 2106{
1780 long cpu = (long)hcpu; 2107 long cpu = (long)hcpu;
2108 struct rcu_data *rdp = per_cpu_ptr(rcu_sched_state.rda, cpu);
2109 struct rcu_node *rnp = rdp->mynode;
1781 2110
1782 switch (action) { 2111 switch (action) {
1783 case CPU_UP_PREPARE: 2112 case CPU_UP_PREPARE:
1784 case CPU_UP_PREPARE_FROZEN: 2113 case CPU_UP_PREPARE_FROZEN:
1785 rcu_online_cpu(cpu); 2114 rcu_online_cpu(cpu);
2115 rcu_online_kthreads(cpu);
2116 break;
2117 case CPU_ONLINE:
2118 rcu_node_kthread_setaffinity(rnp);
1786 break; 2119 break;
1787 case CPU_DYING: 2120 case CPU_DYING:
1788 case CPU_DYING_FROZEN: 2121 case CPU_DYING_FROZEN:
@@ -1923,7 +2256,6 @@ void __init rcu_init(void)
1923 rcu_init_one(&rcu_sched_state, &rcu_sched_data); 2256 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
1924 rcu_init_one(&rcu_bh_state, &rcu_bh_data); 2257 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
1925 __rcu_init_preempt(); 2258 __rcu_init_preempt();
1926 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
1927 2259
1928 /* 2260 /*
1929 * We don't need protection against CPU-hotplug here because 2261 * We don't need protection against CPU-hotplug here because