aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree.c
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2009-08-22 16:56:52 -0400
committerIngo Molnar <mingo@elte.hu>2009-08-23 04:32:40 -0400
commitf41d911f8c49a5d65c86504c19e8204bb605c4fd (patch)
tree59bcd3048652ef290b3e19d2904409afd5c90eb3 /kernel/rcutree.c
parenta157229cabd6dd8cfa82525fc9bf730c94cc9ac2 (diff)
rcu: Merge preemptable-RCU functionality into hierarchical RCU
Create a kernel/rcutree_plugin.h file that contains definitions for preemptable RCU (or, under the #else branch of the #ifdef, empty definitions for the classic non-preemptable semantics). These definitions fit into plugins defined in kernel/rcutree.c for this purpose. This variant of preemptable RCU uses a new algorithm whose read-side expense is roughly that of classic hierarchical RCU under CONFIG_PREEMPT. This new algorithm's update-side expense is similar to that of classic hierarchical RCU, and, in absence of read-side preemption or blocking, is exactly that of classic hierarchical RCU. Perhaps more important, this new algorithm has a much simpler implementation, saving well over 1,000 lines of code compared to mainline's implementation of preemptable RCU, which will hopefully be retired in favor of this new algorithm. The simplifications are obtained by maintaining per-task nesting state for running tasks, and using a simple lock-protected algorithm to handle accounting when tasks block within RCU read-side critical sections, making use of lessons learned while creating numerous user-level RCU implementations over the past 18 months. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josht@linux.vnet.ibm.com Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org LKML-Reference: <12509746134003-git-send-email-> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r--kernel/rcutree.c135
1 files changed, 92 insertions, 43 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 4ce3adcfa94d..cc0255714075 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -80,6 +80,21 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
80struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); 80struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
81DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 81DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
82 82
83extern long rcu_batches_completed_sched(void);
84static void cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp,
85 struct rcu_node *rnp, unsigned long flags);
86static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags);
87static void __rcu_process_callbacks(struct rcu_state *rsp,
88 struct rcu_data *rdp);
89static void __call_rcu(struct rcu_head *head,
90 void (*func)(struct rcu_head *rcu),
91 struct rcu_state *rsp);
92static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp);
93static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp,
94 int preemptable);
95
96#include "rcutree_plugin.h"
97
83/* 98/*
84 * Note a quiescent state. Because we do not need to know 99 * Note a quiescent state. Because we do not need to know
85 * how many quiescent states passed, just if there was at least 100 * how many quiescent states passed, just if there was at least
@@ -87,16 +102,27 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
87 */ 102 */
88void rcu_sched_qs(int cpu) 103void rcu_sched_qs(int cpu)
89{ 104{
90 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); 105 unsigned long flags;
106 struct rcu_data *rdp;
107
108 local_irq_save(flags);
109 rdp = &per_cpu(rcu_sched_data, cpu);
91 rdp->passed_quiesc = 1; 110 rdp->passed_quiesc = 1;
92 rdp->passed_quiesc_completed = rdp->completed; 111 rdp->passed_quiesc_completed = rdp->completed;
112 rcu_preempt_qs(cpu);
113 local_irq_restore(flags);
93} 114}
94 115
95void rcu_bh_qs(int cpu) 116void rcu_bh_qs(int cpu)
96{ 117{
97 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); 118 unsigned long flags;
119 struct rcu_data *rdp;
120
121 local_irq_save(flags);
122 rdp = &per_cpu(rcu_bh_data, cpu);
98 rdp->passed_quiesc = 1; 123 rdp->passed_quiesc = 1;
99 rdp->passed_quiesc_completed = rdp->completed; 124 rdp->passed_quiesc_completed = rdp->completed;
125 local_irq_restore(flags);
100} 126}
101 127
102#ifdef CONFIG_NO_HZ 128#ifdef CONFIG_NO_HZ
@@ -123,16 +149,6 @@ long rcu_batches_completed_sched(void)
123EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); 149EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
124 150
125/* 151/*
126 * Return the number of RCU batches processed thus far for debug & stats.
127 * @@@ placeholder, maps to rcu_batches_completed_sched().
128 */
129long rcu_batches_completed(void)
130{
131 return rcu_batches_completed_sched();
132}
133EXPORT_SYMBOL_GPL(rcu_batches_completed);
134
135/*
136 * Return the number of RCU BH batches processed thus far for debug & stats. 152 * Return the number of RCU BH batches processed thus far for debug & stats.
137 */ 153 */
138long rcu_batches_completed_bh(void) 154long rcu_batches_completed_bh(void)
@@ -193,6 +209,10 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
193 return 1; 209 return 1;
194 } 210 }
195 211
212 /* If preemptable RCU, no point in sending reschedule IPI. */
213 if (rdp->preemptable)
214 return 0;
215
196 /* The CPU is online, so send it a reschedule IPI. */ 216 /* The CPU is online, so send it a reschedule IPI. */
197 if (rdp->cpu != smp_processor_id()) 217 if (rdp->cpu != smp_processor_id())
198 smp_send_reschedule(rdp->cpu); 218 smp_send_reschedule(rdp->cpu);
@@ -473,6 +493,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
473 493
474 printk(KERN_ERR "INFO: RCU detected CPU stalls:"); 494 printk(KERN_ERR "INFO: RCU detected CPU stalls:");
475 for (; rnp_cur < rnp_end; rnp_cur++) { 495 for (; rnp_cur < rnp_end; rnp_cur++) {
496 rcu_print_task_stall(rnp);
476 if (rnp_cur->qsmask == 0) 497 if (rnp_cur->qsmask == 0)
477 continue; 498 continue;
478 for (cpu = 0; cpu <= rnp_cur->grphi - rnp_cur->grplo; cpu++) 499 for (cpu = 0; cpu <= rnp_cur->grphi - rnp_cur->grplo; cpu++)
@@ -686,6 +707,19 @@ rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
686} 707}
687 708
688/* 709/*
710 * Clean up after the prior grace period and let rcu_start_gp() start up
711 * the next grace period if one is needed. Note that the caller must
712 * hold rnp->lock, as required by rcu_start_gp(), which will release it.
713 */
714static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags)
715 __releases(rnp->lock)
716{
717 rsp->completed = rsp->gpnum;
718 rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
719 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
720}
721
722/*
689 * Similar to cpu_quiet(), for which it is a helper function. Allows 723 * Similar to cpu_quiet(), for which it is a helper function. Allows
690 * a group of CPUs to be quieted at one go, though all the CPUs in the 724 * a group of CPUs to be quieted at one go, though all the CPUs in the
691 * group must be represented by the same leaf rcu_node structure. 725 * group must be represented by the same leaf rcu_node structure.
@@ -706,7 +740,7 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
706 return; 740 return;
707 } 741 }
708 rnp->qsmask &= ~mask; 742 rnp->qsmask &= ~mask;
709 if (rnp->qsmask != 0) { 743 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
710 744
711 /* Other bits still set at this level, so done. */ 745 /* Other bits still set at this level, so done. */
712 spin_unlock_irqrestore(&rnp->lock, flags); 746 spin_unlock_irqrestore(&rnp->lock, flags);
@@ -726,14 +760,10 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
726 760
727 /* 761 /*
728 * Get here if we are the last CPU to pass through a quiescent 762 * Get here if we are the last CPU to pass through a quiescent
729 * state for this grace period. Clean up and let rcu_start_gp() 763 * state for this grace period. Invoke cpu_quiet_msk_finish()
730 * start up the next grace period if one is needed. Note that 764 * to clean up and start the next grace period if one is needed.
731 * we still hold rnp->lock, as required by rcu_start_gp(), which
732 * will release it.
733 */ 765 */
734 rsp->completed = rsp->gpnum; 766 cpu_quiet_msk_finish(rsp, flags); /* releases rnp->lock. */
735 rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
736 rcu_start_gp(rsp, flags); /* releases rnp->lock. */
737} 767}
738 768
739/* 769/*
@@ -840,11 +870,11 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
840 spin_lock(&rnp->lock); /* irqs already disabled. */ 870 spin_lock(&rnp->lock); /* irqs already disabled. */
841 rnp->qsmaskinit &= ~mask; 871 rnp->qsmaskinit &= ~mask;
842 if (rnp->qsmaskinit != 0) { 872 if (rnp->qsmaskinit != 0) {
843 spin_unlock(&rnp->lock); /* irqs already disabled. */ 873 spin_unlock(&rnp->lock); /* irqs remain disabled. */
844 break; 874 break;
845 } 875 }
846 mask = rnp->grpmask; 876 mask = rnp->grpmask;
847 spin_unlock(&rnp->lock); /* irqs already disabled. */ 877 spin_unlock(&rnp->lock); /* irqs remain disabled. */
848 rnp = rnp->parent; 878 rnp = rnp->parent;
849 } while (rnp != NULL); 879 } while (rnp != NULL);
850 lastcomp = rsp->completed; 880 lastcomp = rsp->completed;
@@ -1007,6 +1037,7 @@ void rcu_check_callbacks(int cpu, int user)
1007 1037
1008 rcu_bh_qs(cpu); 1038 rcu_bh_qs(cpu);
1009 } 1039 }
1040 rcu_preempt_check_callbacks(cpu);
1010 raise_softirq(RCU_SOFTIRQ); 1041 raise_softirq(RCU_SOFTIRQ);
1011} 1042}
1012 1043
@@ -1188,6 +1219,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
1188 __rcu_process_callbacks(&rcu_sched_state, 1219 __rcu_process_callbacks(&rcu_sched_state,
1189 &__get_cpu_var(rcu_sched_data)); 1220 &__get_cpu_var(rcu_sched_data));
1190 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); 1221 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1222 rcu_preempt_process_callbacks();
1191 1223
1192 /* 1224 /*
1193 * Memory references from any later RCU read-side critical sections 1225 * Memory references from any later RCU read-side critical sections
@@ -1252,17 +1284,6 @@ void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1252EXPORT_SYMBOL_GPL(call_rcu_sched); 1284EXPORT_SYMBOL_GPL(call_rcu_sched);
1253 1285
1254/* 1286/*
1255 * @@@ Queue an RCU callback for invocation after a grace period.
1256 * @@@ Placeholder pending rcutree_plugin.h.
1257 */
1258void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1259{
1260 call_rcu_sched(head, func);
1261}
1262EXPORT_SYMBOL_GPL(call_rcu);
1263
1264
1265/*
1266 * Queue an RCU for invocation after a quicker grace period. 1287 * Queue an RCU for invocation after a quicker grace period.
1267 */ 1288 */
1268void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 1289void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
@@ -1335,7 +1356,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1335static int rcu_pending(int cpu) 1356static int rcu_pending(int cpu)
1336{ 1357{
1337 return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || 1358 return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) ||
1338 __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)); 1359 __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) ||
1360 rcu_preempt_pending(cpu);
1339} 1361}
1340 1362
1341/* 1363/*
@@ -1348,7 +1370,8 @@ int rcu_needs_cpu(int cpu)
1348{ 1370{
1349 /* RCU callbacks either ready or pending? */ 1371 /* RCU callbacks either ready or pending? */
1350 return per_cpu(rcu_sched_data, cpu).nxtlist || 1372 return per_cpu(rcu_sched_data, cpu).nxtlist ||
1351 per_cpu(rcu_bh_data, cpu).nxtlist; 1373 per_cpu(rcu_bh_data, cpu).nxtlist ||
1374 rcu_preempt_needs_cpu(cpu);
1352} 1375}
1353 1376
1354/* 1377/*
@@ -1383,7 +1406,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
1383 * that this CPU cannot possibly have any RCU callbacks in flight yet. 1406 * that this CPU cannot possibly have any RCU callbacks in flight yet.
1384 */ 1407 */
1385static void __cpuinit 1408static void __cpuinit
1386rcu_init_percpu_data(int cpu, struct rcu_state *rsp) 1409rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
1387{ 1410{
1388 unsigned long flags; 1411 unsigned long flags;
1389 long lastcomp; 1412 long lastcomp;
@@ -1399,6 +1422,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
1399 rdp->passed_quiesc = 0; /* We could be racing with new GP, */ 1422 rdp->passed_quiesc = 0; /* We could be racing with new GP, */
1400 rdp->qs_pending = 1; /* so set up to respond to current GP. */ 1423 rdp->qs_pending = 1; /* so set up to respond to current GP. */
1401 rdp->beenonline = 1; /* We have now been online. */ 1424 rdp->beenonline = 1; /* We have now been online. */
1425 rdp->preemptable = preemptable;
1402 rdp->passed_quiesc_completed = lastcomp - 1; 1426 rdp->passed_quiesc_completed = lastcomp - 1;
1403 rdp->blimit = blimit; 1427 rdp->blimit = blimit;
1404 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1428 spin_unlock(&rnp->lock); /* irqs remain disabled. */
@@ -1441,12 +1465,13 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
1441 1465
1442static void __cpuinit rcu_online_cpu(int cpu) 1466static void __cpuinit rcu_online_cpu(int cpu)
1443{ 1467{
1444 rcu_init_percpu_data(cpu, &rcu_sched_state); 1468 rcu_init_percpu_data(cpu, &rcu_sched_state, 0);
1445 rcu_init_percpu_data(cpu, &rcu_bh_state); 1469 rcu_init_percpu_data(cpu, &rcu_bh_state, 0);
1470 rcu_preempt_init_percpu_data(cpu);
1446} 1471}
1447 1472
1448/* 1473/*
1449 * Handle CPU online/offline notifcation events. 1474 * Handle CPU online/offline notification events.
1450 */ 1475 */
1451int __cpuinit rcu_cpu_notify(struct notifier_block *self, 1476int __cpuinit rcu_cpu_notify(struct notifier_block *self,
1452 unsigned long action, void *hcpu) 1477 unsigned long action, void *hcpu)
@@ -1521,6 +1546,7 @@ static void __init rcu_init_one(struct rcu_state *rsp)
1521 rnp = rsp->level[i]; 1546 rnp = rsp->level[i];
1522 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { 1547 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
1523 spin_lock_init(&rnp->lock); 1548 spin_lock_init(&rnp->lock);
1549 rnp->gpnum = 0;
1524 rnp->qsmask = 0; 1550 rnp->qsmask = 0;
1525 rnp->qsmaskinit = 0; 1551 rnp->qsmaskinit = 0;
1526 rnp->grplo = j * cpustride; 1552 rnp->grplo = j * cpustride;
@@ -1538,13 +1564,16 @@ static void __init rcu_init_one(struct rcu_state *rsp)
1538 j / rsp->levelspread[i - 1]; 1564 j / rsp->levelspread[i - 1];
1539 } 1565 }
1540 rnp->level = i; 1566 rnp->level = i;
1567 INIT_LIST_HEAD(&rnp->blocked_tasks[0]);
1568 INIT_LIST_HEAD(&rnp->blocked_tasks[1]);
1541 } 1569 }
1542 } 1570 }
1543} 1571}
1544 1572
1545/* 1573/*
1546 * Helper macro for __rcu_init(). To be used nowhere else! 1574 * Helper macro for __rcu_init() and __rcu_init_preempt(). To be used
1547 * Assigns leaf node pointers into each CPU's rcu_data structure. 1575 * nowhere else! Assigns leaf node pointers into each CPU's rcu_data
1576 * structure.
1548 */ 1577 */
1549#define RCU_INIT_FLAVOR(rsp, rcu_data) \ 1578#define RCU_INIT_FLAVOR(rsp, rcu_data) \
1550do { \ 1579do { \
@@ -1560,18 +1589,38 @@ do { \
1560 } \ 1589 } \
1561} while (0) 1590} while (0)
1562 1591
1592#ifdef CONFIG_TREE_PREEMPT_RCU
1593
1594void __init __rcu_init_preempt(void)
1595{
1596 int i; /* All used by RCU_INIT_FLAVOR(). */
1597 int j;
1598 struct rcu_node *rnp;
1599
1600 RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data);
1601}
1602
1603#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1604
1605void __init __rcu_init_preempt(void)
1606{
1607}
1608
1609#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
1610
1563void __init __rcu_init(void) 1611void __init __rcu_init(void)
1564{ 1612{
1565 int i; /* All used by RCU_DATA_PTR_INIT(). */ 1613 int i; /* All used by RCU_INIT_FLAVOR(). */
1566 int j; 1614 int j;
1567 struct rcu_node *rnp; 1615 struct rcu_node *rnp;
1568 1616
1569 printk(KERN_INFO "Hierarchical RCU implementation.\n"); 1617 rcu_bootup_announce();
1570#ifdef CONFIG_RCU_CPU_STALL_DETECTOR 1618#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
1571 printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); 1619 printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
1572#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 1620#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
1573 RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); 1621 RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data);
1574 RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); 1622 RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data);
1623 __rcu_init_preempt();
1575 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 1624 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
1576} 1625}
1577 1626