aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/rcutree.c135
-rw-r--r--kernel/rcutree.h9
-rw-r--r--kernel/rcutree_plugin.h447
-rw-r--r--kernel/rcutree_trace.c20
7 files changed, 571 insertions, 47 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 2419c9d43918..1a38b4789dda 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -81,6 +81,7 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
81obj-$(CONFIG_SECCOMP) += seccomp.o 81obj-$(CONFIG_SECCOMP) += seccomp.o
82obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 82obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
83obj-$(CONFIG_TREE_RCU) += rcutree.o 83obj-$(CONFIG_TREE_RCU) += rcutree.o
84obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
84obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o 85obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
85obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o 86obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
86obj-$(CONFIG_PREEMPT_RCU_TRACE) += rcupreempt_trace.o 87obj-$(CONFIG_PREEMPT_RCU_TRACE) += rcupreempt_trace.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 869dc221733e..263f95ed7201 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1010,6 +1010,7 @@ NORET_TYPE void do_exit(long code)
1010 __free_pipe_info(tsk->splice_pipe); 1010 __free_pipe_info(tsk->splice_pipe);
1011 1011
1012 preempt_disable(); 1012 preempt_disable();
1013 exit_rcu();
1013 /* causes final put_task_struct in finish_task_switch(). */ 1014 /* causes final put_task_struct in finish_task_switch(). */
1014 tsk->state = TASK_DEAD; 1015 tsk->state = TASK_DEAD;
1015 schedule(); 1016 schedule();
diff --git a/kernel/fork.c b/kernel/fork.c
index 021e1138556e..642e8b5edf00 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1022,10 +1022,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1022 copy_flags(clone_flags, p); 1022 copy_flags(clone_flags, p);
1023 INIT_LIST_HEAD(&p->children); 1023 INIT_LIST_HEAD(&p->children);
1024 INIT_LIST_HEAD(&p->sibling); 1024 INIT_LIST_HEAD(&p->sibling);
1025#ifdef CONFIG_PREEMPT_RCU 1025 rcu_copy_process(p);
1026 p->rcu_read_lock_nesting = 0;
1027 p->rcu_flipctr_idx = 0;
1028#endif /* #ifdef CONFIG_PREEMPT_RCU */
1029 p->vfork_done = NULL; 1026 p->vfork_done = NULL;
1030 spin_lock_init(&p->alloc_lock); 1027 spin_lock_init(&p->alloc_lock);
1031 1028
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 4ce3adcfa94d..cc0255714075 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -80,6 +80,21 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
80struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); 80struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
81DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 81DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
82 82
83extern long rcu_batches_completed_sched(void);
84static void cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp,
85 struct rcu_node *rnp, unsigned long flags);
86static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags);
87static void __rcu_process_callbacks(struct rcu_state *rsp,
88 struct rcu_data *rdp);
89static void __call_rcu(struct rcu_head *head,
90 void (*func)(struct rcu_head *rcu),
91 struct rcu_state *rsp);
92static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp);
93static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp,
94 int preemptable);
95
96#include "rcutree_plugin.h"
97
83/* 98/*
84 * Note a quiescent state. Because we do not need to know 99 * Note a quiescent state. Because we do not need to know
85 * how many quiescent states passed, just if there was at least 100 * how many quiescent states passed, just if there was at least
@@ -87,16 +102,27 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
87 */ 102 */
88void rcu_sched_qs(int cpu) 103void rcu_sched_qs(int cpu)
89{ 104{
90 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); 105 unsigned long flags;
106 struct rcu_data *rdp;
107
108 local_irq_save(flags);
109 rdp = &per_cpu(rcu_sched_data, cpu);
91 rdp->passed_quiesc = 1; 110 rdp->passed_quiesc = 1;
92 rdp->passed_quiesc_completed = rdp->completed; 111 rdp->passed_quiesc_completed = rdp->completed;
112 rcu_preempt_qs(cpu);
113 local_irq_restore(flags);
93} 114}
94 115
95void rcu_bh_qs(int cpu) 116void rcu_bh_qs(int cpu)
96{ 117{
97 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); 118 unsigned long flags;
119 struct rcu_data *rdp;
120
121 local_irq_save(flags);
122 rdp = &per_cpu(rcu_bh_data, cpu);
98 rdp->passed_quiesc = 1; 123 rdp->passed_quiesc = 1;
99 rdp->passed_quiesc_completed = rdp->completed; 124 rdp->passed_quiesc_completed = rdp->completed;
125 local_irq_restore(flags);
100} 126}
101 127
102#ifdef CONFIG_NO_HZ 128#ifdef CONFIG_NO_HZ
@@ -123,16 +149,6 @@ long rcu_batches_completed_sched(void)
123EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); 149EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
124 150
125/* 151/*
126 * Return the number of RCU batches processed thus far for debug & stats.
127 * @@@ placeholder, maps to rcu_batches_completed_sched().
128 */
129long rcu_batches_completed(void)
130{
131 return rcu_batches_completed_sched();
132}
133EXPORT_SYMBOL_GPL(rcu_batches_completed);
134
135/*
136 * Return the number of RCU BH batches processed thus far for debug & stats. 152 * Return the number of RCU BH batches processed thus far for debug & stats.
137 */ 153 */
138long rcu_batches_completed_bh(void) 154long rcu_batches_completed_bh(void)
@@ -193,6 +209,10 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
193 return 1; 209 return 1;
194 } 210 }
195 211
212 /* If preemptable RCU, no point in sending reschedule IPI. */
213 if (rdp->preemptable)
214 return 0;
215
196 /* The CPU is online, so send it a reschedule IPI. */ 216 /* The CPU is online, so send it a reschedule IPI. */
197 if (rdp->cpu != smp_processor_id()) 217 if (rdp->cpu != smp_processor_id())
198 smp_send_reschedule(rdp->cpu); 218 smp_send_reschedule(rdp->cpu);
@@ -473,6 +493,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
473 493
474 printk(KERN_ERR "INFO: RCU detected CPU stalls:"); 494 printk(KERN_ERR "INFO: RCU detected CPU stalls:");
475 for (; rnp_cur < rnp_end; rnp_cur++) { 495 for (; rnp_cur < rnp_end; rnp_cur++) {
496 rcu_print_task_stall(rnp);
476 if (rnp_cur->qsmask == 0) 497 if (rnp_cur->qsmask == 0)
477 continue; 498 continue;
478 for (cpu = 0; cpu <= rnp_cur->grphi - rnp_cur->grplo; cpu++) 499 for (cpu = 0; cpu <= rnp_cur->grphi - rnp_cur->grplo; cpu++)
@@ -686,6 +707,19 @@ rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
686} 707}
687 708
688/* 709/*
710 * Clean up after the prior grace period and let rcu_start_gp() start up
711 * the next grace period if one is needed. Note that the caller must
712 * hold rnp->lock, as required by rcu_start_gp(), which will release it.
713 */
714static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags)
715 __releases(rnp->lock)
716{
717 rsp->completed = rsp->gpnum;
718 rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
719 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
720}
721
722/*
689 * Similar to cpu_quiet(), for which it is a helper function. Allows 723 * Similar to cpu_quiet(), for which it is a helper function. Allows
690 * a group of CPUs to be quieted at one go, though all the CPUs in the 724 * a group of CPUs to be quieted at one go, though all the CPUs in the
691 * group must be represented by the same leaf rcu_node structure. 725 * group must be represented by the same leaf rcu_node structure.
@@ -706,7 +740,7 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
706 return; 740 return;
707 } 741 }
708 rnp->qsmask &= ~mask; 742 rnp->qsmask &= ~mask;
709 if (rnp->qsmask != 0) { 743 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
710 744
711 /* Other bits still set at this level, so done. */ 745 /* Other bits still set at this level, so done. */
712 spin_unlock_irqrestore(&rnp->lock, flags); 746 spin_unlock_irqrestore(&rnp->lock, flags);
@@ -726,14 +760,10 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
726 760
727 /* 761 /*
728 * Get here if we are the last CPU to pass through a quiescent 762 * Get here if we are the last CPU to pass through a quiescent
729 * state for this grace period. Clean up and let rcu_start_gp() 763 * state for this grace period. Invoke cpu_quiet_msk_finish()
730 * start up the next grace period if one is needed. Note that 764 * to clean up and start the next grace period if one is needed.
731 * we still hold rnp->lock, as required by rcu_start_gp(), which
732 * will release it.
733 */ 765 */
734 rsp->completed = rsp->gpnum; 766 cpu_quiet_msk_finish(rsp, flags); /* releases rnp->lock. */
735 rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
736 rcu_start_gp(rsp, flags); /* releases rnp->lock. */
737} 767}
738 768
739/* 769/*
@@ -840,11 +870,11 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
840 spin_lock(&rnp->lock); /* irqs already disabled. */ 870 spin_lock(&rnp->lock); /* irqs already disabled. */
841 rnp->qsmaskinit &= ~mask; 871 rnp->qsmaskinit &= ~mask;
842 if (rnp->qsmaskinit != 0) { 872 if (rnp->qsmaskinit != 0) {
843 spin_unlock(&rnp->lock); /* irqs already disabled. */ 873 spin_unlock(&rnp->lock); /* irqs remain disabled. */
844 break; 874 break;
845 } 875 }
846 mask = rnp->grpmask; 876 mask = rnp->grpmask;
847 spin_unlock(&rnp->lock); /* irqs already disabled. */ 877 spin_unlock(&rnp->lock); /* irqs remain disabled. */
848 rnp = rnp->parent; 878 rnp = rnp->parent;
849 } while (rnp != NULL); 879 } while (rnp != NULL);
850 lastcomp = rsp->completed; 880 lastcomp = rsp->completed;
@@ -1007,6 +1037,7 @@ void rcu_check_callbacks(int cpu, int user)
1007 1037
1008 rcu_bh_qs(cpu); 1038 rcu_bh_qs(cpu);
1009 } 1039 }
1040 rcu_preempt_check_callbacks(cpu);
1010 raise_softirq(RCU_SOFTIRQ); 1041 raise_softirq(RCU_SOFTIRQ);
1011} 1042}
1012 1043
@@ -1188,6 +1219,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
1188 __rcu_process_callbacks(&rcu_sched_state, 1219 __rcu_process_callbacks(&rcu_sched_state,
1189 &__get_cpu_var(rcu_sched_data)); 1220 &__get_cpu_var(rcu_sched_data));
1190 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); 1221 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1222 rcu_preempt_process_callbacks();
1191 1223
1192 /* 1224 /*
1193 * Memory references from any later RCU read-side critical sections 1225 * Memory references from any later RCU read-side critical sections
@@ -1252,17 +1284,6 @@ void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1252EXPORT_SYMBOL_GPL(call_rcu_sched); 1284EXPORT_SYMBOL_GPL(call_rcu_sched);
1253 1285
1254/* 1286/*
1255 * @@@ Queue an RCU callback for invocation after a grace period.
1256 * @@@ Placeholder pending rcutree_plugin.h.
1257 */
1258void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1259{
1260 call_rcu_sched(head, func);
1261}
1262EXPORT_SYMBOL_GPL(call_rcu);
1263
1264
1265/*
1266 * Queue an RCU for invocation after a quicker grace period. 1287 * Queue an RCU for invocation after a quicker grace period.
1267 */ 1288 */
1268void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 1289void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
@@ -1335,7 +1356,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1335static int rcu_pending(int cpu) 1356static int rcu_pending(int cpu)
1336{ 1357{
1337 return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || 1358 return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) ||
1338 __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)); 1359 __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) ||
1360 rcu_preempt_pending(cpu);
1339} 1361}
1340 1362
1341/* 1363/*
@@ -1348,7 +1370,8 @@ int rcu_needs_cpu(int cpu)
1348{ 1370{
1349 /* RCU callbacks either ready or pending? */ 1371 /* RCU callbacks either ready or pending? */
1350 return per_cpu(rcu_sched_data, cpu).nxtlist || 1372 return per_cpu(rcu_sched_data, cpu).nxtlist ||
1351 per_cpu(rcu_bh_data, cpu).nxtlist; 1373 per_cpu(rcu_bh_data, cpu).nxtlist ||
1374 rcu_preempt_needs_cpu(cpu);
1352} 1375}
1353 1376
1354/* 1377/*
@@ -1383,7 +1406,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
1383 * that this CPU cannot possibly have any RCU callbacks in flight yet. 1406 * that this CPU cannot possibly have any RCU callbacks in flight yet.
1384 */ 1407 */
1385static void __cpuinit 1408static void __cpuinit
1386rcu_init_percpu_data(int cpu, struct rcu_state *rsp) 1409rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
1387{ 1410{
1388 unsigned long flags; 1411 unsigned long flags;
1389 long lastcomp; 1412 long lastcomp;
@@ -1399,6 +1422,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
1399 rdp->passed_quiesc = 0; /* We could be racing with new GP, */ 1422 rdp->passed_quiesc = 0; /* We could be racing with new GP, */
1400 rdp->qs_pending = 1; /* so set up to respond to current GP. */ 1423 rdp->qs_pending = 1; /* so set up to respond to current GP. */
1401 rdp->beenonline = 1; /* We have now been online. */ 1424 rdp->beenonline = 1; /* We have now been online. */
1425 rdp->preemptable = preemptable;
1402 rdp->passed_quiesc_completed = lastcomp - 1; 1426 rdp->passed_quiesc_completed = lastcomp - 1;
1403 rdp->blimit = blimit; 1427 rdp->blimit = blimit;
1404 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1428 spin_unlock(&rnp->lock); /* irqs remain disabled. */
@@ -1441,12 +1465,13 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
1441 1465
1442static void __cpuinit rcu_online_cpu(int cpu) 1466static void __cpuinit rcu_online_cpu(int cpu)
1443{ 1467{
1444 rcu_init_percpu_data(cpu, &rcu_sched_state); 1468 rcu_init_percpu_data(cpu, &rcu_sched_state, 0);
1445 rcu_init_percpu_data(cpu, &rcu_bh_state); 1469 rcu_init_percpu_data(cpu, &rcu_bh_state, 0);
1470 rcu_preempt_init_percpu_data(cpu);
1446} 1471}
1447 1472
1448/* 1473/*
1449 * Handle CPU online/offline notifcation events. 1474 * Handle CPU online/offline notification events.
1450 */ 1475 */
1451int __cpuinit rcu_cpu_notify(struct notifier_block *self, 1476int __cpuinit rcu_cpu_notify(struct notifier_block *self,
1452 unsigned long action, void *hcpu) 1477 unsigned long action, void *hcpu)
@@ -1521,6 +1546,7 @@ static void __init rcu_init_one(struct rcu_state *rsp)
1521 rnp = rsp->level[i]; 1546 rnp = rsp->level[i];
1522 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { 1547 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
1523 spin_lock_init(&rnp->lock); 1548 spin_lock_init(&rnp->lock);
1549 rnp->gpnum = 0;
1524 rnp->qsmask = 0; 1550 rnp->qsmask = 0;
1525 rnp->qsmaskinit = 0; 1551 rnp->qsmaskinit = 0;
1526 rnp->grplo = j * cpustride; 1552 rnp->grplo = j * cpustride;
@@ -1538,13 +1564,16 @@ static void __init rcu_init_one(struct rcu_state *rsp)
1538 j / rsp->levelspread[i - 1]; 1564 j / rsp->levelspread[i - 1];
1539 } 1565 }
1540 rnp->level = i; 1566 rnp->level = i;
1567 INIT_LIST_HEAD(&rnp->blocked_tasks[0]);
1568 INIT_LIST_HEAD(&rnp->blocked_tasks[1]);
1541 } 1569 }
1542 } 1570 }
1543} 1571}
1544 1572
1545/* 1573/*
1546 * Helper macro for __rcu_init(). To be used nowhere else! 1574 * Helper macro for __rcu_init() and __rcu_init_preempt(). To be used
1547 * Assigns leaf node pointers into each CPU's rcu_data structure. 1575 * nowhere else! Assigns leaf node pointers into each CPU's rcu_data
1576 * structure.
1548 */ 1577 */
1549#define RCU_INIT_FLAVOR(rsp, rcu_data) \ 1578#define RCU_INIT_FLAVOR(rsp, rcu_data) \
1550do { \ 1579do { \
@@ -1560,18 +1589,38 @@ do { \
1560 } \ 1589 } \
1561} while (0) 1590} while (0)
1562 1591
1592#ifdef CONFIG_TREE_PREEMPT_RCU
1593
1594void __init __rcu_init_preempt(void)
1595{
1596 int i; /* All used by RCU_INIT_FLAVOR(). */
1597 int j;
1598 struct rcu_node *rnp;
1599
1600 RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data);
1601}
1602
1603#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1604
1605void __init __rcu_init_preempt(void)
1606{
1607}
1608
1609#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
1610
1563void __init __rcu_init(void) 1611void __init __rcu_init(void)
1564{ 1612{
1565 int i; /* All used by RCU_DATA_PTR_INIT(). */ 1613 int i; /* All used by RCU_INIT_FLAVOR(). */
1566 int j; 1614 int j;
1567 struct rcu_node *rnp; 1615 struct rcu_node *rnp;
1568 1616
1569 printk(KERN_INFO "Hierarchical RCU implementation.\n"); 1617 rcu_bootup_announce();
1570#ifdef CONFIG_RCU_CPU_STALL_DETECTOR 1618#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
1571 printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); 1619 printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
1572#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 1620#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
1573 RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); 1621 RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data);
1574 RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); 1622 RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data);
1623 __rcu_init_preempt();
1575 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 1624 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
1576} 1625}
1577 1626
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 0024e5ddcc68..ca560364d8cd 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -80,6 +80,7 @@ struct rcu_dynticks {
80 */ 80 */
81struct rcu_node { 81struct rcu_node {
82 spinlock_t lock; 82 spinlock_t lock;
83 long gpnum; /* Current grace period for this node. */
83 unsigned long qsmask; /* CPUs or groups that need to switch in */ 84 unsigned long qsmask; /* CPUs or groups that need to switch in */
84 /* order for current grace period to proceed.*/ 85 /* order for current grace period to proceed.*/
85 unsigned long qsmaskinit; 86 unsigned long qsmaskinit;
@@ -90,6 +91,8 @@ struct rcu_node {
90 u8 grpnum; /* CPU/group number for next level up. */ 91 u8 grpnum; /* CPU/group number for next level up. */
91 u8 level; /* root is at level 0. */ 92 u8 level; /* root is at level 0. */
92 struct rcu_node *parent; 93 struct rcu_node *parent;
94 struct list_head blocked_tasks[2];
95 /* Tasks blocked in RCU read-side critsect. */
93} ____cacheline_internodealigned_in_smp; 96} ____cacheline_internodealigned_in_smp;
94 97
95/* Index values for nxttail array in struct rcu_data. */ 98/* Index values for nxttail array in struct rcu_data. */
@@ -111,6 +114,7 @@ struct rcu_data {
111 bool passed_quiesc; /* User-mode/idle loop etc. */ 114 bool passed_quiesc; /* User-mode/idle loop etc. */
112 bool qs_pending; /* Core waits for quiesc state. */ 115 bool qs_pending; /* Core waits for quiesc state. */
113 bool beenonline; /* CPU online at least once. */ 116 bool beenonline; /* CPU online at least once. */
117 bool preemptable; /* Preemptable RCU? */
114 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ 118 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
115 unsigned long grpmask; /* Mask to apply to leaf qsmask. */ 119 unsigned long grpmask; /* Mask to apply to leaf qsmask. */
116 120
@@ -244,5 +248,10 @@ DECLARE_PER_CPU(struct rcu_data, rcu_sched_data);
244extern struct rcu_state rcu_bh_state; 248extern struct rcu_state rcu_bh_state;
245DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); 249DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
246 250
251#ifdef CONFIG_TREE_PREEMPT_RCU
252extern struct rcu_state rcu_preempt_state;
253DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
254#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
255
247#endif /* #ifdef RCU_TREE_NONCORE */ 256#endif /* #ifdef RCU_TREE_NONCORE */
248 257
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
new file mode 100644
index 000000000000..cd2ab67400c6
--- /dev/null
+++ b/kernel/rcutree_plugin.h
@@ -0,0 +1,447 @@
1/*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
3 * Internal non-public definitions that provide either classic
4 * or preemptable semantics.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 * Copyright Red Hat, 2009
21 * Copyright IBM Corporation, 2009
22 *
23 * Author: Ingo Molnar <mingo@elte.hu>
24 * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
25 */
26
27
28#ifdef CONFIG_TREE_PREEMPT_RCU
29
30struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
31DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
32
33/*
34 * Tell them what RCU they are running.
35 */
36static inline void rcu_bootup_announce(void)
37{
38 printk(KERN_INFO
39 "Experimental preemptable hierarchical RCU implementation.\n");
40}
41
42/*
43 * Return the number of RCU-preempt batches processed thus far
44 * for debug and statistics.
45 */
46long rcu_batches_completed_preempt(void)
47{
48 return rcu_preempt_state.completed;
49}
50EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
51
52/*
53 * Return the number of RCU batches processed thus far for debug & stats.
54 */
55long rcu_batches_completed(void)
56{
57 return rcu_batches_completed_preempt();
58}
59EXPORT_SYMBOL_GPL(rcu_batches_completed);
60
61/*
62 * Record a preemptable-RCU quiescent state for the specified CPU. Note
63 * that this just means that the task currently running on the CPU is
64 * not in a quiescent state. There might be any number of tasks blocked
65 * while in an RCU read-side critical section.
66 */
67static void rcu_preempt_qs_record(int cpu)
68{
69 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
70 rdp->passed_quiesc = 1;
71 rdp->passed_quiesc_completed = rdp->completed;
72}
73
74/*
75 * We have entered the scheduler or are between softirqs in ksoftirqd.
76 * If we are in an RCU read-side critical section, we need to reflect
77 * that in the state of the rcu_node structure corresponding to this CPU.
78 * Caller must disable hardirqs.
79 */
80static void rcu_preempt_qs(int cpu)
81{
82 struct task_struct *t = current;
83 int phase;
84 struct rcu_data *rdp;
85 struct rcu_node *rnp;
86
87 if (t->rcu_read_lock_nesting &&
88 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
89
90 /* Possibly blocking in an RCU read-side critical section. */
91 rdp = rcu_preempt_state.rda[cpu];
92 rnp = rdp->mynode;
93 spin_lock(&rnp->lock);
94 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
95 t->rcu_blocked_cpu = cpu;
96
97 /*
98 * If this CPU has already checked in, then this task
99 * will hold up the next grace period rather than the
100 * current grace period. Queue the task accordingly.
101 * If the task is queued for the current grace period
102 * (i.e., this CPU has not yet passed through a quiescent
103 * state for the current grace period), then as long
104 * as that task remains queued, the current grace period
105 * cannot end.
106 */
107 phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1);
108 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
109 smp_mb(); /* Ensure later ctxt swtch seen after above. */
110 spin_unlock(&rnp->lock);
111 }
112
113 /*
114 * Either we were not in an RCU read-side critical section to
115 * begin with, or we have now recorded that critical section
116 * globally. Either way, we can now note a quiescent state
117 * for this CPU. Again, if we were in an RCU read-side critical
118 * section, and if that critical section was blocking the current
119 * grace period, then the fact that the task has been enqueued
120 * means that we continue to block the current grace period.
121 */
122 rcu_preempt_qs_record(cpu);
123 t->rcu_read_unlock_special &= ~(RCU_READ_UNLOCK_NEED_QS |
124 RCU_READ_UNLOCK_GOT_QS);
125}
126
127/*
128 * Tree-preemptable RCU implementation for rcu_read_lock().
129 * Just increment ->rcu_read_lock_nesting, shared state will be updated
130 * if we block.
131 */
132void __rcu_read_lock(void)
133{
134 ACCESS_ONCE(current->rcu_read_lock_nesting)++;
135 barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */
136}
137EXPORT_SYMBOL_GPL(__rcu_read_lock);
138
139static void rcu_read_unlock_special(struct task_struct *t)
140{
141 int empty;
142 unsigned long flags;
143 unsigned long mask;
144 struct rcu_node *rnp;
145 int special;
146
147 /* NMI handlers cannot block and cannot safely manipulate state. */
148 if (in_nmi())
149 return;
150
151 local_irq_save(flags);
152
153 /*
154 * If RCU core is waiting for this CPU to exit critical section,
155 * let it know that we have done so.
156 */
157 special = t->rcu_read_unlock_special;
158 if (special & RCU_READ_UNLOCK_NEED_QS) {
159 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
160 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_GOT_QS;
161 }
162
163 /* Hardware IRQ handlers cannot block. */
164 if (in_irq()) {
165 local_irq_restore(flags);
166 return;
167 }
168
169 /* Clean up if blocked during RCU read-side critical section. */
170 if (special & RCU_READ_UNLOCK_BLOCKED) {
171 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
172
173 /* Remove this task from the list it blocked on. */
174 rnp = rcu_preempt_state.rda[t->rcu_blocked_cpu]->mynode;
175 spin_lock(&rnp->lock);
176 empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
177 list_del_init(&t->rcu_node_entry);
178 t->rcu_blocked_cpu = -1;
179
180 /*
181 * If this was the last task on the current list, and if
182 * we aren't waiting on any CPUs, report the quiescent state.
183 * Note that both cpu_quiet_msk_finish() and cpu_quiet_msk()
184 * drop rnp->lock and restore irq.
185 */
186 if (!empty && rnp->qsmask == 0 &&
187 list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) {
188 t->rcu_read_unlock_special &=
189 ~(RCU_READ_UNLOCK_NEED_QS |
190 RCU_READ_UNLOCK_GOT_QS);
191 if (rnp->parent == NULL) {
192 /* Only one rcu_node in the tree. */
193 cpu_quiet_msk_finish(&rcu_preempt_state, flags);
194 return;
195 }
196 /* Report up the rest of the hierarchy. */
197 mask = rnp->grpmask;
198 spin_unlock_irqrestore(&rnp->lock, flags);
199 rnp = rnp->parent;
200 spin_lock_irqsave(&rnp->lock, flags);
201 cpu_quiet_msk(mask, &rcu_preempt_state, rnp, flags);
202 return;
203 }
204 spin_unlock(&rnp->lock);
205 }
206 local_irq_restore(flags);
207}
208
209/*
210 * Tree-preemptable RCU implementation for rcu_read_unlock().
211 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
212 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
213 * invoke rcu_read_unlock_special() to clean up after a context switch
214 * in an RCU read-side critical section and other special cases.
215 */
216void __rcu_read_unlock(void)
217{
218 struct task_struct *t = current;
219
220 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */
221 if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
222 unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
223 rcu_read_unlock_special(t);
224}
225EXPORT_SYMBOL_GPL(__rcu_read_unlock);
226
227#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
228
229/*
230 * Scan the current list of tasks blocked within RCU read-side critical
231 * sections, printing out the tid of each.
232 */
233static void rcu_print_task_stall(struct rcu_node *rnp)
234{
235 unsigned long flags;
236 struct list_head *lp;
237 int phase = rnp->gpnum & 0x1;
238 struct task_struct *t;
239
240 if (!list_empty(&rnp->blocked_tasks[phase])) {
241 spin_lock_irqsave(&rnp->lock, flags);
242 phase = rnp->gpnum & 0x1; /* re-read under lock. */
243 lp = &rnp->blocked_tasks[phase];
244 list_for_each_entry(t, lp, rcu_node_entry)
245 printk(" P%d", t->pid);
246 spin_unlock_irqrestore(&rnp->lock, flags);
247 }
248}
249
250#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
251
252/*
253 * Check for preempted RCU readers for the specified rcu_node structure.
254 * If the caller needs a reliable answer, it must hold the rcu_node's
255 * >lock.
256 */
257static int rcu_preempted_readers(struct rcu_node *rnp)
258{
259 return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
260}
261
262/*
263 * Check for a quiescent state from the current CPU. When a task blocks,
264 * the task is recorded in the corresponding CPU's rcu_node structure,
265 * which is checked elsewhere.
266 *
267 * Caller must disable hard irqs.
268 */
269static void rcu_preempt_check_callbacks(int cpu)
270{
271 struct task_struct *t = current;
272
273 if (t->rcu_read_lock_nesting == 0) {
274 t->rcu_read_unlock_special &=
275 ~(RCU_READ_UNLOCK_NEED_QS | RCU_READ_UNLOCK_GOT_QS);
276 rcu_preempt_qs_record(cpu);
277 return;
278 }
279 if (per_cpu(rcu_preempt_data, cpu).qs_pending) {
280 if (t->rcu_read_unlock_special & RCU_READ_UNLOCK_GOT_QS) {
281 rcu_preempt_qs_record(cpu);
282 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_GOT_QS;
283 } else if (!(t->rcu_read_unlock_special &
284 RCU_READ_UNLOCK_NEED_QS)) {
285 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
286 }
287 }
288}
289
290/*
291 * Process callbacks for preemptable RCU.
292 */
293static void rcu_preempt_process_callbacks(void)
294{
295 __rcu_process_callbacks(&rcu_preempt_state,
296 &__get_cpu_var(rcu_preempt_data));
297}
298
299/*
300 * Queue a preemptable-RCU callback for invocation after a grace period.
301 */
302void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
303{
304 __call_rcu(head, func, &rcu_preempt_state);
305}
306EXPORT_SYMBOL_GPL(call_rcu);
307
308/*
309 * Check to see if there is any immediate preemptable-RCU-related work
310 * to be done.
311 */
312static int rcu_preempt_pending(int cpu)
313{
314 return __rcu_pending(&rcu_preempt_state,
315 &per_cpu(rcu_preempt_data, cpu));
316}
317
318/*
319 * Does preemptable RCU need the CPU to stay out of dynticks mode?
320 */
321static int rcu_preempt_needs_cpu(int cpu)
322{
323 return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
324}
325
326/*
327 * Initialize preemptable RCU's per-CPU data.
328 */
329static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
330{
331 rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
332}
333
334/*
335 * Check for a task exiting while in a preemptable-RCU read-side
336 * critical section, clean up if so. No need to issue warnings,
337 * as debug_check_no_locks_held() already does this if lockdep
338 * is enabled.
339 */
340void exit_rcu(void)
341{
342 struct task_struct *t = current;
343
344 if (t->rcu_read_lock_nesting == 0)
345 return;
346 t->rcu_read_lock_nesting = 1;
347 rcu_read_unlock();
348}
349
350#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
351
352/*
353 * Tell them what RCU they are running.
354 */
355static inline void rcu_bootup_announce(void)
356{
357 printk(KERN_INFO "Hierarchical RCU implementation.\n");
358}
359
360/*
361 * Return the number of RCU batches processed thus far for debug & stats.
362 */
363long rcu_batches_completed(void)
364{
365 return rcu_batches_completed_sched();
366}
367EXPORT_SYMBOL_GPL(rcu_batches_completed);
368
369/*
370 * Because preemptable RCU does not exist, we never have to check for
371 * CPUs being in quiescent states.
372 */
373static void rcu_preempt_qs(int cpu)
374{
375}
376
377#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
378
379/*
380 * Because preemptable RCU does not exist, we never have to check for
381 * tasks blocked within RCU read-side critical sections.
382 */
383static void rcu_print_task_stall(struct rcu_node *rnp)
384{
385}
386
387#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
388
389/*
390 * Because preemptable RCU does not exist, there are never any preempted
391 * RCU readers.
392 */
393static int rcu_preempted_readers(struct rcu_node *rnp)
394{
395 return 0;
396}
397
398/*
399 * Because preemptable RCU does not exist, it never has any callbacks
400 * to check.
401 */
402void rcu_preempt_check_callbacks(int cpu)
403{
404}
405
406/*
407 * Because preemptable RCU does not exist, it never has any callbacks
408 * to process.
409 */
410void rcu_preempt_process_callbacks(void)
411{
412}
413
414/*
415 * In classic RCU, call_rcu() is just call_rcu_sched().
416 */
417void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
418{
419 call_rcu_sched(head, func);
420}
421EXPORT_SYMBOL_GPL(call_rcu);
422
423/*
424 * Because preemptable RCU does not exist, it never has any work to do.
425 */
426static int rcu_preempt_pending(int cpu)
427{
428 return 0;
429}
430
431/*
432 * Because preemptable RCU does not exist, it never needs any CPU.
433 */
434static int rcu_preempt_needs_cpu(int cpu)
435{
436 return 0;
437}
438
439/*
440 * Because preemptable RCU does not exist, there is no per-CPU
441 * data to initialize.
442 */
443static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
444{
445}
446
447#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 31af3a0fb6d5..0ea1bff69727 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -77,6 +77,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
77 77
78static int show_rcudata(struct seq_file *m, void *unused) 78static int show_rcudata(struct seq_file *m, void *unused)
79{ 79{
80#ifdef CONFIG_TREE_PREEMPT_RCU
81 seq_puts(m, "rcu_preempt:\n");
82 PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data, m);
83#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
80 seq_puts(m, "rcu_sched:\n"); 84 seq_puts(m, "rcu_sched:\n");
81 PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data, m); 85 PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data, m);
82 seq_puts(m, "rcu_bh:\n"); 86 seq_puts(m, "rcu_bh:\n");
@@ -125,6 +129,10 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)
125 seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); 129 seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\",");
126#endif /* #ifdef CONFIG_NO_HZ */ 130#endif /* #ifdef CONFIG_NO_HZ */
127 seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\"\n"); 131 seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\"\n");
132#ifdef CONFIG_TREE_PREEMPT_RCU
133 seq_puts(m, "\"rcu_preempt:\"\n");
134 PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m);
135#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
128 seq_puts(m, "\"rcu_sched:\"\n"); 136 seq_puts(m, "\"rcu_sched:\"\n");
129 PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data_csv, m); 137 PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data_csv, m);
130 seq_puts(m, "\"rcu_bh:\"\n"); 138 seq_puts(m, "\"rcu_bh:\"\n");
@@ -172,6 +180,10 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
172 180
173static int show_rcuhier(struct seq_file *m, void *unused) 181static int show_rcuhier(struct seq_file *m, void *unused)
174{ 182{
183#ifdef CONFIG_TREE_PREEMPT_RCU
184 seq_puts(m, "rcu_preempt:\n");
185 print_one_rcu_state(m, &rcu_preempt_state);
186#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
175 seq_puts(m, "rcu_sched:\n"); 187 seq_puts(m, "rcu_sched:\n");
176 print_one_rcu_state(m, &rcu_sched_state); 188 print_one_rcu_state(m, &rcu_sched_state);
177 seq_puts(m, "rcu_bh:\n"); 189 seq_puts(m, "rcu_bh:\n");
@@ -194,6 +206,10 @@ static struct file_operations rcuhier_fops = {
194 206
195static int show_rcugp(struct seq_file *m, void *unused) 207static int show_rcugp(struct seq_file *m, void *unused)
196{ 208{
209#ifdef CONFIG_TREE_PREEMPT_RCU
210 seq_printf(m, "rcu_preempt: completed=%ld gpnum=%ld\n",
211 rcu_preempt_state.completed, rcu_preempt_state.gpnum);
212#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
197 seq_printf(m, "rcu_sched: completed=%ld gpnum=%ld\n", 213 seq_printf(m, "rcu_sched: completed=%ld gpnum=%ld\n",
198 rcu_sched_state.completed, rcu_sched_state.gpnum); 214 rcu_sched_state.completed, rcu_sched_state.gpnum);
199 seq_printf(m, "rcu_bh: completed=%ld gpnum=%ld\n", 215 seq_printf(m, "rcu_bh: completed=%ld gpnum=%ld\n",
@@ -244,6 +260,10 @@ static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp)
244 260
245static int show_rcu_pending(struct seq_file *m, void *unused) 261static int show_rcu_pending(struct seq_file *m, void *unused)
246{ 262{
263#ifdef CONFIG_TREE_PREEMPT_RCU
264 seq_puts(m, "rcu_preempt:\n");
265 print_rcu_pendings(m, &rcu_preempt_state);
266#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
247 seq_puts(m, "rcu_sched:\n"); 267 seq_puts(m, "rcu_sched:\n");
248 print_rcu_pendings(m, &rcu_sched_state); 268 print_rcu_pendings(m, &rcu_sched_state);
249 seq_puts(m, "rcu_bh:\n"); 269 seq_puts(m, "rcu_bh:\n");