aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2010-06-29 19:49:16 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2010-08-20 11:55:00 -0400
commita57eb940d130477a799dfb24a570ee04979c0f7f (patch)
tree5add1c135a302cf1c1a454b0620ed17eb802923b
parent4d87ffadbba88105f33271bef5f2c79366c6a4e1 (diff)
rcu: Add a TINY_PREEMPT_RCU
Implement a small-memory-footprint uniprocessor-only implementation of preemptible RCU. This implementation uses but a single blocked-tasks list rather than the combinatorial number used per leaf rcu_node by TREE_PREEMPT_RCU, which reduces memory consumption and greatly simplifies processing. This version also takes advantage of uniprocessor execution to accelerate grace periods in the case where there are no readers. The general design is otherwise broadly similar to that of TREE_PREEMPT_RCU. This implementation is a step towards having RCU implementation driven off of the SMP and PREEMPT kernel configuration variables, which can happen once this implementation has accumulated sufficient experience. Removed ACCESS_ONCE() from __rcu_read_unlock() and added barrier() as suggested by Steve Rostedt in order to avoid the compiler-reordering issue noted by Mathieu Desnoyers (http://lkml.org/lkml/2010/8/16/183). As can be seen below, CONFIG_TINY_PREEMPT_RCU represents almost 5Kbyte savings compared to CONFIG_TREE_PREEMPT_RCU. Of course, for non-real-time workloads, CONFIG_TINY_RCU is even better. CONFIG_TREE_PREEMPT_RCU text data bss dec filename 13 0 0 13 kernel/rcupdate.o 6170 825 28 7023 kernel/rcutree.o ---- 7026 Total CONFIG_TINY_PREEMPT_RCU text data bss dec filename 13 0 0 13 kernel/rcupdate.o 2081 81 8 2170 kernel/rcutiny.o ---- 2183 Total CONFIG_TINY_RCU (non-preemptible) text data bss dec filename 13 0 0 13 kernel/rcupdate.o 719 25 0 744 kernel/rcutiny.o --- 757 Total Requested-by: Loïc Minier <loic.minier@canonical.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-rw-r--r--include/linux/hardirq.h2
-rw-r--r--include/linux/init_task.h10
-rw-r--r--include/linux/rcupdate.h3
-rw-r--r--include/linux/rcutiny.h126
-rw-r--r--include/linux/rcutree.h2
-rw-r--r--include/linux/sched.h10
-rw-r--r--init/Kconfig16
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/rcutiny.c33
-rw-r--r--kernel/rcutiny_plugin.h582
10 files changed, 717 insertions, 68 deletions
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index d5b387669dab..1f4517d55b19 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -139,7 +139,7 @@ static inline void account_system_vtime(struct task_struct *tsk)
139#endif 139#endif
140 140
141#if defined(CONFIG_NO_HZ) 141#if defined(CONFIG_NO_HZ)
142#if defined(CONFIG_TINY_RCU) 142#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
143extern void rcu_enter_nohz(void); 143extern void rcu_enter_nohz(void);
144extern void rcu_exit_nohz(void); 144extern void rcu_exit_nohz(void);
145 145
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6460fc65ed6b..2fea6c8ef6ba 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -82,11 +82,17 @@ extern struct group_info init_groups;
82# define CAP_INIT_BSET CAP_FULL_SET 82# define CAP_INIT_BSET CAP_FULL_SET
83 83
84#ifdef CONFIG_TREE_PREEMPT_RCU 84#ifdef CONFIG_TREE_PREEMPT_RCU
85#define INIT_TASK_RCU_TREE_PREEMPT() \
86 .rcu_blocked_node = NULL,
87#else
88#define INIT_TASK_RCU_TREE_PREEMPT(tsk)
89#endif
90#ifdef CONFIG_PREEMPT_RCU
85#define INIT_TASK_RCU_PREEMPT(tsk) \ 91#define INIT_TASK_RCU_PREEMPT(tsk) \
86 .rcu_read_lock_nesting = 0, \ 92 .rcu_read_lock_nesting = 0, \
87 .rcu_read_unlock_special = 0, \ 93 .rcu_read_unlock_special = 0, \
88 .rcu_blocked_node = NULL, \ 94 .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \
89 .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), 95 INIT_TASK_RCU_TREE_PREEMPT()
90#else 96#else
91#define INIT_TASK_RCU_PREEMPT(tsk) 97#define INIT_TASK_RCU_PREEMPT(tsk)
92#endif 98#endif
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 27b44b3e3024..24b896649384 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -58,7 +58,6 @@ struct rcu_head {
58}; 58};
59 59
60/* Exported common interfaces */ 60/* Exported common interfaces */
61extern void rcu_barrier(void);
62extern void rcu_barrier_bh(void); 61extern void rcu_barrier_bh(void);
63extern void rcu_barrier_sched(void); 62extern void rcu_barrier_sched(void);
64extern void synchronize_sched_expedited(void); 63extern void synchronize_sched_expedited(void);
@@ -69,7 +68,7 @@ extern void rcu_init(void);
69 68
70#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) 69#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
71#include <linux/rcutree.h> 70#include <linux/rcutree.h>
72#elif defined(CONFIG_TINY_RCU) 71#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
73#include <linux/rcutiny.h> 72#include <linux/rcutiny.h>
74#else 73#else
75#error "Unknown RCU implementation specified to kernel configuration" 74#error "Unknown RCU implementation specified to kernel configuration"
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index e2e893144a84..4cc5eba41616 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -29,66 +29,51 @@
29 29
30void rcu_sched_qs(int cpu); 30void rcu_sched_qs(int cpu);
31void rcu_bh_qs(int cpu); 31void rcu_bh_qs(int cpu);
32static inline void rcu_note_context_switch(int cpu)
33{
34 rcu_sched_qs(cpu);
35}
36 32
33#ifdef CONFIG_TINY_RCU
37#define __rcu_read_lock() preempt_disable() 34#define __rcu_read_lock() preempt_disable()
38#define __rcu_read_unlock() preempt_enable() 35#define __rcu_read_unlock() preempt_enable()
36#else /* #ifdef CONFIG_TINY_RCU */
37void __rcu_read_lock(void);
38void __rcu_read_unlock(void);
39#endif /* #else #ifdef CONFIG_TINY_RCU */
39#define __rcu_read_lock_bh() local_bh_disable() 40#define __rcu_read_lock_bh() local_bh_disable()
40#define __rcu_read_unlock_bh() local_bh_enable() 41#define __rcu_read_unlock_bh() local_bh_enable()
41#define call_rcu_sched call_rcu 42extern void call_rcu_sched(struct rcu_head *head,
43 void (*func)(struct rcu_head *rcu));
42 44
43#define rcu_init_sched() do { } while (0) 45#define rcu_init_sched() do { } while (0)
44extern void rcu_check_callbacks(int cpu, int user);
45 46
46static inline int rcu_needs_cpu(int cpu) 47extern void synchronize_sched(void);
47{
48 return 0;
49}
50 48
51/* 49#ifdef CONFIG_TINY_RCU
52 * Return the number of grace periods.
53 */
54static inline long rcu_batches_completed(void)
55{
56 return 0;
57}
58 50
59/* 51#define call_rcu call_rcu_sched
60 * Return the number of bottom-half grace periods.
61 */
62static inline long rcu_batches_completed_bh(void)
63{
64 return 0;
65}
66 52
67static inline void rcu_force_quiescent_state(void) 53static inline void synchronize_rcu(void)
68{ 54{
55 synchronize_sched();
69} 56}
70 57
71static inline void rcu_bh_force_quiescent_state(void) 58static inline void synchronize_rcu_expedited(void)
72{ 59{
60 synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */
73} 61}
74 62
75static inline void rcu_sched_force_quiescent_state(void) 63static inline void rcu_barrier(void)
76{ 64{
65 rcu_barrier_sched(); /* Only one CPU, so only one list of callbacks! */
77} 66}
78 67
79extern void synchronize_sched(void); 68#else /* #ifdef CONFIG_TINY_RCU */
80 69
81static inline void synchronize_rcu(void) 70void synchronize_rcu(void);
82{ 71void rcu_barrier(void);
83 synchronize_sched(); 72void synchronize_rcu_expedited(void);
84}
85 73
86static inline void synchronize_rcu_bh(void) 74#endif /* #else #ifdef CONFIG_TINY_RCU */
87{
88 synchronize_sched();
89}
90 75
91static inline void synchronize_rcu_expedited(void) 76static inline void synchronize_rcu_bh(void)
92{ 77{
93 synchronize_sched(); 78 synchronize_sched();
94} 79}
@@ -117,15 +102,82 @@ static inline void rcu_exit_nohz(void)
117 102
118#endif /* #else #ifdef CONFIG_NO_HZ */ 103#endif /* #else #ifdef CONFIG_NO_HZ */
119 104
105#ifdef CONFIG_TINY_RCU
106
107static inline void rcu_preempt_note_context_switch(void)
108{
109}
110
120static inline void exit_rcu(void) 111static inline void exit_rcu(void)
121{ 112{
122} 113}
123 114
115static inline int rcu_needs_cpu(int cpu)
116{
117 return 0;
118}
119
124static inline int rcu_preempt_depth(void) 120static inline int rcu_preempt_depth(void)
125{ 121{
126 return 0; 122 return 0;
127} 123}
128 124
125#else /* #ifdef CONFIG_TINY_RCU */
126
127void rcu_preempt_note_context_switch(void);
128extern void exit_rcu(void);
129int rcu_preempt_needs_cpu(void);
130
131static inline int rcu_needs_cpu(int cpu)
132{
133 return rcu_preempt_needs_cpu();
134}
135
136/*
137 * Defined as macro as it is a very low level header
138 * included from areas that don't even know about current
139 * FIXME: combine with include/linux/rcutree.h into rcupdate.h.
140 */
141#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
142
143#endif /* #else #ifdef CONFIG_TINY_RCU */
144
145static inline void rcu_note_context_switch(int cpu)
146{
147 rcu_sched_qs(cpu);
148 rcu_preempt_note_context_switch();
149}
150
151extern void rcu_check_callbacks(int cpu, int user);
152
153/*
154 * Return the number of grace periods.
155 */
156static inline long rcu_batches_completed(void)
157{
158 return 0;
159}
160
161/*
162 * Return the number of bottom-half grace periods.
163 */
164static inline long rcu_batches_completed_bh(void)
165{
166 return 0;
167}
168
169static inline void rcu_force_quiescent_state(void)
170{
171}
172
173static inline void rcu_bh_force_quiescent_state(void)
174{
175}
176
177static inline void rcu_sched_force_quiescent_state(void)
178{
179}
180
129#ifdef CONFIG_DEBUG_LOCK_ALLOC 181#ifdef CONFIG_DEBUG_LOCK_ALLOC
130 182
131extern int rcu_scheduler_active __read_mostly; 183extern int rcu_scheduler_active __read_mostly;
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index c0ed1c056f29..c13b85dd22bc 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -95,6 +95,8 @@ static inline void synchronize_rcu_bh_expedited(void)
95 synchronize_sched_expedited(); 95 synchronize_sched_expedited();
96} 96}
97 97
98extern void rcu_barrier(void);
99
98extern void rcu_check_callbacks(int cpu, int user); 100extern void rcu_check_callbacks(int cpu, int user);
99 101
100extern long rcu_batches_completed(void); 102extern long rcu_batches_completed(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c756666c111..e18473f0eb78 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1202,11 +1202,13 @@ struct task_struct {
1202 unsigned int policy; 1202 unsigned int policy;
1203 cpumask_t cpus_allowed; 1203 cpumask_t cpus_allowed;
1204 1204
1205#ifdef CONFIG_TREE_PREEMPT_RCU 1205#ifdef CONFIG_PREEMPT_RCU
1206 int rcu_read_lock_nesting; 1206 int rcu_read_lock_nesting;
1207 char rcu_read_unlock_special; 1207 char rcu_read_unlock_special;
1208 struct rcu_node *rcu_blocked_node;
1209 struct list_head rcu_node_entry; 1208 struct list_head rcu_node_entry;
1209#endif /* #ifdef CONFIG_PREEMPT_RCU */
1210#ifdef CONFIG_TREE_PREEMPT_RCU
1211 struct rcu_node *rcu_blocked_node;
1210#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 1212#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1211 1213
1212#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 1214#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
@@ -1740,7 +1742,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
1740#define tsk_used_math(p) ((p)->flags & PF_USED_MATH) 1742#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
1741#define used_math() tsk_used_math(current) 1743#define used_math() tsk_used_math(current)
1742 1744
1743#ifdef CONFIG_TREE_PREEMPT_RCU 1745#ifdef CONFIG_PREEMPT_RCU
1744 1746
1745#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ 1747#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
1746#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ 1748#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
@@ -1749,7 +1751,9 @@ static inline void rcu_copy_process(struct task_struct *p)
1749{ 1751{
1750 p->rcu_read_lock_nesting = 0; 1752 p->rcu_read_lock_nesting = 0;
1751 p->rcu_read_unlock_special = 0; 1753 p->rcu_read_unlock_special = 0;
1754#ifdef CONFIG_TREE_PREEMPT_RCU
1752 p->rcu_blocked_node = NULL; 1755 p->rcu_blocked_node = NULL;
1756#endif
1753 INIT_LIST_HEAD(&p->rcu_node_entry); 1757 INIT_LIST_HEAD(&p->rcu_node_entry);
1754} 1758}
1755 1759
diff --git a/init/Kconfig b/init/Kconfig
index dbc08baad77e..a619a1ac7f4c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -348,7 +348,7 @@ config TREE_RCU
348 smaller systems. 348 smaller systems.
349 349
350config TREE_PREEMPT_RCU 350config TREE_PREEMPT_RCU
351 bool "Preemptable tree-based hierarchical RCU" 351 bool "Preemptible tree-based hierarchical RCU"
352 depends on PREEMPT 352 depends on PREEMPT
353 help 353 help
354 This option selects the RCU implementation that is 354 This option selects the RCU implementation that is
@@ -366,8 +366,22 @@ config TINY_RCU
366 is not required. This option greatly reduces the 366 is not required. This option greatly reduces the
367 memory footprint of RCU. 367 memory footprint of RCU.
368 368
369config TINY_PREEMPT_RCU
370 bool "Preemptible UP-only small-memory-footprint RCU"
371 depends on !SMP && PREEMPT
372 help
373 This option selects the RCU implementation that is designed
374 for real-time UP systems. This option greatly reduces the
375 memory footprint of RCU.
376
369endchoice 377endchoice
370 378
379config PREEMPT_RCU
380 def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU )
381 help
382 This option enables preemptible-RCU code that is common between
383 the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
384
371config RCU_TRACE 385config RCU_TRACE
372 bool "Enable tracing for RCU" 386 bool "Enable tracing for RCU"
373 depends on TREE_RCU || TREE_PREEMPT_RCU 387 depends on TREE_RCU || TREE_PREEMPT_RCU
diff --git a/kernel/Makefile b/kernel/Makefile
index 0b72d1a74be0..17046b6e7c90 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -86,6 +86,7 @@ obj-$(CONFIG_TREE_RCU) += rcutree.o
86obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o 86obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
87obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o 87obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
88obj-$(CONFIG_TINY_RCU) += rcutiny.o 88obj-$(CONFIG_TINY_RCU) += rcutiny.o
89obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o
89obj-$(CONFIG_RELAY) += relay.o 90obj-$(CONFIG_RELAY) += relay.o
90obj-$(CONFIG_SYSCTL) += utsname_sysctl.o 91obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
91obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o 92obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 196ec02f8be0..d806735342ac 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -59,6 +59,14 @@ int rcu_scheduler_active __read_mostly;
59EXPORT_SYMBOL_GPL(rcu_scheduler_active); 59EXPORT_SYMBOL_GPL(rcu_scheduler_active);
60#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 60#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
61 61
62/* Forward declarations for rcutiny_plugin.h. */
63static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
64static void __call_rcu(struct rcu_head *head,
65 void (*func)(struct rcu_head *rcu),
66 struct rcu_ctrlblk *rcp);
67
68#include "rcutiny_plugin.h"
69
62#ifdef CONFIG_NO_HZ 70#ifdef CONFIG_NO_HZ
63 71
64static long rcu_dynticks_nesting = 1; 72static long rcu_dynticks_nesting = 1;
@@ -140,6 +148,7 @@ void rcu_check_callbacks(int cpu, int user)
140 rcu_sched_qs(cpu); 148 rcu_sched_qs(cpu);
141 else if (!in_softirq()) 149 else if (!in_softirq())
142 rcu_bh_qs(cpu); 150 rcu_bh_qs(cpu);
151 rcu_preempt_check_callbacks();
143} 152}
144 153
145/* 154/*
@@ -162,6 +171,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
162 *rcp->donetail = NULL; 171 *rcp->donetail = NULL;
163 if (rcp->curtail == rcp->donetail) 172 if (rcp->curtail == rcp->donetail)
164 rcp->curtail = &rcp->rcucblist; 173 rcp->curtail = &rcp->rcucblist;
174 rcu_preempt_remove_callbacks(rcp);
165 rcp->donetail = &rcp->rcucblist; 175 rcp->donetail = &rcp->rcucblist;
166 local_irq_restore(flags); 176 local_irq_restore(flags);
167 177
@@ -182,6 +192,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
182{ 192{
183 __rcu_process_callbacks(&rcu_sched_ctrlblk); 193 __rcu_process_callbacks(&rcu_sched_ctrlblk);
184 __rcu_process_callbacks(&rcu_bh_ctrlblk); 194 __rcu_process_callbacks(&rcu_bh_ctrlblk);
195 rcu_preempt_process_callbacks();
185} 196}
186 197
187/* 198/*
@@ -223,15 +234,15 @@ static void __call_rcu(struct rcu_head *head,
223} 234}
224 235
225/* 236/*
226 * Post an RCU callback to be invoked after the end of an RCU grace 237 * Post an RCU callback to be invoked after the end of an RCU-sched grace
227 * period. But since we have but one CPU, that would be after any 238 * period. But since we have but one CPU, that would be after any
228 * quiescent state. 239 * quiescent state.
229 */ 240 */
230void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 241void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
231{ 242{
232 __call_rcu(head, func, &rcu_sched_ctrlblk); 243 __call_rcu(head, func, &rcu_sched_ctrlblk);
233} 244}
234EXPORT_SYMBOL_GPL(call_rcu); 245EXPORT_SYMBOL_GPL(call_rcu_sched);
235 246
236/* 247/*
237 * Post an RCU bottom-half callback to be invoked after any subsequent 248 * Post an RCU bottom-half callback to be invoked after any subsequent
@@ -243,20 +254,6 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
243} 254}
244EXPORT_SYMBOL_GPL(call_rcu_bh); 255EXPORT_SYMBOL_GPL(call_rcu_bh);
245 256
246void rcu_barrier(void)
247{
248 struct rcu_synchronize rcu;
249
250 init_rcu_head_on_stack(&rcu.head);
251 init_completion(&rcu.completion);
252 /* Will wake me after RCU finished. */
253 call_rcu(&rcu.head, wakeme_after_rcu);
254 /* Wait for it. */
255 wait_for_completion(&rcu.completion);
256 destroy_rcu_head_on_stack(&rcu.head);
257}
258EXPORT_SYMBOL_GPL(rcu_barrier);
259
260void rcu_barrier_bh(void) 257void rcu_barrier_bh(void)
261{ 258{
262 struct rcu_synchronize rcu; 259 struct rcu_synchronize rcu;
@@ -289,5 +286,3 @@ void __init rcu_init(void)
289{ 286{
290 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 287 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
291} 288}
292
293#include "rcutiny_plugin.h"
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index d223a92bc742..e6bc1b447c6c 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version) 2 * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition
3 * Internal non-public definitions that provide either classic 3 * Internal non-public definitions that provide either classic
4 * or preemptable semantics. 4 * or preemptible semantics.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
@@ -17,11 +17,587 @@
17 * along with this program; if not, write to the Free Software 17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * 19 *
20 * Copyright IBM Corporation, 2009 20 * Copyright (c) 2010 Linaro
21 * 21 *
22 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 22 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
23 */ 23 */
24 24
25#ifdef CONFIG_TINY_PREEMPT_RCU
26
27#include <linux/delay.h>
28
29/* FIXME: merge with definitions in kernel/rcutree.h. */
30#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
31#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
32
33/* Global control variables for preemptible RCU. */
34struct rcu_preempt_ctrlblk {
35 struct rcu_ctrlblk rcb; /* curtail: ->next ptr of last CB for GP. */
36 struct rcu_head **nexttail;
37 /* Tasks blocked in a preemptible RCU */
38 /* read-side critical section while an */
39 /* preemptible-RCU grace period is in */
40 /* progress must wait for a later grace */
41 /* period. This pointer points to the */
42 /* ->next pointer of the last task that */
43 /* must wait for a later grace period, or */
44 /* to &->rcb.rcucblist if there is no */
45 /* such task. */
46 struct list_head blkd_tasks;
47 /* Tasks blocked in RCU read-side critical */
48 /* section. Tasks are placed at the head */
49 /* of this list and age towards the tail. */
50 struct list_head *gp_tasks;
51 /* Pointer to the first task blocking the */
52 /* current grace period, or NULL if there */
53 /* is not such task. */
54 struct list_head *exp_tasks;
55 /* Pointer to first task blocking the */
56 /* current expedited grace period, or NULL */
57 /* if there is no such task. If there */
58 /* is no current expedited grace period, */
59 /* then there cannot be any such task. */
60 u8 gpnum; /* Current grace period. */
61 u8 gpcpu; /* Last grace period blocked by the CPU. */
62 u8 completed; /* Last grace period completed. */
63 /* If all three are equal, RCU is idle. */
64};
65
66static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
67 .rcb.donetail = &rcu_preempt_ctrlblk.rcb.rcucblist,
68 .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist,
69 .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist,
70 .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks),
71};
72
73static int rcu_preempted_readers_exp(void);
74static void rcu_report_exp_done(void);
75
76/*
77 * Return true if the CPU has not yet responded to the current grace period.
78 */
79static int rcu_cpu_cur_gp(void)
80{
81 return rcu_preempt_ctrlblk.gpcpu != rcu_preempt_ctrlblk.gpnum;
82}
83
84/*
85 * Check for a running RCU reader. Because there is only one CPU,
86 * there can be but one running RCU reader at a time. ;-)
87 */
88static int rcu_preempt_running_reader(void)
89{
90 return current->rcu_read_lock_nesting;
91}
92
93/*
94 * Check for preempted RCU readers blocking any grace period.
95 * If the caller needs a reliable answer, it must disable hard irqs.
96 */
97static int rcu_preempt_blocked_readers_any(void)
98{
99 return !list_empty(&rcu_preempt_ctrlblk.blkd_tasks);
100}
101
102/*
103 * Check for preempted RCU readers blocking the current grace period.
104 * If the caller needs a reliable answer, it must disable hard irqs.
105 */
106static int rcu_preempt_blocked_readers_cgp(void)
107{
108 return rcu_preempt_ctrlblk.gp_tasks != NULL;
109}
110
111/*
112 * Return true if another preemptible-RCU grace period is needed.
113 */
114static int rcu_preempt_needs_another_gp(void)
115{
116 return *rcu_preempt_ctrlblk.rcb.curtail != NULL;
117}
118
119/*
120 * Return true if a preemptible-RCU grace period is in progress.
121 * The caller must disable hardirqs.
122 */
123static int rcu_preempt_gp_in_progress(void)
124{
125 return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum;
126}
127
128/*
129 * Record a preemptible-RCU quiescent state for the specified CPU. Note
130 * that this just means that the task currently running on the CPU is
131 * in a quiescent state. There might be any number of tasks blocked
132 * while in an RCU read-side critical section.
133 *
134 * Unlike the other rcu_*_qs() functions, callers to this function
135 * must disable irqs in order to protect the assignment to
136 * ->rcu_read_unlock_special.
137 *
138 * Because this is a single-CPU implementation, the only way a grace
139 * period can end is if the CPU is in a quiescent state. The reason is
140 * that a blocked preemptible-RCU reader can exit its critical section
141 * only if the CPU is running it at the time. Therefore, when the
142 * last task blocking the current grace period exits its RCU read-side
143 * critical section, neither the CPU nor blocked tasks will be stopping
144 * the current grace period. (In contrast, SMP implementations
145 * might have CPUs running in RCU read-side critical sections that
146 * block later grace periods -- but this is not possible given only
147 * one CPU.)
148 */
149static void rcu_preempt_cpu_qs(void)
150{
151 /* Record both CPU and task as having responded to current GP. */
152 rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum;
153 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
154
155 /*
156 * If there is no GP, or if blocked readers are still blocking GP,
157 * then there is nothing more to do.
158 */
159 if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp())
160 return;
161
162 /* Advance callbacks. */
163 rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum;
164 rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.rcb.curtail;
165 rcu_preempt_ctrlblk.rcb.curtail = rcu_preempt_ctrlblk.nexttail;
166
167 /* If there are no blocked readers, next GP is done instantly. */
168 if (!rcu_preempt_blocked_readers_any())
169 rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail;
170
171 /* If there are done callbacks, make RCU_SOFTIRQ process them. */
172 if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
173 raise_softirq(RCU_SOFTIRQ);
174}
175
176/*
177 * Start a new RCU grace period if warranted. Hard irqs must be disabled.
178 */
179static void rcu_preempt_start_gp(void)
180{
181 if (!rcu_preempt_gp_in_progress() && rcu_preempt_needs_another_gp()) {
182
183 /* Official start of GP. */
184 rcu_preempt_ctrlblk.gpnum++;
185
186 /* Any blocked RCU readers block new GP. */
187 if (rcu_preempt_blocked_readers_any())
188 rcu_preempt_ctrlblk.gp_tasks =
189 rcu_preempt_ctrlblk.blkd_tasks.next;
190
191 /* If there is no running reader, CPU is done with GP. */
192 if (!rcu_preempt_running_reader())
193 rcu_preempt_cpu_qs();
194 }
195}
196
197/*
198 * We have entered the scheduler, and the current task might soon be
199 * context-switched away from. If this task is in an RCU read-side
200 * critical section, we will no longer be able to rely on the CPU to
201 * record that fact, so we enqueue the task on the blkd_tasks list.
202 * If the task started after the current grace period began, as recorded
203 * by ->gpcpu, we enqueue at the beginning of the list. Otherwise
204 * before the element referenced by ->gp_tasks (or at the tail if
205 * ->gp_tasks is NULL) and point ->gp_tasks at the newly added element.
206 * The task will dequeue itself when it exits the outermost enclosing
207 * RCU read-side critical section. Therefore, the current grace period
208 * cannot be permitted to complete until the ->gp_tasks pointer becomes
209 * NULL.
210 *
211 * Caller must disable preemption.
212 */
213void rcu_preempt_note_context_switch(void)
214{
215 struct task_struct *t = current;
216 unsigned long flags;
217
218 local_irq_save(flags); /* must exclude scheduler_tick(). */
219 if (rcu_preempt_running_reader() &&
220 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
221
222 /* Possibly blocking in an RCU read-side critical section. */
223 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
224
225 /*
226 * If this CPU has already checked in, then this task
227 * will hold up the next grace period rather than the
228 * current grace period. Queue the task accordingly.
229 * If the task is queued for the current grace period
230 * (i.e., this CPU has not yet passed through a quiescent
231 * state for the current grace period), then as long
232 * as that task remains queued, the current grace period
233 * cannot end.
234 */
235 list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks);
236 if (rcu_cpu_cur_gp())
237 rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry;
238 }
239
240 /*
241 * Either we were not in an RCU read-side critical section to
242 * begin with, or we have now recorded that critical section
243 * globally. Either way, we can now note a quiescent state
244 * for this CPU. Again, if we were in an RCU read-side critical
245 * section, and if that critical section was blocking the current
246 * grace period, then the fact that the task has been enqueued
247 * means that current grace period continues to be blocked.
248 */
249 rcu_preempt_cpu_qs();
250 local_irq_restore(flags);
251}
252
253/*
254 * Tiny-preemptible RCU implementation for rcu_read_lock().
255 * Just increment ->rcu_read_lock_nesting, shared state will be updated
256 * if we block.
257 */
258void __rcu_read_lock(void)
259{
260 current->rcu_read_lock_nesting++;
261 barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */
262}
263EXPORT_SYMBOL_GPL(__rcu_read_lock);
264
265/*
266 * Handle special cases during rcu_read_unlock(), such as needing to
267 * notify RCU core processing or task having blocked during the RCU
268 * read-side critical section.
269 */
270static void rcu_read_unlock_special(struct task_struct *t)
271{
272 int empty;
273 int empty_exp;
274 unsigned long flags;
275 struct list_head *np;
276 int special;
277
278 /*
279 * NMI handlers cannot block and cannot safely manipulate state.
280 * They therefore cannot possibly be special, so just leave.
281 */
282 if (in_nmi())
283 return;
284
285 local_irq_save(flags);
286
287 /*
288 * If RCU core is waiting for this CPU to exit critical section,
289 * let it know that we have done so.
290 */
291 special = t->rcu_read_unlock_special;
292 if (special & RCU_READ_UNLOCK_NEED_QS)
293 rcu_preempt_cpu_qs();
294
295 /* Hardware IRQ handlers cannot block. */
296 if (in_irq()) {
297 local_irq_restore(flags);
298 return;
299 }
300
301 /* Clean up if blocked during RCU read-side critical section. */
302 if (special & RCU_READ_UNLOCK_BLOCKED) {
303 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
304
305 /*
306 * Remove this task from the ->blkd_tasks list and adjust
307 * any pointers that might have been referencing it.
308 */
309 empty = !rcu_preempt_blocked_readers_cgp();
310 empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
311 np = t->rcu_node_entry.next;
312 if (np == &rcu_preempt_ctrlblk.blkd_tasks)
313 np = NULL;
314 list_del(&t->rcu_node_entry);
315 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
316 rcu_preempt_ctrlblk.gp_tasks = np;
317 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
318 rcu_preempt_ctrlblk.exp_tasks = np;
319 INIT_LIST_HEAD(&t->rcu_node_entry);
320
321 /*
322 * If this was the last task on the current list, and if
323 * we aren't waiting on the CPU, report the quiescent state
324 * and start a new grace period if needed.
325 */
326 if (!empty && !rcu_preempt_blocked_readers_cgp()) {
327 rcu_preempt_cpu_qs();
328 rcu_preempt_start_gp();
329 }
330
331 /*
332 * If this was the last task on the expedited lists,
333 * then we need wake up the waiting task.
334 */
335 if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL)
336 rcu_report_exp_done();
337 }
338 local_irq_restore(flags);
339}
340
341/*
342 * Tiny-preemptible RCU implementation for rcu_read_unlock().
343 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
344 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
345 * invoke rcu_read_unlock_special() to clean up after a context switch
346 * in an RCU read-side critical section and other special cases.
347 */
348void __rcu_read_unlock(void)
349{
350 struct task_struct *t = current;
351
352 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */
353 --t->rcu_read_lock_nesting;
354 barrier(); /* decrement before load of ->rcu_read_unlock_special */
355 if (t->rcu_read_lock_nesting == 0 &&
356 unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
357 rcu_read_unlock_special(t);
358#ifdef CONFIG_PROVE_LOCKING
359 WARN_ON_ONCE(t->rcu_read_lock_nesting < 0);
360#endif /* #ifdef CONFIG_PROVE_LOCKING */
361}
362EXPORT_SYMBOL_GPL(__rcu_read_unlock);
363
364/*
365 * Check for a quiescent state from the current CPU. When a task blocks,
366 * the task is recorded in the rcu_preempt_ctrlblk structure, which is
367 * checked elsewhere. This is called from the scheduling-clock interrupt.
368 *
369 * Caller must disable hard irqs.
370 */
371static void rcu_preempt_check_callbacks(void)
372{
373 struct task_struct *t = current;
374
375 if (!rcu_preempt_running_reader() && rcu_preempt_gp_in_progress())
376 rcu_preempt_cpu_qs();
377 if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
378 rcu_preempt_ctrlblk.rcb.donetail)
379 raise_softirq(RCU_SOFTIRQ);
380 if (rcu_preempt_gp_in_progress() && rcu_preempt_running_reader())
381 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
382}
383
384/*
385 * TINY_PREEMPT_RCU has an extra callback-list tail pointer to
386 * update, so this is invoked from __rcu_process_callbacks() to
387 * handle that case. Of course, it is invoked for all flavors of
388 * RCU, but RCU callbacks can appear only on one of the lists, and
389 * neither ->nexttail nor ->donetail can possibly be NULL, so there
390 * is no need for an explicit check.
391 */
392static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
393{
394 if (rcu_preempt_ctrlblk.nexttail == rcp->donetail)
395 rcu_preempt_ctrlblk.nexttail = &rcp->rcucblist;
396}
397
398/*
399 * Process callbacks for preemptible RCU.
400 */
401static void rcu_preempt_process_callbacks(void)
402{
403 __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
404}
405
406/*
407 * Queue a preemptible -RCU callback for invocation after a grace period.
408 */
409void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
410{
411 unsigned long flags;
412
413 debug_rcu_head_queue(head);
414 head->func = func;
415 head->next = NULL;
416
417 local_irq_save(flags);
418 *rcu_preempt_ctrlblk.nexttail = head;
419 rcu_preempt_ctrlblk.nexttail = &head->next;
420 rcu_preempt_start_gp(); /* checks to see if GP needed. */
421 local_irq_restore(flags);
422}
423EXPORT_SYMBOL_GPL(call_rcu);
424
425void rcu_barrier(void)
426{
427 struct rcu_synchronize rcu;
428
429 init_rcu_head_on_stack(&rcu.head);
430 init_completion(&rcu.completion);
431 /* Will wake me after RCU finished. */
432 call_rcu(&rcu.head, wakeme_after_rcu);
433 /* Wait for it. */
434 wait_for_completion(&rcu.completion);
435 destroy_rcu_head_on_stack(&rcu.head);
436}
437EXPORT_SYMBOL_GPL(rcu_barrier);
438
439/*
440 * synchronize_rcu - wait until a grace period has elapsed.
441 *
442 * Control will return to the caller some time after a full grace
443 * period has elapsed, in other words after all currently executing RCU
444 * read-side critical sections have completed. RCU read-side critical
445 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
446 * and may be nested.
447 */
448void synchronize_rcu(void)
449{
450#ifdef CONFIG_DEBUG_LOCK_ALLOC
451 if (!rcu_scheduler_active)
452 return;
453#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
454
455 WARN_ON_ONCE(rcu_preempt_running_reader());
456 if (!rcu_preempt_blocked_readers_any())
457 return;
458
459 /* Once we get past the fastpath checks, same code as rcu_barrier(). */
460 rcu_barrier();
461}
462EXPORT_SYMBOL_GPL(synchronize_rcu);
463
464static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
465static unsigned long sync_rcu_preempt_exp_count;
466static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
467
468/*
469 * Return non-zero if there are any tasks in RCU read-side critical
470 * sections blocking the current preemptible-RCU expedited grace period.
471 * If there is no preemptible-RCU expedited grace period currently in
472 * progress, returns zero unconditionally.
473 */
474static int rcu_preempted_readers_exp(void)
475{
476 return rcu_preempt_ctrlblk.exp_tasks != NULL;
477}
478
479/*
480 * Report the exit from RCU read-side critical section for the last task
481 * that queued itself during or before the current expedited preemptible-RCU
482 * grace period.
483 */
484static void rcu_report_exp_done(void)
485{
486 wake_up(&sync_rcu_preempt_exp_wq);
487}
488
489/*
490 * Wait for an rcu-preempt grace period, but expedite it. The basic idea
491 * is to rely in the fact that there is but one CPU, and that it is
492 * illegal for a task to invoke synchronize_rcu_expedited() while in a
493 * preemptible-RCU read-side critical section. Therefore, any such
494 * critical sections must correspond to blocked tasks, which must therefore
495 * be on the ->blkd_tasks list. So just record the current head of the
496 * list in the ->exp_tasks pointer, and wait for all tasks including and
497 * after the task pointed to by ->exp_tasks to drain.
498 */
499void synchronize_rcu_expedited(void)
500{
501 unsigned long flags;
502 struct rcu_preempt_ctrlblk *rpcp = &rcu_preempt_ctrlblk;
503 unsigned long snap;
504
505 barrier(); /* ensure prior action seen before grace period. */
506
507 WARN_ON_ONCE(rcu_preempt_running_reader());
508
509 /*
510 * Acquire lock so that there is only one preemptible RCU grace
511 * period in flight. Of course, if someone does the expedited
512 * grace period for us while we are acquiring the lock, just leave.
513 */
514 snap = sync_rcu_preempt_exp_count + 1;
515 mutex_lock(&sync_rcu_preempt_exp_mutex);
516 if (ULONG_CMP_LT(snap, sync_rcu_preempt_exp_count))
517 goto unlock_mb_ret; /* Others did our work for us. */
518
519 local_irq_save(flags);
520
521 /*
522 * All RCU readers have to already be on blkd_tasks because
523 * we cannot legally be executing in an RCU read-side critical
524 * section.
525 */
526
527 /* Snapshot current head of ->blkd_tasks list. */
528 rpcp->exp_tasks = rpcp->blkd_tasks.next;
529 if (rpcp->exp_tasks == &rpcp->blkd_tasks)
530 rpcp->exp_tasks = NULL;
531 local_irq_restore(flags);
532
533 /* Wait for tail of ->blkd_tasks list to drain. */
534 if (rcu_preempted_readers_exp())
535 wait_event(sync_rcu_preempt_exp_wq,
536 !rcu_preempted_readers_exp());
537
538 /* Clean up and exit. */
539 barrier(); /* ensure expedited GP seen before counter increment. */
540 sync_rcu_preempt_exp_count++;
541unlock_mb_ret:
542 mutex_unlock(&sync_rcu_preempt_exp_mutex);
543 barrier(); /* ensure subsequent action seen after grace period. */
544}
545EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
546
547/*
548 * Does preemptible RCU need the CPU to stay out of dynticks mode?
549 */
550int rcu_preempt_needs_cpu(void)
551{
552 if (!rcu_preempt_running_reader())
553 rcu_preempt_cpu_qs();
554 return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
555}
556
557/*
558 * Check for a task exiting while in a preemptible -RCU read-side
559 * critical section, clean up if so. No need to issue warnings,
560 * as debug_check_no_locks_held() already does this if lockdep
561 * is enabled.
562 */
563void exit_rcu(void)
564{
565 struct task_struct *t = current;
566
567 if (t->rcu_read_lock_nesting == 0)
568 return;
569 t->rcu_read_lock_nesting = 1;
570 rcu_read_unlock();
571}
572
573#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
574
575/*
576 * Because preemptible RCU does not exist, it never has any callbacks
577 * to check.
578 */
579static void rcu_preempt_check_callbacks(void)
580{
581}
582
583/*
584 * Because preemptible RCU does not exist, it never has any callbacks
585 * to remove.
586 */
587static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
588{
589}
590
591/*
592 * Because preemptible RCU does not exist, it never has any callbacks
593 * to process.
594 */
595static void rcu_preempt_process_callbacks(void)
596{
597}
598
599#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
600
25#ifdef CONFIG_DEBUG_LOCK_ALLOC 601#ifdef CONFIG_DEBUG_LOCK_ALLOC
26 602
27#include <linux/kernel_stat.h> 603#include <linux/kernel_stat.h>