aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2010-06-29 19:49:16 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2010-08-20 11:55:00 -0400
commita57eb940d130477a799dfb24a570ee04979c0f7f (patch)
tree5add1c135a302cf1c1a454b0620ed17eb802923b /kernel
parent4d87ffadbba88105f33271bef5f2c79366c6a4e1 (diff)
rcu: Add a TINY_PREEMPT_RCU
Implement a small-memory-footprint uniprocessor-only implementation of preemptible RCU. This implementation uses but a single blocked-tasks list rather than the combinatorial number used per leaf rcu_node by TREE_PREEMPT_RCU, which reduces memory consumption and greatly simplifies processing. This version also takes advantage of uniprocessor execution to accelerate grace periods in the case where there are no readers. The general design is otherwise broadly similar to that of TREE_PREEMPT_RCU. This implementation is a step towards having RCU implementation driven off of the SMP and PREEMPT kernel configuration variables, which can happen once this implementation has accumulated sufficient experience. Removed ACCESS_ONCE() from __rcu_read_unlock() and added barrier() as suggested by Steve Rostedt in order to avoid the compiler-reordering issue noted by Mathieu Desnoyers (http://lkml.org/lkml/2010/8/16/183). As can be seen below, CONFIG_TINY_PREEMPT_RCU represents almost 5Kbyte savings compared to CONFIG_TREE_PREEMPT_RCU. Of course, for non-real-time workloads, CONFIG_TINY_RCU is even better. CONFIG_TREE_PREEMPT_RCU text data bss dec filename 13 0 0 13 kernel/rcupdate.o 6170 825 28 7023 kernel/rcutree.o ---- 7026 Total CONFIG_TINY_PREEMPT_RCU text data bss dec filename 13 0 0 13 kernel/rcupdate.o 2081 81 8 2170 kernel/rcutiny.o ---- 2183 Total CONFIG_TINY_RCU (non-preemptible) text data bss dec filename 13 0 0 13 kernel/rcupdate.o 719 25 0 744 kernel/rcutiny.o --- 757 Total Requested-by: Loïc Minier <loic.minier@canonical.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/rcutiny.c33
-rw-r--r--kernel/rcutiny_plugin.h582
3 files changed, 594 insertions, 22 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 0b72d1a74be0..17046b6e7c90 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -86,6 +86,7 @@ obj-$(CONFIG_TREE_RCU) += rcutree.o
86obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o 86obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
87obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o 87obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
88obj-$(CONFIG_TINY_RCU) += rcutiny.o 88obj-$(CONFIG_TINY_RCU) += rcutiny.o
89obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o
89obj-$(CONFIG_RELAY) += relay.o 90obj-$(CONFIG_RELAY) += relay.o
90obj-$(CONFIG_SYSCTL) += utsname_sysctl.o 91obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
91obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o 92obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 196ec02f8be0..d806735342ac 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -59,6 +59,14 @@ int rcu_scheduler_active __read_mostly;
59EXPORT_SYMBOL_GPL(rcu_scheduler_active); 59EXPORT_SYMBOL_GPL(rcu_scheduler_active);
60#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 60#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
61 61
62/* Forward declarations for rcutiny_plugin.h. */
63static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
64static void __call_rcu(struct rcu_head *head,
65 void (*func)(struct rcu_head *rcu),
66 struct rcu_ctrlblk *rcp);
67
68#include "rcutiny_plugin.h"
69
62#ifdef CONFIG_NO_HZ 70#ifdef CONFIG_NO_HZ
63 71
64static long rcu_dynticks_nesting = 1; 72static long rcu_dynticks_nesting = 1;
@@ -140,6 +148,7 @@ void rcu_check_callbacks(int cpu, int user)
140 rcu_sched_qs(cpu); 148 rcu_sched_qs(cpu);
141 else if (!in_softirq()) 149 else if (!in_softirq())
142 rcu_bh_qs(cpu); 150 rcu_bh_qs(cpu);
151 rcu_preempt_check_callbacks();
143} 152}
144 153
145/* 154/*
@@ -162,6 +171,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
162 *rcp->donetail = NULL; 171 *rcp->donetail = NULL;
163 if (rcp->curtail == rcp->donetail) 172 if (rcp->curtail == rcp->donetail)
164 rcp->curtail = &rcp->rcucblist; 173 rcp->curtail = &rcp->rcucblist;
174 rcu_preempt_remove_callbacks(rcp);
165 rcp->donetail = &rcp->rcucblist; 175 rcp->donetail = &rcp->rcucblist;
166 local_irq_restore(flags); 176 local_irq_restore(flags);
167 177
@@ -182,6 +192,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
182{ 192{
183 __rcu_process_callbacks(&rcu_sched_ctrlblk); 193 __rcu_process_callbacks(&rcu_sched_ctrlblk);
184 __rcu_process_callbacks(&rcu_bh_ctrlblk); 194 __rcu_process_callbacks(&rcu_bh_ctrlblk);
195 rcu_preempt_process_callbacks();
185} 196}
186 197
187/* 198/*
@@ -223,15 +234,15 @@ static void __call_rcu(struct rcu_head *head,
223} 234}
224 235
225/* 236/*
226 * Post an RCU callback to be invoked after the end of an RCU grace 237 * Post an RCU callback to be invoked after the end of an RCU-sched grace
227 * period. But since we have but one CPU, that would be after any 238 * period. But since we have but one CPU, that would be after any
228 * quiescent state. 239 * quiescent state.
229 */ 240 */
230void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 241void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
231{ 242{
232 __call_rcu(head, func, &rcu_sched_ctrlblk); 243 __call_rcu(head, func, &rcu_sched_ctrlblk);
233} 244}
234EXPORT_SYMBOL_GPL(call_rcu); 245EXPORT_SYMBOL_GPL(call_rcu_sched);
235 246
236/* 247/*
237 * Post an RCU bottom-half callback to be invoked after any subsequent 248 * Post an RCU bottom-half callback to be invoked after any subsequent
@@ -243,20 +254,6 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
243} 254}
244EXPORT_SYMBOL_GPL(call_rcu_bh); 255EXPORT_SYMBOL_GPL(call_rcu_bh);
245 256
246void rcu_barrier(void)
247{
248 struct rcu_synchronize rcu;
249
250 init_rcu_head_on_stack(&rcu.head);
251 init_completion(&rcu.completion);
252 /* Will wake me after RCU finished. */
253 call_rcu(&rcu.head, wakeme_after_rcu);
254 /* Wait for it. */
255 wait_for_completion(&rcu.completion);
256 destroy_rcu_head_on_stack(&rcu.head);
257}
258EXPORT_SYMBOL_GPL(rcu_barrier);
259
260void rcu_barrier_bh(void) 257void rcu_barrier_bh(void)
261{ 258{
262 struct rcu_synchronize rcu; 259 struct rcu_synchronize rcu;
@@ -289,5 +286,3 @@ void __init rcu_init(void)
289{ 286{
290 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 287 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
291} 288}
292
293#include "rcutiny_plugin.h"
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index d223a92bc742..e6bc1b447c6c 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version) 2 * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition
3 * Internal non-public definitions that provide either classic 3 * Internal non-public definitions that provide either classic
4 * or preemptable semantics. 4 * or preemptible semantics.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
@@ -17,11 +17,587 @@
17 * along with this program; if not, write to the Free Software 17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * 19 *
20 * Copyright IBM Corporation, 2009 20 * Copyright (c) 2010 Linaro
21 * 21 *
22 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 22 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
23 */ 23 */
24 24
25#ifdef CONFIG_TINY_PREEMPT_RCU
26
27#include <linux/delay.h>
28
29/* FIXME: merge with definitions in kernel/rcutree.h. */
30#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
31#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
32
33/* Global control variables for preemptible RCU. */
34struct rcu_preempt_ctrlblk {
35 struct rcu_ctrlblk rcb; /* curtail: ->next ptr of last CB for GP. */
36 struct rcu_head **nexttail;
37 /* Tasks blocked in a preemptible RCU */
38 /* read-side critical section while an */
39 /* preemptible-RCU grace period is in */
40 /* progress must wait for a later grace */
41 /* period. This pointer points to the */
42 /* ->next pointer of the last task that */
43 /* must wait for a later grace period, or */
44 /* to &->rcb.rcucblist if there is no */
45 /* such task. */
46 struct list_head blkd_tasks;
47 /* Tasks blocked in RCU read-side critical */
48 /* section. Tasks are placed at the head */
49 /* of this list and age towards the tail. */
50 struct list_head *gp_tasks;
51 /* Pointer to the first task blocking the */
52 /* current grace period, or NULL if there */
53 /* is not such task. */
54 struct list_head *exp_tasks;
55 /* Pointer to first task blocking the */
56 /* current expedited grace period, or NULL */
57 /* if there is no such task. If there */
58 /* is no current expedited grace period, */
59 /* then there cannot be any such task. */
60 u8 gpnum; /* Current grace period. */
61 u8 gpcpu; /* Last grace period blocked by the CPU. */
62 u8 completed; /* Last grace period completed. */
63 /* If all three are equal, RCU is idle. */
64};
65
66static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
67 .rcb.donetail = &rcu_preempt_ctrlblk.rcb.rcucblist,
68 .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist,
69 .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist,
70 .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks),
71};
72
73static int rcu_preempted_readers_exp(void);
74static void rcu_report_exp_done(void);
75
76/*
77 * Return true if the CPU has not yet responded to the current grace period.
78 */
79static int rcu_cpu_cur_gp(void)
80{
81 return rcu_preempt_ctrlblk.gpcpu != rcu_preempt_ctrlblk.gpnum;
82}
83
84/*
85 * Check for a running RCU reader. Because there is only one CPU,
86 * there can be but one running RCU reader at a time. ;-)
87 */
88static int rcu_preempt_running_reader(void)
89{
90 return current->rcu_read_lock_nesting;
91}
92
93/*
94 * Check for preempted RCU readers blocking any grace period.
95 * If the caller needs a reliable answer, it must disable hard irqs.
96 */
97static int rcu_preempt_blocked_readers_any(void)
98{
99 return !list_empty(&rcu_preempt_ctrlblk.blkd_tasks);
100}
101
102/*
103 * Check for preempted RCU readers blocking the current grace period.
104 * If the caller needs a reliable answer, it must disable hard irqs.
105 */
106static int rcu_preempt_blocked_readers_cgp(void)
107{
108 return rcu_preempt_ctrlblk.gp_tasks != NULL;
109}
110
111/*
112 * Return true if another preemptible-RCU grace period is needed.
113 */
114static int rcu_preempt_needs_another_gp(void)
115{
116 return *rcu_preempt_ctrlblk.rcb.curtail != NULL;
117}
118
119/*
120 * Return true if a preemptible-RCU grace period is in progress.
121 * The caller must disable hardirqs.
122 */
123static int rcu_preempt_gp_in_progress(void)
124{
125 return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum;
126}
127
128/*
129 * Record a preemptible-RCU quiescent state for the specified CPU. Note
130 * that this just means that the task currently running on the CPU is
131 * in a quiescent state. There might be any number of tasks blocked
132 * while in an RCU read-side critical section.
133 *
134 * Unlike the other rcu_*_qs() functions, callers to this function
135 * must disable irqs in order to protect the assignment to
136 * ->rcu_read_unlock_special.
137 *
138 * Because this is a single-CPU implementation, the only way a grace
139 * period can end is if the CPU is in a quiescent state. The reason is
140 * that a blocked preemptible-RCU reader can exit its critical section
141 * only if the CPU is running it at the time. Therefore, when the
142 * last task blocking the current grace period exits its RCU read-side
143 * critical section, neither the CPU nor blocked tasks will be stopping
144 * the current grace period. (In contrast, SMP implementations
145 * might have CPUs running in RCU read-side critical sections that
146 * block later grace periods -- but this is not possible given only
147 * one CPU.)
148 */
149static void rcu_preempt_cpu_qs(void)
150{
151 /* Record both CPU and task as having responded to current GP. */
152 rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum;
153 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
154
155 /*
156 * If there is no GP, or if blocked readers are still blocking GP,
157 * then there is nothing more to do.
158 */
159 if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp())
160 return;
161
162 /* Advance callbacks. */
163 rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum;
164 rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.rcb.curtail;
165 rcu_preempt_ctrlblk.rcb.curtail = rcu_preempt_ctrlblk.nexttail;
166
167 /* If there are no blocked readers, next GP is done instantly. */
168 if (!rcu_preempt_blocked_readers_any())
169 rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail;
170
171 /* If there are done callbacks, make RCU_SOFTIRQ process them. */
172 if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
173 raise_softirq(RCU_SOFTIRQ);
174}
175
176/*
177 * Start a new RCU grace period if warranted. Hard irqs must be disabled.
178 */
179static void rcu_preempt_start_gp(void)
180{
181 if (!rcu_preempt_gp_in_progress() && rcu_preempt_needs_another_gp()) {
182
183 /* Official start of GP. */
184 rcu_preempt_ctrlblk.gpnum++;
185
186 /* Any blocked RCU readers block new GP. */
187 if (rcu_preempt_blocked_readers_any())
188 rcu_preempt_ctrlblk.gp_tasks =
189 rcu_preempt_ctrlblk.blkd_tasks.next;
190
191 /* If there is no running reader, CPU is done with GP. */
192 if (!rcu_preempt_running_reader())
193 rcu_preempt_cpu_qs();
194 }
195}
196
197/*
198 * We have entered the scheduler, and the current task might soon be
199 * context-switched away from. If this task is in an RCU read-side
200 * critical section, we will no longer be able to rely on the CPU to
201 * record that fact, so we enqueue the task on the blkd_tasks list.
202 * If the task started after the current grace period began, as recorded
203 * by ->gpcpu, we enqueue at the beginning of the list. Otherwise
204 * before the element referenced by ->gp_tasks (or at the tail if
205 * ->gp_tasks is NULL) and point ->gp_tasks at the newly added element.
206 * The task will dequeue itself when it exits the outermost enclosing
207 * RCU read-side critical section. Therefore, the current grace period
208 * cannot be permitted to complete until the ->gp_tasks pointer becomes
209 * NULL.
210 *
211 * Caller must disable preemption.
212 */
213void rcu_preempt_note_context_switch(void)
214{
215 struct task_struct *t = current;
216 unsigned long flags;
217
218 local_irq_save(flags); /* must exclude scheduler_tick(). */
219 if (rcu_preempt_running_reader() &&
220 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
221
222 /* Possibly blocking in an RCU read-side critical section. */
223 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
224
225 /*
226 * If this CPU has already checked in, then this task
227 * will hold up the next grace period rather than the
228 * current grace period. Queue the task accordingly.
229 * If the task is queued for the current grace period
230 * (i.e., this CPU has not yet passed through a quiescent
231 * state for the current grace period), then as long
232 * as that task remains queued, the current grace period
233 * cannot end.
234 */
235 list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks);
236 if (rcu_cpu_cur_gp())
237 rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry;
238 }
239
240 /*
241 * Either we were not in an RCU read-side critical section to
242 * begin with, or we have now recorded that critical section
243 * globally. Either way, we can now note a quiescent state
244 * for this CPU. Again, if we were in an RCU read-side critical
245 * section, and if that critical section was blocking the current
246 * grace period, then the fact that the task has been enqueued
247 * means that current grace period continues to be blocked.
248 */
249 rcu_preempt_cpu_qs();
250 local_irq_restore(flags);
251}
252
253/*
254 * Tiny-preemptible RCU implementation for rcu_read_lock().
255 * Just increment ->rcu_read_lock_nesting, shared state will be updated
256 * if we block.
257 */
258void __rcu_read_lock(void)
259{
260 current->rcu_read_lock_nesting++;
261 barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */
262}
263EXPORT_SYMBOL_GPL(__rcu_read_lock);
264
265/*
266 * Handle special cases during rcu_read_unlock(), such as needing to
267 * notify RCU core processing or task having blocked during the RCU
268 * read-side critical section.
269 */
270static void rcu_read_unlock_special(struct task_struct *t)
271{
272 int empty;
273 int empty_exp;
274 unsigned long flags;
275 struct list_head *np;
276 int special;
277
278 /*
279 * NMI handlers cannot block and cannot safely manipulate state.
280 * They therefore cannot possibly be special, so just leave.
281 */
282 if (in_nmi())
283 return;
284
285 local_irq_save(flags);
286
287 /*
288 * If RCU core is waiting for this CPU to exit critical section,
289 * let it know that we have done so.
290 */
291 special = t->rcu_read_unlock_special;
292 if (special & RCU_READ_UNLOCK_NEED_QS)
293 rcu_preempt_cpu_qs();
294
295 /* Hardware IRQ handlers cannot block. */
296 if (in_irq()) {
297 local_irq_restore(flags);
298 return;
299 }
300
301 /* Clean up if blocked during RCU read-side critical section. */
302 if (special & RCU_READ_UNLOCK_BLOCKED) {
303 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
304
305 /*
306 * Remove this task from the ->blkd_tasks list and adjust
307 * any pointers that might have been referencing it.
308 */
309 empty = !rcu_preempt_blocked_readers_cgp();
310 empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
311 np = t->rcu_node_entry.next;
312 if (np == &rcu_preempt_ctrlblk.blkd_tasks)
313 np = NULL;
314 list_del(&t->rcu_node_entry);
315 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
316 rcu_preempt_ctrlblk.gp_tasks = np;
317 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
318 rcu_preempt_ctrlblk.exp_tasks = np;
319 INIT_LIST_HEAD(&t->rcu_node_entry);
320
321 /*
322 * If this was the last task on the current list, and if
323 * we aren't waiting on the CPU, report the quiescent state
324 * and start a new grace period if needed.
325 */
326 if (!empty && !rcu_preempt_blocked_readers_cgp()) {
327 rcu_preempt_cpu_qs();
328 rcu_preempt_start_gp();
329 }
330
331 /*
332 * If this was the last task on the expedited lists,
333 * then we need wake up the waiting task.
334 */
335 if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL)
336 rcu_report_exp_done();
337 }
338 local_irq_restore(flags);
339}
340
341/*
342 * Tiny-preemptible RCU implementation for rcu_read_unlock().
343 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
344 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
345 * invoke rcu_read_unlock_special() to clean up after a context switch
346 * in an RCU read-side critical section and other special cases.
347 */
348void __rcu_read_unlock(void)
349{
350 struct task_struct *t = current;
351
352 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */
353 --t->rcu_read_lock_nesting;
354 barrier(); /* decrement before load of ->rcu_read_unlock_special */
355 if (t->rcu_read_lock_nesting == 0 &&
356 unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
357 rcu_read_unlock_special(t);
358#ifdef CONFIG_PROVE_LOCKING
359 WARN_ON_ONCE(t->rcu_read_lock_nesting < 0);
360#endif /* #ifdef CONFIG_PROVE_LOCKING */
361}
362EXPORT_SYMBOL_GPL(__rcu_read_unlock);
363
364/*
365 * Check for a quiescent state from the current CPU. When a task blocks,
366 * the task is recorded in the rcu_preempt_ctrlblk structure, which is
367 * checked elsewhere. This is called from the scheduling-clock interrupt.
368 *
369 * Caller must disable hard irqs.
370 */
371static void rcu_preempt_check_callbacks(void)
372{
373 struct task_struct *t = current;
374
375 if (!rcu_preempt_running_reader() && rcu_preempt_gp_in_progress())
376 rcu_preempt_cpu_qs();
377 if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
378 rcu_preempt_ctrlblk.rcb.donetail)
379 raise_softirq(RCU_SOFTIRQ);
380 if (rcu_preempt_gp_in_progress() && rcu_preempt_running_reader())
381 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
382}
383
384/*
385 * TINY_PREEMPT_RCU has an extra callback-list tail pointer to
386 * update, so this is invoked from __rcu_process_callbacks() to
387 * handle that case. Of course, it is invoked for all flavors of
388 * RCU, but RCU callbacks can appear only on one of the lists, and
389 * neither ->nexttail nor ->donetail can possibly be NULL, so there
390 * is no need for an explicit check.
391 */
392static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
393{
394 if (rcu_preempt_ctrlblk.nexttail == rcp->donetail)
395 rcu_preempt_ctrlblk.nexttail = &rcp->rcucblist;
396}
397
398/*
399 * Process callbacks for preemptible RCU.
400 */
401static void rcu_preempt_process_callbacks(void)
402{
403 __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
404}
405
406/*
407 * Queue a preemptible -RCU callback for invocation after a grace period.
408 */
409void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
410{
411 unsigned long flags;
412
413 debug_rcu_head_queue(head);
414 head->func = func;
415 head->next = NULL;
416
417 local_irq_save(flags);
418 *rcu_preempt_ctrlblk.nexttail = head;
419 rcu_preempt_ctrlblk.nexttail = &head->next;
420 rcu_preempt_start_gp(); /* checks to see if GP needed. */
421 local_irq_restore(flags);
422}
423EXPORT_SYMBOL_GPL(call_rcu);
424
425void rcu_barrier(void)
426{
427 struct rcu_synchronize rcu;
428
429 init_rcu_head_on_stack(&rcu.head);
430 init_completion(&rcu.completion);
431 /* Will wake me after RCU finished. */
432 call_rcu(&rcu.head, wakeme_after_rcu);
433 /* Wait for it. */
434 wait_for_completion(&rcu.completion);
435 destroy_rcu_head_on_stack(&rcu.head);
436}
437EXPORT_SYMBOL_GPL(rcu_barrier);
438
439/*
440 * synchronize_rcu - wait until a grace period has elapsed.
441 *
442 * Control will return to the caller some time after a full grace
443 * period has elapsed, in other words after all currently executing RCU
444 * read-side critical sections have completed. RCU read-side critical
445 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
446 * and may be nested.
447 */
448void synchronize_rcu(void)
449{
450#ifdef CONFIG_DEBUG_LOCK_ALLOC
451 if (!rcu_scheduler_active)
452 return;
453#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
454
455 WARN_ON_ONCE(rcu_preempt_running_reader());
456 if (!rcu_preempt_blocked_readers_any())
457 return;
458
459 /* Once we get past the fastpath checks, same code as rcu_barrier(). */
460 rcu_barrier();
461}
462EXPORT_SYMBOL_GPL(synchronize_rcu);
463
464static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
465static unsigned long sync_rcu_preempt_exp_count;
466static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
467
468/*
469 * Return non-zero if there are any tasks in RCU read-side critical
470 * sections blocking the current preemptible-RCU expedited grace period.
471 * If there is no preemptible-RCU expedited grace period currently in
472 * progress, returns zero unconditionally.
473 */
474static int rcu_preempted_readers_exp(void)
475{
476 return rcu_preempt_ctrlblk.exp_tasks != NULL;
477}
478
479/*
480 * Report the exit from RCU read-side critical section for the last task
481 * that queued itself during or before the current expedited preemptible-RCU
482 * grace period.
483 */
484static void rcu_report_exp_done(void)
485{
486 wake_up(&sync_rcu_preempt_exp_wq);
487}
488
489/*
490 * Wait for an rcu-preempt grace period, but expedite it. The basic idea
491 * is to rely in the fact that there is but one CPU, and that it is
492 * illegal for a task to invoke synchronize_rcu_expedited() while in a
493 * preemptible-RCU read-side critical section. Therefore, any such
494 * critical sections must correspond to blocked tasks, which must therefore
495 * be on the ->blkd_tasks list. So just record the current head of the
496 * list in the ->exp_tasks pointer, and wait for all tasks including and
497 * after the task pointed to by ->exp_tasks to drain.
498 */
499void synchronize_rcu_expedited(void)
500{
501 unsigned long flags;
502 struct rcu_preempt_ctrlblk *rpcp = &rcu_preempt_ctrlblk;
503 unsigned long snap;
504
505 barrier(); /* ensure prior action seen before grace period. */
506
507 WARN_ON_ONCE(rcu_preempt_running_reader());
508
509 /*
510 * Acquire lock so that there is only one preemptible RCU grace
511 * period in flight. Of course, if someone does the expedited
512 * grace period for us while we are acquiring the lock, just leave.
513 */
514 snap = sync_rcu_preempt_exp_count + 1;
515 mutex_lock(&sync_rcu_preempt_exp_mutex);
516 if (ULONG_CMP_LT(snap, sync_rcu_preempt_exp_count))
517 goto unlock_mb_ret; /* Others did our work for us. */
518
519 local_irq_save(flags);
520
521 /*
522 * All RCU readers have to already be on blkd_tasks because
523 * we cannot legally be executing in an RCU read-side critical
524 * section.
525 */
526
527 /* Snapshot current head of ->blkd_tasks list. */
528 rpcp->exp_tasks = rpcp->blkd_tasks.next;
529 if (rpcp->exp_tasks == &rpcp->blkd_tasks)
530 rpcp->exp_tasks = NULL;
531 local_irq_restore(flags);
532
533 /* Wait for tail of ->blkd_tasks list to drain. */
534 if (rcu_preempted_readers_exp())
535 wait_event(sync_rcu_preempt_exp_wq,
536 !rcu_preempted_readers_exp());
537
538 /* Clean up and exit. */
539 barrier(); /* ensure expedited GP seen before counter increment. */
540 sync_rcu_preempt_exp_count++;
541unlock_mb_ret:
542 mutex_unlock(&sync_rcu_preempt_exp_mutex);
543 barrier(); /* ensure subsequent action seen after grace period. */
544}
545EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
546
547/*
548 * Does preemptible RCU need the CPU to stay out of dynticks mode?
549 */
550int rcu_preempt_needs_cpu(void)
551{
552 if (!rcu_preempt_running_reader())
553 rcu_preempt_cpu_qs();
554 return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
555}
556
557/*
558 * Check for a task exiting while in a preemptible -RCU read-side
559 * critical section, clean up if so. No need to issue warnings,
560 * as debug_check_no_locks_held() already does this if lockdep
561 * is enabled.
562 */
563void exit_rcu(void)
564{
565 struct task_struct *t = current;
566
567 if (t->rcu_read_lock_nesting == 0)
568 return;
569 t->rcu_read_lock_nesting = 1;
570 rcu_read_unlock();
571}
572
573#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
574
575/*
576 * Because preemptible RCU does not exist, it never has any callbacks
577 * to check.
578 */
579static void rcu_preempt_check_callbacks(void)
580{
581}
582
583/*
584 * Because preemptible RCU does not exist, it never has any callbacks
585 * to remove.
586 */
587static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
588{
589}
590
591/*
592 * Because preemptible RCU does not exist, it never has any callbacks
593 * to process.
594 */
595static void rcu_preempt_process_callbacks(void)
596{
597}
598
599#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
600
25#ifdef CONFIG_DEBUG_LOCK_ALLOC 601#ifdef CONFIG_DEBUG_LOCK_ALLOC
26 602
27#include <linux/kernel_stat.h> 603#include <linux/kernel_stat.h>