aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcupdate.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcupdate.c')
-rw-r--r--kernel/rcupdate.c190
1 files changed, 124 insertions, 66 deletions
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index c4d159a21e04..0cf8146bd585 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -35,6 +35,7 @@
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/spinlock.h> 36#include <linux/spinlock.h>
37#include <linux/smp.h> 37#include <linux/smp.h>
38#include <linux/rcupdate.h>
38#include <linux/interrupt.h> 39#include <linux/interrupt.h>
39#include <linux/sched.h> 40#include <linux/sched.h>
40#include <asm/atomic.h> 41#include <asm/atomic.h>
@@ -45,26 +46,21 @@
45#include <linux/percpu.h> 46#include <linux/percpu.h>
46#include <linux/notifier.h> 47#include <linux/notifier.h>
47#include <linux/rcupdate.h> 48#include <linux/rcupdate.h>
48#include <linux/rcuref.h>
49#include <linux/cpu.h> 49#include <linux/cpu.h>
50 50
51/* Definition for rcupdate control block. */ 51/* Definition for rcupdate control block. */
52struct rcu_ctrlblk rcu_ctrlblk = 52struct rcu_ctrlblk rcu_ctrlblk = {
53 { .cur = -300, .completed = -300 }; 53 .cur = -300,
54struct rcu_ctrlblk rcu_bh_ctrlblk = 54 .completed = -300,
55 { .cur = -300, .completed = -300 }; 55 .lock = SPIN_LOCK_UNLOCKED,
56 56 .cpumask = CPU_MASK_NONE,
57/* Bookkeeping of the progress of the grace period */ 57};
58struct rcu_state { 58struct rcu_ctrlblk rcu_bh_ctrlblk = {
59 spinlock_t lock; /* Guard this struct and writes to rcu_ctrlblk */ 59 .cur = -300,
60 cpumask_t cpumask; /* CPUs that need to switch in order */ 60 .completed = -300,
61 /* for current batch to proceed. */ 61 .lock = SPIN_LOCK_UNLOCKED,
62 .cpumask = CPU_MASK_NONE,
62}; 63};
63
64static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp =
65 {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
66static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp =
67 {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
68 64
69DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; 65DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
70DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; 66DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
@@ -73,19 +69,6 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
73static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; 69static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
74static int maxbatch = 10000; 70static int maxbatch = 10000;
75 71
76#ifndef __HAVE_ARCH_CMPXCHG
77/*
78 * We use an array of spinlocks for the rcurefs -- similar to ones in sparc
79 * 32 bit atomic_t implementations, and a hash function similar to that
80 * for our refcounting needs.
81 * Can't help multiprocessors which donot have cmpxchg :(
82 */
83
84spinlock_t __rcuref_hash[RCUREF_HASH_SIZE] = {
85 [0 ... (RCUREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED
86};
87#endif
88
89/** 72/**
90 * call_rcu - Queue an RCU callback for invocation after a grace period. 73 * call_rcu - Queue an RCU callback for invocation after a grace period.
91 * @head: structure to be used for queueing the RCU updates. 74 * @head: structure to be used for queueing the RCU updates.
@@ -116,6 +99,10 @@ void fastcall call_rcu(struct rcu_head *head,
116 local_irq_restore(flags); 99 local_irq_restore(flags);
117} 100}
118 101
102static atomic_t rcu_barrier_cpu_count;
103static struct semaphore rcu_barrier_sema;
104static struct completion rcu_barrier_completion;
105
119/** 106/**
120 * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. 107 * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
121 * @head: structure to be used for queueing the RCU updates. 108 * @head: structure to be used for queueing the RCU updates.
@@ -162,6 +149,42 @@ long rcu_batches_completed(void)
162 return rcu_ctrlblk.completed; 149 return rcu_ctrlblk.completed;
163} 150}
164 151
152static void rcu_barrier_callback(struct rcu_head *notused)
153{
154 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
155 complete(&rcu_barrier_completion);
156}
157
158/*
159 * Called with preemption disabled, and from cross-cpu IRQ context.
160 */
161static void rcu_barrier_func(void *notused)
162{
163 int cpu = smp_processor_id();
164 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
165 struct rcu_head *head;
166
167 head = &rdp->barrier;
168 atomic_inc(&rcu_barrier_cpu_count);
169 call_rcu(head, rcu_barrier_callback);
170}
171
172/**
173 * rcu_barrier - Wait until all the in-flight RCUs are complete.
174 */
175void rcu_barrier(void)
176{
177 BUG_ON(in_interrupt());
178 /* Take cpucontrol semaphore to protect against CPU hotplug */
179 down(&rcu_barrier_sema);
180 init_completion(&rcu_barrier_completion);
181 atomic_set(&rcu_barrier_cpu_count, 0);
182 on_each_cpu(rcu_barrier_func, NULL, 0, 1);
183 wait_for_completion(&rcu_barrier_completion);
184 up(&rcu_barrier_sema);
185}
186EXPORT_SYMBOL_GPL(rcu_barrier);
187
165/* 188/*
166 * Invoke the completed RCU callbacks. They are expected to be in 189 * Invoke the completed RCU callbacks. They are expected to be in
167 * a per-cpu list. 190 * a per-cpu list.
@@ -193,13 +216,13 @@ static void rcu_do_batch(struct rcu_data *rdp)
193 * This is done by rcu_start_batch. The start is not broadcasted to 216 * This is done by rcu_start_batch. The start is not broadcasted to
194 * all cpus, they must pick this up by comparing rcp->cur with 217 * all cpus, they must pick this up by comparing rcp->cur with
195 * rdp->quiescbatch. All cpus are recorded in the 218 * rdp->quiescbatch. All cpus are recorded in the
196 * rcu_state.cpumask bitmap. 219 * rcu_ctrlblk.cpumask bitmap.
197 * - All cpus must go through a quiescent state. 220 * - All cpus must go through a quiescent state.
198 * Since the start of the grace period is not broadcasted, at least two 221 * Since the start of the grace period is not broadcasted, at least two
199 * calls to rcu_check_quiescent_state are required: 222 * calls to rcu_check_quiescent_state are required:
200 * The first call just notices that a new grace period is running. The 223 * The first call just notices that a new grace period is running. The
201 * following calls check if there was a quiescent state since the beginning 224 * following calls check if there was a quiescent state since the beginning
202 * of the grace period. If so, it updates rcu_state.cpumask. If 225 * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
203 * the bitmap is empty, then the grace period is completed. 226 * the bitmap is empty, then the grace period is completed.
204 * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace 227 * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
205 * period (if necessary). 228 * period (if necessary).
@@ -207,25 +230,29 @@ static void rcu_do_batch(struct rcu_data *rdp)
207/* 230/*
208 * Register a new batch of callbacks, and start it up if there is currently no 231 * Register a new batch of callbacks, and start it up if there is currently no
209 * active batch and the batch to be registered has not already occurred. 232 * active batch and the batch to be registered has not already occurred.
210 * Caller must hold rcu_state.lock. 233 * Caller must hold rcu_ctrlblk.lock.
211 */ 234 */
212static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp, 235static void rcu_start_batch(struct rcu_ctrlblk *rcp)
213 int next_pending)
214{ 236{
215 if (next_pending)
216 rcp->next_pending = 1;
217
218 if (rcp->next_pending && 237 if (rcp->next_pending &&
219 rcp->completed == rcp->cur) { 238 rcp->completed == rcp->cur) {
220 /* Can't change, since spin lock held. */
221 cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask);
222
223 rcp->next_pending = 0; 239 rcp->next_pending = 0;
224 /* next_pending == 0 must be visible in __rcu_process_callbacks() 240 /*
225 * before it can see new value of cur. 241 * next_pending == 0 must be visible in
242 * __rcu_process_callbacks() before it can see new value of cur.
226 */ 243 */
227 smp_wmb(); 244 smp_wmb();
228 rcp->cur++; 245 rcp->cur++;
246
247 /*
248 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
249 * Barrier Otherwise it can cause tickless idle CPUs to be
250 * included in rcp->cpumask, which will extend graceperiods
251 * unnecessarily.
252 */
253 smp_mb();
254 cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
255
229 } 256 }
230} 257}
231 258
@@ -234,13 +261,13 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp,
234 * Clear it from the cpu mask and complete the grace period if it was the last 261 * Clear it from the cpu mask and complete the grace period if it was the last
235 * cpu. Start another grace period if someone has further entries pending 262 * cpu. Start another grace period if someone has further entries pending
236 */ 263 */
237static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp) 264static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
238{ 265{
239 cpu_clear(cpu, rsp->cpumask); 266 cpu_clear(cpu, rcp->cpumask);
240 if (cpus_empty(rsp->cpumask)) { 267 if (cpus_empty(rcp->cpumask)) {
241 /* batch completed ! */ 268 /* batch completed ! */
242 rcp->completed = rcp->cur; 269 rcp->completed = rcp->cur;
243 rcu_start_batch(rcp, rsp, 0); 270 rcu_start_batch(rcp);
244 } 271 }
245} 272}
246 273
@@ -250,7 +277,7 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp)
250 * quiescent cycle, then indicate that it has done so. 277 * quiescent cycle, then indicate that it has done so.
251 */ 278 */
252static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, 279static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
253 struct rcu_state *rsp, struct rcu_data *rdp) 280 struct rcu_data *rdp)
254{ 281{
255 if (rdp->quiescbatch != rcp->cur) { 282 if (rdp->quiescbatch != rcp->cur) {
256 /* start new grace period: */ 283 /* start new grace period: */
@@ -275,15 +302,15 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
275 return; 302 return;
276 rdp->qs_pending = 0; 303 rdp->qs_pending = 0;
277 304
278 spin_lock(&rsp->lock); 305 spin_lock(&rcp->lock);
279 /* 306 /*
280 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync 307 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
281 * during cpu startup. Ignore the quiescent state. 308 * during cpu startup. Ignore the quiescent state.
282 */ 309 */
283 if (likely(rdp->quiescbatch == rcp->cur)) 310 if (likely(rdp->quiescbatch == rcp->cur))
284 cpu_quiet(rdp->cpu, rcp, rsp); 311 cpu_quiet(rdp->cpu, rcp);
285 312
286 spin_unlock(&rsp->lock); 313 spin_unlock(&rcp->lock);
287} 314}
288 315
289 316
@@ -304,28 +331,29 @@ static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
304} 331}
305 332
306static void __rcu_offline_cpu(struct rcu_data *this_rdp, 333static void __rcu_offline_cpu(struct rcu_data *this_rdp,
307 struct rcu_ctrlblk *rcp, struct rcu_state *rsp, struct rcu_data *rdp) 334 struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
308{ 335{
309 /* if the cpu going offline owns the grace period 336 /* if the cpu going offline owns the grace period
310 * we can block indefinitely waiting for it, so flush 337 * we can block indefinitely waiting for it, so flush
311 * it here 338 * it here
312 */ 339 */
313 spin_lock_bh(&rsp->lock); 340 spin_lock_bh(&rcp->lock);
314 if (rcp->cur != rcp->completed) 341 if (rcp->cur != rcp->completed)
315 cpu_quiet(rdp->cpu, rcp, rsp); 342 cpu_quiet(rdp->cpu, rcp);
316 spin_unlock_bh(&rsp->lock); 343 spin_unlock_bh(&rcp->lock);
317 rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail); 344 rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
318 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail); 345 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
319 346 rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
320} 347}
348
321static void rcu_offline_cpu(int cpu) 349static void rcu_offline_cpu(int cpu)
322{ 350{
323 struct rcu_data *this_rdp = &get_cpu_var(rcu_data); 351 struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
324 struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data); 352 struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
325 353
326 __rcu_offline_cpu(this_rdp, &rcu_ctrlblk, &rcu_state, 354 __rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
327 &per_cpu(rcu_data, cpu)); 355 &per_cpu(rcu_data, cpu));
328 __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, &rcu_bh_state, 356 __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
329 &per_cpu(rcu_bh_data, cpu)); 357 &per_cpu(rcu_bh_data, cpu));
330 put_cpu_var(rcu_data); 358 put_cpu_var(rcu_data);
331 put_cpu_var(rcu_bh_data); 359 put_cpu_var(rcu_bh_data);
@@ -344,7 +372,7 @@ static void rcu_offline_cpu(int cpu)
344 * This does the RCU processing work from tasklet context. 372 * This does the RCU processing work from tasklet context.
345 */ 373 */
346static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, 374static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
347 struct rcu_state *rsp, struct rcu_data *rdp) 375 struct rcu_data *rdp)
348{ 376{
349 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) { 377 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
350 *rdp->donetail = rdp->curlist; 378 *rdp->donetail = rdp->curlist;
@@ -374,24 +402,53 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
374 402
375 if (!rcp->next_pending) { 403 if (!rcp->next_pending) {
376 /* and start it/schedule start if it's a new batch */ 404 /* and start it/schedule start if it's a new batch */
377 spin_lock(&rsp->lock); 405 spin_lock(&rcp->lock);
378 rcu_start_batch(rcp, rsp, 1); 406 rcp->next_pending = 1;
379 spin_unlock(&rsp->lock); 407 rcu_start_batch(rcp);
408 spin_unlock(&rcp->lock);
380 } 409 }
381 } else { 410 } else {
382 local_irq_enable(); 411 local_irq_enable();
383 } 412 }
384 rcu_check_quiescent_state(rcp, rsp, rdp); 413 rcu_check_quiescent_state(rcp, rdp);
385 if (rdp->donelist) 414 if (rdp->donelist)
386 rcu_do_batch(rdp); 415 rcu_do_batch(rdp);
387} 416}
388 417
389static void rcu_process_callbacks(unsigned long unused) 418static void rcu_process_callbacks(unsigned long unused)
390{ 419{
391 __rcu_process_callbacks(&rcu_ctrlblk, &rcu_state, 420 __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
392 &__get_cpu_var(rcu_data)); 421 __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
393 __rcu_process_callbacks(&rcu_bh_ctrlblk, &rcu_bh_state, 422}
394 &__get_cpu_var(rcu_bh_data)); 423
424static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
425{
426 /* This cpu has pending rcu entries and the grace period
427 * for them has completed.
428 */
429 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
430 return 1;
431
432 /* This cpu has no pending entries, but there are new entries */
433 if (!rdp->curlist && rdp->nxtlist)
434 return 1;
435
436 /* This cpu has finished callbacks to invoke */
437 if (rdp->donelist)
438 return 1;
439
440 /* The rcu core waits for a quiescent state from the cpu */
441 if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
442 return 1;
443
444 /* nothing to do */
445 return 0;
446}
447
448int rcu_pending(int cpu)
449{
450 return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
451 __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
395} 452}
396 453
397void rcu_check_callbacks(int cpu, int user) 454void rcu_check_callbacks(int cpu, int user)
@@ -457,6 +514,7 @@ static struct notifier_block __devinitdata rcu_nb = {
457 */ 514 */
458void __init rcu_init(void) 515void __init rcu_init(void)
459{ 516{
517 sema_init(&rcu_barrier_sema, 1);
460 rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, 518 rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
461 (void *)(long)smp_processor_id()); 519 (void *)(long)smp_processor_id());
462 /* Register notifier for non-boot CPUs */ 520 /* Register notifier for non-boot CPUs */