aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcupdate.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcupdate.c')
-rw-r--r--kernel/rcupdate.c135
1 files changed, 72 insertions, 63 deletions
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 48d3bce465b8..0cf8146bd585 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -35,6 +35,7 @@
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/spinlock.h> 36#include <linux/spinlock.h>
37#include <linux/smp.h> 37#include <linux/smp.h>
38#include <linux/rcupdate.h>
38#include <linux/interrupt.h> 39#include <linux/interrupt.h>
39#include <linux/sched.h> 40#include <linux/sched.h>
40#include <asm/atomic.h> 41#include <asm/atomic.h>
@@ -45,26 +46,21 @@
45#include <linux/percpu.h> 46#include <linux/percpu.h>
46#include <linux/notifier.h> 47#include <linux/notifier.h>
47#include <linux/rcupdate.h> 48#include <linux/rcupdate.h>
48#include <linux/rcuref.h>
49#include <linux/cpu.h> 49#include <linux/cpu.h>
50 50
51/* Definition for rcupdate control block. */ 51/* Definition for rcupdate control block. */
52struct rcu_ctrlblk rcu_ctrlblk = 52struct rcu_ctrlblk rcu_ctrlblk = {
53 { .cur = -300, .completed = -300 }; 53 .cur = -300,
54struct rcu_ctrlblk rcu_bh_ctrlblk = 54 .completed = -300,
55 { .cur = -300, .completed = -300 }; 55 .lock = SPIN_LOCK_UNLOCKED,
56 56 .cpumask = CPU_MASK_NONE,
57/* Bookkeeping of the progress of the grace period */ 57};
58struct rcu_state { 58struct rcu_ctrlblk rcu_bh_ctrlblk = {
59 spinlock_t lock; /* Guard this struct and writes to rcu_ctrlblk */ 59 .cur = -300,
60 cpumask_t cpumask; /* CPUs that need to switch in order */ 60 .completed = -300,
61 /* for current batch to proceed. */ 61 .lock = SPIN_LOCK_UNLOCKED,
62 .cpumask = CPU_MASK_NONE,
62}; 63};
63
64static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp =
65 {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
66static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp =
67 {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
68 64
69DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; 65DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
70DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; 66DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
@@ -73,19 +69,6 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
73static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; 69static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
74static int maxbatch = 10000; 70static int maxbatch = 10000;
75 71
76#ifndef __HAVE_ARCH_CMPXCHG
77/*
78 * We use an array of spinlocks for the rcurefs -- similar to ones in sparc
79 * 32 bit atomic_t implementations, and a hash function similar to that
80 * for our refcounting needs.
81 * Can't help multiprocessors which donot have cmpxchg :(
82 */
83
84spinlock_t __rcuref_hash[RCUREF_HASH_SIZE] = {
85 [0 ... (RCUREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED
86};
87#endif
88
89/** 72/**
90 * call_rcu - Queue an RCU callback for invocation after a grace period. 73 * call_rcu - Queue an RCU callback for invocation after a grace period.
91 * @head: structure to be used for queueing the RCU updates. 74 * @head: structure to be used for queueing the RCU updates.
@@ -233,13 +216,13 @@ static void rcu_do_batch(struct rcu_data *rdp)
233 * This is done by rcu_start_batch. The start is not broadcasted to 216 * This is done by rcu_start_batch. The start is not broadcasted to
234 * all cpus, they must pick this up by comparing rcp->cur with 217 * all cpus, they must pick this up by comparing rcp->cur with
235 * rdp->quiescbatch. All cpus are recorded in the 218 * rdp->quiescbatch. All cpus are recorded in the
236 * rcu_state.cpumask bitmap. 219 * rcu_ctrlblk.cpumask bitmap.
237 * - All cpus must go through a quiescent state. 220 * - All cpus must go through a quiescent state.
238 * Since the start of the grace period is not broadcasted, at least two 221 * Since the start of the grace period is not broadcasted, at least two
239 * calls to rcu_check_quiescent_state are required: 222 * calls to rcu_check_quiescent_state are required:
240 * The first call just notices that a new grace period is running. The 223 * The first call just notices that a new grace period is running. The
241 * following calls check if there was a quiescent state since the beginning 224 * following calls check if there was a quiescent state since the beginning
242 * of the grace period. If so, it updates rcu_state.cpumask. If 225 * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
243 * the bitmap is empty, then the grace period is completed. 226 * the bitmap is empty, then the grace period is completed.
244 * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace 227 * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
245 * period (if necessary). 228 * period (if necessary).
@@ -247,14 +230,10 @@ static void rcu_do_batch(struct rcu_data *rdp)
247/* 230/*
248 * Register a new batch of callbacks, and start it up if there is currently no 231 * Register a new batch of callbacks, and start it up if there is currently no
249 * active batch and the batch to be registered has not already occurred. 232 * active batch and the batch to be registered has not already occurred.
250 * Caller must hold rcu_state.lock. 233 * Caller must hold rcu_ctrlblk.lock.
251 */ 234 */
252static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp, 235static void rcu_start_batch(struct rcu_ctrlblk *rcp)
253 int next_pending)
254{ 236{
255 if (next_pending)
256 rcp->next_pending = 1;
257
258 if (rcp->next_pending && 237 if (rcp->next_pending &&
259 rcp->completed == rcp->cur) { 238 rcp->completed == rcp->cur) {
260 rcp->next_pending = 0; 239 rcp->next_pending = 0;
@@ -268,11 +247,11 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp,
268 /* 247 /*
269 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a 248 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
270 * Barrier Otherwise it can cause tickless idle CPUs to be 249 * Barrier Otherwise it can cause tickless idle CPUs to be
271 * included in rsp->cpumask, which will extend graceperiods 250 * included in rcp->cpumask, which will extend graceperiods
272 * unnecessarily. 251 * unnecessarily.
273 */ 252 */
274 smp_mb(); 253 smp_mb();
275 cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask); 254 cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
276 255
277 } 256 }
278} 257}
@@ -282,13 +261,13 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp,
282 * Clear it from the cpu mask and complete the grace period if it was the last 261 * Clear it from the cpu mask and complete the grace period if it was the last
283 * cpu. Start another grace period if someone has further entries pending 262 * cpu. Start another grace period if someone has further entries pending
284 */ 263 */
285static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp) 264static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
286{ 265{
287 cpu_clear(cpu, rsp->cpumask); 266 cpu_clear(cpu, rcp->cpumask);
288 if (cpus_empty(rsp->cpumask)) { 267 if (cpus_empty(rcp->cpumask)) {
289 /* batch completed ! */ 268 /* batch completed ! */
290 rcp->completed = rcp->cur; 269 rcp->completed = rcp->cur;
291 rcu_start_batch(rcp, rsp, 0); 270 rcu_start_batch(rcp);
292 } 271 }
293} 272}
294 273
@@ -298,7 +277,7 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp)
298 * quiescent cycle, then indicate that it has done so. 277 * quiescent cycle, then indicate that it has done so.
299 */ 278 */
300static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, 279static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
301 struct rcu_state *rsp, struct rcu_data *rdp) 280 struct rcu_data *rdp)
302{ 281{
303 if (rdp->quiescbatch != rcp->cur) { 282 if (rdp->quiescbatch != rcp->cur) {
304 /* start new grace period: */ 283 /* start new grace period: */
@@ -323,15 +302,15 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
323 return; 302 return;
324 rdp->qs_pending = 0; 303 rdp->qs_pending = 0;
325 304
326 spin_lock(&rsp->lock); 305 spin_lock(&rcp->lock);
327 /* 306 /*
328 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync 307 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
329 * during cpu startup. Ignore the quiescent state. 308 * during cpu startup. Ignore the quiescent state.
330 */ 309 */
331 if (likely(rdp->quiescbatch == rcp->cur)) 310 if (likely(rdp->quiescbatch == rcp->cur))
332 cpu_quiet(rdp->cpu, rcp, rsp); 311 cpu_quiet(rdp->cpu, rcp);
333 312
334 spin_unlock(&rsp->lock); 313 spin_unlock(&rcp->lock);
335} 314}
336 315
337 316
@@ -352,28 +331,29 @@ static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
352} 331}
353 332
354static void __rcu_offline_cpu(struct rcu_data *this_rdp, 333static void __rcu_offline_cpu(struct rcu_data *this_rdp,
355 struct rcu_ctrlblk *rcp, struct rcu_state *rsp, struct rcu_data *rdp) 334 struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
356{ 335{
357 /* if the cpu going offline owns the grace period 336 /* if the cpu going offline owns the grace period
358 * we can block indefinitely waiting for it, so flush 337 * we can block indefinitely waiting for it, so flush
359 * it here 338 * it here
360 */ 339 */
361 spin_lock_bh(&rsp->lock); 340 spin_lock_bh(&rcp->lock);
362 if (rcp->cur != rcp->completed) 341 if (rcp->cur != rcp->completed)
363 cpu_quiet(rdp->cpu, rcp, rsp); 342 cpu_quiet(rdp->cpu, rcp);
364 spin_unlock_bh(&rsp->lock); 343 spin_unlock_bh(&rcp->lock);
365 rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail); 344 rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
366 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail); 345 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
367 346 rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
368} 347}
348
369static void rcu_offline_cpu(int cpu) 349static void rcu_offline_cpu(int cpu)
370{ 350{
371 struct rcu_data *this_rdp = &get_cpu_var(rcu_data); 351 struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
372 struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data); 352 struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
373 353
374 __rcu_offline_cpu(this_rdp, &rcu_ctrlblk, &rcu_state, 354 __rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
375 &per_cpu(rcu_data, cpu)); 355 &per_cpu(rcu_data, cpu));
376 __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, &rcu_bh_state, 356 __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
377 &per_cpu(rcu_bh_data, cpu)); 357 &per_cpu(rcu_bh_data, cpu));
378 put_cpu_var(rcu_data); 358 put_cpu_var(rcu_data);
379 put_cpu_var(rcu_bh_data); 359 put_cpu_var(rcu_bh_data);
@@ -392,7 +372,7 @@ static void rcu_offline_cpu(int cpu)
392 * This does the RCU processing work from tasklet context. 372 * This does the RCU processing work from tasklet context.
393 */ 373 */
394static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, 374static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
395 struct rcu_state *rsp, struct rcu_data *rdp) 375 struct rcu_data *rdp)
396{ 376{
397 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) { 377 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
398 *rdp->donetail = rdp->curlist; 378 *rdp->donetail = rdp->curlist;
@@ -422,24 +402,53 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
422 402
423 if (!rcp->next_pending) { 403 if (!rcp->next_pending) {
424 /* and start it/schedule start if it's a new batch */ 404 /* and start it/schedule start if it's a new batch */
425 spin_lock(&rsp->lock); 405 spin_lock(&rcp->lock);
426 rcu_start_batch(rcp, rsp, 1); 406 rcp->next_pending = 1;
427 spin_unlock(&rsp->lock); 407 rcu_start_batch(rcp);
408 spin_unlock(&rcp->lock);
428 } 409 }
429 } else { 410 } else {
430 local_irq_enable(); 411 local_irq_enable();
431 } 412 }
432 rcu_check_quiescent_state(rcp, rsp, rdp); 413 rcu_check_quiescent_state(rcp, rdp);
433 if (rdp->donelist) 414 if (rdp->donelist)
434 rcu_do_batch(rdp); 415 rcu_do_batch(rdp);
435} 416}
436 417
437static void rcu_process_callbacks(unsigned long unused) 418static void rcu_process_callbacks(unsigned long unused)
438{ 419{
439 __rcu_process_callbacks(&rcu_ctrlblk, &rcu_state, 420 __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
440 &__get_cpu_var(rcu_data)); 421 __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
441 __rcu_process_callbacks(&rcu_bh_ctrlblk, &rcu_bh_state, 422}
442 &__get_cpu_var(rcu_bh_data)); 423
424static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
425{
426 /* This cpu has pending rcu entries and the grace period
427 * for them has completed.
428 */
429 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
430 return 1;
431
432 /* This cpu has no pending entries, but there are new entries */
433 if (!rdp->curlist && rdp->nxtlist)
434 return 1;
435
436 /* This cpu has finished callbacks to invoke */
437 if (rdp->donelist)
438 return 1;
439
440 /* The rcu core waits for a quiescent state from the cpu */
441 if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
442 return 1;
443
444 /* nothing to do */
445 return 0;
446}
447
448int rcu_pending(int cpu)
449{
450 return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
451 __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
443} 452}
444 453
445void rcu_check_callbacks(int cpu, int user) 454void rcu_check_callbacks(int cpu, int user)