diff options
Diffstat (limited to 'kernel/rcupdate.c')
-rw-r--r-- | kernel/rcupdate.c | 190 |
1 files changed, 124 insertions, 66 deletions
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index c4d159a21e04..0cf8146bd585 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/init.h> | 35 | #include <linux/init.h> |
36 | #include <linux/spinlock.h> | 36 | #include <linux/spinlock.h> |
37 | #include <linux/smp.h> | 37 | #include <linux/smp.h> |
38 | #include <linux/rcupdate.h> | ||
38 | #include <linux/interrupt.h> | 39 | #include <linux/interrupt.h> |
39 | #include <linux/sched.h> | 40 | #include <linux/sched.h> |
40 | #include <asm/atomic.h> | 41 | #include <asm/atomic.h> |
@@ -45,26 +46,21 @@ | |||
45 | #include <linux/percpu.h> | 46 | #include <linux/percpu.h> |
46 | #include <linux/notifier.h> | 47 | #include <linux/notifier.h> |
47 | #include <linux/rcupdate.h> | 48 | #include <linux/rcupdate.h> |
48 | #include <linux/rcuref.h> | ||
49 | #include <linux/cpu.h> | 49 | #include <linux/cpu.h> |
50 | 50 | ||
51 | /* Definition for rcupdate control block. */ | 51 | /* Definition for rcupdate control block. */ |
52 | struct rcu_ctrlblk rcu_ctrlblk = | 52 | struct rcu_ctrlblk rcu_ctrlblk = { |
53 | { .cur = -300, .completed = -300 }; | 53 | .cur = -300, |
54 | struct rcu_ctrlblk rcu_bh_ctrlblk = | 54 | .completed = -300, |
55 | { .cur = -300, .completed = -300 }; | 55 | .lock = SPIN_LOCK_UNLOCKED, |
56 | 56 | .cpumask = CPU_MASK_NONE, | |
57 | /* Bookkeeping of the progress of the grace period */ | 57 | }; |
58 | struct rcu_state { | 58 | struct rcu_ctrlblk rcu_bh_ctrlblk = { |
59 | spinlock_t lock; /* Guard this struct and writes to rcu_ctrlblk */ | 59 | .cur = -300, |
60 | cpumask_t cpumask; /* CPUs that need to switch in order */ | 60 | .completed = -300, |
61 | /* for current batch to proceed. */ | 61 | .lock = SPIN_LOCK_UNLOCKED, |
62 | .cpumask = CPU_MASK_NONE, | ||
62 | }; | 63 | }; |
63 | |||
64 | static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp = | ||
65 | {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }; | ||
66 | static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp = | ||
67 | {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }; | ||
68 | 64 | ||
69 | DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; | 65 | DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; |
70 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; | 66 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; |
@@ -73,19 +69,6 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; | |||
73 | static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; | 69 | static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; |
74 | static int maxbatch = 10000; | 70 | static int maxbatch = 10000; |
75 | 71 | ||
76 | #ifndef __HAVE_ARCH_CMPXCHG | ||
77 | /* | ||
78 | * We use an array of spinlocks for the rcurefs -- similar to ones in sparc | ||
79 | * 32 bit atomic_t implementations, and a hash function similar to that | ||
80 | * for our refcounting needs. | ||
81 | * Can't help multiprocessors which donot have cmpxchg :( | ||
82 | */ | ||
83 | |||
84 | spinlock_t __rcuref_hash[RCUREF_HASH_SIZE] = { | ||
85 | [0 ... (RCUREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED | ||
86 | }; | ||
87 | #endif | ||
88 | |||
89 | /** | 72 | /** |
90 | * call_rcu - Queue an RCU callback for invocation after a grace period. | 73 | * call_rcu - Queue an RCU callback for invocation after a grace period. |
91 | * @head: structure to be used for queueing the RCU updates. | 74 | * @head: structure to be used for queueing the RCU updates. |
@@ -116,6 +99,10 @@ void fastcall call_rcu(struct rcu_head *head, | |||
116 | local_irq_restore(flags); | 99 | local_irq_restore(flags); |
117 | } | 100 | } |
118 | 101 | ||
102 | static atomic_t rcu_barrier_cpu_count; | ||
103 | static struct semaphore rcu_barrier_sema; | ||
104 | static struct completion rcu_barrier_completion; | ||
105 | |||
119 | /** | 106 | /** |
120 | * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. | 107 | * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. |
121 | * @head: structure to be used for queueing the RCU updates. | 108 | * @head: structure to be used for queueing the RCU updates. |
@@ -162,6 +149,42 @@ long rcu_batches_completed(void) | |||
162 | return rcu_ctrlblk.completed; | 149 | return rcu_ctrlblk.completed; |
163 | } | 150 | } |
164 | 151 | ||
152 | static void rcu_barrier_callback(struct rcu_head *notused) | ||
153 | { | ||
154 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | ||
155 | complete(&rcu_barrier_completion); | ||
156 | } | ||
157 | |||
158 | /* | ||
159 | * Called with preemption disabled, and from cross-cpu IRQ context. | ||
160 | */ | ||
161 | static void rcu_barrier_func(void *notused) | ||
162 | { | ||
163 | int cpu = smp_processor_id(); | ||
164 | struct rcu_data *rdp = &per_cpu(rcu_data, cpu); | ||
165 | struct rcu_head *head; | ||
166 | |||
167 | head = &rdp->barrier; | ||
168 | atomic_inc(&rcu_barrier_cpu_count); | ||
169 | call_rcu(head, rcu_barrier_callback); | ||
170 | } | ||
171 | |||
172 | /** | ||
173 | * rcu_barrier - Wait until all the in-flight RCUs are complete. | ||
174 | */ | ||
175 | void rcu_barrier(void) | ||
176 | { | ||
177 | BUG_ON(in_interrupt()); | ||
178 | /* Take cpucontrol semaphore to protect against CPU hotplug */ | ||
179 | down(&rcu_barrier_sema); | ||
180 | init_completion(&rcu_barrier_completion); | ||
181 | atomic_set(&rcu_barrier_cpu_count, 0); | ||
182 | on_each_cpu(rcu_barrier_func, NULL, 0, 1); | ||
183 | wait_for_completion(&rcu_barrier_completion); | ||
184 | up(&rcu_barrier_sema); | ||
185 | } | ||
186 | EXPORT_SYMBOL_GPL(rcu_barrier); | ||
187 | |||
165 | /* | 188 | /* |
166 | * Invoke the completed RCU callbacks. They are expected to be in | 189 | * Invoke the completed RCU callbacks. They are expected to be in |
167 | * a per-cpu list. | 190 | * a per-cpu list. |
@@ -193,13 +216,13 @@ static void rcu_do_batch(struct rcu_data *rdp) | |||
193 | * This is done by rcu_start_batch. The start is not broadcasted to | 216 | * This is done by rcu_start_batch. The start is not broadcasted to |
194 | * all cpus, they must pick this up by comparing rcp->cur with | 217 | * all cpus, they must pick this up by comparing rcp->cur with |
195 | * rdp->quiescbatch. All cpus are recorded in the | 218 | * rdp->quiescbatch. All cpus are recorded in the |
196 | * rcu_state.cpumask bitmap. | 219 | * rcu_ctrlblk.cpumask bitmap. |
197 | * - All cpus must go through a quiescent state. | 220 | * - All cpus must go through a quiescent state. |
198 | * Since the start of the grace period is not broadcasted, at least two | 221 | * Since the start of the grace period is not broadcasted, at least two |
199 | * calls to rcu_check_quiescent_state are required: | 222 | * calls to rcu_check_quiescent_state are required: |
200 | * The first call just notices that a new grace period is running. The | 223 | * The first call just notices that a new grace period is running. The |
201 | * following calls check if there was a quiescent state since the beginning | 224 | * following calls check if there was a quiescent state since the beginning |
202 | * of the grace period. If so, it updates rcu_state.cpumask. If | 225 | * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If |
203 | * the bitmap is empty, then the grace period is completed. | 226 | * the bitmap is empty, then the grace period is completed. |
204 | * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace | 227 | * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace |
205 | * period (if necessary). | 228 | * period (if necessary). |
@@ -207,25 +230,29 @@ static void rcu_do_batch(struct rcu_data *rdp) | |||
207 | /* | 230 | /* |
208 | * Register a new batch of callbacks, and start it up if there is currently no | 231 | * Register a new batch of callbacks, and start it up if there is currently no |
209 | * active batch and the batch to be registered has not already occurred. | 232 | * active batch and the batch to be registered has not already occurred. |
210 | * Caller must hold rcu_state.lock. | 233 | * Caller must hold rcu_ctrlblk.lock. |
211 | */ | 234 | */ |
212 | static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp, | 235 | static void rcu_start_batch(struct rcu_ctrlblk *rcp) |
213 | int next_pending) | ||
214 | { | 236 | { |
215 | if (next_pending) | ||
216 | rcp->next_pending = 1; | ||
217 | |||
218 | if (rcp->next_pending && | 237 | if (rcp->next_pending && |
219 | rcp->completed == rcp->cur) { | 238 | rcp->completed == rcp->cur) { |
220 | /* Can't change, since spin lock held. */ | ||
221 | cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask); | ||
222 | |||
223 | rcp->next_pending = 0; | 239 | rcp->next_pending = 0; |
224 | /* next_pending == 0 must be visible in __rcu_process_callbacks() | 240 | /* |
225 | * before it can see new value of cur. | 241 | * next_pending == 0 must be visible in |
242 | * __rcu_process_callbacks() before it can see new value of cur. | ||
226 | */ | 243 | */ |
227 | smp_wmb(); | 244 | smp_wmb(); |
228 | rcp->cur++; | 245 | rcp->cur++; |
246 | |||
247 | /* | ||
248 | * Accessing nohz_cpu_mask before incrementing rcp->cur needs a | ||
249 | * Barrier Otherwise it can cause tickless idle CPUs to be | ||
250 | * included in rcp->cpumask, which will extend graceperiods | ||
251 | * unnecessarily. | ||
252 | */ | ||
253 | smp_mb(); | ||
254 | cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask); | ||
255 | |||
229 | } | 256 | } |
230 | } | 257 | } |
231 | 258 | ||
@@ -234,13 +261,13 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp, | |||
234 | * Clear it from the cpu mask and complete the grace period if it was the last | 261 | * Clear it from the cpu mask and complete the grace period if it was the last |
235 | * cpu. Start another grace period if someone has further entries pending | 262 | * cpu. Start another grace period if someone has further entries pending |
236 | */ | 263 | */ |
237 | static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp) | 264 | static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp) |
238 | { | 265 | { |
239 | cpu_clear(cpu, rsp->cpumask); | 266 | cpu_clear(cpu, rcp->cpumask); |
240 | if (cpus_empty(rsp->cpumask)) { | 267 | if (cpus_empty(rcp->cpumask)) { |
241 | /* batch completed ! */ | 268 | /* batch completed ! */ |
242 | rcp->completed = rcp->cur; | 269 | rcp->completed = rcp->cur; |
243 | rcu_start_batch(rcp, rsp, 0); | 270 | rcu_start_batch(rcp); |
244 | } | 271 | } |
245 | } | 272 | } |
246 | 273 | ||
@@ -250,7 +277,7 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp) | |||
250 | * quiescent cycle, then indicate that it has done so. | 277 | * quiescent cycle, then indicate that it has done so. |
251 | */ | 278 | */ |
252 | static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, | 279 | static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, |
253 | struct rcu_state *rsp, struct rcu_data *rdp) | 280 | struct rcu_data *rdp) |
254 | { | 281 | { |
255 | if (rdp->quiescbatch != rcp->cur) { | 282 | if (rdp->quiescbatch != rcp->cur) { |
256 | /* start new grace period: */ | 283 | /* start new grace period: */ |
@@ -275,15 +302,15 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, | |||
275 | return; | 302 | return; |
276 | rdp->qs_pending = 0; | 303 | rdp->qs_pending = 0; |
277 | 304 | ||
278 | spin_lock(&rsp->lock); | 305 | spin_lock(&rcp->lock); |
279 | /* | 306 | /* |
280 | * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync | 307 | * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync |
281 | * during cpu startup. Ignore the quiescent state. | 308 | * during cpu startup. Ignore the quiescent state. |
282 | */ | 309 | */ |
283 | if (likely(rdp->quiescbatch == rcp->cur)) | 310 | if (likely(rdp->quiescbatch == rcp->cur)) |
284 | cpu_quiet(rdp->cpu, rcp, rsp); | 311 | cpu_quiet(rdp->cpu, rcp); |
285 | 312 | ||
286 | spin_unlock(&rsp->lock); | 313 | spin_unlock(&rcp->lock); |
287 | } | 314 | } |
288 | 315 | ||
289 | 316 | ||
@@ -304,28 +331,29 @@ static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list, | |||
304 | } | 331 | } |
305 | 332 | ||
306 | static void __rcu_offline_cpu(struct rcu_data *this_rdp, | 333 | static void __rcu_offline_cpu(struct rcu_data *this_rdp, |
307 | struct rcu_ctrlblk *rcp, struct rcu_state *rsp, struct rcu_data *rdp) | 334 | struct rcu_ctrlblk *rcp, struct rcu_data *rdp) |
308 | { | 335 | { |
309 | /* if the cpu going offline owns the grace period | 336 | /* if the cpu going offline owns the grace period |
310 | * we can block indefinitely waiting for it, so flush | 337 | * we can block indefinitely waiting for it, so flush |
311 | * it here | 338 | * it here |
312 | */ | 339 | */ |
313 | spin_lock_bh(&rsp->lock); | 340 | spin_lock_bh(&rcp->lock); |
314 | if (rcp->cur != rcp->completed) | 341 | if (rcp->cur != rcp->completed) |
315 | cpu_quiet(rdp->cpu, rcp, rsp); | 342 | cpu_quiet(rdp->cpu, rcp); |
316 | spin_unlock_bh(&rsp->lock); | 343 | spin_unlock_bh(&rcp->lock); |
317 | rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail); | 344 | rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail); |
318 | rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail); | 345 | rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail); |
319 | 346 | rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail); | |
320 | } | 347 | } |
348 | |||
321 | static void rcu_offline_cpu(int cpu) | 349 | static void rcu_offline_cpu(int cpu) |
322 | { | 350 | { |
323 | struct rcu_data *this_rdp = &get_cpu_var(rcu_data); | 351 | struct rcu_data *this_rdp = &get_cpu_var(rcu_data); |
324 | struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data); | 352 | struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data); |
325 | 353 | ||
326 | __rcu_offline_cpu(this_rdp, &rcu_ctrlblk, &rcu_state, | 354 | __rcu_offline_cpu(this_rdp, &rcu_ctrlblk, |
327 | &per_cpu(rcu_data, cpu)); | 355 | &per_cpu(rcu_data, cpu)); |
328 | __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, &rcu_bh_state, | 356 | __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, |
329 | &per_cpu(rcu_bh_data, cpu)); | 357 | &per_cpu(rcu_bh_data, cpu)); |
330 | put_cpu_var(rcu_data); | 358 | put_cpu_var(rcu_data); |
331 | put_cpu_var(rcu_bh_data); | 359 | put_cpu_var(rcu_bh_data); |
@@ -344,7 +372,7 @@ static void rcu_offline_cpu(int cpu) | |||
344 | * This does the RCU processing work from tasklet context. | 372 | * This does the RCU processing work from tasklet context. |
345 | */ | 373 | */ |
346 | static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, | 374 | static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, |
347 | struct rcu_state *rsp, struct rcu_data *rdp) | 375 | struct rcu_data *rdp) |
348 | { | 376 | { |
349 | if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) { | 377 | if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) { |
350 | *rdp->donetail = rdp->curlist; | 378 | *rdp->donetail = rdp->curlist; |
@@ -374,24 +402,53 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, | |||
374 | 402 | ||
375 | if (!rcp->next_pending) { | 403 | if (!rcp->next_pending) { |
376 | /* and start it/schedule start if it's a new batch */ | 404 | /* and start it/schedule start if it's a new batch */ |
377 | spin_lock(&rsp->lock); | 405 | spin_lock(&rcp->lock); |
378 | rcu_start_batch(rcp, rsp, 1); | 406 | rcp->next_pending = 1; |
379 | spin_unlock(&rsp->lock); | 407 | rcu_start_batch(rcp); |
408 | spin_unlock(&rcp->lock); | ||
380 | } | 409 | } |
381 | } else { | 410 | } else { |
382 | local_irq_enable(); | 411 | local_irq_enable(); |
383 | } | 412 | } |
384 | rcu_check_quiescent_state(rcp, rsp, rdp); | 413 | rcu_check_quiescent_state(rcp, rdp); |
385 | if (rdp->donelist) | 414 | if (rdp->donelist) |
386 | rcu_do_batch(rdp); | 415 | rcu_do_batch(rdp); |
387 | } | 416 | } |
388 | 417 | ||
389 | static void rcu_process_callbacks(unsigned long unused) | 418 | static void rcu_process_callbacks(unsigned long unused) |
390 | { | 419 | { |
391 | __rcu_process_callbacks(&rcu_ctrlblk, &rcu_state, | 420 | __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data)); |
392 | &__get_cpu_var(rcu_data)); | 421 | __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data)); |
393 | __rcu_process_callbacks(&rcu_bh_ctrlblk, &rcu_bh_state, | 422 | } |
394 | &__get_cpu_var(rcu_bh_data)); | 423 | |
424 | static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) | ||
425 | { | ||
426 | /* This cpu has pending rcu entries and the grace period | ||
427 | * for them has completed. | ||
428 | */ | ||
429 | if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) | ||
430 | return 1; | ||
431 | |||
432 | /* This cpu has no pending entries, but there are new entries */ | ||
433 | if (!rdp->curlist && rdp->nxtlist) | ||
434 | return 1; | ||
435 | |||
436 | /* This cpu has finished callbacks to invoke */ | ||
437 | if (rdp->donelist) | ||
438 | return 1; | ||
439 | |||
440 | /* The rcu core waits for a quiescent state from the cpu */ | ||
441 | if (rdp->quiescbatch != rcp->cur || rdp->qs_pending) | ||
442 | return 1; | ||
443 | |||
444 | /* nothing to do */ | ||
445 | return 0; | ||
446 | } | ||
447 | |||
448 | int rcu_pending(int cpu) | ||
449 | { | ||
450 | return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) || | ||
451 | __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); | ||
395 | } | 452 | } |
396 | 453 | ||
397 | void rcu_check_callbacks(int cpu, int user) | 454 | void rcu_check_callbacks(int cpu, int user) |
@@ -457,6 +514,7 @@ static struct notifier_block __devinitdata rcu_nb = { | |||
457 | */ | 514 | */ |
458 | void __init rcu_init(void) | 515 | void __init rcu_init(void) |
459 | { | 516 | { |
517 | sema_init(&rcu_barrier_sema, 1); | ||
460 | rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, | 518 | rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, |
461 | (void *)(long)smp_processor_id()); | 519 | (void *)(long)smp_processor_id()); |
462 | /* Register notifier for non-boot CPUs */ | 520 | /* Register notifier for non-boot CPUs */ |