diff options
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r-- | kernel/rcutree.c | 916 |
1 files changed, 539 insertions, 377 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index f280e542e3e9..4fb2376ddf06 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -52,6 +52,7 @@ | |||
52 | #include <linux/prefetch.h> | 52 | #include <linux/prefetch.h> |
53 | #include <linux/delay.h> | 53 | #include <linux/delay.h> |
54 | #include <linux/stop_machine.h> | 54 | #include <linux/stop_machine.h> |
55 | #include <linux/random.h> | ||
55 | 56 | ||
56 | #include "rcutree.h" | 57 | #include "rcutree.h" |
57 | #include <trace/events/rcu.h> | 58 | #include <trace/events/rcu.h> |
@@ -61,6 +62,7 @@ | |||
61 | /* Data structures. */ | 62 | /* Data structures. */ |
62 | 63 | ||
63 | static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; | 64 | static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; |
65 | static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; | ||
64 | 66 | ||
65 | #define RCU_STATE_INITIALIZER(sname, cr) { \ | 67 | #define RCU_STATE_INITIALIZER(sname, cr) { \ |
66 | .level = { &sname##_state.node[0] }, \ | 68 | .level = { &sname##_state.node[0] }, \ |
@@ -72,7 +74,6 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; | |||
72 | .orphan_nxttail = &sname##_state.orphan_nxtlist, \ | 74 | .orphan_nxttail = &sname##_state.orphan_nxtlist, \ |
73 | .orphan_donetail = &sname##_state.orphan_donelist, \ | 75 | .orphan_donetail = &sname##_state.orphan_donelist, \ |
74 | .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ | 76 | .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ |
75 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \ | ||
76 | .name = #sname, \ | 77 | .name = #sname, \ |
77 | } | 78 | } |
78 | 79 | ||
@@ -88,7 +89,7 @@ LIST_HEAD(rcu_struct_flavors); | |||
88 | 89 | ||
89 | /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ | 90 | /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ |
90 | static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; | 91 | static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; |
91 | module_param(rcu_fanout_leaf, int, 0); | 92 | module_param(rcu_fanout_leaf, int, 0444); |
92 | int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; | 93 | int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; |
93 | static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ | 94 | static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ |
94 | NUM_RCU_LVL_0, | 95 | NUM_RCU_LVL_0, |
@@ -133,13 +134,12 @@ static int rcu_scheduler_fully_active __read_mostly; | |||
133 | */ | 134 | */ |
134 | static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); | 135 | static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); |
135 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | 136 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); |
136 | DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); | ||
137 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | 137 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); |
138 | DEFINE_PER_CPU(char, rcu_cpu_has_work); | 138 | DEFINE_PER_CPU(char, rcu_cpu_has_work); |
139 | 139 | ||
140 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 140 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
141 | 141 | ||
142 | static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); | 142 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); |
143 | static void invoke_rcu_core(void); | 143 | static void invoke_rcu_core(void); |
144 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | 144 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); |
145 | 145 | ||
@@ -175,8 +175,6 @@ void rcu_sched_qs(int cpu) | |||
175 | { | 175 | { |
176 | struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); | 176 | struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); |
177 | 177 | ||
178 | rdp->passed_quiesce_gpnum = rdp->gpnum; | ||
179 | barrier(); | ||
180 | if (rdp->passed_quiesce == 0) | 178 | if (rdp->passed_quiesce == 0) |
181 | trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs"); | 179 | trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs"); |
182 | rdp->passed_quiesce = 1; | 180 | rdp->passed_quiesce = 1; |
@@ -186,8 +184,6 @@ void rcu_bh_qs(int cpu) | |||
186 | { | 184 | { |
187 | struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); | 185 | struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); |
188 | 186 | ||
189 | rdp->passed_quiesce_gpnum = rdp->gpnum; | ||
190 | barrier(); | ||
191 | if (rdp->passed_quiesce == 0) | 187 | if (rdp->passed_quiesce == 0) |
192 | trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs"); | 188 | trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs"); |
193 | rdp->passed_quiesce = 1; | 189 | rdp->passed_quiesce = 1; |
@@ -210,15 +206,18 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch); | |||
210 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | 206 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { |
211 | .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, | 207 | .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, |
212 | .dynticks = ATOMIC_INIT(1), | 208 | .dynticks = ATOMIC_INIT(1), |
209 | #if defined(CONFIG_RCU_USER_QS) && !defined(CONFIG_RCU_USER_QS_FORCE) | ||
210 | .ignore_user_qs = true, | ||
211 | #endif | ||
213 | }; | 212 | }; |
214 | 213 | ||
215 | static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ | 214 | static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ |
216 | static int qhimark = 10000; /* If this many pending, ignore blimit. */ | 215 | static int qhimark = 10000; /* If this many pending, ignore blimit. */ |
217 | static int qlowmark = 100; /* Once only this many pending, use blimit. */ | 216 | static int qlowmark = 100; /* Once only this many pending, use blimit. */ |
218 | 217 | ||
219 | module_param(blimit, int, 0); | 218 | module_param(blimit, int, 0444); |
220 | module_param(qhimark, int, 0); | 219 | module_param(qhimark, int, 0444); |
221 | module_param(qlowmark, int, 0); | 220 | module_param(qlowmark, int, 0444); |
222 | 221 | ||
223 | int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ | 222 | int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ |
224 | int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; | 223 | int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; |
@@ -226,7 +225,14 @@ int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; | |||
226 | module_param(rcu_cpu_stall_suppress, int, 0644); | 225 | module_param(rcu_cpu_stall_suppress, int, 0644); |
227 | module_param(rcu_cpu_stall_timeout, int, 0644); | 226 | module_param(rcu_cpu_stall_timeout, int, 0644); |
228 | 227 | ||
229 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed); | 228 | static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS; |
229 | static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS; | ||
230 | |||
231 | module_param(jiffies_till_first_fqs, ulong, 0644); | ||
232 | module_param(jiffies_till_next_fqs, ulong, 0644); | ||
233 | |||
234 | static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)); | ||
235 | static void force_quiescent_state(struct rcu_state *rsp); | ||
230 | static int rcu_pending(int cpu); | 236 | static int rcu_pending(int cpu); |
231 | 237 | ||
232 | /* | 238 | /* |
@@ -252,7 +258,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); | |||
252 | */ | 258 | */ |
253 | void rcu_bh_force_quiescent_state(void) | 259 | void rcu_bh_force_quiescent_state(void) |
254 | { | 260 | { |
255 | force_quiescent_state(&rcu_bh_state, 0); | 261 | force_quiescent_state(&rcu_bh_state); |
256 | } | 262 | } |
257 | EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); | 263 | EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); |
258 | 264 | ||
@@ -286,7 +292,7 @@ EXPORT_SYMBOL_GPL(rcutorture_record_progress); | |||
286 | */ | 292 | */ |
287 | void rcu_sched_force_quiescent_state(void) | 293 | void rcu_sched_force_quiescent_state(void) |
288 | { | 294 | { |
289 | force_quiescent_state(&rcu_sched_state, 0); | 295 | force_quiescent_state(&rcu_sched_state); |
290 | } | 296 | } |
291 | EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); | 297 | EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); |
292 | 298 | ||
@@ -305,7 +311,9 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) | |||
305 | static int | 311 | static int |
306 | cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) | 312 | cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) |
307 | { | 313 | { |
308 | return *rdp->nxttail[RCU_DONE_TAIL] && !rcu_gp_in_progress(rsp); | 314 | return *rdp->nxttail[RCU_DONE_TAIL + |
315 | ACCESS_ONCE(rsp->completed) != rdp->completed] && | ||
316 | !rcu_gp_in_progress(rsp); | ||
309 | } | 317 | } |
310 | 318 | ||
311 | /* | 319 | /* |
@@ -317,45 +325,17 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) | |||
317 | } | 325 | } |
318 | 326 | ||
319 | /* | 327 | /* |
320 | * If the specified CPU is offline, tell the caller that it is in | 328 | * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state |
321 | * a quiescent state. Otherwise, whack it with a reschedule IPI. | ||
322 | * Grace periods can end up waiting on an offline CPU when that | ||
323 | * CPU is in the process of coming online -- it will be added to the | ||
324 | * rcu_node bitmasks before it actually makes it online. The same thing | ||
325 | * can happen while a CPU is in the process of coming online. Because this | ||
326 | * race is quite rare, we check for it after detecting that the grace | ||
327 | * period has been delayed rather than checking each and every CPU | ||
328 | * each and every time we start a new grace period. | ||
329 | */ | ||
330 | static int rcu_implicit_offline_qs(struct rcu_data *rdp) | ||
331 | { | ||
332 | /* | ||
333 | * If the CPU is offline for more than a jiffy, it is in a quiescent | ||
334 | * state. We can trust its state not to change because interrupts | ||
335 | * are disabled. The reason for the jiffy's worth of slack is to | ||
336 | * handle CPUs initializing on the way up and finding their way | ||
337 | * to the idle loop on the way down. | ||
338 | */ | ||
339 | if (cpu_is_offline(rdp->cpu) && | ||
340 | ULONG_CMP_LT(rdp->rsp->gp_start + 2, jiffies)) { | ||
341 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); | ||
342 | rdp->offline_fqs++; | ||
343 | return 1; | ||
344 | } | ||
345 | return 0; | ||
346 | } | ||
347 | |||
348 | /* | ||
349 | * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle | ||
350 | * | 329 | * |
351 | * If the new value of the ->dynticks_nesting counter now is zero, | 330 | * If the new value of the ->dynticks_nesting counter now is zero, |
352 | * we really have entered idle, and must do the appropriate accounting. | 331 | * we really have entered idle, and must do the appropriate accounting. |
353 | * The caller must have disabled interrupts. | 332 | * The caller must have disabled interrupts. |
354 | */ | 333 | */ |
355 | static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) | 334 | static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, |
335 | bool user) | ||
356 | { | 336 | { |
357 | trace_rcu_dyntick("Start", oldval, 0); | 337 | trace_rcu_dyntick("Start", oldval, 0); |
358 | if (!is_idle_task(current)) { | 338 | if (!user && !is_idle_task(current)) { |
359 | struct task_struct *idle = idle_task(smp_processor_id()); | 339 | struct task_struct *idle = idle_task(smp_processor_id()); |
360 | 340 | ||
361 | trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); | 341 | trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); |
@@ -372,7 +352,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) | |||
372 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | 352 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); |
373 | 353 | ||
374 | /* | 354 | /* |
375 | * The idle task is not permitted to enter the idle loop while | 355 | * It is illegal to enter an extended quiescent state while |
376 | * in an RCU read-side critical section. | 356 | * in an RCU read-side critical section. |
377 | */ | 357 | */ |
378 | rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), | 358 | rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), |
@@ -383,6 +363,25 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) | |||
383 | "Illegal idle entry in RCU-sched read-side critical section."); | 363 | "Illegal idle entry in RCU-sched read-side critical section."); |
384 | } | 364 | } |
385 | 365 | ||
366 | /* | ||
367 | * Enter an RCU extended quiescent state, which can be either the | ||
368 | * idle loop or adaptive-tickless usermode execution. | ||
369 | */ | ||
370 | static void rcu_eqs_enter(bool user) | ||
371 | { | ||
372 | long long oldval; | ||
373 | struct rcu_dynticks *rdtp; | ||
374 | |||
375 | rdtp = &__get_cpu_var(rcu_dynticks); | ||
376 | oldval = rdtp->dynticks_nesting; | ||
377 | WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); | ||
378 | if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) | ||
379 | rdtp->dynticks_nesting = 0; | ||
380 | else | ||
381 | rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; | ||
382 | rcu_eqs_enter_common(rdtp, oldval, user); | ||
383 | } | ||
384 | |||
386 | /** | 385 | /** |
387 | * rcu_idle_enter - inform RCU that current CPU is entering idle | 386 | * rcu_idle_enter - inform RCU that current CPU is entering idle |
388 | * | 387 | * |
@@ -398,21 +397,70 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) | |||
398 | void rcu_idle_enter(void) | 397 | void rcu_idle_enter(void) |
399 | { | 398 | { |
400 | unsigned long flags; | 399 | unsigned long flags; |
401 | long long oldval; | 400 | |
401 | local_irq_save(flags); | ||
402 | rcu_eqs_enter(false); | ||
403 | local_irq_restore(flags); | ||
404 | } | ||
405 | EXPORT_SYMBOL_GPL(rcu_idle_enter); | ||
406 | |||
407 | #ifdef CONFIG_RCU_USER_QS | ||
408 | /** | ||
409 | * rcu_user_enter - inform RCU that we are resuming userspace. | ||
410 | * | ||
411 | * Enter RCU idle mode right before resuming userspace. No use of RCU | ||
412 | * is permitted between this call and rcu_user_exit(). This way the | ||
413 | * CPU doesn't need to maintain the tick for RCU maintenance purposes | ||
414 | * when the CPU runs in userspace. | ||
415 | */ | ||
416 | void rcu_user_enter(void) | ||
417 | { | ||
418 | unsigned long flags; | ||
402 | struct rcu_dynticks *rdtp; | 419 | struct rcu_dynticks *rdtp; |
403 | 420 | ||
421 | /* | ||
422 | * Some contexts may involve an exception occuring in an irq, | ||
423 | * leading to that nesting: | ||
424 | * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() | ||
425 | * This would mess up the dyntick_nesting count though. And rcu_irq_*() | ||
426 | * helpers are enough to protect RCU uses inside the exception. So | ||
427 | * just return immediately if we detect we are in an IRQ. | ||
428 | */ | ||
429 | if (in_interrupt()) | ||
430 | return; | ||
431 | |||
432 | WARN_ON_ONCE(!current->mm); | ||
433 | |||
404 | local_irq_save(flags); | 434 | local_irq_save(flags); |
405 | rdtp = &__get_cpu_var(rcu_dynticks); | 435 | rdtp = &__get_cpu_var(rcu_dynticks); |
406 | oldval = rdtp->dynticks_nesting; | 436 | if (!rdtp->ignore_user_qs && !rdtp->in_user) { |
407 | WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); | 437 | rdtp->in_user = true; |
408 | if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) | 438 | rcu_eqs_enter(true); |
409 | rdtp->dynticks_nesting = 0; | 439 | } |
410 | else | ||
411 | rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; | ||
412 | rcu_idle_enter_common(rdtp, oldval); | ||
413 | local_irq_restore(flags); | 440 | local_irq_restore(flags); |
414 | } | 441 | } |
415 | EXPORT_SYMBOL_GPL(rcu_idle_enter); | 442 | |
443 | /** | ||
444 | * rcu_user_enter_after_irq - inform RCU that we are going to resume userspace | ||
445 | * after the current irq returns. | ||
446 | * | ||
447 | * This is similar to rcu_user_enter() but in the context of a non-nesting | ||
448 | * irq. After this call, RCU enters into idle mode when the interrupt | ||
449 | * returns. | ||
450 | */ | ||
451 | void rcu_user_enter_after_irq(void) | ||
452 | { | ||
453 | unsigned long flags; | ||
454 | struct rcu_dynticks *rdtp; | ||
455 | |||
456 | local_irq_save(flags); | ||
457 | rdtp = &__get_cpu_var(rcu_dynticks); | ||
458 | /* Ensure this irq is interrupting a non-idle RCU state. */ | ||
459 | WARN_ON_ONCE(!(rdtp->dynticks_nesting & DYNTICK_TASK_MASK)); | ||
460 | rdtp->dynticks_nesting = 1; | ||
461 | local_irq_restore(flags); | ||
462 | } | ||
463 | #endif /* CONFIG_RCU_USER_QS */ | ||
416 | 464 | ||
417 | /** | 465 | /** |
418 | * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle | 466 | * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle |
@@ -444,18 +492,19 @@ void rcu_irq_exit(void) | |||
444 | if (rdtp->dynticks_nesting) | 492 | if (rdtp->dynticks_nesting) |
445 | trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting); | 493 | trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting); |
446 | else | 494 | else |
447 | rcu_idle_enter_common(rdtp, oldval); | 495 | rcu_eqs_enter_common(rdtp, oldval, true); |
448 | local_irq_restore(flags); | 496 | local_irq_restore(flags); |
449 | } | 497 | } |
450 | 498 | ||
451 | /* | 499 | /* |
452 | * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle | 500 | * rcu_eqs_exit_common - current CPU moving away from extended quiescent state |
453 | * | 501 | * |
454 | * If the new value of the ->dynticks_nesting counter was previously zero, | 502 | * If the new value of the ->dynticks_nesting counter was previously zero, |
455 | * we really have exited idle, and must do the appropriate accounting. | 503 | * we really have exited idle, and must do the appropriate accounting. |
456 | * The caller must have disabled interrupts. | 504 | * The caller must have disabled interrupts. |
457 | */ | 505 | */ |
458 | static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) | 506 | static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, |
507 | int user) | ||
459 | { | 508 | { |
460 | smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ | 509 | smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ |
461 | atomic_inc(&rdtp->dynticks); | 510 | atomic_inc(&rdtp->dynticks); |
@@ -464,7 +513,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) | |||
464 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | 513 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); |
465 | rcu_cleanup_after_idle(smp_processor_id()); | 514 | rcu_cleanup_after_idle(smp_processor_id()); |
466 | trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); | 515 | trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); |
467 | if (!is_idle_task(current)) { | 516 | if (!user && !is_idle_task(current)) { |
468 | struct task_struct *idle = idle_task(smp_processor_id()); | 517 | struct task_struct *idle = idle_task(smp_processor_id()); |
469 | 518 | ||
470 | trace_rcu_dyntick("Error on exit: not idle task", | 519 | trace_rcu_dyntick("Error on exit: not idle task", |
@@ -476,6 +525,25 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) | |||
476 | } | 525 | } |
477 | } | 526 | } |
478 | 527 | ||
528 | /* | ||
529 | * Exit an RCU extended quiescent state, which can be either the | ||
530 | * idle loop or adaptive-tickless usermode execution. | ||
531 | */ | ||
532 | static void rcu_eqs_exit(bool user) | ||
533 | { | ||
534 | struct rcu_dynticks *rdtp; | ||
535 | long long oldval; | ||
536 | |||
537 | rdtp = &__get_cpu_var(rcu_dynticks); | ||
538 | oldval = rdtp->dynticks_nesting; | ||
539 | WARN_ON_ONCE(oldval < 0); | ||
540 | if (oldval & DYNTICK_TASK_NEST_MASK) | ||
541 | rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; | ||
542 | else | ||
543 | rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; | ||
544 | rcu_eqs_exit_common(rdtp, oldval, user); | ||
545 | } | ||
546 | |||
479 | /** | 547 | /** |
480 | * rcu_idle_exit - inform RCU that current CPU is leaving idle | 548 | * rcu_idle_exit - inform RCU that current CPU is leaving idle |
481 | * | 549 | * |
@@ -490,21 +558,67 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) | |||
490 | void rcu_idle_exit(void) | 558 | void rcu_idle_exit(void) |
491 | { | 559 | { |
492 | unsigned long flags; | 560 | unsigned long flags; |
561 | |||
562 | local_irq_save(flags); | ||
563 | rcu_eqs_exit(false); | ||
564 | local_irq_restore(flags); | ||
565 | } | ||
566 | EXPORT_SYMBOL_GPL(rcu_idle_exit); | ||
567 | |||
568 | #ifdef CONFIG_RCU_USER_QS | ||
569 | /** | ||
570 | * rcu_user_exit - inform RCU that we are exiting userspace. | ||
571 | * | ||
572 | * Exit RCU idle mode while entering the kernel because it can | ||
573 | * run a RCU read side critical section anytime. | ||
574 | */ | ||
575 | void rcu_user_exit(void) | ||
576 | { | ||
577 | unsigned long flags; | ||
493 | struct rcu_dynticks *rdtp; | 578 | struct rcu_dynticks *rdtp; |
494 | long long oldval; | 579 | |
580 | /* | ||
581 | * Some contexts may involve an exception occuring in an irq, | ||
582 | * leading to that nesting: | ||
583 | * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() | ||
584 | * This would mess up the dyntick_nesting count though. And rcu_irq_*() | ||
585 | * helpers are enough to protect RCU uses inside the exception. So | ||
586 | * just return immediately if we detect we are in an IRQ. | ||
587 | */ | ||
588 | if (in_interrupt()) | ||
589 | return; | ||
495 | 590 | ||
496 | local_irq_save(flags); | 591 | local_irq_save(flags); |
497 | rdtp = &__get_cpu_var(rcu_dynticks); | 592 | rdtp = &__get_cpu_var(rcu_dynticks); |
498 | oldval = rdtp->dynticks_nesting; | 593 | if (rdtp->in_user) { |
499 | WARN_ON_ONCE(oldval < 0); | 594 | rdtp->in_user = false; |
500 | if (oldval & DYNTICK_TASK_NEST_MASK) | 595 | rcu_eqs_exit(true); |
501 | rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; | 596 | } |
502 | else | ||
503 | rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; | ||
504 | rcu_idle_exit_common(rdtp, oldval); | ||
505 | local_irq_restore(flags); | 597 | local_irq_restore(flags); |
506 | } | 598 | } |
507 | EXPORT_SYMBOL_GPL(rcu_idle_exit); | 599 | |
600 | /** | ||
601 | * rcu_user_exit_after_irq - inform RCU that we won't resume to userspace | ||
602 | * idle mode after the current non-nesting irq returns. | ||
603 | * | ||
604 | * This is similar to rcu_user_exit() but in the context of an irq. | ||
605 | * This is called when the irq has interrupted a userspace RCU idle mode | ||
606 | * context. When the current non-nesting interrupt returns after this call, | ||
607 | * the CPU won't restore the RCU idle mode. | ||
608 | */ | ||
609 | void rcu_user_exit_after_irq(void) | ||
610 | { | ||
611 | unsigned long flags; | ||
612 | struct rcu_dynticks *rdtp; | ||
613 | |||
614 | local_irq_save(flags); | ||
615 | rdtp = &__get_cpu_var(rcu_dynticks); | ||
616 | /* Ensure we are interrupting an RCU idle mode. */ | ||
617 | WARN_ON_ONCE(rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK); | ||
618 | rdtp->dynticks_nesting += DYNTICK_TASK_EXIT_IDLE; | ||
619 | local_irq_restore(flags); | ||
620 | } | ||
621 | #endif /* CONFIG_RCU_USER_QS */ | ||
508 | 622 | ||
509 | /** | 623 | /** |
510 | * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle | 624 | * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle |
@@ -539,7 +653,7 @@ void rcu_irq_enter(void) | |||
539 | if (oldval) | 653 | if (oldval) |
540 | trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting); | 654 | trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting); |
541 | else | 655 | else |
542 | rcu_idle_exit_common(rdtp, oldval); | 656 | rcu_eqs_exit_common(rdtp, oldval, true); |
543 | local_irq_restore(flags); | 657 | local_irq_restore(flags); |
544 | } | 658 | } |
545 | 659 | ||
@@ -603,6 +717,21 @@ int rcu_is_cpu_idle(void) | |||
603 | } | 717 | } |
604 | EXPORT_SYMBOL(rcu_is_cpu_idle); | 718 | EXPORT_SYMBOL(rcu_is_cpu_idle); |
605 | 719 | ||
720 | #ifdef CONFIG_RCU_USER_QS | ||
721 | void rcu_user_hooks_switch(struct task_struct *prev, | ||
722 | struct task_struct *next) | ||
723 | { | ||
724 | struct rcu_dynticks *rdtp; | ||
725 | |||
726 | /* Interrupts are disabled in context switch */ | ||
727 | rdtp = &__get_cpu_var(rcu_dynticks); | ||
728 | if (!rdtp->ignore_user_qs) { | ||
729 | clear_tsk_thread_flag(prev, TIF_NOHZ); | ||
730 | set_tsk_thread_flag(next, TIF_NOHZ); | ||
731 | } | ||
732 | } | ||
733 | #endif /* #ifdef CONFIG_RCU_USER_QS */ | ||
734 | |||
606 | #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) | 735 | #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) |
607 | 736 | ||
608 | /* | 737 | /* |
@@ -673,7 +802,7 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) | |||
673 | * Return true if the specified CPU has passed through a quiescent | 802 | * Return true if the specified CPU has passed through a quiescent |
674 | * state by virtue of being in or having passed through an dynticks | 803 | * state by virtue of being in or having passed through an dynticks |
675 | * idle state since the last call to dyntick_save_progress_counter() | 804 | * idle state since the last call to dyntick_save_progress_counter() |
676 | * for this same CPU. | 805 | * for this same CPU, or by virtue of having been offline. |
677 | */ | 806 | */ |
678 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | 807 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) |
679 | { | 808 | { |
@@ -697,8 +826,26 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | |||
697 | return 1; | 826 | return 1; |
698 | } | 827 | } |
699 | 828 | ||
700 | /* Go check for the CPU being offline. */ | 829 | /* |
701 | return rcu_implicit_offline_qs(rdp); | 830 | * Check for the CPU being offline, but only if the grace period |
831 | * is old enough. We don't need to worry about the CPU changing | ||
832 | * state: If we see it offline even once, it has been through a | ||
833 | * quiescent state. | ||
834 | * | ||
835 | * The reason for insisting that the grace period be at least | ||
836 | * one jiffy old is that CPUs that are not quite online and that | ||
837 | * have just gone offline can still execute RCU read-side critical | ||
838 | * sections. | ||
839 | */ | ||
840 | if (ULONG_CMP_GE(rdp->rsp->gp_start + 2, jiffies)) | ||
841 | return 0; /* Grace period is not old enough. */ | ||
842 | barrier(); | ||
843 | if (cpu_is_offline(rdp->cpu)) { | ||
844 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); | ||
845 | rdp->offline_fqs++; | ||
846 | return 1; | ||
847 | } | ||
848 | return 0; | ||
702 | } | 849 | } |
703 | 850 | ||
704 | static int jiffies_till_stall_check(void) | 851 | static int jiffies_till_stall_check(void) |
@@ -755,14 +902,15 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
755 | rcu_for_each_leaf_node(rsp, rnp) { | 902 | rcu_for_each_leaf_node(rsp, rnp) { |
756 | raw_spin_lock_irqsave(&rnp->lock, flags); | 903 | raw_spin_lock_irqsave(&rnp->lock, flags); |
757 | ndetected += rcu_print_task_stall(rnp); | 904 | ndetected += rcu_print_task_stall(rnp); |
905 | if (rnp->qsmask != 0) { | ||
906 | for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) | ||
907 | if (rnp->qsmask & (1UL << cpu)) { | ||
908 | print_cpu_stall_info(rsp, | ||
909 | rnp->grplo + cpu); | ||
910 | ndetected++; | ||
911 | } | ||
912 | } | ||
758 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 913 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
759 | if (rnp->qsmask == 0) | ||
760 | continue; | ||
761 | for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) | ||
762 | if (rnp->qsmask & (1UL << cpu)) { | ||
763 | print_cpu_stall_info(rsp, rnp->grplo + cpu); | ||
764 | ndetected++; | ||
765 | } | ||
766 | } | 914 | } |
767 | 915 | ||
768 | /* | 916 | /* |
@@ -782,11 +930,11 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
782 | else if (!trigger_all_cpu_backtrace()) | 930 | else if (!trigger_all_cpu_backtrace()) |
783 | dump_stack(); | 931 | dump_stack(); |
784 | 932 | ||
785 | /* If so configured, complain about tasks blocking the grace period. */ | 933 | /* Complain about tasks blocking the grace period. */ |
786 | 934 | ||
787 | rcu_print_detail_task_stall(rsp); | 935 | rcu_print_detail_task_stall(rsp); |
788 | 936 | ||
789 | force_quiescent_state(rsp, 0); /* Kick them all. */ | 937 | force_quiescent_state(rsp); /* Kick them all. */ |
790 | } | 938 | } |
791 | 939 | ||
792 | static void print_cpu_stall(struct rcu_state *rsp) | 940 | static void print_cpu_stall(struct rcu_state *rsp) |
@@ -827,7 +975,8 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) | |||
827 | j = ACCESS_ONCE(jiffies); | 975 | j = ACCESS_ONCE(jiffies); |
828 | js = ACCESS_ONCE(rsp->jiffies_stall); | 976 | js = ACCESS_ONCE(rsp->jiffies_stall); |
829 | rnp = rdp->mynode; | 977 | rnp = rdp->mynode; |
830 | if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { | 978 | if (rcu_gp_in_progress(rsp) && |
979 | (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { | ||
831 | 980 | ||
832 | /* We haven't checked in, so go dump stack. */ | 981 | /* We haven't checked in, so go dump stack. */ |
833 | print_cpu_stall(rsp); | 982 | print_cpu_stall(rsp); |
@@ -889,12 +1038,8 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct | |||
889 | */ | 1038 | */ |
890 | rdp->gpnum = rnp->gpnum; | 1039 | rdp->gpnum = rnp->gpnum; |
891 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart"); | 1040 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart"); |
892 | if (rnp->qsmask & rdp->grpmask) { | 1041 | rdp->passed_quiesce = 0; |
893 | rdp->qs_pending = 1; | 1042 | rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); |
894 | rdp->passed_quiesce = 0; | ||
895 | } else { | ||
896 | rdp->qs_pending = 0; | ||
897 | } | ||
898 | zero_cpu_stall_ticks(rdp); | 1043 | zero_cpu_stall_ticks(rdp); |
899 | } | 1044 | } |
900 | } | 1045 | } |
@@ -974,10 +1119,13 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat | |||
974 | * our behalf. Catch up with this state to avoid noting | 1119 | * our behalf. Catch up with this state to avoid noting |
975 | * spurious new grace periods. If another grace period | 1120 | * spurious new grace periods. If another grace period |
976 | * has started, then rnp->gpnum will have advanced, so | 1121 | * has started, then rnp->gpnum will have advanced, so |
977 | * we will detect this later on. | 1122 | * we will detect this later on. Of course, any quiescent |
1123 | * states we found for the old GP are now invalid. | ||
978 | */ | 1124 | */ |
979 | if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) | 1125 | if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) { |
980 | rdp->gpnum = rdp->completed; | 1126 | rdp->gpnum = rdp->completed; |
1127 | rdp->passed_quiesce = 0; | ||
1128 | } | ||
981 | 1129 | ||
982 | /* | 1130 | /* |
983 | * If RCU does not need a quiescent state from this CPU, | 1131 | * If RCU does not need a quiescent state from this CPU, |
@@ -1021,97 +1169,56 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat | |||
1021 | /* Prior grace period ended, so advance callbacks for current CPU. */ | 1169 | /* Prior grace period ended, so advance callbacks for current CPU. */ |
1022 | __rcu_process_gp_end(rsp, rnp, rdp); | 1170 | __rcu_process_gp_end(rsp, rnp, rdp); |
1023 | 1171 | ||
1024 | /* | ||
1025 | * Because this CPU just now started the new grace period, we know | ||
1026 | * that all of its callbacks will be covered by this upcoming grace | ||
1027 | * period, even the ones that were registered arbitrarily recently. | ||
1028 | * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL. | ||
1029 | * | ||
1030 | * Other CPUs cannot be sure exactly when the grace period started. | ||
1031 | * Therefore, their recently registered callbacks must pass through | ||
1032 | * an additional RCU_NEXT_READY stage, so that they will be handled | ||
1033 | * by the next RCU grace period. | ||
1034 | */ | ||
1035 | rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; | ||
1036 | rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; | ||
1037 | |||
1038 | /* Set state so that this CPU will detect the next quiescent state. */ | 1172 | /* Set state so that this CPU will detect the next quiescent state. */ |
1039 | __note_new_gpnum(rsp, rnp, rdp); | 1173 | __note_new_gpnum(rsp, rnp, rdp); |
1040 | } | 1174 | } |
1041 | 1175 | ||
1042 | /* | 1176 | /* |
1043 | * Start a new RCU grace period if warranted, re-initializing the hierarchy | 1177 | * Initialize a new grace period. |
1044 | * in preparation for detecting the next grace period. The caller must hold | ||
1045 | * the root node's ->lock, which is released before return. Hard irqs must | ||
1046 | * be disabled. | ||
1047 | * | ||
1048 | * Note that it is legal for a dying CPU (which is marked as offline) to | ||
1049 | * invoke this function. This can happen when the dying CPU reports its | ||
1050 | * quiescent state. | ||
1051 | */ | 1178 | */ |
1052 | static void | 1179 | static int rcu_gp_init(struct rcu_state *rsp) |
1053 | rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | ||
1054 | __releases(rcu_get_root(rsp)->lock) | ||
1055 | { | 1180 | { |
1056 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 1181 | struct rcu_data *rdp; |
1057 | struct rcu_node *rnp = rcu_get_root(rsp); | 1182 | struct rcu_node *rnp = rcu_get_root(rsp); |
1058 | 1183 | ||
1059 | if (!rcu_scheduler_fully_active || | 1184 | raw_spin_lock_irq(&rnp->lock); |
1060 | !cpu_needs_another_gp(rsp, rdp)) { | 1185 | rsp->gp_flags = 0; /* Clear all flags: New grace period. */ |
1061 | /* | ||
1062 | * Either the scheduler hasn't yet spawned the first | ||
1063 | * non-idle task or this CPU does not need another | ||
1064 | * grace period. Either way, don't start a new grace | ||
1065 | * period. | ||
1066 | */ | ||
1067 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1068 | return; | ||
1069 | } | ||
1070 | 1186 | ||
1071 | if (rsp->fqs_active) { | 1187 | if (rcu_gp_in_progress(rsp)) { |
1072 | /* | 1188 | /* Grace period already in progress, don't start another. */ |
1073 | * This CPU needs a grace period, but force_quiescent_state() | 1189 | raw_spin_unlock_irq(&rnp->lock); |
1074 | * is running. Tell it to start one on this CPU's behalf. | 1190 | return 0; |
1075 | */ | ||
1076 | rsp->fqs_need_gp = 1; | ||
1077 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1078 | return; | ||
1079 | } | 1191 | } |
1080 | 1192 | ||
1081 | /* Advance to a new grace period and initialize state. */ | 1193 | /* Advance to a new grace period and initialize state. */ |
1082 | rsp->gpnum++; | 1194 | rsp->gpnum++; |
1083 | trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); | 1195 | trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); |
1084 | WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT); | ||
1085 | rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */ | ||
1086 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; | ||
1087 | record_gp_stall_check_time(rsp); | 1196 | record_gp_stall_check_time(rsp); |
1088 | raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */ | 1197 | raw_spin_unlock_irq(&rnp->lock); |
1089 | 1198 | ||
1090 | /* Exclude any concurrent CPU-hotplug operations. */ | 1199 | /* Exclude any concurrent CPU-hotplug operations. */ |
1091 | raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ | 1200 | get_online_cpus(); |
1092 | 1201 | ||
1093 | /* | 1202 | /* |
1094 | * Set the quiescent-state-needed bits in all the rcu_node | 1203 | * Set the quiescent-state-needed bits in all the rcu_node |
1095 | * structures for all currently online CPUs in breadth-first | 1204 | * structures for all currently online CPUs in breadth-first order, |
1096 | * order, starting from the root rcu_node structure. This | 1205 | * starting from the root rcu_node structure, relying on the layout |
1097 | * operation relies on the layout of the hierarchy within the | 1206 | * of the tree within the rsp->node[] array. Note that other CPUs |
1098 | * rsp->node[] array. Note that other CPUs will access only | 1207 | * will access only the leaves of the hierarchy, thus seeing that no |
1099 | * the leaves of the hierarchy, which still indicate that no | ||
1100 | * grace period is in progress, at least until the corresponding | 1208 | * grace period is in progress, at least until the corresponding |
1101 | * leaf node has been initialized. In addition, we have excluded | 1209 | * leaf node has been initialized. In addition, we have excluded |
1102 | * CPU-hotplug operations. | 1210 | * CPU-hotplug operations. |
1103 | * | 1211 | * |
1104 | * Note that the grace period cannot complete until we finish | 1212 | * The grace period cannot complete until the initialization |
1105 | * the initialization process, as there will be at least one | 1213 | * process finishes, because this kthread handles both. |
1106 | * qsmask bit set in the root node until that time, namely the | ||
1107 | * one corresponding to this CPU, due to the fact that we have | ||
1108 | * irqs disabled. | ||
1109 | */ | 1214 | */ |
1110 | rcu_for_each_node_breadth_first(rsp, rnp) { | 1215 | rcu_for_each_node_breadth_first(rsp, rnp) { |
1111 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 1216 | raw_spin_lock_irq(&rnp->lock); |
1217 | rdp = this_cpu_ptr(rsp->rda); | ||
1112 | rcu_preempt_check_blocked_tasks(rnp); | 1218 | rcu_preempt_check_blocked_tasks(rnp); |
1113 | rnp->qsmask = rnp->qsmaskinit; | 1219 | rnp->qsmask = rnp->qsmaskinit; |
1114 | rnp->gpnum = rsp->gpnum; | 1220 | rnp->gpnum = rsp->gpnum; |
1221 | WARN_ON_ONCE(rnp->completed != rsp->completed); | ||
1115 | rnp->completed = rsp->completed; | 1222 | rnp->completed = rsp->completed; |
1116 | if (rnp == rdp->mynode) | 1223 | if (rnp == rdp->mynode) |
1117 | rcu_start_gp_per_cpu(rsp, rnp, rdp); | 1224 | rcu_start_gp_per_cpu(rsp, rnp, rdp); |
@@ -1119,37 +1226,54 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
1119 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, | 1226 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, |
1120 | rnp->level, rnp->grplo, | 1227 | rnp->level, rnp->grplo, |
1121 | rnp->grphi, rnp->qsmask); | 1228 | rnp->grphi, rnp->qsmask); |
1122 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1229 | raw_spin_unlock_irq(&rnp->lock); |
1230 | #ifdef CONFIG_PROVE_RCU_DELAY | ||
1231 | if ((random32() % (rcu_num_nodes * 8)) == 0) | ||
1232 | schedule_timeout_uninterruptible(2); | ||
1233 | #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ | ||
1234 | cond_resched(); | ||
1123 | } | 1235 | } |
1124 | 1236 | ||
1125 | rnp = rcu_get_root(rsp); | 1237 | put_online_cpus(); |
1126 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 1238 | return 1; |
1127 | rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ | ||
1128 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
1129 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
1130 | } | 1239 | } |
1131 | 1240 | ||
1132 | /* | 1241 | /* |
1133 | * Report a full set of quiescent states to the specified rcu_state | 1242 | * Do one round of quiescent-state forcing. |
1134 | * data structure. This involves cleaning up after the prior grace | ||
1135 | * period and letting rcu_start_gp() start up the next grace period | ||
1136 | * if one is needed. Note that the caller must hold rnp->lock, as | ||
1137 | * required by rcu_start_gp(), which will release it. | ||
1138 | */ | 1243 | */ |
1139 | static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | 1244 | int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) |
1140 | __releases(rcu_get_root(rsp)->lock) | ||
1141 | { | 1245 | { |
1142 | unsigned long gp_duration; | 1246 | int fqs_state = fqs_state_in; |
1143 | struct rcu_node *rnp = rcu_get_root(rsp); | 1247 | struct rcu_node *rnp = rcu_get_root(rsp); |
1144 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | ||
1145 | 1248 | ||
1146 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); | 1249 | rsp->n_force_qs++; |
1250 | if (fqs_state == RCU_SAVE_DYNTICK) { | ||
1251 | /* Collect dyntick-idle snapshots. */ | ||
1252 | force_qs_rnp(rsp, dyntick_save_progress_counter); | ||
1253 | fqs_state = RCU_FORCE_QS; | ||
1254 | } else { | ||
1255 | /* Handle dyntick-idle and offline CPUs. */ | ||
1256 | force_qs_rnp(rsp, rcu_implicit_dynticks_qs); | ||
1257 | } | ||
1258 | /* Clear flag to prevent immediate re-entry. */ | ||
1259 | if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { | ||
1260 | raw_spin_lock_irq(&rnp->lock); | ||
1261 | rsp->gp_flags &= ~RCU_GP_FLAG_FQS; | ||
1262 | raw_spin_unlock_irq(&rnp->lock); | ||
1263 | } | ||
1264 | return fqs_state; | ||
1265 | } | ||
1147 | 1266 | ||
1148 | /* | 1267 | /* |
1149 | * Ensure that all grace-period and pre-grace-period activity | 1268 | * Clean up after the old grace period. |
1150 | * is seen before the assignment to rsp->completed. | 1269 | */ |
1151 | */ | 1270 | static void rcu_gp_cleanup(struct rcu_state *rsp) |
1152 | smp_mb(); /* See above block comment. */ | 1271 | { |
1272 | unsigned long gp_duration; | ||
1273 | struct rcu_data *rdp; | ||
1274 | struct rcu_node *rnp = rcu_get_root(rsp); | ||
1275 | |||
1276 | raw_spin_lock_irq(&rnp->lock); | ||
1153 | gp_duration = jiffies - rsp->gp_start; | 1277 | gp_duration = jiffies - rsp->gp_start; |
1154 | if (gp_duration > rsp->gp_max) | 1278 | if (gp_duration > rsp->gp_max) |
1155 | rsp->gp_max = gp_duration; | 1279 | rsp->gp_max = gp_duration; |
@@ -1161,35 +1285,149 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | |||
1161 | * they can do to advance the grace period. It is therefore | 1285 | * they can do to advance the grace period. It is therefore |
1162 | * safe for us to drop the lock in order to mark the grace | 1286 | * safe for us to drop the lock in order to mark the grace |
1163 | * period as completed in all of the rcu_node structures. | 1287 | * period as completed in all of the rcu_node structures. |
1164 | * | ||
1165 | * But if this CPU needs another grace period, it will take | ||
1166 | * care of this while initializing the next grace period. | ||
1167 | * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL | ||
1168 | * because the callbacks have not yet been advanced: Those | ||
1169 | * callbacks are waiting on the grace period that just now | ||
1170 | * completed. | ||
1171 | */ | 1288 | */ |
1172 | if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { | 1289 | raw_spin_unlock_irq(&rnp->lock); |
1173 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
1174 | 1290 | ||
1175 | /* | 1291 | /* |
1176 | * Propagate new ->completed value to rcu_node structures | 1292 | * Propagate new ->completed value to rcu_node structures so |
1177 | * so that other CPUs don't have to wait until the start | 1293 | * that other CPUs don't have to wait until the start of the next |
1178 | * of the next grace period to process their callbacks. | 1294 | * grace period to process their callbacks. This also avoids |
1179 | */ | 1295 | * some nasty RCU grace-period initialization races by forcing |
1180 | rcu_for_each_node_breadth_first(rsp, rnp) { | 1296 | * the end of the current grace period to be completely recorded in |
1181 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 1297 | * all of the rcu_node structures before the beginning of the next |
1182 | rnp->completed = rsp->gpnum; | 1298 | * grace period is recorded in any of the rcu_node structures. |
1183 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1299 | */ |
1184 | } | 1300 | rcu_for_each_node_breadth_first(rsp, rnp) { |
1185 | rnp = rcu_get_root(rsp); | 1301 | raw_spin_lock_irq(&rnp->lock); |
1186 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 1302 | rnp->completed = rsp->gpnum; |
1303 | raw_spin_unlock_irq(&rnp->lock); | ||
1304 | cond_resched(); | ||
1187 | } | 1305 | } |
1306 | rnp = rcu_get_root(rsp); | ||
1307 | raw_spin_lock_irq(&rnp->lock); | ||
1188 | 1308 | ||
1189 | rsp->completed = rsp->gpnum; /* Declare the grace period complete. */ | 1309 | rsp->completed = rsp->gpnum; /* Declare grace period done. */ |
1190 | trace_rcu_grace_period(rsp->name, rsp->completed, "end"); | 1310 | trace_rcu_grace_period(rsp->name, rsp->completed, "end"); |
1191 | rsp->fqs_state = RCU_GP_IDLE; | 1311 | rsp->fqs_state = RCU_GP_IDLE; |
1192 | rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ | 1312 | rdp = this_cpu_ptr(rsp->rda); |
1313 | if (cpu_needs_another_gp(rsp, rdp)) | ||
1314 | rsp->gp_flags = 1; | ||
1315 | raw_spin_unlock_irq(&rnp->lock); | ||
1316 | } | ||
1317 | |||
1318 | /* | ||
1319 | * Body of kthread that handles grace periods. | ||
1320 | */ | ||
1321 | static int __noreturn rcu_gp_kthread(void *arg) | ||
1322 | { | ||
1323 | int fqs_state; | ||
1324 | unsigned long j; | ||
1325 | int ret; | ||
1326 | struct rcu_state *rsp = arg; | ||
1327 | struct rcu_node *rnp = rcu_get_root(rsp); | ||
1328 | |||
1329 | for (;;) { | ||
1330 | |||
1331 | /* Handle grace-period start. */ | ||
1332 | for (;;) { | ||
1333 | wait_event_interruptible(rsp->gp_wq, | ||
1334 | rsp->gp_flags & | ||
1335 | RCU_GP_FLAG_INIT); | ||
1336 | if ((rsp->gp_flags & RCU_GP_FLAG_INIT) && | ||
1337 | rcu_gp_init(rsp)) | ||
1338 | break; | ||
1339 | cond_resched(); | ||
1340 | flush_signals(current); | ||
1341 | } | ||
1342 | |||
1343 | /* Handle quiescent-state forcing. */ | ||
1344 | fqs_state = RCU_SAVE_DYNTICK; | ||
1345 | j = jiffies_till_first_fqs; | ||
1346 | if (j > HZ) { | ||
1347 | j = HZ; | ||
1348 | jiffies_till_first_fqs = HZ; | ||
1349 | } | ||
1350 | for (;;) { | ||
1351 | rsp->jiffies_force_qs = jiffies + j; | ||
1352 | ret = wait_event_interruptible_timeout(rsp->gp_wq, | ||
1353 | (rsp->gp_flags & RCU_GP_FLAG_FQS) || | ||
1354 | (!ACCESS_ONCE(rnp->qsmask) && | ||
1355 | !rcu_preempt_blocked_readers_cgp(rnp)), | ||
1356 | j); | ||
1357 | /* If grace period done, leave loop. */ | ||
1358 | if (!ACCESS_ONCE(rnp->qsmask) && | ||
1359 | !rcu_preempt_blocked_readers_cgp(rnp)) | ||
1360 | break; | ||
1361 | /* If time for quiescent-state forcing, do it. */ | ||
1362 | if (ret == 0 || (rsp->gp_flags & RCU_GP_FLAG_FQS)) { | ||
1363 | fqs_state = rcu_gp_fqs(rsp, fqs_state); | ||
1364 | cond_resched(); | ||
1365 | } else { | ||
1366 | /* Deal with stray signal. */ | ||
1367 | cond_resched(); | ||
1368 | flush_signals(current); | ||
1369 | } | ||
1370 | j = jiffies_till_next_fqs; | ||
1371 | if (j > HZ) { | ||
1372 | j = HZ; | ||
1373 | jiffies_till_next_fqs = HZ; | ||
1374 | } else if (j < 1) { | ||
1375 | j = 1; | ||
1376 | jiffies_till_next_fqs = 1; | ||
1377 | } | ||
1378 | } | ||
1379 | |||
1380 | /* Handle grace-period end. */ | ||
1381 | rcu_gp_cleanup(rsp); | ||
1382 | } | ||
1383 | } | ||
1384 | |||
1385 | /* | ||
1386 | * Start a new RCU grace period if warranted, re-initializing the hierarchy | ||
1387 | * in preparation for detecting the next grace period. The caller must hold | ||
1388 | * the root node's ->lock, which is released before return. Hard irqs must | ||
1389 | * be disabled. | ||
1390 | * | ||
1391 | * Note that it is legal for a dying CPU (which is marked as offline) to | ||
1392 | * invoke this function. This can happen when the dying CPU reports its | ||
1393 | * quiescent state. | ||
1394 | */ | ||
1395 | static void | ||
1396 | rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | ||
1397 | __releases(rcu_get_root(rsp)->lock) | ||
1398 | { | ||
1399 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | ||
1400 | struct rcu_node *rnp = rcu_get_root(rsp); | ||
1401 | |||
1402 | if (!rsp->gp_kthread || | ||
1403 | !cpu_needs_another_gp(rsp, rdp)) { | ||
1404 | /* | ||
1405 | * Either we have not yet spawned the grace-period | ||
1406 | * task or this CPU does not need another grace period. | ||
1407 | * Either way, don't start a new grace period. | ||
1408 | */ | ||
1409 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1410 | return; | ||
1411 | } | ||
1412 | |||
1413 | rsp->gp_flags = RCU_GP_FLAG_INIT; | ||
1414 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1415 | wake_up(&rsp->gp_wq); | ||
1416 | } | ||
1417 | |||
1418 | /* | ||
1419 | * Report a full set of quiescent states to the specified rcu_state | ||
1420 | * data structure. This involves cleaning up after the prior grace | ||
1421 | * period and letting rcu_start_gp() start up the next grace period | ||
1422 | * if one is needed. Note that the caller must hold rnp->lock, as | ||
1423 | * required by rcu_start_gp(), which will release it. | ||
1424 | */ | ||
1425 | static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | ||
1426 | __releases(rcu_get_root(rsp)->lock) | ||
1427 | { | ||
1428 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); | ||
1429 | raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); | ||
1430 | wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ | ||
1193 | } | 1431 | } |
1194 | 1432 | ||
1195 | /* | 1433 | /* |
@@ -1258,7 +1496,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, | |||
1258 | * based on quiescent states detected in an earlier grace period! | 1496 | * based on quiescent states detected in an earlier grace period! |
1259 | */ | 1497 | */ |
1260 | static void | 1498 | static void |
1261 | rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastgp) | 1499 | rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) |
1262 | { | 1500 | { |
1263 | unsigned long flags; | 1501 | unsigned long flags; |
1264 | unsigned long mask; | 1502 | unsigned long mask; |
@@ -1266,7 +1504,8 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long las | |||
1266 | 1504 | ||
1267 | rnp = rdp->mynode; | 1505 | rnp = rdp->mynode; |
1268 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1506 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1269 | if (lastgp != rnp->gpnum || rnp->completed == rnp->gpnum) { | 1507 | if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum || |
1508 | rnp->completed == rnp->gpnum) { | ||
1270 | 1509 | ||
1271 | /* | 1510 | /* |
1272 | * The grace period in which this quiescent state was | 1511 | * The grace period in which this quiescent state was |
@@ -1325,7 +1564,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1325 | * Tell RCU we are done (but rcu_report_qs_rdp() will be the | 1564 | * Tell RCU we are done (but rcu_report_qs_rdp() will be the |
1326 | * judge of that). | 1565 | * judge of that). |
1327 | */ | 1566 | */ |
1328 | rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesce_gpnum); | 1567 | rcu_report_qs_rdp(rdp->cpu, rsp, rdp); |
1329 | } | 1568 | } |
1330 | 1569 | ||
1331 | #ifdef CONFIG_HOTPLUG_CPU | 1570 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -1390,17 +1629,6 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | |||
1390 | int i; | 1629 | int i; |
1391 | struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); | 1630 | struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); |
1392 | 1631 | ||
1393 | /* | ||
1394 | * If there is an rcu_barrier() operation in progress, then | ||
1395 | * only the task doing that operation is permitted to adopt | ||
1396 | * callbacks. To do otherwise breaks rcu_barrier() and friends | ||
1397 | * by causing them to fail to wait for the callbacks in the | ||
1398 | * orphanage. | ||
1399 | */ | ||
1400 | if (rsp->rcu_barrier_in_progress && | ||
1401 | rsp->rcu_barrier_in_progress != current) | ||
1402 | return; | ||
1403 | |||
1404 | /* Do the accounting first. */ | 1632 | /* Do the accounting first. */ |
1405 | rdp->qlen_lazy += rsp->qlen_lazy; | 1633 | rdp->qlen_lazy += rsp->qlen_lazy; |
1406 | rdp->qlen += rsp->qlen; | 1634 | rdp->qlen += rsp->qlen; |
@@ -1455,9 +1683,8 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) | |||
1455 | * The CPU has been completely removed, and some other CPU is reporting | 1683 | * The CPU has been completely removed, and some other CPU is reporting |
1456 | * this fact from process context. Do the remainder of the cleanup, | 1684 | * this fact from process context. Do the remainder of the cleanup, |
1457 | * including orphaning the outgoing CPU's RCU callbacks, and also | 1685 | * including orphaning the outgoing CPU's RCU callbacks, and also |
1458 | * adopting them, if there is no _rcu_barrier() instance running. | 1686 | * adopting them. There can only be one CPU hotplug operation at a time, |
1459 | * There can only be one CPU hotplug operation at a time, so no other | 1687 | * so no other CPU can be attempting to update rcu_cpu_kthread_task. |
1460 | * CPU can be attempting to update rcu_cpu_kthread_task. | ||
1461 | */ | 1688 | */ |
1462 | static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | 1689 | static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) |
1463 | { | 1690 | { |
@@ -1468,8 +1695,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | |||
1468 | struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ | 1695 | struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ |
1469 | 1696 | ||
1470 | /* Adjust any no-longer-needed kthreads. */ | 1697 | /* Adjust any no-longer-needed kthreads. */ |
1471 | rcu_stop_cpu_kthread(cpu); | 1698 | rcu_boost_kthread_setaffinity(rnp, -1); |
1472 | rcu_node_kthread_setaffinity(rnp, -1); | ||
1473 | 1699 | ||
1474 | /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ | 1700 | /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ |
1475 | 1701 | ||
@@ -1515,14 +1741,13 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | |||
1515 | WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, | 1741 | WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, |
1516 | "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", | 1742 | "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", |
1517 | cpu, rdp->qlen, rdp->nxtlist); | 1743 | cpu, rdp->qlen, rdp->nxtlist); |
1744 | init_callback_list(rdp); | ||
1745 | /* Disallow further callbacks on this CPU. */ | ||
1746 | rdp->nxttail[RCU_NEXT_TAIL] = NULL; | ||
1518 | } | 1747 | } |
1519 | 1748 | ||
1520 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1749 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
1521 | 1750 | ||
1522 | static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | ||
1523 | { | ||
1524 | } | ||
1525 | |||
1526 | static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) | 1751 | static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) |
1527 | { | 1752 | { |
1528 | } | 1753 | } |
@@ -1687,6 +1912,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) | |||
1687 | struct rcu_node *rnp; | 1912 | struct rcu_node *rnp; |
1688 | 1913 | ||
1689 | rcu_for_each_leaf_node(rsp, rnp) { | 1914 | rcu_for_each_leaf_node(rsp, rnp) { |
1915 | cond_resched(); | ||
1690 | mask = 0; | 1916 | mask = 0; |
1691 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1917 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1692 | if (!rcu_gp_in_progress(rsp)) { | 1918 | if (!rcu_gp_in_progress(rsp)) { |
@@ -1723,72 +1949,39 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) | |||
1723 | * Force quiescent states on reluctant CPUs, and also detect which | 1949 | * Force quiescent states on reluctant CPUs, and also detect which |
1724 | * CPUs are in dyntick-idle mode. | 1950 | * CPUs are in dyntick-idle mode. |
1725 | */ | 1951 | */ |
1726 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | 1952 | static void force_quiescent_state(struct rcu_state *rsp) |
1727 | { | 1953 | { |
1728 | unsigned long flags; | 1954 | unsigned long flags; |
1729 | struct rcu_node *rnp = rcu_get_root(rsp); | 1955 | bool ret; |
1730 | 1956 | struct rcu_node *rnp; | |
1731 | trace_rcu_utilization("Start fqs"); | 1957 | struct rcu_node *rnp_old = NULL; |
1732 | if (!rcu_gp_in_progress(rsp)) { | 1958 | |
1733 | trace_rcu_utilization("End fqs"); | 1959 | /* Funnel through hierarchy to reduce memory contention. */ |
1734 | return; /* No grace period in progress, nothing to force. */ | 1960 | rnp = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode; |
1735 | } | 1961 | for (; rnp != NULL; rnp = rnp->parent) { |
1736 | if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { | 1962 | ret = (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) || |
1737 | rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ | 1963 | !raw_spin_trylock(&rnp->fqslock); |
1738 | trace_rcu_utilization("End fqs"); | 1964 | if (rnp_old != NULL) |
1739 | return; /* Someone else is already on the job. */ | 1965 | raw_spin_unlock(&rnp_old->fqslock); |
1740 | } | 1966 | if (ret) { |
1741 | if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) | 1967 | rsp->n_force_qs_lh++; |
1742 | goto unlock_fqs_ret; /* no emergency and done recently. */ | 1968 | return; |
1743 | rsp->n_force_qs++; | 1969 | } |
1744 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | 1970 | rnp_old = rnp; |
1745 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; | ||
1746 | if(!rcu_gp_in_progress(rsp)) { | ||
1747 | rsp->n_force_qs_ngp++; | ||
1748 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | ||
1749 | goto unlock_fqs_ret; /* no GP in progress, time updated. */ | ||
1750 | } | ||
1751 | rsp->fqs_active = 1; | ||
1752 | switch (rsp->fqs_state) { | ||
1753 | case RCU_GP_IDLE: | ||
1754 | case RCU_GP_INIT: | ||
1755 | |||
1756 | break; /* grace period idle or initializing, ignore. */ | ||
1757 | |||
1758 | case RCU_SAVE_DYNTICK: | ||
1759 | |||
1760 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | ||
1761 | |||
1762 | /* Record dyntick-idle state. */ | ||
1763 | force_qs_rnp(rsp, dyntick_save_progress_counter); | ||
1764 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | ||
1765 | if (rcu_gp_in_progress(rsp)) | ||
1766 | rsp->fqs_state = RCU_FORCE_QS; | ||
1767 | break; | ||
1768 | |||
1769 | case RCU_FORCE_QS: | ||
1770 | |||
1771 | /* Check dyntick-idle state, send IPI to laggarts. */ | ||
1772 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | ||
1773 | force_qs_rnp(rsp, rcu_implicit_dynticks_qs); | ||
1774 | |||
1775 | /* Leave state in case more forcing is required. */ | ||
1776 | |||
1777 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | ||
1778 | break; | ||
1779 | } | 1971 | } |
1780 | rsp->fqs_active = 0; | 1972 | /* rnp_old == rcu_get_root(rsp), rnp == NULL. */ |
1781 | if (rsp->fqs_need_gp) { | 1973 | |
1782 | raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ | 1974 | /* Reached the root of the rcu_node tree, acquire lock. */ |
1783 | rsp->fqs_need_gp = 0; | 1975 | raw_spin_lock_irqsave(&rnp_old->lock, flags); |
1784 | rcu_start_gp(rsp, flags); /* releases rnp->lock */ | 1976 | raw_spin_unlock(&rnp_old->fqslock); |
1785 | trace_rcu_utilization("End fqs"); | 1977 | if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { |
1786 | return; | 1978 | rsp->n_force_qs_lh++; |
1979 | raw_spin_unlock_irqrestore(&rnp_old->lock, flags); | ||
1980 | return; /* Someone beat us to it. */ | ||
1787 | } | 1981 | } |
1788 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | 1982 | rsp->gp_flags |= RCU_GP_FLAG_FQS; |
1789 | unlock_fqs_ret: | 1983 | raw_spin_unlock_irqrestore(&rnp_old->lock, flags); |
1790 | raw_spin_unlock_irqrestore(&rsp->fqslock, flags); | 1984 | wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ |
1791 | trace_rcu_utilization("End fqs"); | ||
1792 | } | 1985 | } |
1793 | 1986 | ||
1794 | /* | 1987 | /* |
@@ -1805,13 +1998,6 @@ __rcu_process_callbacks(struct rcu_state *rsp) | |||
1805 | WARN_ON_ONCE(rdp->beenonline == 0); | 1998 | WARN_ON_ONCE(rdp->beenonline == 0); |
1806 | 1999 | ||
1807 | /* | 2000 | /* |
1808 | * If an RCU GP has gone long enough, go check for dyntick | ||
1809 | * idle CPUs and, if needed, send resched IPIs. | ||
1810 | */ | ||
1811 | if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) | ||
1812 | force_quiescent_state(rsp, 1); | ||
1813 | |||
1814 | /* | ||
1815 | * Advance callbacks in response to end of earlier grace | 2001 | * Advance callbacks in response to end of earlier grace |
1816 | * period that some other CPU ended. | 2002 | * period that some other CPU ended. |
1817 | */ | 2003 | */ |
@@ -1838,6 +2024,8 @@ static void rcu_process_callbacks(struct softirq_action *unused) | |||
1838 | { | 2024 | { |
1839 | struct rcu_state *rsp; | 2025 | struct rcu_state *rsp; |
1840 | 2026 | ||
2027 | if (cpu_is_offline(smp_processor_id())) | ||
2028 | return; | ||
1841 | trace_rcu_utilization("Start RCU core"); | 2029 | trace_rcu_utilization("Start RCU core"); |
1842 | for_each_rcu_flavor(rsp) | 2030 | for_each_rcu_flavor(rsp) |
1843 | __rcu_process_callbacks(rsp); | 2031 | __rcu_process_callbacks(rsp); |
@@ -1909,12 +2097,11 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, | |||
1909 | rdp->blimit = LONG_MAX; | 2097 | rdp->blimit = LONG_MAX; |
1910 | if (rsp->n_force_qs == rdp->n_force_qs_snap && | 2098 | if (rsp->n_force_qs == rdp->n_force_qs_snap && |
1911 | *rdp->nxttail[RCU_DONE_TAIL] != head) | 2099 | *rdp->nxttail[RCU_DONE_TAIL] != head) |
1912 | force_quiescent_state(rsp, 0); | 2100 | force_quiescent_state(rsp); |
1913 | rdp->n_force_qs_snap = rsp->n_force_qs; | 2101 | rdp->n_force_qs_snap = rsp->n_force_qs; |
1914 | rdp->qlen_last_fqs_check = rdp->qlen; | 2102 | rdp->qlen_last_fqs_check = rdp->qlen; |
1915 | } | 2103 | } |
1916 | } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) | 2104 | } |
1917 | force_quiescent_state(rsp, 1); | ||
1918 | } | 2105 | } |
1919 | 2106 | ||
1920 | static void | 2107 | static void |
@@ -1929,8 +2116,6 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1929 | head->func = func; | 2116 | head->func = func; |
1930 | head->next = NULL; | 2117 | head->next = NULL; |
1931 | 2118 | ||
1932 | smp_mb(); /* Ensure RCU update seen before callback registry. */ | ||
1933 | |||
1934 | /* | 2119 | /* |
1935 | * Opportunistically note grace-period endings and beginnings. | 2120 | * Opportunistically note grace-period endings and beginnings. |
1936 | * Note that we might see a beginning right after we see an | 2121 | * Note that we might see a beginning right after we see an |
@@ -1941,6 +2126,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1941 | rdp = this_cpu_ptr(rsp->rda); | 2126 | rdp = this_cpu_ptr(rsp->rda); |
1942 | 2127 | ||
1943 | /* Add the callback to our list. */ | 2128 | /* Add the callback to our list. */ |
2129 | if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL)) { | ||
2130 | /* _call_rcu() is illegal on offline CPU; leak the callback. */ | ||
2131 | WARN_ON_ONCE(1); | ||
2132 | local_irq_restore(flags); | ||
2133 | return; | ||
2134 | } | ||
1944 | ACCESS_ONCE(rdp->qlen)++; | 2135 | ACCESS_ONCE(rdp->qlen)++; |
1945 | if (lazy) | 2136 | if (lazy) |
1946 | rdp->qlen_lazy++; | 2137 | rdp->qlen_lazy++; |
@@ -2195,17 +2386,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
2195 | /* Is the RCU core waiting for a quiescent state from this CPU? */ | 2386 | /* Is the RCU core waiting for a quiescent state from this CPU? */ |
2196 | if (rcu_scheduler_fully_active && | 2387 | if (rcu_scheduler_fully_active && |
2197 | rdp->qs_pending && !rdp->passed_quiesce) { | 2388 | rdp->qs_pending && !rdp->passed_quiesce) { |
2198 | |||
2199 | /* | ||
2200 | * If force_quiescent_state() coming soon and this CPU | ||
2201 | * needs a quiescent state, and this is either RCU-sched | ||
2202 | * or RCU-bh, force a local reschedule. | ||
2203 | */ | ||
2204 | rdp->n_rp_qs_pending++; | 2389 | rdp->n_rp_qs_pending++; |
2205 | if (!rdp->preemptible && | ||
2206 | ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, | ||
2207 | jiffies)) | ||
2208 | set_need_resched(); | ||
2209 | } else if (rdp->qs_pending && rdp->passed_quiesce) { | 2390 | } else if (rdp->qs_pending && rdp->passed_quiesce) { |
2210 | rdp->n_rp_report_qs++; | 2391 | rdp->n_rp_report_qs++; |
2211 | return 1; | 2392 | return 1; |
@@ -2235,13 +2416,6 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
2235 | return 1; | 2416 | return 1; |
2236 | } | 2417 | } |
2237 | 2418 | ||
2238 | /* Has an RCU GP gone long enough to send resched IPIs &c? */ | ||
2239 | if (rcu_gp_in_progress(rsp) && | ||
2240 | ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) { | ||
2241 | rdp->n_rp_need_fqs++; | ||
2242 | return 1; | ||
2243 | } | ||
2244 | |||
2245 | /* nothing to do */ | 2419 | /* nothing to do */ |
2246 | rdp->n_rp_need_nothing++; | 2420 | rdp->n_rp_need_nothing++; |
2247 | return 0; | 2421 | return 0; |
@@ -2326,13 +2500,10 @@ static void rcu_barrier_func(void *type) | |||
2326 | static void _rcu_barrier(struct rcu_state *rsp) | 2500 | static void _rcu_barrier(struct rcu_state *rsp) |
2327 | { | 2501 | { |
2328 | int cpu; | 2502 | int cpu; |
2329 | unsigned long flags; | ||
2330 | struct rcu_data *rdp; | 2503 | struct rcu_data *rdp; |
2331 | struct rcu_data rd; | ||
2332 | unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done); | 2504 | unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done); |
2333 | unsigned long snap_done; | 2505 | unsigned long snap_done; |
2334 | 2506 | ||
2335 | init_rcu_head_on_stack(&rd.barrier_head); | ||
2336 | _rcu_barrier_trace(rsp, "Begin", -1, snap); | 2507 | _rcu_barrier_trace(rsp, "Begin", -1, snap); |
2337 | 2508 | ||
2338 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ | 2509 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ |
@@ -2372,70 +2543,30 @@ static void _rcu_barrier(struct rcu_state *rsp) | |||
2372 | /* | 2543 | /* |
2373 | * Initialize the count to one rather than to zero in order to | 2544 | * Initialize the count to one rather than to zero in order to |
2374 | * avoid a too-soon return to zero in case of a short grace period | 2545 | * avoid a too-soon return to zero in case of a short grace period |
2375 | * (or preemption of this task). Also flag this task as doing | 2546 | * (or preemption of this task). Exclude CPU-hotplug operations |
2376 | * an rcu_barrier(). This will prevent anyone else from adopting | 2547 | * to ensure that no offline CPU has callbacks queued. |
2377 | * orphaned callbacks, which could cause otherwise failure if a | ||
2378 | * CPU went offline and quickly came back online. To see this, | ||
2379 | * consider the following sequence of events: | ||
2380 | * | ||
2381 | * 1. We cause CPU 0 to post an rcu_barrier_callback() callback. | ||
2382 | * 2. CPU 1 goes offline, orphaning its callbacks. | ||
2383 | * 3. CPU 0 adopts CPU 1's orphaned callbacks. | ||
2384 | * 4. CPU 1 comes back online. | ||
2385 | * 5. We cause CPU 1 to post an rcu_barrier_callback() callback. | ||
2386 | * 6. Both rcu_barrier_callback() callbacks are invoked, awakening | ||
2387 | * us -- but before CPU 1's orphaned callbacks are invoked!!! | ||
2388 | */ | 2548 | */ |
2389 | init_completion(&rsp->barrier_completion); | 2549 | init_completion(&rsp->barrier_completion); |
2390 | atomic_set(&rsp->barrier_cpu_count, 1); | 2550 | atomic_set(&rsp->barrier_cpu_count, 1); |
2391 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 2551 | get_online_cpus(); |
2392 | rsp->rcu_barrier_in_progress = current; | ||
2393 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
2394 | 2552 | ||
2395 | /* | 2553 | /* |
2396 | * Force every CPU with callbacks to register a new callback | 2554 | * Force each CPU with callbacks to register a new callback. |
2397 | * that will tell us when all the preceding callbacks have | 2555 | * When that callback is invoked, we will know that all of the |
2398 | * been invoked. If an offline CPU has callbacks, wait for | 2556 | * corresponding CPU's preceding callbacks have been invoked. |
2399 | * it to either come back online or to finish orphaning those | ||
2400 | * callbacks. | ||
2401 | */ | 2557 | */ |
2402 | for_each_possible_cpu(cpu) { | 2558 | for_each_online_cpu(cpu) { |
2403 | preempt_disable(); | ||
2404 | rdp = per_cpu_ptr(rsp->rda, cpu); | 2559 | rdp = per_cpu_ptr(rsp->rda, cpu); |
2405 | if (cpu_is_offline(cpu)) { | 2560 | if (ACCESS_ONCE(rdp->qlen)) { |
2406 | _rcu_barrier_trace(rsp, "Offline", cpu, | ||
2407 | rsp->n_barrier_done); | ||
2408 | preempt_enable(); | ||
2409 | while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) | ||
2410 | schedule_timeout_interruptible(1); | ||
2411 | } else if (ACCESS_ONCE(rdp->qlen)) { | ||
2412 | _rcu_barrier_trace(rsp, "OnlineQ", cpu, | 2561 | _rcu_barrier_trace(rsp, "OnlineQ", cpu, |
2413 | rsp->n_barrier_done); | 2562 | rsp->n_barrier_done); |
2414 | smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); | 2563 | smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); |
2415 | preempt_enable(); | ||
2416 | } else { | 2564 | } else { |
2417 | _rcu_barrier_trace(rsp, "OnlineNQ", cpu, | 2565 | _rcu_barrier_trace(rsp, "OnlineNQ", cpu, |
2418 | rsp->n_barrier_done); | 2566 | rsp->n_barrier_done); |
2419 | preempt_enable(); | ||
2420 | } | 2567 | } |
2421 | } | 2568 | } |
2422 | 2569 | put_online_cpus(); | |
2423 | /* | ||
2424 | * Now that all online CPUs have rcu_barrier_callback() callbacks | ||
2425 | * posted, we can adopt all of the orphaned callbacks and place | ||
2426 | * an rcu_barrier_callback() callback after them. When that is done, | ||
2427 | * we are guaranteed to have an rcu_barrier_callback() callback | ||
2428 | * following every callback that could possibly have been | ||
2429 | * registered before _rcu_barrier() was called. | ||
2430 | */ | ||
2431 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | ||
2432 | rcu_adopt_orphan_cbs(rsp); | ||
2433 | rsp->rcu_barrier_in_progress = NULL; | ||
2434 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
2435 | atomic_inc(&rsp->barrier_cpu_count); | ||
2436 | smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ | ||
2437 | rd.rsp = rsp; | ||
2438 | rsp->call(&rd.barrier_head, rcu_barrier_callback); | ||
2439 | 2570 | ||
2440 | /* | 2571 | /* |
2441 | * Now that we have an rcu_barrier_callback() callback on each | 2572 | * Now that we have an rcu_barrier_callback() callback on each |
@@ -2456,8 +2587,6 @@ static void _rcu_barrier(struct rcu_state *rsp) | |||
2456 | 2587 | ||
2457 | /* Other rcu_barrier() invocations can now safely proceed. */ | 2588 | /* Other rcu_barrier() invocations can now safely proceed. */ |
2458 | mutex_unlock(&rsp->barrier_mutex); | 2589 | mutex_unlock(&rsp->barrier_mutex); |
2459 | |||
2460 | destroy_rcu_head_on_stack(&rd.barrier_head); | ||
2461 | } | 2590 | } |
2462 | 2591 | ||
2463 | /** | 2592 | /** |
@@ -2497,6 +2626,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
2497 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); | 2626 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); |
2498 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); | 2627 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); |
2499 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); | 2628 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); |
2629 | #ifdef CONFIG_RCU_USER_QS | ||
2630 | WARN_ON_ONCE(rdp->dynticks->in_user); | ||
2631 | #endif | ||
2500 | rdp->cpu = cpu; | 2632 | rdp->cpu = cpu; |
2501 | rdp->rsp = rsp; | 2633 | rdp->rsp = rsp; |
2502 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2634 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
@@ -2523,6 +2655,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
2523 | rdp->qlen_last_fqs_check = 0; | 2655 | rdp->qlen_last_fqs_check = 0; |
2524 | rdp->n_force_qs_snap = rsp->n_force_qs; | 2656 | rdp->n_force_qs_snap = rsp->n_force_qs; |
2525 | rdp->blimit = blimit; | 2657 | rdp->blimit = blimit; |
2658 | init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ | ||
2526 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; | 2659 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; |
2527 | atomic_set(&rdp->dynticks->dynticks, | 2660 | atomic_set(&rdp->dynticks->dynticks, |
2528 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); | 2661 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); |
@@ -2555,7 +2688,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
2555 | rdp->completed = rnp->completed; | 2688 | rdp->completed = rnp->completed; |
2556 | rdp->passed_quiesce = 0; | 2689 | rdp->passed_quiesce = 0; |
2557 | rdp->qs_pending = 0; | 2690 | rdp->qs_pending = 0; |
2558 | rdp->passed_quiesce_gpnum = rnp->gpnum - 1; | ||
2559 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); | 2691 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); |
2560 | } | 2692 | } |
2561 | raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ | 2693 | raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ |
@@ -2594,12 +2726,10 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
2594 | break; | 2726 | break; |
2595 | case CPU_ONLINE: | 2727 | case CPU_ONLINE: |
2596 | case CPU_DOWN_FAILED: | 2728 | case CPU_DOWN_FAILED: |
2597 | rcu_node_kthread_setaffinity(rnp, -1); | 2729 | rcu_boost_kthread_setaffinity(rnp, -1); |
2598 | rcu_cpu_kthread_setrt(cpu, 1); | ||
2599 | break; | 2730 | break; |
2600 | case CPU_DOWN_PREPARE: | 2731 | case CPU_DOWN_PREPARE: |
2601 | rcu_node_kthread_setaffinity(rnp, cpu); | 2732 | rcu_boost_kthread_setaffinity(rnp, cpu); |
2602 | rcu_cpu_kthread_setrt(cpu, 0); | ||
2603 | break; | 2733 | break; |
2604 | case CPU_DYING: | 2734 | case CPU_DYING: |
2605 | case CPU_DYING_FROZEN: | 2735 | case CPU_DYING_FROZEN: |
@@ -2627,6 +2757,28 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
2627 | } | 2757 | } |
2628 | 2758 | ||
2629 | /* | 2759 | /* |
2760 | * Spawn the kthread that handles this RCU flavor's grace periods. | ||
2761 | */ | ||
2762 | static int __init rcu_spawn_gp_kthread(void) | ||
2763 | { | ||
2764 | unsigned long flags; | ||
2765 | struct rcu_node *rnp; | ||
2766 | struct rcu_state *rsp; | ||
2767 | struct task_struct *t; | ||
2768 | |||
2769 | for_each_rcu_flavor(rsp) { | ||
2770 | t = kthread_run(rcu_gp_kthread, rsp, rsp->name); | ||
2771 | BUG_ON(IS_ERR(t)); | ||
2772 | rnp = rcu_get_root(rsp); | ||
2773 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
2774 | rsp->gp_kthread = t; | ||
2775 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
2776 | } | ||
2777 | return 0; | ||
2778 | } | ||
2779 | early_initcall(rcu_spawn_gp_kthread); | ||
2780 | |||
2781 | /* | ||
2630 | * This function is invoked towards the end of the scheduler's initialization | 2782 | * This function is invoked towards the end of the scheduler's initialization |
2631 | * process. Before this is called, the idle task might contain | 2783 | * process. Before this is called, the idle task might contain |
2632 | * RCU read-side critical sections (during which time, this idle | 2784 | * RCU read-side critical sections (during which time, this idle |
@@ -2661,7 +2813,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
2661 | int cprv; | 2813 | int cprv; |
2662 | int i; | 2814 | int i; |
2663 | 2815 | ||
2664 | cprv = NR_CPUS; | 2816 | cprv = nr_cpu_ids; |
2665 | for (i = rcu_num_lvls - 1; i >= 0; i--) { | 2817 | for (i = rcu_num_lvls - 1; i >= 0; i--) { |
2666 | ccur = rsp->levelcnt[i]; | 2818 | ccur = rsp->levelcnt[i]; |
2667 | rsp->levelspread[i] = (cprv + ccur - 1) / ccur; | 2819 | rsp->levelspread[i] = (cprv + ccur - 1) / ccur; |
@@ -2676,10 +2828,14 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
2676 | static void __init rcu_init_one(struct rcu_state *rsp, | 2828 | static void __init rcu_init_one(struct rcu_state *rsp, |
2677 | struct rcu_data __percpu *rda) | 2829 | struct rcu_data __percpu *rda) |
2678 | { | 2830 | { |
2679 | static char *buf[] = { "rcu_node_level_0", | 2831 | static char *buf[] = { "rcu_node_0", |
2680 | "rcu_node_level_1", | 2832 | "rcu_node_1", |
2681 | "rcu_node_level_2", | 2833 | "rcu_node_2", |
2682 | "rcu_node_level_3" }; /* Match MAX_RCU_LVLS */ | 2834 | "rcu_node_3" }; /* Match MAX_RCU_LVLS */ |
2835 | static char *fqs[] = { "rcu_node_fqs_0", | ||
2836 | "rcu_node_fqs_1", | ||
2837 | "rcu_node_fqs_2", | ||
2838 | "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */ | ||
2683 | int cpustride = 1; | 2839 | int cpustride = 1; |
2684 | int i; | 2840 | int i; |
2685 | int j; | 2841 | int j; |
@@ -2704,7 +2860,11 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
2704 | raw_spin_lock_init(&rnp->lock); | 2860 | raw_spin_lock_init(&rnp->lock); |
2705 | lockdep_set_class_and_name(&rnp->lock, | 2861 | lockdep_set_class_and_name(&rnp->lock, |
2706 | &rcu_node_class[i], buf[i]); | 2862 | &rcu_node_class[i], buf[i]); |
2707 | rnp->gpnum = 0; | 2863 | raw_spin_lock_init(&rnp->fqslock); |
2864 | lockdep_set_class_and_name(&rnp->fqslock, | ||
2865 | &rcu_fqs_class[i], fqs[i]); | ||
2866 | rnp->gpnum = rsp->gpnum; | ||
2867 | rnp->completed = rsp->completed; | ||
2708 | rnp->qsmask = 0; | 2868 | rnp->qsmask = 0; |
2709 | rnp->qsmaskinit = 0; | 2869 | rnp->qsmaskinit = 0; |
2710 | rnp->grplo = j * cpustride; | 2870 | rnp->grplo = j * cpustride; |
@@ -2727,6 +2887,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
2727 | } | 2887 | } |
2728 | 2888 | ||
2729 | rsp->rda = rda; | 2889 | rsp->rda = rda; |
2890 | init_waitqueue_head(&rsp->gp_wq); | ||
2730 | rnp = rsp->level[rcu_num_lvls - 1]; | 2891 | rnp = rsp->level[rcu_num_lvls - 1]; |
2731 | for_each_possible_cpu(i) { | 2892 | for_each_possible_cpu(i) { |
2732 | while (i > rnp->grphi) | 2893 | while (i > rnp->grphi) |
@@ -2750,7 +2911,8 @@ static void __init rcu_init_geometry(void) | |||
2750 | int rcu_capacity[MAX_RCU_LVLS + 1]; | 2911 | int rcu_capacity[MAX_RCU_LVLS + 1]; |
2751 | 2912 | ||
2752 | /* If the compile-time values are accurate, just leave. */ | 2913 | /* If the compile-time values are accurate, just leave. */ |
2753 | if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF) | 2914 | if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF && |
2915 | nr_cpu_ids == NR_CPUS) | ||
2754 | return; | 2916 | return; |
2755 | 2917 | ||
2756 | /* | 2918 | /* |