diff options
-rw-r--r-- | Documentation/kernel-parameters.txt | 6 | ||||
-rw-r--r-- | arch/blackfin/mach-common/smp.c | 6 | ||||
-rw-r--r-- | arch/metag/kernel/smp.c | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/cpu.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/smp.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 39 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 46 | ||||
-rw-r--r-- | include/linux/cpu.h | 14 | ||||
-rw-r--r-- | include/linux/lockdep.h | 7 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 40 | ||||
-rw-r--r-- | include/linux/srcu.h | 2 | ||||
-rw-r--r-- | init/Kconfig | 13 | ||||
-rw-r--r-- | kernel/cpu.c | 4 | ||||
-rw-r--r-- | kernel/rcu/rcutorture.c | 27 | ||||
-rw-r--r-- | kernel/rcu/srcu.c | 19 | ||||
-rw-r--r-- | kernel/rcu/tiny.c | 14 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 437 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 11 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 267 | ||||
-rw-r--r-- | kernel/rcu/tree_trace.c | 4 | ||||
-rw-r--r-- | kernel/rcu/update.c | 72 | ||||
-rw-r--r-- | kernel/sched/idle.c | 9 | ||||
-rw-r--r-- | kernel/smpboot.c | 156 | ||||
-rw-r--r-- | lib/Kconfig.debug | 35 | ||||
-rwxr-xr-x | tools/testing/selftests/rcutorture/bin/kvm.sh | 2 | ||||
-rw-r--r-- | tools/testing/selftests/rcutorture/configs/rcu/CFcommon | 1 |
26 files changed, 863 insertions, 377 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index d913e3b4bf0d..5368ba701de2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -2968,6 +2968,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2968 | Set maximum number of finished RCU callbacks to | 2968 | Set maximum number of finished RCU callbacks to |
2969 | process in one batch. | 2969 | process in one batch. |
2970 | 2970 | ||
2971 | rcutree.gp_init_delay= [KNL] | ||
2972 | Set the number of jiffies to delay each step of | ||
2973 | RCU grace-period initialization. This only has | ||
2974 | effect when CONFIG_RCU_TORTURE_TEST_SLOW_INIT is | ||
2975 | set. | ||
2976 | |||
2971 | rcutree.rcu_fanout_leaf= [KNL] | 2977 | rcutree.rcu_fanout_leaf= [KNL] |
2972 | Increase the number of CPUs assigned to each | 2978 | Increase the number of CPUs assigned to each |
2973 | leaf rcu_node structure. Useful for very large | 2979 | leaf rcu_node structure. Useful for very large |
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c index 8ad3e90cc8fc..1c7259597395 100644 --- a/arch/blackfin/mach-common/smp.c +++ b/arch/blackfin/mach-common/smp.c | |||
@@ -413,16 +413,14 @@ int __cpu_disable(void) | |||
413 | return 0; | 413 | return 0; |
414 | } | 414 | } |
415 | 415 | ||
416 | static DECLARE_COMPLETION(cpu_killed); | ||
417 | |||
418 | int __cpu_die(unsigned int cpu) | 416 | int __cpu_die(unsigned int cpu) |
419 | { | 417 | { |
420 | return wait_for_completion_timeout(&cpu_killed, 5000); | 418 | return cpu_wait_death(cpu, 5); |
421 | } | 419 | } |
422 | 420 | ||
423 | void cpu_die(void) | 421 | void cpu_die(void) |
424 | { | 422 | { |
425 | complete(&cpu_killed); | 423 | (void)cpu_report_death(); |
426 | 424 | ||
427 | atomic_dec(&init_mm.mm_users); | 425 | atomic_dec(&init_mm.mm_users); |
428 | atomic_dec(&init_mm.mm_count); | 426 | atomic_dec(&init_mm.mm_count); |
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c index f006d2276f40..ac3a199e33e7 100644 --- a/arch/metag/kernel/smp.c +++ b/arch/metag/kernel/smp.c | |||
@@ -261,7 +261,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) | |||
261 | } | 261 | } |
262 | 262 | ||
263 | #ifdef CONFIG_HOTPLUG_CPU | 263 | #ifdef CONFIG_HOTPLUG_CPU |
264 | static DECLARE_COMPLETION(cpu_killed); | ||
265 | 264 | ||
266 | /* | 265 | /* |
267 | * __cpu_disable runs on the processor to be shutdown. | 266 | * __cpu_disable runs on the processor to be shutdown. |
@@ -299,7 +298,7 @@ int __cpu_disable(void) | |||
299 | */ | 298 | */ |
300 | void __cpu_die(unsigned int cpu) | 299 | void __cpu_die(unsigned int cpu) |
301 | { | 300 | { |
302 | if (!wait_for_completion_timeout(&cpu_killed, msecs_to_jiffies(1))) | 301 | if (!cpu_wait_death(cpu, 1)) |
303 | pr_err("CPU%u: unable to kill\n", cpu); | 302 | pr_err("CPU%u: unable to kill\n", cpu); |
304 | } | 303 | } |
305 | 304 | ||
@@ -314,7 +313,7 @@ void cpu_die(void) | |||
314 | local_irq_disable(); | 313 | local_irq_disable(); |
315 | idle_task_exit(); | 314 | idle_task_exit(); |
316 | 315 | ||
317 | complete(&cpu_killed); | 316 | (void)cpu_report_death(); |
318 | 317 | ||
319 | asm ("XOR TXENABLE, D0Re0,D0Re0\n"); | 318 | asm ("XOR TXENABLE, D0Re0,D0Re0\n"); |
320 | } | 319 | } |
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index d2b12988d2ed..bf2caa1dedc5 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h | |||
@@ -34,8 +34,6 @@ extern int _debug_hotplug_cpu(int cpu, int action); | |||
34 | #endif | 34 | #endif |
35 | #endif | 35 | #endif |
36 | 36 | ||
37 | DECLARE_PER_CPU(int, cpu_state); | ||
38 | |||
39 | int mwait_usable(const struct cpuinfo_x86 *); | 37 | int mwait_usable(const struct cpuinfo_x86 *); |
40 | 38 | ||
41 | #endif /* _ASM_X86_CPU_H */ | 39 | #endif /* _ASM_X86_CPU_H */ |
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 8cd1cc3bc835..a5cb4f6e9492 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -150,12 +150,12 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) | |||
150 | } | 150 | } |
151 | 151 | ||
152 | void cpu_disable_common(void); | 152 | void cpu_disable_common(void); |
153 | void cpu_die_common(unsigned int cpu); | ||
154 | void native_smp_prepare_boot_cpu(void); | 153 | void native_smp_prepare_boot_cpu(void); |
155 | void native_smp_prepare_cpus(unsigned int max_cpus); | 154 | void native_smp_prepare_cpus(unsigned int max_cpus); |
156 | void native_smp_cpus_done(unsigned int max_cpus); | 155 | void native_smp_cpus_done(unsigned int max_cpus); |
157 | int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); | 156 | int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); |
158 | int native_cpu_disable(void); | 157 | int native_cpu_disable(void); |
158 | int common_cpu_die(unsigned int cpu); | ||
159 | void native_cpu_die(unsigned int cpu); | 159 | void native_cpu_die(unsigned int cpu); |
160 | void native_play_dead(void); | 160 | void native_play_dead(void); |
161 | void play_dead_common(void); | 161 | void play_dead_common(void); |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index febc6aabc72e..c8fa34963ead 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -77,9 +77,6 @@ | |||
77 | #include <asm/realmode.h> | 77 | #include <asm/realmode.h> |
78 | #include <asm/misc.h> | 78 | #include <asm/misc.h> |
79 | 79 | ||
80 | /* State of each CPU */ | ||
81 | DEFINE_PER_CPU(int, cpu_state) = { 0 }; | ||
82 | |||
83 | /* Number of siblings per CPU package */ | 80 | /* Number of siblings per CPU package */ |
84 | int smp_num_siblings = 1; | 81 | int smp_num_siblings = 1; |
85 | EXPORT_SYMBOL(smp_num_siblings); | 82 | EXPORT_SYMBOL(smp_num_siblings); |
@@ -257,7 +254,7 @@ static void notrace start_secondary(void *unused) | |||
257 | lock_vector_lock(); | 254 | lock_vector_lock(); |
258 | set_cpu_online(smp_processor_id(), true); | 255 | set_cpu_online(smp_processor_id(), true); |
259 | unlock_vector_lock(); | 256 | unlock_vector_lock(); |
260 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | 257 | cpu_set_state_online(smp_processor_id()); |
261 | x86_platform.nmi_init(); | 258 | x86_platform.nmi_init(); |
262 | 259 | ||
263 | /* enable local interrupts */ | 260 | /* enable local interrupts */ |
@@ -948,7 +945,10 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) | |||
948 | */ | 945 | */ |
949 | mtrr_save_state(); | 946 | mtrr_save_state(); |
950 | 947 | ||
951 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | 948 | /* x86 CPUs take themselves offline, so delayed offline is OK. */ |
949 | err = cpu_check_up_prepare(cpu); | ||
950 | if (err && err != -EBUSY) | ||
951 | return err; | ||
952 | 952 | ||
953 | /* the FPU context is blank, nobody can own it */ | 953 | /* the FPU context is blank, nobody can own it */ |
954 | __cpu_disable_lazy_restore(cpu); | 954 | __cpu_disable_lazy_restore(cpu); |
@@ -1191,7 +1191,7 @@ void __init native_smp_prepare_boot_cpu(void) | |||
1191 | switch_to_new_gdt(me); | 1191 | switch_to_new_gdt(me); |
1192 | /* already set me in cpu_online_mask in boot_cpu_init() */ | 1192 | /* already set me in cpu_online_mask in boot_cpu_init() */ |
1193 | cpumask_set_cpu(me, cpu_callout_mask); | 1193 | cpumask_set_cpu(me, cpu_callout_mask); |
1194 | per_cpu(cpu_state, me) = CPU_ONLINE; | 1194 | cpu_set_state_online(me); |
1195 | } | 1195 | } |
1196 | 1196 | ||
1197 | void __init native_smp_cpus_done(unsigned int max_cpus) | 1197 | void __init native_smp_cpus_done(unsigned int max_cpus) |
@@ -1318,14 +1318,10 @@ static void __ref remove_cpu_from_maps(int cpu) | |||
1318 | numa_remove_cpu(cpu); | 1318 | numa_remove_cpu(cpu); |
1319 | } | 1319 | } |
1320 | 1320 | ||
1321 | static DEFINE_PER_CPU(struct completion, die_complete); | ||
1322 | |||
1323 | void cpu_disable_common(void) | 1321 | void cpu_disable_common(void) |
1324 | { | 1322 | { |
1325 | int cpu = smp_processor_id(); | 1323 | int cpu = smp_processor_id(); |
1326 | 1324 | ||
1327 | init_completion(&per_cpu(die_complete, smp_processor_id())); | ||
1328 | |||
1329 | remove_siblinginfo(cpu); | 1325 | remove_siblinginfo(cpu); |
1330 | 1326 | ||
1331 | /* It's now safe to remove this processor from the online map */ | 1327 | /* It's now safe to remove this processor from the online map */ |
@@ -1349,24 +1345,27 @@ int native_cpu_disable(void) | |||
1349 | return 0; | 1345 | return 0; |
1350 | } | 1346 | } |
1351 | 1347 | ||
1352 | void cpu_die_common(unsigned int cpu) | 1348 | int common_cpu_die(unsigned int cpu) |
1353 | { | 1349 | { |
1354 | wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ); | 1350 | int ret = 0; |
1355 | } | ||
1356 | 1351 | ||
1357 | void native_cpu_die(unsigned int cpu) | ||
1358 | { | ||
1359 | /* We don't do anything here: idle task is faking death itself. */ | 1352 | /* We don't do anything here: idle task is faking death itself. */ |
1360 | 1353 | ||
1361 | cpu_die_common(cpu); | ||
1362 | |||
1363 | /* They ack this in play_dead() by setting CPU_DEAD */ | 1354 | /* They ack this in play_dead() by setting CPU_DEAD */ |
1364 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { | 1355 | if (cpu_wait_death(cpu, 5)) { |
1365 | if (system_state == SYSTEM_RUNNING) | 1356 | if (system_state == SYSTEM_RUNNING) |
1366 | pr_info("CPU %u is now offline\n", cpu); | 1357 | pr_info("CPU %u is now offline\n", cpu); |
1367 | } else { | 1358 | } else { |
1368 | pr_err("CPU %u didn't die...\n", cpu); | 1359 | pr_err("CPU %u didn't die...\n", cpu); |
1360 | ret = -1; | ||
1369 | } | 1361 | } |
1362 | |||
1363 | return ret; | ||
1364 | } | ||
1365 | |||
1366 | void native_cpu_die(unsigned int cpu) | ||
1367 | { | ||
1368 | common_cpu_die(cpu); | ||
1370 | } | 1369 | } |
1371 | 1370 | ||
1372 | void play_dead_common(void) | 1371 | void play_dead_common(void) |
@@ -1375,10 +1374,8 @@ void play_dead_common(void) | |||
1375 | reset_lazy_tlbstate(); | 1374 | reset_lazy_tlbstate(); |
1376 | amd_e400_remove_cpu(raw_smp_processor_id()); | 1375 | amd_e400_remove_cpu(raw_smp_processor_id()); |
1377 | 1376 | ||
1378 | mb(); | ||
1379 | /* Ack it */ | 1377 | /* Ack it */ |
1380 | __this_cpu_write(cpu_state, CPU_DEAD); | 1378 | (void)cpu_report_death(); |
1381 | complete(&per_cpu(die_complete, smp_processor_id())); | ||
1382 | 1379 | ||
1383 | /* | 1380 | /* |
1384 | * With physical CPU hotplug, we should halt the cpu | 1381 | * With physical CPU hotplug, we should halt the cpu |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 08e8489c47f1..1c5e760f34ca 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -90,14 +90,10 @@ static void cpu_bringup(void) | |||
90 | 90 | ||
91 | set_cpu_online(cpu, true); | 91 | set_cpu_online(cpu, true); |
92 | 92 | ||
93 | this_cpu_write(cpu_state, CPU_ONLINE); | 93 | cpu_set_state_online(cpu); /* Implies full memory barrier. */ |
94 | |||
95 | wmb(); | ||
96 | 94 | ||
97 | /* We can take interrupts now: we're officially "up". */ | 95 | /* We can take interrupts now: we're officially "up". */ |
98 | local_irq_enable(); | 96 | local_irq_enable(); |
99 | |||
100 | wmb(); /* make sure everything is out */ | ||
101 | } | 97 | } |
102 | 98 | ||
103 | /* | 99 | /* |
@@ -459,7 +455,13 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) | |||
459 | xen_setup_timer(cpu); | 455 | xen_setup_timer(cpu); |
460 | xen_init_lock_cpu(cpu); | 456 | xen_init_lock_cpu(cpu); |
461 | 457 | ||
462 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | 458 | /* |
459 | * PV VCPUs are always successfully taken down (see 'while' loop | ||
460 | * in xen_cpu_die()), so -EBUSY is an error. | ||
461 | */ | ||
462 | rc = cpu_check_up_prepare(cpu); | ||
463 | if (rc) | ||
464 | return rc; | ||
463 | 465 | ||
464 | /* make sure interrupts start blocked */ | 466 | /* make sure interrupts start blocked */ |
465 | per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; | 467 | per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; |
@@ -479,10 +481,8 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) | |||
479 | rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); | 481 | rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); |
480 | BUG_ON(rc); | 482 | BUG_ON(rc); |
481 | 483 | ||
482 | while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { | 484 | while (cpu_report_state(cpu) != CPU_ONLINE) |
483 | HYPERVISOR_sched_op(SCHEDOP_yield, NULL); | 485 | HYPERVISOR_sched_op(SCHEDOP_yield, NULL); |
484 | barrier(); | ||
485 | } | ||
486 | 486 | ||
487 | return 0; | 487 | return 0; |
488 | } | 488 | } |
@@ -511,11 +511,11 @@ static void xen_cpu_die(unsigned int cpu) | |||
511 | schedule_timeout(HZ/10); | 511 | schedule_timeout(HZ/10); |
512 | } | 512 | } |
513 | 513 | ||
514 | cpu_die_common(cpu); | 514 | if (common_cpu_die(cpu) == 0) { |
515 | 515 | xen_smp_intr_free(cpu); | |
516 | xen_smp_intr_free(cpu); | 516 | xen_uninit_lock_cpu(cpu); |
517 | xen_uninit_lock_cpu(cpu); | 517 | xen_teardown_timer(cpu); |
518 | xen_teardown_timer(cpu); | 518 | } |
519 | } | 519 | } |
520 | 520 | ||
521 | static void xen_play_dead(void) /* used only with HOTPLUG_CPU */ | 521 | static void xen_play_dead(void) /* used only with HOTPLUG_CPU */ |
@@ -747,6 +747,16 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) | |||
747 | static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) | 747 | static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) |
748 | { | 748 | { |
749 | int rc; | 749 | int rc; |
750 | |||
751 | /* | ||
752 | * This can happen if CPU was offlined earlier and | ||
753 | * offlining timed out in common_cpu_die(). | ||
754 | */ | ||
755 | if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) { | ||
756 | xen_smp_intr_free(cpu); | ||
757 | xen_uninit_lock_cpu(cpu); | ||
758 | } | ||
759 | |||
750 | /* | 760 | /* |
751 | * xen_smp_intr_init() needs to run before native_cpu_up() | 761 | * xen_smp_intr_init() needs to run before native_cpu_up() |
752 | * so that IPI vectors are set up on the booting CPU before | 762 | * so that IPI vectors are set up on the booting CPU before |
@@ -768,12 +778,6 @@ static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) | |||
768 | return rc; | 778 | return rc; |
769 | } | 779 | } |
770 | 780 | ||
771 | static void xen_hvm_cpu_die(unsigned int cpu) | ||
772 | { | ||
773 | xen_cpu_die(cpu); | ||
774 | native_cpu_die(cpu); | ||
775 | } | ||
776 | |||
777 | void __init xen_hvm_smp_init(void) | 781 | void __init xen_hvm_smp_init(void) |
778 | { | 782 | { |
779 | if (!xen_have_vector_callback) | 783 | if (!xen_have_vector_callback) |
@@ -781,7 +785,7 @@ void __init xen_hvm_smp_init(void) | |||
781 | smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; | 785 | smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; |
782 | smp_ops.smp_send_reschedule = xen_smp_send_reschedule; | 786 | smp_ops.smp_send_reschedule = xen_smp_send_reschedule; |
783 | smp_ops.cpu_up = xen_hvm_cpu_up; | 787 | smp_ops.cpu_up = xen_hvm_cpu_up; |
784 | smp_ops.cpu_die = xen_hvm_cpu_die; | 788 | smp_ops.cpu_die = xen_cpu_die; |
785 | smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; | 789 | smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; |
786 | smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; | 790 | smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; |
787 | smp_ops.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu; | 791 | smp_ops.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu; |
diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 4260e8594bd7..d028721748d4 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h | |||
@@ -95,6 +95,10 @@ enum { | |||
95 | * Called on the new cpu, just before | 95 | * Called on the new cpu, just before |
96 | * enabling interrupts. Must not sleep, | 96 | * enabling interrupts. Must not sleep, |
97 | * must not fail */ | 97 | * must not fail */ |
98 | #define CPU_DYING_IDLE 0x000B /* CPU (unsigned)v dying, reached | ||
99 | * idle loop. */ | ||
100 | #define CPU_BROKEN 0x000C /* CPU (unsigned)v did not die properly, | ||
101 | * perhaps due to preemption. */ | ||
98 | 102 | ||
99 | /* Used for CPU hotplug events occurring while tasks are frozen due to a suspend | 103 | /* Used for CPU hotplug events occurring while tasks are frozen due to a suspend |
100 | * operation in progress | 104 | * operation in progress |
@@ -271,4 +275,14 @@ void arch_cpu_idle_enter(void); | |||
271 | void arch_cpu_idle_exit(void); | 275 | void arch_cpu_idle_exit(void); |
272 | void arch_cpu_idle_dead(void); | 276 | void arch_cpu_idle_dead(void); |
273 | 277 | ||
278 | DECLARE_PER_CPU(bool, cpu_dead_idle); | ||
279 | |||
280 | int cpu_report_state(int cpu); | ||
281 | int cpu_check_up_prepare(int cpu); | ||
282 | void cpu_set_state_online(int cpu); | ||
283 | #ifdef CONFIG_HOTPLUG_CPU | ||
284 | bool cpu_wait_death(unsigned int cpu, int seconds); | ||
285 | bool cpu_report_death(void); | ||
286 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
287 | |||
274 | #endif /* _LINUX_CPU_H_ */ | 288 | #endif /* _LINUX_CPU_H_ */ |
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 74ab23176e9b..066ba4157541 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h | |||
@@ -531,8 +531,13 @@ do { \ | |||
531 | # define might_lock_read(lock) do { } while (0) | 531 | # define might_lock_read(lock) do { } while (0) |
532 | #endif | 532 | #endif |
533 | 533 | ||
534 | #ifdef CONFIG_PROVE_RCU | 534 | #ifdef CONFIG_LOCKDEP |
535 | void lockdep_rcu_suspicious(const char *file, const int line, const char *s); | 535 | void lockdep_rcu_suspicious(const char *file, const int line, const char *s); |
536 | #else | ||
537 | static inline void | ||
538 | lockdep_rcu_suspicious(const char *file, const int line, const char *s) | ||
539 | { | ||
540 | } | ||
536 | #endif | 541 | #endif |
537 | 542 | ||
538 | #endif /* __LINUX_LOCKDEP_H */ | 543 | #endif /* __LINUX_LOCKDEP_H */ |
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 78097491cd99..573a5afd5ed8 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h | |||
@@ -48,6 +48,26 @@ | |||
48 | 48 | ||
49 | extern int rcu_expedited; /* for sysctl */ | 49 | extern int rcu_expedited; /* for sysctl */ |
50 | 50 | ||
51 | #ifdef CONFIG_TINY_RCU | ||
52 | /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ | ||
53 | static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */ | ||
54 | { | ||
55 | return false; | ||
56 | } | ||
57 | |||
58 | static inline void rcu_expedite_gp(void) | ||
59 | { | ||
60 | } | ||
61 | |||
62 | static inline void rcu_unexpedite_gp(void) | ||
63 | { | ||
64 | } | ||
65 | #else /* #ifdef CONFIG_TINY_RCU */ | ||
66 | bool rcu_gp_is_expedited(void); /* Internal RCU use. */ | ||
67 | void rcu_expedite_gp(void); | ||
68 | void rcu_unexpedite_gp(void); | ||
69 | #endif /* #else #ifdef CONFIG_TINY_RCU */ | ||
70 | |||
51 | enum rcutorture_type { | 71 | enum rcutorture_type { |
52 | RCU_FLAVOR, | 72 | RCU_FLAVOR, |
53 | RCU_BH_FLAVOR, | 73 | RCU_BH_FLAVOR, |
@@ -195,6 +215,15 @@ void call_rcu_sched(struct rcu_head *head, | |||
195 | 215 | ||
196 | void synchronize_sched(void); | 216 | void synchronize_sched(void); |
197 | 217 | ||
218 | /* | ||
219 | * Structure allowing asynchronous waiting on RCU. | ||
220 | */ | ||
221 | struct rcu_synchronize { | ||
222 | struct rcu_head head; | ||
223 | struct completion completion; | ||
224 | }; | ||
225 | void wakeme_after_rcu(struct rcu_head *head); | ||
226 | |||
198 | /** | 227 | /** |
199 | * call_rcu_tasks() - Queue an RCU for invocation task-based grace period | 228 | * call_rcu_tasks() - Queue an RCU for invocation task-based grace period |
200 | * @head: structure to be used for queueing the RCU updates. | 229 | * @head: structure to be used for queueing the RCU updates. |
@@ -258,6 +287,7 @@ static inline int rcu_preempt_depth(void) | |||
258 | 287 | ||
259 | /* Internal to kernel */ | 288 | /* Internal to kernel */ |
260 | void rcu_init(void); | 289 | void rcu_init(void); |
290 | void rcu_end_inkernel_boot(void); | ||
261 | void rcu_sched_qs(void); | 291 | void rcu_sched_qs(void); |
262 | void rcu_bh_qs(void); | 292 | void rcu_bh_qs(void); |
263 | void rcu_check_callbacks(int user); | 293 | void rcu_check_callbacks(int user); |
@@ -266,6 +296,8 @@ void rcu_idle_enter(void); | |||
266 | void rcu_idle_exit(void); | 296 | void rcu_idle_exit(void); |
267 | void rcu_irq_enter(void); | 297 | void rcu_irq_enter(void); |
268 | void rcu_irq_exit(void); | 298 | void rcu_irq_exit(void); |
299 | int rcu_cpu_notify(struct notifier_block *self, | ||
300 | unsigned long action, void *hcpu); | ||
269 | 301 | ||
270 | #ifdef CONFIG_RCU_STALL_COMMON | 302 | #ifdef CONFIG_RCU_STALL_COMMON |
271 | void rcu_sysrq_start(void); | 303 | void rcu_sysrq_start(void); |
@@ -720,7 +752,7 @@ static inline void rcu_preempt_sleep_check(void) | |||
720 | * annotated as __rcu. | 752 | * annotated as __rcu. |
721 | */ | 753 | */ |
722 | #define rcu_dereference_check(p, c) \ | 754 | #define rcu_dereference_check(p, c) \ |
723 | __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu) | 755 | __rcu_dereference_check((p), (c) || rcu_read_lock_held(), __rcu) |
724 | 756 | ||
725 | /** | 757 | /** |
726 | * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking | 758 | * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking |
@@ -730,7 +762,7 @@ static inline void rcu_preempt_sleep_check(void) | |||
730 | * This is the RCU-bh counterpart to rcu_dereference_check(). | 762 | * This is the RCU-bh counterpart to rcu_dereference_check(). |
731 | */ | 763 | */ |
732 | #define rcu_dereference_bh_check(p, c) \ | 764 | #define rcu_dereference_bh_check(p, c) \ |
733 | __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu) | 765 | __rcu_dereference_check((p), (c) || rcu_read_lock_bh_held(), __rcu) |
734 | 766 | ||
735 | /** | 767 | /** |
736 | * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking | 768 | * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking |
@@ -740,7 +772,7 @@ static inline void rcu_preempt_sleep_check(void) | |||
740 | * This is the RCU-sched counterpart to rcu_dereference_check(). | 772 | * This is the RCU-sched counterpart to rcu_dereference_check(). |
741 | */ | 773 | */ |
742 | #define rcu_dereference_sched_check(p, c) \ | 774 | #define rcu_dereference_sched_check(p, c) \ |
743 | __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \ | 775 | __rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \ |
744 | __rcu) | 776 | __rcu) |
745 | 777 | ||
746 | #define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ | 778 | #define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ |
@@ -933,9 +965,9 @@ static inline void rcu_read_unlock(void) | |||
933 | { | 965 | { |
934 | rcu_lockdep_assert(rcu_is_watching(), | 966 | rcu_lockdep_assert(rcu_is_watching(), |
935 | "rcu_read_unlock() used illegally while idle"); | 967 | "rcu_read_unlock() used illegally while idle"); |
936 | rcu_lock_release(&rcu_lock_map); | ||
937 | __release(RCU); | 968 | __release(RCU); |
938 | __rcu_read_unlock(); | 969 | __rcu_read_unlock(); |
970 | rcu_lock_release(&rcu_lock_map); /* Keep acq info for rls diags. */ | ||
939 | } | 971 | } |
940 | 972 | ||
941 | /** | 973 | /** |
diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 9cfd9623fb03..bdeb4567b71e 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h | |||
@@ -182,7 +182,7 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) | |||
182 | * lockdep_is_held() calls. | 182 | * lockdep_is_held() calls. |
183 | */ | 183 | */ |
184 | #define srcu_dereference_check(p, sp, c) \ | 184 | #define srcu_dereference_check(p, sp, c) \ |
185 | __rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu) | 185 | __rcu_dereference_check((p), (c) || srcu_read_lock_held(sp), __rcu) |
186 | 186 | ||
187 | /** | 187 | /** |
188 | * srcu_dereference - fetch SRCU-protected pointer for later dereferencing | 188 | * srcu_dereference - fetch SRCU-protected pointer for later dereferencing |
diff --git a/init/Kconfig b/init/Kconfig index f5dbc6d4261b..9a0592516f48 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -791,6 +791,19 @@ config RCU_NOCB_CPU_ALL | |||
791 | 791 | ||
792 | endchoice | 792 | endchoice |
793 | 793 | ||
794 | config RCU_EXPEDITE_BOOT | ||
795 | bool | ||
796 | default n | ||
797 | help | ||
798 | This option enables expedited grace periods at boot time, | ||
799 | as if rcu_expedite_gp() had been invoked early in boot. | ||
800 | The corresponding rcu_unexpedite_gp() is invoked from | ||
801 | rcu_end_inkernel_boot(), which is intended to be invoked | ||
802 | at the end of the kernel-only boot sequence, just before | ||
803 | init is exec'ed. | ||
804 | |||
805 | Accept the default if unsure. | ||
806 | |||
794 | endmenu # "RCU Subsystem" | 807 | endmenu # "RCU Subsystem" |
795 | 808 | ||
796 | config BUILD_BIN2C | 809 | config BUILD_BIN2C |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 1972b161c61e..d46b4dae0ca0 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -408,8 +408,10 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
408 | * | 408 | * |
409 | * Wait for the stop thread to go away. | 409 | * Wait for the stop thread to go away. |
410 | */ | 410 | */ |
411 | while (!idle_cpu(cpu)) | 411 | while (!per_cpu(cpu_dead_idle, cpu)) |
412 | cpu_relax(); | 412 | cpu_relax(); |
413 | smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */ | ||
414 | per_cpu(cpu_dead_idle, cpu) = false; | ||
413 | 415 | ||
414 | /* This actually kills the CPU. */ | 416 | /* This actually kills the CPU. */ |
415 | __cpu_die(cpu); | 417 | __cpu_die(cpu); |
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 30d42aa55d83..8dbe27611ec3 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c | |||
@@ -853,6 +853,8 @@ rcu_torture_fqs(void *arg) | |||
853 | static int | 853 | static int |
854 | rcu_torture_writer(void *arg) | 854 | rcu_torture_writer(void *arg) |
855 | { | 855 | { |
856 | bool can_expedite = !rcu_gp_is_expedited(); | ||
857 | int expediting = 0; | ||
856 | unsigned long gp_snap; | 858 | unsigned long gp_snap; |
857 | bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal; | 859 | bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal; |
858 | bool gp_sync1 = gp_sync; | 860 | bool gp_sync1 = gp_sync; |
@@ -865,9 +867,15 @@ rcu_torture_writer(void *arg) | |||
865 | int nsynctypes = 0; | 867 | int nsynctypes = 0; |
866 | 868 | ||
867 | VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); | 869 | VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); |
870 | pr_alert("%s" TORTURE_FLAG | ||
871 | " Grace periods expedited from boot/sysfs for %s,\n", | ||
872 | torture_type, cur_ops->name); | ||
873 | pr_alert("%s" TORTURE_FLAG | ||
874 | " Testing of dynamic grace-period expediting diabled.\n", | ||
875 | torture_type); | ||
868 | 876 | ||
869 | /* Initialize synctype[] array. If none set, take default. */ | 877 | /* Initialize synctype[] array. If none set, take default. */ |
870 | if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync) | 878 | if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1) |
871 | gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true; | 879 | gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true; |
872 | if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync) | 880 | if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync) |
873 | synctype[nsynctypes++] = RTWS_COND_GET; | 881 | synctype[nsynctypes++] = RTWS_COND_GET; |
@@ -949,9 +957,26 @@ rcu_torture_writer(void *arg) | |||
949 | } | 957 | } |
950 | } | 958 | } |
951 | rcutorture_record_progress(++rcu_torture_current_version); | 959 | rcutorture_record_progress(++rcu_torture_current_version); |
960 | /* Cycle through nesting levels of rcu_expedite_gp() calls. */ | ||
961 | if (can_expedite && | ||
962 | !(torture_random(&rand) & 0xff & (!!expediting - 1))) { | ||
963 | WARN_ON_ONCE(expediting == 0 && rcu_gp_is_expedited()); | ||
964 | if (expediting >= 0) | ||
965 | rcu_expedite_gp(); | ||
966 | else | ||
967 | rcu_unexpedite_gp(); | ||
968 | if (++expediting > 3) | ||
969 | expediting = -expediting; | ||
970 | } | ||
952 | rcu_torture_writer_state = RTWS_STUTTER; | 971 | rcu_torture_writer_state = RTWS_STUTTER; |
953 | stutter_wait("rcu_torture_writer"); | 972 | stutter_wait("rcu_torture_writer"); |
954 | } while (!torture_must_stop()); | 973 | } while (!torture_must_stop()); |
974 | /* Reset expediting back to unexpedited. */ | ||
975 | if (expediting > 0) | ||
976 | expediting = -expediting; | ||
977 | while (can_expedite && expediting++ < 0) | ||
978 | rcu_unexpedite_gp(); | ||
979 | WARN_ON_ONCE(can_expedite && rcu_gp_is_expedited()); | ||
955 | rcu_torture_writer_state = RTWS_STOPPING; | 980 | rcu_torture_writer_state = RTWS_STOPPING; |
956 | torture_kthread_stopping("rcu_torture_writer"); | 981 | torture_kthread_stopping("rcu_torture_writer"); |
957 | return 0; | 982 | return 0; |
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index 445bf8ffe3fb..cad76e76b4e7 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c | |||
@@ -402,23 +402,6 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head, | |||
402 | } | 402 | } |
403 | EXPORT_SYMBOL_GPL(call_srcu); | 403 | EXPORT_SYMBOL_GPL(call_srcu); |
404 | 404 | ||
405 | struct rcu_synchronize { | ||
406 | struct rcu_head head; | ||
407 | struct completion completion; | ||
408 | }; | ||
409 | |||
410 | /* | ||
411 | * Awaken the corresponding synchronize_srcu() instance now that a | ||
412 | * grace period has elapsed. | ||
413 | */ | ||
414 | static void wakeme_after_rcu(struct rcu_head *head) | ||
415 | { | ||
416 | struct rcu_synchronize *rcu; | ||
417 | |||
418 | rcu = container_of(head, struct rcu_synchronize, head); | ||
419 | complete(&rcu->completion); | ||
420 | } | ||
421 | |||
422 | static void srcu_advance_batches(struct srcu_struct *sp, int trycount); | 405 | static void srcu_advance_batches(struct srcu_struct *sp, int trycount); |
423 | static void srcu_reschedule(struct srcu_struct *sp); | 406 | static void srcu_reschedule(struct srcu_struct *sp); |
424 | 407 | ||
@@ -507,7 +490,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount) | |||
507 | */ | 490 | */ |
508 | void synchronize_srcu(struct srcu_struct *sp) | 491 | void synchronize_srcu(struct srcu_struct *sp) |
509 | { | 492 | { |
510 | __synchronize_srcu(sp, rcu_expedited | 493 | __synchronize_srcu(sp, rcu_gp_is_expedited() |
511 | ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT | 494 | ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT |
512 | : SYNCHRONIZE_SRCU_TRYCOUNT); | 495 | : SYNCHRONIZE_SRCU_TRYCOUNT); |
513 | } | 496 | } |
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index cc9ceca7bde1..069742d61c68 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c | |||
@@ -103,8 +103,7 @@ EXPORT_SYMBOL(__rcu_is_watching); | |||
103 | static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) | 103 | static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) |
104 | { | 104 | { |
105 | RCU_TRACE(reset_cpu_stall_ticks(rcp)); | 105 | RCU_TRACE(reset_cpu_stall_ticks(rcp)); |
106 | if (rcp->rcucblist != NULL && | 106 | if (rcp->donetail != rcp->curtail) { |
107 | rcp->donetail != rcp->curtail) { | ||
108 | rcp->donetail = rcp->curtail; | 107 | rcp->donetail = rcp->curtail; |
109 | return 1; | 108 | return 1; |
110 | } | 109 | } |
@@ -169,17 +168,6 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
169 | unsigned long flags; | 168 | unsigned long flags; |
170 | RCU_TRACE(int cb_count = 0); | 169 | RCU_TRACE(int cb_count = 0); |
171 | 170 | ||
172 | /* If no RCU callbacks ready to invoke, just return. */ | ||
173 | if (&rcp->rcucblist == rcp->donetail) { | ||
174 | RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, 0, -1)); | ||
175 | RCU_TRACE(trace_rcu_batch_end(rcp->name, 0, | ||
176 | !!ACCESS_ONCE(rcp->rcucblist), | ||
177 | need_resched(), | ||
178 | is_idle_task(current), | ||
179 | false)); | ||
180 | return; | ||
181 | } | ||
182 | |||
183 | /* Move the ready-to-invoke callbacks to a local list. */ | 171 | /* Move the ready-to-invoke callbacks to a local list. */ |
184 | local_irq_save(flags); | 172 | local_irq_save(flags); |
185 | RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1)); | 173 | RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1)); |
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 48d640ca1a05..233165da782f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
@@ -91,8 +91,10 @@ static const char *tp_##sname##_varname __used __tracepoint_string = sname##_var | |||
91 | 91 | ||
92 | #define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ | 92 | #define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ |
93 | DEFINE_RCU_TPS(sname) \ | 93 | DEFINE_RCU_TPS(sname) \ |
94 | DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \ | ||
94 | struct rcu_state sname##_state = { \ | 95 | struct rcu_state sname##_state = { \ |
95 | .level = { &sname##_state.node[0] }, \ | 96 | .level = { &sname##_state.node[0] }, \ |
97 | .rda = &sname##_data, \ | ||
96 | .call = cr, \ | 98 | .call = cr, \ |
97 | .fqs_state = RCU_GP_IDLE, \ | 99 | .fqs_state = RCU_GP_IDLE, \ |
98 | .gpnum = 0UL - 300UL, \ | 100 | .gpnum = 0UL - 300UL, \ |
@@ -101,11 +103,9 @@ struct rcu_state sname##_state = { \ | |||
101 | .orphan_nxttail = &sname##_state.orphan_nxtlist, \ | 103 | .orphan_nxttail = &sname##_state.orphan_nxtlist, \ |
102 | .orphan_donetail = &sname##_state.orphan_donelist, \ | 104 | .orphan_donetail = &sname##_state.orphan_donelist, \ |
103 | .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ | 105 | .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ |
104 | .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ | ||
105 | .name = RCU_STATE_NAME(sname), \ | 106 | .name = RCU_STATE_NAME(sname), \ |
106 | .abbr = sabbr, \ | 107 | .abbr = sabbr, \ |
107 | }; \ | 108 | } |
108 | DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data) | ||
109 | 109 | ||
110 | RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); | 110 | RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); |
111 | RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); | 111 | RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); |
@@ -152,6 +152,8 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active); | |||
152 | */ | 152 | */ |
153 | static int rcu_scheduler_fully_active __read_mostly; | 153 | static int rcu_scheduler_fully_active __read_mostly; |
154 | 154 | ||
155 | static void rcu_init_new_rnp(struct rcu_node *rnp_leaf); | ||
156 | static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf); | ||
155 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); | 157 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); |
156 | static void invoke_rcu_core(void); | 158 | static void invoke_rcu_core(void); |
157 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | 159 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); |
@@ -160,6 +162,12 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | |||
160 | static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; | 162 | static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; |
161 | module_param(kthread_prio, int, 0644); | 163 | module_param(kthread_prio, int, 0644); |
162 | 164 | ||
165 | /* Delay in jiffies for grace-period initialization delays. */ | ||
166 | static int gp_init_delay = IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) | ||
167 | ? CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY | ||
168 | : 0; | ||
169 | module_param(gp_init_delay, int, 0644); | ||
170 | |||
163 | /* | 171 | /* |
164 | * Track the rcutorture test sequence number and the update version | 172 | * Track the rcutorture test sequence number and the update version |
165 | * number within a given test. The rcutorture_testseq is incremented | 173 | * number within a given test. The rcutorture_testseq is incremented |
@@ -173,6 +181,17 @@ unsigned long rcutorture_testseq; | |||
173 | unsigned long rcutorture_vernum; | 181 | unsigned long rcutorture_vernum; |
174 | 182 | ||
175 | /* | 183 | /* |
184 | * Compute the mask of online CPUs for the specified rcu_node structure. | ||
185 | * This will not be stable unless the rcu_node structure's ->lock is | ||
186 | * held, but the bit corresponding to the current CPU will be stable | ||
187 | * in most contexts. | ||
188 | */ | ||
189 | unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp) | ||
190 | { | ||
191 | return ACCESS_ONCE(rnp->qsmaskinitnext); | ||
192 | } | ||
193 | |||
194 | /* | ||
176 | * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s | 195 | * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s |
177 | * permit this function to be invoked without holding the root rcu_node | 196 | * permit this function to be invoked without holding the root rcu_node |
178 | * structure's ->lock, but of course results can be subject to change. | 197 | * structure's ->lock, but of course results can be subject to change. |
@@ -292,10 +311,10 @@ void rcu_note_context_switch(void) | |||
292 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); | 311 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); |
293 | 312 | ||
294 | /* | 313 | /* |
295 | * Register a quiesecent state for all RCU flavors. If there is an | 314 | * Register a quiescent state for all RCU flavors. If there is an |
296 | * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight | 315 | * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight |
297 | * dyntick-idle quiescent state visible to other CPUs (but only for those | 316 | * dyntick-idle quiescent state visible to other CPUs (but only for those |
298 | * RCU flavors in desparate need of a quiescent state, which will normally | 317 | * RCU flavors in desperate need of a quiescent state, which will normally |
299 | * be none of them). Either way, do a lightweight quiescent state for | 318 | * be none of them). Either way, do a lightweight quiescent state for |
300 | * all RCU flavors. | 319 | * all RCU flavors. |
301 | */ | 320 | */ |
@@ -410,6 +429,15 @@ void rcu_bh_force_quiescent_state(void) | |||
410 | EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); | 429 | EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); |
411 | 430 | ||
412 | /* | 431 | /* |
432 | * Force a quiescent state for RCU-sched. | ||
433 | */ | ||
434 | void rcu_sched_force_quiescent_state(void) | ||
435 | { | ||
436 | force_quiescent_state(&rcu_sched_state); | ||
437 | } | ||
438 | EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); | ||
439 | |||
440 | /* | ||
413 | * Show the state of the grace-period kthreads. | 441 | * Show the state of the grace-period kthreads. |
414 | */ | 442 | */ |
415 | void show_rcu_gp_kthreads(void) | 443 | void show_rcu_gp_kthreads(void) |
@@ -483,15 +511,6 @@ void rcutorture_record_progress(unsigned long vernum) | |||
483 | EXPORT_SYMBOL_GPL(rcutorture_record_progress); | 511 | EXPORT_SYMBOL_GPL(rcutorture_record_progress); |
484 | 512 | ||
485 | /* | 513 | /* |
486 | * Force a quiescent state for RCU-sched. | ||
487 | */ | ||
488 | void rcu_sched_force_quiescent_state(void) | ||
489 | { | ||
490 | force_quiescent_state(&rcu_sched_state); | ||
491 | } | ||
492 | EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); | ||
493 | |||
494 | /* | ||
495 | * Does the CPU have callbacks ready to be invoked? | 514 | * Does the CPU have callbacks ready to be invoked? |
496 | */ | 515 | */ |
497 | static int | 516 | static int |
@@ -954,7 +973,7 @@ bool rcu_lockdep_current_cpu_online(void) | |||
954 | preempt_disable(); | 973 | preempt_disable(); |
955 | rdp = this_cpu_ptr(&rcu_sched_data); | 974 | rdp = this_cpu_ptr(&rcu_sched_data); |
956 | rnp = rdp->mynode; | 975 | rnp = rdp->mynode; |
957 | ret = (rdp->grpmask & rnp->qsmaskinit) || | 976 | ret = (rdp->grpmask & rcu_rnp_online_cpus(rnp)) || |
958 | !rcu_scheduler_fully_active; | 977 | !rcu_scheduler_fully_active; |
959 | preempt_enable(); | 978 | preempt_enable(); |
960 | return ret; | 979 | return ret; |
@@ -1196,9 +1215,10 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) | |||
1196 | } else { | 1215 | } else { |
1197 | j = jiffies; | 1216 | j = jiffies; |
1198 | gpa = ACCESS_ONCE(rsp->gp_activity); | 1217 | gpa = ACCESS_ONCE(rsp->gp_activity); |
1199 | pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld\n", | 1218 | pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n", |
1200 | rsp->name, j - gpa, j, gpa, | 1219 | rsp->name, j - gpa, j, gpa, |
1201 | jiffies_till_next_fqs); | 1220 | jiffies_till_next_fqs, |
1221 | rcu_get_root(rsp)->qsmask); | ||
1202 | /* In this case, the current CPU might be at fault. */ | 1222 | /* In this case, the current CPU might be at fault. */ |
1203 | sched_show_task(current); | 1223 | sched_show_task(current); |
1204 | } | 1224 | } |
@@ -1328,20 +1348,30 @@ void rcu_cpu_stall_reset(void) | |||
1328 | } | 1348 | } |
1329 | 1349 | ||
1330 | /* | 1350 | /* |
1331 | * Initialize the specified rcu_data structure's callback list to empty. | 1351 | * Initialize the specified rcu_data structure's default callback list |
1352 | * to empty. The default callback list is the one that is not used by | ||
1353 | * no-callbacks CPUs. | ||
1332 | */ | 1354 | */ |
1333 | static void init_callback_list(struct rcu_data *rdp) | 1355 | static void init_default_callback_list(struct rcu_data *rdp) |
1334 | { | 1356 | { |
1335 | int i; | 1357 | int i; |
1336 | 1358 | ||
1337 | if (init_nocb_callback_list(rdp)) | ||
1338 | return; | ||
1339 | rdp->nxtlist = NULL; | 1359 | rdp->nxtlist = NULL; |
1340 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 1360 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
1341 | rdp->nxttail[i] = &rdp->nxtlist; | 1361 | rdp->nxttail[i] = &rdp->nxtlist; |
1342 | } | 1362 | } |
1343 | 1363 | ||
1344 | /* | 1364 | /* |
1365 | * Initialize the specified rcu_data structure's callback list to empty. | ||
1366 | */ | ||
1367 | static void init_callback_list(struct rcu_data *rdp) | ||
1368 | { | ||
1369 | if (init_nocb_callback_list(rdp)) | ||
1370 | return; | ||
1371 | init_default_callback_list(rdp); | ||
1372 | } | ||
1373 | |||
1374 | /* | ||
1345 | * Determine the value that ->completed will have at the end of the | 1375 | * Determine the value that ->completed will have at the end of the |
1346 | * next subsequent grace period. This is used to tag callbacks so that | 1376 | * next subsequent grace period. This is used to tag callbacks so that |
1347 | * a CPU can invoke callbacks in a timely fashion even if that CPU has | 1377 | * a CPU can invoke callbacks in a timely fashion even if that CPU has |
@@ -1703,11 +1733,11 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1703 | */ | 1733 | */ |
1704 | static int rcu_gp_init(struct rcu_state *rsp) | 1734 | static int rcu_gp_init(struct rcu_state *rsp) |
1705 | { | 1735 | { |
1736 | unsigned long oldmask; | ||
1706 | struct rcu_data *rdp; | 1737 | struct rcu_data *rdp; |
1707 | struct rcu_node *rnp = rcu_get_root(rsp); | 1738 | struct rcu_node *rnp = rcu_get_root(rsp); |
1708 | 1739 | ||
1709 | ACCESS_ONCE(rsp->gp_activity) = jiffies; | 1740 | ACCESS_ONCE(rsp->gp_activity) = jiffies; |
1710 | rcu_bind_gp_kthread(); | ||
1711 | raw_spin_lock_irq(&rnp->lock); | 1741 | raw_spin_lock_irq(&rnp->lock); |
1712 | smp_mb__after_unlock_lock(); | 1742 | smp_mb__after_unlock_lock(); |
1713 | if (!ACCESS_ONCE(rsp->gp_flags)) { | 1743 | if (!ACCESS_ONCE(rsp->gp_flags)) { |
@@ -1733,9 +1763,54 @@ static int rcu_gp_init(struct rcu_state *rsp) | |||
1733 | trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); | 1763 | trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); |
1734 | raw_spin_unlock_irq(&rnp->lock); | 1764 | raw_spin_unlock_irq(&rnp->lock); |
1735 | 1765 | ||
1736 | /* Exclude any concurrent CPU-hotplug operations. */ | 1766 | /* |
1737 | mutex_lock(&rsp->onoff_mutex); | 1767 | * Apply per-leaf buffered online and offline operations to the |
1738 | smp_mb__after_unlock_lock(); /* ->gpnum increment before GP! */ | 1768 | * rcu_node tree. Note that this new grace period need not wait |
1769 | * for subsequent online CPUs, and that quiescent-state forcing | ||
1770 | * will handle subsequent offline CPUs. | ||
1771 | */ | ||
1772 | rcu_for_each_leaf_node(rsp, rnp) { | ||
1773 | raw_spin_lock_irq(&rnp->lock); | ||
1774 | smp_mb__after_unlock_lock(); | ||
1775 | if (rnp->qsmaskinit == rnp->qsmaskinitnext && | ||
1776 | !rnp->wait_blkd_tasks) { | ||
1777 | /* Nothing to do on this leaf rcu_node structure. */ | ||
1778 | raw_spin_unlock_irq(&rnp->lock); | ||
1779 | continue; | ||
1780 | } | ||
1781 | |||
1782 | /* Record old state, apply changes to ->qsmaskinit field. */ | ||
1783 | oldmask = rnp->qsmaskinit; | ||
1784 | rnp->qsmaskinit = rnp->qsmaskinitnext; | ||
1785 | |||
1786 | /* If zero-ness of ->qsmaskinit changed, propagate up tree. */ | ||
1787 | if (!oldmask != !rnp->qsmaskinit) { | ||
1788 | if (!oldmask) /* First online CPU for this rcu_node. */ | ||
1789 | rcu_init_new_rnp(rnp); | ||
1790 | else if (rcu_preempt_has_tasks(rnp)) /* blocked tasks */ | ||
1791 | rnp->wait_blkd_tasks = true; | ||
1792 | else /* Last offline CPU and can propagate. */ | ||
1793 | rcu_cleanup_dead_rnp(rnp); | ||
1794 | } | ||
1795 | |||
1796 | /* | ||
1797 | * If all waited-on tasks from prior grace period are | ||
1798 | * done, and if all this rcu_node structure's CPUs are | ||
1799 | * still offline, propagate up the rcu_node tree and | ||
1800 | * clear ->wait_blkd_tasks. Otherwise, if one of this | ||
1801 | * rcu_node structure's CPUs has since come back online, | ||
1802 | * simply clear ->wait_blkd_tasks (but rcu_cleanup_dead_rnp() | ||
1803 | * checks for this, so just call it unconditionally). | ||
1804 | */ | ||
1805 | if (rnp->wait_blkd_tasks && | ||
1806 | (!rcu_preempt_has_tasks(rnp) || | ||
1807 | rnp->qsmaskinit)) { | ||
1808 | rnp->wait_blkd_tasks = false; | ||
1809 | rcu_cleanup_dead_rnp(rnp); | ||
1810 | } | ||
1811 | |||
1812 | raw_spin_unlock_irq(&rnp->lock); | ||
1813 | } | ||
1739 | 1814 | ||
1740 | /* | 1815 | /* |
1741 | * Set the quiescent-state-needed bits in all the rcu_node | 1816 | * Set the quiescent-state-needed bits in all the rcu_node |
@@ -1757,8 +1832,8 @@ static int rcu_gp_init(struct rcu_state *rsp) | |||
1757 | rcu_preempt_check_blocked_tasks(rnp); | 1832 | rcu_preempt_check_blocked_tasks(rnp); |
1758 | rnp->qsmask = rnp->qsmaskinit; | 1833 | rnp->qsmask = rnp->qsmaskinit; |
1759 | ACCESS_ONCE(rnp->gpnum) = rsp->gpnum; | 1834 | ACCESS_ONCE(rnp->gpnum) = rsp->gpnum; |
1760 | WARN_ON_ONCE(rnp->completed != rsp->completed); | 1835 | if (WARN_ON_ONCE(rnp->completed != rsp->completed)) |
1761 | ACCESS_ONCE(rnp->completed) = rsp->completed; | 1836 | ACCESS_ONCE(rnp->completed) = rsp->completed; |
1762 | if (rnp == rdp->mynode) | 1837 | if (rnp == rdp->mynode) |
1763 | (void)__note_gp_changes(rsp, rnp, rdp); | 1838 | (void)__note_gp_changes(rsp, rnp, rdp); |
1764 | rcu_preempt_boost_start_gp(rnp); | 1839 | rcu_preempt_boost_start_gp(rnp); |
@@ -1768,9 +1843,12 @@ static int rcu_gp_init(struct rcu_state *rsp) | |||
1768 | raw_spin_unlock_irq(&rnp->lock); | 1843 | raw_spin_unlock_irq(&rnp->lock); |
1769 | cond_resched_rcu_qs(); | 1844 | cond_resched_rcu_qs(); |
1770 | ACCESS_ONCE(rsp->gp_activity) = jiffies; | 1845 | ACCESS_ONCE(rsp->gp_activity) = jiffies; |
1846 | if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) && | ||
1847 | gp_init_delay > 0 && | ||
1848 | !(rsp->gpnum % (rcu_num_nodes * 10))) | ||
1849 | schedule_timeout_uninterruptible(gp_init_delay); | ||
1771 | } | 1850 | } |
1772 | 1851 | ||
1773 | mutex_unlock(&rsp->onoff_mutex); | ||
1774 | return 1; | 1852 | return 1; |
1775 | } | 1853 | } |
1776 | 1854 | ||
@@ -1798,7 +1876,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) | |||
1798 | fqs_state = RCU_FORCE_QS; | 1876 | fqs_state = RCU_FORCE_QS; |
1799 | } else { | 1877 | } else { |
1800 | /* Handle dyntick-idle and offline CPUs. */ | 1878 | /* Handle dyntick-idle and offline CPUs. */ |
1801 | isidle = false; | 1879 | isidle = true; |
1802 | force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj); | 1880 | force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj); |
1803 | } | 1881 | } |
1804 | /* Clear flag to prevent immediate re-entry. */ | 1882 | /* Clear flag to prevent immediate re-entry. */ |
@@ -1852,6 +1930,8 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) | |||
1852 | rcu_for_each_node_breadth_first(rsp, rnp) { | 1930 | rcu_for_each_node_breadth_first(rsp, rnp) { |
1853 | raw_spin_lock_irq(&rnp->lock); | 1931 | raw_spin_lock_irq(&rnp->lock); |
1854 | smp_mb__after_unlock_lock(); | 1932 | smp_mb__after_unlock_lock(); |
1933 | WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); | ||
1934 | WARN_ON_ONCE(rnp->qsmask); | ||
1855 | ACCESS_ONCE(rnp->completed) = rsp->gpnum; | 1935 | ACCESS_ONCE(rnp->completed) = rsp->gpnum; |
1856 | rdp = this_cpu_ptr(rsp->rda); | 1936 | rdp = this_cpu_ptr(rsp->rda); |
1857 | if (rnp == rdp->mynode) | 1937 | if (rnp == rdp->mynode) |
@@ -1895,6 +1975,7 @@ static int __noreturn rcu_gp_kthread(void *arg) | |||
1895 | struct rcu_state *rsp = arg; | 1975 | struct rcu_state *rsp = arg; |
1896 | struct rcu_node *rnp = rcu_get_root(rsp); | 1976 | struct rcu_node *rnp = rcu_get_root(rsp); |
1897 | 1977 | ||
1978 | rcu_bind_gp_kthread(); | ||
1898 | for (;;) { | 1979 | for (;;) { |
1899 | 1980 | ||
1900 | /* Handle grace-period start. */ | 1981 | /* Handle grace-period start. */ |
@@ -2062,25 +2143,32 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | |||
2062 | * Similar to rcu_report_qs_rdp(), for which it is a helper function. | 2143 | * Similar to rcu_report_qs_rdp(), for which it is a helper function. |
2063 | * Allows quiescent states for a group of CPUs to be reported at one go | 2144 | * Allows quiescent states for a group of CPUs to be reported at one go |
2064 | * to the specified rcu_node structure, though all the CPUs in the group | 2145 | * to the specified rcu_node structure, though all the CPUs in the group |
2065 | * must be represented by the same rcu_node structure (which need not be | 2146 | * must be represented by the same rcu_node structure (which need not be a |
2066 | * a leaf rcu_node structure, though it often will be). That structure's | 2147 | * leaf rcu_node structure, though it often will be). The gps parameter |
2067 | * lock must be held upon entry, and it is released before return. | 2148 | * is the grace-period snapshot, which means that the quiescent states |
2149 | * are valid only if rnp->gpnum is equal to gps. That structure's lock | ||
2150 | * must be held upon entry, and it is released before return. | ||
2068 | */ | 2151 | */ |
2069 | static void | 2152 | static void |
2070 | rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, | 2153 | rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, |
2071 | struct rcu_node *rnp, unsigned long flags) | 2154 | struct rcu_node *rnp, unsigned long gps, unsigned long flags) |
2072 | __releases(rnp->lock) | 2155 | __releases(rnp->lock) |
2073 | { | 2156 | { |
2157 | unsigned long oldmask = 0; | ||
2074 | struct rcu_node *rnp_c; | 2158 | struct rcu_node *rnp_c; |
2075 | 2159 | ||
2076 | /* Walk up the rcu_node hierarchy. */ | 2160 | /* Walk up the rcu_node hierarchy. */ |
2077 | for (;;) { | 2161 | for (;;) { |
2078 | if (!(rnp->qsmask & mask)) { | 2162 | if (!(rnp->qsmask & mask) || rnp->gpnum != gps) { |
2079 | 2163 | ||
2080 | /* Our bit has already been cleared, so done. */ | 2164 | /* |
2165 | * Our bit has already been cleared, or the | ||
2166 | * relevant grace period is already over, so done. | ||
2167 | */ | ||
2081 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2168 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
2082 | return; | 2169 | return; |
2083 | } | 2170 | } |
2171 | WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */ | ||
2084 | rnp->qsmask &= ~mask; | 2172 | rnp->qsmask &= ~mask; |
2085 | trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, | 2173 | trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, |
2086 | mask, rnp->qsmask, rnp->level, | 2174 | mask, rnp->qsmask, rnp->level, |
@@ -2104,7 +2192,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, | |||
2104 | rnp = rnp->parent; | 2192 | rnp = rnp->parent; |
2105 | raw_spin_lock_irqsave(&rnp->lock, flags); | 2193 | raw_spin_lock_irqsave(&rnp->lock, flags); |
2106 | smp_mb__after_unlock_lock(); | 2194 | smp_mb__after_unlock_lock(); |
2107 | WARN_ON_ONCE(rnp_c->qsmask); | 2195 | oldmask = rnp_c->qsmask; |
2108 | } | 2196 | } |
2109 | 2197 | ||
2110 | /* | 2198 | /* |
@@ -2116,6 +2204,46 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, | |||
2116 | } | 2204 | } |
2117 | 2205 | ||
2118 | /* | 2206 | /* |
2207 | * Record a quiescent state for all tasks that were previously queued | ||
2208 | * on the specified rcu_node structure and that were blocking the current | ||
2209 | * RCU grace period. The caller must hold the specified rnp->lock with | ||
2210 | * irqs disabled, and this lock is released upon return, but irqs remain | ||
2211 | * disabled. | ||
2212 | */ | ||
2213 | static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp, | ||
2214 | struct rcu_node *rnp, unsigned long flags) | ||
2215 | __releases(rnp->lock) | ||
2216 | { | ||
2217 | unsigned long gps; | ||
2218 | unsigned long mask; | ||
2219 | struct rcu_node *rnp_p; | ||
2220 | |||
2221 | if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p || | ||
2222 | rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { | ||
2223 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
2224 | return; /* Still need more quiescent states! */ | ||
2225 | } | ||
2226 | |||
2227 | rnp_p = rnp->parent; | ||
2228 | if (rnp_p == NULL) { | ||
2229 | /* | ||
2230 | * Only one rcu_node structure in the tree, so don't | ||
2231 | * try to report up to its nonexistent parent! | ||
2232 | */ | ||
2233 | rcu_report_qs_rsp(rsp, flags); | ||
2234 | return; | ||
2235 | } | ||
2236 | |||
2237 | /* Report up the rest of the hierarchy, tracking current ->gpnum. */ | ||
2238 | gps = rnp->gpnum; | ||
2239 | mask = rnp->grpmask; | ||
2240 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
2241 | raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ | ||
2242 | smp_mb__after_unlock_lock(); | ||
2243 | rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags); | ||
2244 | } | ||
2245 | |||
2246 | /* | ||
2119 | * Record a quiescent state for the specified CPU to that CPU's rcu_data | 2247 | * Record a quiescent state for the specified CPU to that CPU's rcu_data |
2120 | * structure. This must be either called from the specified CPU, or | 2248 | * structure. This must be either called from the specified CPU, or |
2121 | * called when the specified CPU is known to be offline (and when it is | 2249 | * called when the specified CPU is known to be offline (and when it is |
@@ -2163,7 +2291,8 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) | |||
2163 | */ | 2291 | */ |
2164 | needwake = rcu_accelerate_cbs(rsp, rnp, rdp); | 2292 | needwake = rcu_accelerate_cbs(rsp, rnp, rdp); |
2165 | 2293 | ||
2166 | rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ | 2294 | rcu_report_qs_rnp(mask, rsp, rnp, rnp->gpnum, flags); |
2295 | /* ^^^ Released rnp->lock */ | ||
2167 | if (needwake) | 2296 | if (needwake) |
2168 | rcu_gp_kthread_wake(rsp); | 2297 | rcu_gp_kthread_wake(rsp); |
2169 | } | 2298 | } |
@@ -2256,8 +2385,12 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, | |||
2256 | rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL]; | 2385 | rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL]; |
2257 | } | 2386 | } |
2258 | 2387 | ||
2259 | /* Finally, initialize the rcu_data structure's list to empty. */ | 2388 | /* |
2389 | * Finally, initialize the rcu_data structure's list to empty and | ||
2390 | * disallow further callbacks on this CPU. | ||
2391 | */ | ||
2260 | init_callback_list(rdp); | 2392 | init_callback_list(rdp); |
2393 | rdp->nxttail[RCU_NEXT_TAIL] = NULL; | ||
2261 | } | 2394 | } |
2262 | 2395 | ||
2263 | /* | 2396 | /* |
@@ -2355,6 +2488,7 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) | |||
2355 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 2488 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ |
2356 | smp_mb__after_unlock_lock(); /* GP memory ordering. */ | 2489 | smp_mb__after_unlock_lock(); /* GP memory ordering. */ |
2357 | rnp->qsmaskinit &= ~mask; | 2490 | rnp->qsmaskinit &= ~mask; |
2491 | rnp->qsmask &= ~mask; | ||
2358 | if (rnp->qsmaskinit) { | 2492 | if (rnp->qsmaskinit) { |
2359 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 2493 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
2360 | return; | 2494 | return; |
@@ -2364,6 +2498,26 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) | |||
2364 | } | 2498 | } |
2365 | 2499 | ||
2366 | /* | 2500 | /* |
2501 | * The CPU is exiting the idle loop into the arch_cpu_idle_dead() | ||
2502 | * function. We now remove it from the rcu_node tree's ->qsmaskinit | ||
2503 | * bit masks. | ||
2504 | */ | ||
2505 | static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) | ||
2506 | { | ||
2507 | unsigned long flags; | ||
2508 | unsigned long mask; | ||
2509 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | ||
2510 | struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ | ||
2511 | |||
2512 | /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ | ||
2513 | mask = rdp->grpmask; | ||
2514 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
2515 | smp_mb__after_unlock_lock(); /* Enforce GP memory-order guarantee. */ | ||
2516 | rnp->qsmaskinitnext &= ~mask; | ||
2517 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
2518 | } | ||
2519 | |||
2520 | /* | ||
2367 | * The CPU has been completely removed, and some other CPU is reporting | 2521 | * The CPU has been completely removed, and some other CPU is reporting |
2368 | * this fact from process context. Do the remainder of the cleanup, | 2522 | * this fact from process context. Do the remainder of the cleanup, |
2369 | * including orphaning the outgoing CPU's RCU callbacks, and also | 2523 | * including orphaning the outgoing CPU's RCU callbacks, and also |
@@ -2379,29 +2533,15 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | |||
2379 | /* Adjust any no-longer-needed kthreads. */ | 2533 | /* Adjust any no-longer-needed kthreads. */ |
2380 | rcu_boost_kthread_setaffinity(rnp, -1); | 2534 | rcu_boost_kthread_setaffinity(rnp, -1); |
2381 | 2535 | ||
2382 | /* Exclude any attempts to start a new grace period. */ | ||
2383 | mutex_lock(&rsp->onoff_mutex); | ||
2384 | raw_spin_lock_irqsave(&rsp->orphan_lock, flags); | ||
2385 | |||
2386 | /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ | 2536 | /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ |
2537 | raw_spin_lock_irqsave(&rsp->orphan_lock, flags); | ||
2387 | rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); | 2538 | rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); |
2388 | rcu_adopt_orphan_cbs(rsp, flags); | 2539 | rcu_adopt_orphan_cbs(rsp, flags); |
2389 | raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags); | 2540 | raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags); |
2390 | 2541 | ||
2391 | /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ | ||
2392 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
2393 | smp_mb__after_unlock_lock(); /* Enforce GP memory-order guarantee. */ | ||
2394 | rnp->qsmaskinit &= ~rdp->grpmask; | ||
2395 | if (rnp->qsmaskinit == 0 && !rcu_preempt_has_tasks(rnp)) | ||
2396 | rcu_cleanup_dead_rnp(rnp); | ||
2397 | rcu_report_qs_rnp(rdp->grpmask, rsp, rnp, flags); /* Rlses rnp->lock. */ | ||
2398 | WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, | 2542 | WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, |
2399 | "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", | 2543 | "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", |
2400 | cpu, rdp->qlen, rdp->nxtlist); | 2544 | cpu, rdp->qlen, rdp->nxtlist); |
2401 | init_callback_list(rdp); | ||
2402 | /* Disallow further callbacks on this CPU. */ | ||
2403 | rdp->nxttail[RCU_NEXT_TAIL] = NULL; | ||
2404 | mutex_unlock(&rsp->onoff_mutex); | ||
2405 | } | 2545 | } |
2406 | 2546 | ||
2407 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 2547 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
@@ -2414,6 +2554,10 @@ static void __maybe_unused rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) | |||
2414 | { | 2554 | { |
2415 | } | 2555 | } |
2416 | 2556 | ||
2557 | static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) | ||
2558 | { | ||
2559 | } | ||
2560 | |||
2417 | static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | 2561 | static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) |
2418 | { | 2562 | { |
2419 | } | 2563 | } |
@@ -2589,26 +2733,47 @@ static void force_qs_rnp(struct rcu_state *rsp, | |||
2589 | return; | 2733 | return; |
2590 | } | 2734 | } |
2591 | if (rnp->qsmask == 0) { | 2735 | if (rnp->qsmask == 0) { |
2592 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ | 2736 | if (rcu_state_p == &rcu_sched_state || |
2593 | continue; | 2737 | rsp != rcu_state_p || |
2738 | rcu_preempt_blocked_readers_cgp(rnp)) { | ||
2739 | /* | ||
2740 | * No point in scanning bits because they | ||
2741 | * are all zero. But we might need to | ||
2742 | * priority-boost blocked readers. | ||
2743 | */ | ||
2744 | rcu_initiate_boost(rnp, flags); | ||
2745 | /* rcu_initiate_boost() releases rnp->lock */ | ||
2746 | continue; | ||
2747 | } | ||
2748 | if (rnp->parent && | ||
2749 | (rnp->parent->qsmask & rnp->grpmask)) { | ||
2750 | /* | ||
2751 | * Race between grace-period | ||
2752 | * initialization and task exiting RCU | ||
2753 | * read-side critical section: Report. | ||
2754 | */ | ||
2755 | rcu_report_unblock_qs_rnp(rsp, rnp, flags); | ||
2756 | /* rcu_report_unblock_qs_rnp() rlses ->lock */ | ||
2757 | continue; | ||
2758 | } | ||
2594 | } | 2759 | } |
2595 | cpu = rnp->grplo; | 2760 | cpu = rnp->grplo; |
2596 | bit = 1; | 2761 | bit = 1; |
2597 | for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { | 2762 | for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { |
2598 | if ((rnp->qsmask & bit) != 0) { | 2763 | if ((rnp->qsmask & bit) != 0) { |
2599 | if ((rnp->qsmaskinit & bit) != 0) | 2764 | if ((rnp->qsmaskinit & bit) == 0) |
2600 | *isidle = false; | 2765 | *isidle = false; /* Pending hotplug. */ |
2601 | if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) | 2766 | if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) |
2602 | mask |= bit; | 2767 | mask |= bit; |
2603 | } | 2768 | } |
2604 | } | 2769 | } |
2605 | if (mask != 0) { | 2770 | if (mask != 0) { |
2606 | 2771 | /* Idle/offline CPUs, report (releases rnp->lock. */ | |
2607 | /* rcu_report_qs_rnp() releases rnp->lock. */ | 2772 | rcu_report_qs_rnp(mask, rsp, rnp, rnp->gpnum, flags); |
2608 | rcu_report_qs_rnp(mask, rsp, rnp, flags); | 2773 | } else { |
2609 | continue; | 2774 | /* Nothing to do here, so just drop the lock. */ |
2775 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
2610 | } | 2776 | } |
2611 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
2612 | } | 2777 | } |
2613 | } | 2778 | } |
2614 | 2779 | ||
@@ -2741,7 +2906,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, | |||
2741 | * If called from an extended quiescent state, invoke the RCU | 2906 | * If called from an extended quiescent state, invoke the RCU |
2742 | * core in order to force a re-evaluation of RCU's idleness. | 2907 | * core in order to force a re-evaluation of RCU's idleness. |
2743 | */ | 2908 | */ |
2744 | if (!rcu_is_watching() && cpu_online(smp_processor_id())) | 2909 | if (!rcu_is_watching()) |
2745 | invoke_rcu_core(); | 2910 | invoke_rcu_core(); |
2746 | 2911 | ||
2747 | /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ | 2912 | /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ |
@@ -2827,11 +2992,22 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
2827 | 2992 | ||
2828 | if (cpu != -1) | 2993 | if (cpu != -1) |
2829 | rdp = per_cpu_ptr(rsp->rda, cpu); | 2994 | rdp = per_cpu_ptr(rsp->rda, cpu); |
2830 | offline = !__call_rcu_nocb(rdp, head, lazy, flags); | 2995 | if (likely(rdp->mynode)) { |
2831 | WARN_ON_ONCE(offline); | 2996 | /* Post-boot, so this should be for a no-CBs CPU. */ |
2832 | /* _call_rcu() is illegal on offline CPU; leak the callback. */ | 2997 | offline = !__call_rcu_nocb(rdp, head, lazy, flags); |
2833 | local_irq_restore(flags); | 2998 | WARN_ON_ONCE(offline); |
2834 | return; | 2999 | /* Offline CPU, _call_rcu() illegal, leak callback. */ |
3000 | local_irq_restore(flags); | ||
3001 | return; | ||
3002 | } | ||
3003 | /* | ||
3004 | * Very early boot, before rcu_init(). Initialize if needed | ||
3005 | * and then drop through to queue the callback. | ||
3006 | */ | ||
3007 | BUG_ON(cpu != -1); | ||
3008 | WARN_ON_ONCE(!rcu_is_watching()); | ||
3009 | if (!likely(rdp->nxtlist)) | ||
3010 | init_default_callback_list(rdp); | ||
2835 | } | 3011 | } |
2836 | ACCESS_ONCE(rdp->qlen) = rdp->qlen + 1; | 3012 | ACCESS_ONCE(rdp->qlen) = rdp->qlen + 1; |
2837 | if (lazy) | 3013 | if (lazy) |
@@ -2954,7 +3130,7 @@ void synchronize_sched(void) | |||
2954 | "Illegal synchronize_sched() in RCU-sched read-side critical section"); | 3130 | "Illegal synchronize_sched() in RCU-sched read-side critical section"); |
2955 | if (rcu_blocking_is_gp()) | 3131 | if (rcu_blocking_is_gp()) |
2956 | return; | 3132 | return; |
2957 | if (rcu_expedited) | 3133 | if (rcu_gp_is_expedited()) |
2958 | synchronize_sched_expedited(); | 3134 | synchronize_sched_expedited(); |
2959 | else | 3135 | else |
2960 | wait_rcu_gp(call_rcu_sched); | 3136 | wait_rcu_gp(call_rcu_sched); |
@@ -2981,7 +3157,7 @@ void synchronize_rcu_bh(void) | |||
2981 | "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section"); | 3157 | "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section"); |
2982 | if (rcu_blocking_is_gp()) | 3158 | if (rcu_blocking_is_gp()) |
2983 | return; | 3159 | return; |
2984 | if (rcu_expedited) | 3160 | if (rcu_gp_is_expedited()) |
2985 | synchronize_rcu_bh_expedited(); | 3161 | synchronize_rcu_bh_expedited(); |
2986 | else | 3162 | else |
2987 | wait_rcu_gp(call_rcu_bh); | 3163 | wait_rcu_gp(call_rcu_bh); |
@@ -3518,6 +3694,28 @@ void rcu_barrier_sched(void) | |||
3518 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); | 3694 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); |
3519 | 3695 | ||
3520 | /* | 3696 | /* |
3697 | * Propagate ->qsinitmask bits up the rcu_node tree to account for the | ||
3698 | * first CPU in a given leaf rcu_node structure coming online. The caller | ||
3699 | * must hold the corresponding leaf rcu_node ->lock with interrrupts | ||
3700 | * disabled. | ||
3701 | */ | ||
3702 | static void rcu_init_new_rnp(struct rcu_node *rnp_leaf) | ||
3703 | { | ||
3704 | long mask; | ||
3705 | struct rcu_node *rnp = rnp_leaf; | ||
3706 | |||
3707 | for (;;) { | ||
3708 | mask = rnp->grpmask; | ||
3709 | rnp = rnp->parent; | ||
3710 | if (rnp == NULL) | ||
3711 | return; | ||
3712 | raw_spin_lock(&rnp->lock); /* Interrupts already disabled. */ | ||
3713 | rnp->qsmaskinit |= mask; | ||
3714 | raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */ | ||
3715 | } | ||
3716 | } | ||
3717 | |||
3718 | /* | ||
3521 | * Do boot-time initialization of a CPU's per-CPU RCU data. | 3719 | * Do boot-time initialization of a CPU's per-CPU RCU data. |
3522 | */ | 3720 | */ |
3523 | static void __init | 3721 | static void __init |
@@ -3553,49 +3751,37 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
3553 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 3751 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
3554 | struct rcu_node *rnp = rcu_get_root(rsp); | 3752 | struct rcu_node *rnp = rcu_get_root(rsp); |
3555 | 3753 | ||
3556 | /* Exclude new grace periods. */ | ||
3557 | mutex_lock(&rsp->onoff_mutex); | ||
3558 | |||
3559 | /* Set up local state, ensuring consistent view of global state. */ | 3754 | /* Set up local state, ensuring consistent view of global state. */ |
3560 | raw_spin_lock_irqsave(&rnp->lock, flags); | 3755 | raw_spin_lock_irqsave(&rnp->lock, flags); |
3561 | rdp->beenonline = 1; /* We have now been online. */ | 3756 | rdp->beenonline = 1; /* We have now been online. */ |
3562 | rdp->qlen_last_fqs_check = 0; | 3757 | rdp->qlen_last_fqs_check = 0; |
3563 | rdp->n_force_qs_snap = rsp->n_force_qs; | 3758 | rdp->n_force_qs_snap = rsp->n_force_qs; |
3564 | rdp->blimit = blimit; | 3759 | rdp->blimit = blimit; |
3565 | init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ | 3760 | if (!rdp->nxtlist) |
3761 | init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ | ||
3566 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; | 3762 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; |
3567 | rcu_sysidle_init_percpu_data(rdp->dynticks); | 3763 | rcu_sysidle_init_percpu_data(rdp->dynticks); |
3568 | atomic_set(&rdp->dynticks->dynticks, | 3764 | atomic_set(&rdp->dynticks->dynticks, |
3569 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); | 3765 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); |
3570 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 3766 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
3571 | 3767 | ||
3572 | /* Add CPU to rcu_node bitmasks. */ | 3768 | /* |
3769 | * Add CPU to leaf rcu_node pending-online bitmask. Any needed | ||
3770 | * propagation up the rcu_node tree will happen at the beginning | ||
3771 | * of the next grace period. | ||
3772 | */ | ||
3573 | rnp = rdp->mynode; | 3773 | rnp = rdp->mynode; |
3574 | mask = rdp->grpmask; | 3774 | mask = rdp->grpmask; |
3575 | do { | 3775 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ |
3576 | /* Exclude any attempts to start a new GP on small systems. */ | 3776 | smp_mb__after_unlock_lock(); |
3577 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 3777 | rnp->qsmaskinitnext |= mask; |
3578 | rnp->qsmaskinit |= mask; | 3778 | rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ |
3579 | mask = rnp->grpmask; | 3779 | rdp->completed = rnp->completed; |
3580 | if (rnp == rdp->mynode) { | 3780 | rdp->passed_quiesce = false; |
3581 | /* | 3781 | rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); |
3582 | * If there is a grace period in progress, we will | 3782 | rdp->qs_pending = false; |
3583 | * set up to wait for it next time we run the | 3783 | trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); |
3584 | * RCU core code. | 3784 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
3585 | */ | ||
3586 | rdp->gpnum = rnp->completed; | ||
3587 | rdp->completed = rnp->completed; | ||
3588 | rdp->passed_quiesce = 0; | ||
3589 | rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); | ||
3590 | rdp->qs_pending = 0; | ||
3591 | trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); | ||
3592 | } | ||
3593 | raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ | ||
3594 | rnp = rnp->parent; | ||
3595 | } while (rnp != NULL && !(rnp->qsmaskinit & mask)); | ||
3596 | local_irq_restore(flags); | ||
3597 | |||
3598 | mutex_unlock(&rsp->onoff_mutex); | ||
3599 | } | 3785 | } |
3600 | 3786 | ||
3601 | static void rcu_prepare_cpu(int cpu) | 3787 | static void rcu_prepare_cpu(int cpu) |
@@ -3609,15 +3795,14 @@ static void rcu_prepare_cpu(int cpu) | |||
3609 | /* | 3795 | /* |
3610 | * Handle CPU online/offline notification events. | 3796 | * Handle CPU online/offline notification events. |
3611 | */ | 3797 | */ |
3612 | static int rcu_cpu_notify(struct notifier_block *self, | 3798 | int rcu_cpu_notify(struct notifier_block *self, |
3613 | unsigned long action, void *hcpu) | 3799 | unsigned long action, void *hcpu) |
3614 | { | 3800 | { |
3615 | long cpu = (long)hcpu; | 3801 | long cpu = (long)hcpu; |
3616 | struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu); | 3802 | struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu); |
3617 | struct rcu_node *rnp = rdp->mynode; | 3803 | struct rcu_node *rnp = rdp->mynode; |
3618 | struct rcu_state *rsp; | 3804 | struct rcu_state *rsp; |
3619 | 3805 | ||
3620 | trace_rcu_utilization(TPS("Start CPU hotplug")); | ||
3621 | switch (action) { | 3806 | switch (action) { |
3622 | case CPU_UP_PREPARE: | 3807 | case CPU_UP_PREPARE: |
3623 | case CPU_UP_PREPARE_FROZEN: | 3808 | case CPU_UP_PREPARE_FROZEN: |
@@ -3637,6 +3822,11 @@ static int rcu_cpu_notify(struct notifier_block *self, | |||
3637 | for_each_rcu_flavor(rsp) | 3822 | for_each_rcu_flavor(rsp) |
3638 | rcu_cleanup_dying_cpu(rsp); | 3823 | rcu_cleanup_dying_cpu(rsp); |
3639 | break; | 3824 | break; |
3825 | case CPU_DYING_IDLE: | ||
3826 | for_each_rcu_flavor(rsp) { | ||
3827 | rcu_cleanup_dying_idle_cpu(cpu, rsp); | ||
3828 | } | ||
3829 | break; | ||
3640 | case CPU_DEAD: | 3830 | case CPU_DEAD: |
3641 | case CPU_DEAD_FROZEN: | 3831 | case CPU_DEAD_FROZEN: |
3642 | case CPU_UP_CANCELED: | 3832 | case CPU_UP_CANCELED: |
@@ -3649,7 +3839,6 @@ static int rcu_cpu_notify(struct notifier_block *self, | |||
3649 | default: | 3839 | default: |
3650 | break; | 3840 | break; |
3651 | } | 3841 | } |
3652 | trace_rcu_utilization(TPS("End CPU hotplug")); | ||
3653 | return NOTIFY_OK; | 3842 | return NOTIFY_OK; |
3654 | } | 3843 | } |
3655 | 3844 | ||
@@ -3660,11 +3849,12 @@ static int rcu_pm_notify(struct notifier_block *self, | |||
3660 | case PM_HIBERNATION_PREPARE: | 3849 | case PM_HIBERNATION_PREPARE: |
3661 | case PM_SUSPEND_PREPARE: | 3850 | case PM_SUSPEND_PREPARE: |
3662 | if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */ | 3851 | if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */ |
3663 | rcu_expedited = 1; | 3852 | rcu_expedite_gp(); |
3664 | break; | 3853 | break; |
3665 | case PM_POST_HIBERNATION: | 3854 | case PM_POST_HIBERNATION: |
3666 | case PM_POST_SUSPEND: | 3855 | case PM_POST_SUSPEND: |
3667 | rcu_expedited = 0; | 3856 | if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */ |
3857 | rcu_unexpedite_gp(); | ||
3668 | break; | 3858 | break; |
3669 | default: | 3859 | default: |
3670 | break; | 3860 | break; |
@@ -3734,30 +3924,26 @@ void rcu_scheduler_starting(void) | |||
3734 | * Compute the per-level fanout, either using the exact fanout specified | 3924 | * Compute the per-level fanout, either using the exact fanout specified |
3735 | * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. | 3925 | * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. |
3736 | */ | 3926 | */ |
3737 | #ifdef CONFIG_RCU_FANOUT_EXACT | ||
3738 | static void __init rcu_init_levelspread(struct rcu_state *rsp) | ||
3739 | { | ||
3740 | int i; | ||
3741 | |||
3742 | rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; | ||
3743 | for (i = rcu_num_lvls - 2; i >= 0; i--) | ||
3744 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; | ||
3745 | } | ||
3746 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ | ||
3747 | static void __init rcu_init_levelspread(struct rcu_state *rsp) | 3927 | static void __init rcu_init_levelspread(struct rcu_state *rsp) |
3748 | { | 3928 | { |
3749 | int ccur; | ||
3750 | int cprv; | ||
3751 | int i; | 3929 | int i; |
3752 | 3930 | ||
3753 | cprv = nr_cpu_ids; | 3931 | if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT)) { |
3754 | for (i = rcu_num_lvls - 1; i >= 0; i--) { | 3932 | rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; |
3755 | ccur = rsp->levelcnt[i]; | 3933 | for (i = rcu_num_lvls - 2; i >= 0; i--) |
3756 | rsp->levelspread[i] = (cprv + ccur - 1) / ccur; | 3934 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; |
3757 | cprv = ccur; | 3935 | } else { |
3936 | int ccur; | ||
3937 | int cprv; | ||
3938 | |||
3939 | cprv = nr_cpu_ids; | ||
3940 | for (i = rcu_num_lvls - 1; i >= 0; i--) { | ||
3941 | ccur = rsp->levelcnt[i]; | ||
3942 | rsp->levelspread[i] = (cprv + ccur - 1) / ccur; | ||
3943 | cprv = ccur; | ||
3944 | } | ||
3758 | } | 3945 | } |
3759 | } | 3946 | } |
3760 | #endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */ | ||
3761 | 3947 | ||
3762 | /* | 3948 | /* |
3763 | * Helper function for rcu_init() that initializes one rcu_state structure. | 3949 | * Helper function for rcu_init() that initializes one rcu_state structure. |
@@ -3833,7 +4019,6 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
3833 | } | 4019 | } |
3834 | } | 4020 | } |
3835 | 4021 | ||
3836 | rsp->rda = rda; | ||
3837 | init_waitqueue_head(&rsp->gp_wq); | 4022 | init_waitqueue_head(&rsp->gp_wq); |
3838 | rnp = rsp->level[rcu_num_lvls - 1]; | 4023 | rnp = rsp->level[rcu_num_lvls - 1]; |
3839 | for_each_possible_cpu(i) { | 4024 | for_each_possible_cpu(i) { |
@@ -3926,6 +4111,8 @@ void __init rcu_init(void) | |||
3926 | { | 4111 | { |
3927 | int cpu; | 4112 | int cpu; |
3928 | 4113 | ||
4114 | rcu_early_boot_tests(); | ||
4115 | |||
3929 | rcu_bootup_announce(); | 4116 | rcu_bootup_announce(); |
3930 | rcu_init_geometry(); | 4117 | rcu_init_geometry(); |
3931 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); | 4118 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); |
@@ -3942,8 +4129,6 @@ void __init rcu_init(void) | |||
3942 | pm_notifier(rcu_pm_notify, 0); | 4129 | pm_notifier(rcu_pm_notify, 0); |
3943 | for_each_online_cpu(cpu) | 4130 | for_each_online_cpu(cpu) |
3944 | rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); | 4131 | rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); |
3945 | |||
3946 | rcu_early_boot_tests(); | ||
3947 | } | 4132 | } |
3948 | 4133 | ||
3949 | #include "tree_plugin.h" | 4134 | #include "tree_plugin.h" |
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 119de399eb2f..a69d3dab2ec4 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h | |||
@@ -141,12 +141,20 @@ struct rcu_node { | |||
141 | /* complete (only for PREEMPT_RCU). */ | 141 | /* complete (only for PREEMPT_RCU). */ |
142 | unsigned long qsmaskinit; | 142 | unsigned long qsmaskinit; |
143 | /* Per-GP initial value for qsmask & expmask. */ | 143 | /* Per-GP initial value for qsmask & expmask. */ |
144 | /* Initialized from ->qsmaskinitnext at the */ | ||
145 | /* beginning of each grace period. */ | ||
146 | unsigned long qsmaskinitnext; | ||
147 | /* Online CPUs for next grace period. */ | ||
144 | unsigned long grpmask; /* Mask to apply to parent qsmask. */ | 148 | unsigned long grpmask; /* Mask to apply to parent qsmask. */ |
145 | /* Only one bit will be set in this mask. */ | 149 | /* Only one bit will be set in this mask. */ |
146 | int grplo; /* lowest-numbered CPU or group here. */ | 150 | int grplo; /* lowest-numbered CPU or group here. */ |
147 | int grphi; /* highest-numbered CPU or group here. */ | 151 | int grphi; /* highest-numbered CPU or group here. */ |
148 | u8 grpnum; /* CPU/group number for next level up. */ | 152 | u8 grpnum; /* CPU/group number for next level up. */ |
149 | u8 level; /* root is at level 0. */ | 153 | u8 level; /* root is at level 0. */ |
154 | bool wait_blkd_tasks;/* Necessary to wait for blocked tasks to */ | ||
155 | /* exit RCU read-side critical sections */ | ||
156 | /* before propagating offline up the */ | ||
157 | /* rcu_node tree? */ | ||
150 | struct rcu_node *parent; | 158 | struct rcu_node *parent; |
151 | struct list_head blkd_tasks; | 159 | struct list_head blkd_tasks; |
152 | /* Tasks blocked in RCU read-side critical */ | 160 | /* Tasks blocked in RCU read-side critical */ |
@@ -448,8 +456,6 @@ struct rcu_state { | |||
448 | long qlen; /* Total number of callbacks. */ | 456 | long qlen; /* Total number of callbacks. */ |
449 | /* End of fields guarded by orphan_lock. */ | 457 | /* End of fields guarded by orphan_lock. */ |
450 | 458 | ||
451 | struct mutex onoff_mutex; /* Coordinate hotplug & GPs. */ | ||
452 | |||
453 | struct mutex barrier_mutex; /* Guards barrier fields. */ | 459 | struct mutex barrier_mutex; /* Guards barrier fields. */ |
454 | atomic_t barrier_cpu_count; /* # CPUs waiting on. */ | 460 | atomic_t barrier_cpu_count; /* # CPUs waiting on. */ |
455 | struct completion barrier_completion; /* Wake at barrier end. */ | 461 | struct completion barrier_completion; /* Wake at barrier end. */ |
@@ -559,6 +565,7 @@ static void rcu_prepare_kthreads(int cpu); | |||
559 | static void rcu_cleanup_after_idle(void); | 565 | static void rcu_cleanup_after_idle(void); |
560 | static void rcu_prepare_for_idle(void); | 566 | static void rcu_prepare_for_idle(void); |
561 | static void rcu_idle_count_callbacks_posted(void); | 567 | static void rcu_idle_count_callbacks_posted(void); |
568 | static bool rcu_preempt_has_tasks(struct rcu_node *rnp); | ||
562 | static void print_cpu_stall_info_begin(void); | 569 | static void print_cpu_stall_info_begin(void); |
563 | static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); | 570 | static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); |
564 | static void print_cpu_stall_info_end(void); | 571 | static void print_cpu_stall_info_end(void); |
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 0a571e9a0f1d..8c0ec0f5a027 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h | |||
@@ -58,38 +58,33 @@ static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ | |||
58 | */ | 58 | */ |
59 | static void __init rcu_bootup_announce_oddness(void) | 59 | static void __init rcu_bootup_announce_oddness(void) |
60 | { | 60 | { |
61 | #ifdef CONFIG_RCU_TRACE | 61 | if (IS_ENABLED(CONFIG_RCU_TRACE)) |
62 | pr_info("\tRCU debugfs-based tracing is enabled.\n"); | 62 | pr_info("\tRCU debugfs-based tracing is enabled.\n"); |
63 | #endif | 63 | if ((IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || |
64 | #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32) | 64 | (!IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)) |
65 | pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", | 65 | pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", |
66 | CONFIG_RCU_FANOUT); | 66 | CONFIG_RCU_FANOUT); |
67 | #endif | 67 | if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT)) |
68 | #ifdef CONFIG_RCU_FANOUT_EXACT | 68 | pr_info("\tHierarchical RCU autobalancing is disabled.\n"); |
69 | pr_info("\tHierarchical RCU autobalancing is disabled.\n"); | 69 | if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ)) |
70 | #endif | 70 | pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); |
71 | #ifdef CONFIG_RCU_FAST_NO_HZ | 71 | if (IS_ENABLED(CONFIG_PROVE_RCU)) |
72 | pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); | 72 | pr_info("\tRCU lockdep checking is enabled.\n"); |
73 | #endif | 73 | if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_RUNNABLE)) |
74 | #ifdef CONFIG_PROVE_RCU | 74 | pr_info("\tRCU torture testing starts during boot.\n"); |
75 | pr_info("\tRCU lockdep checking is enabled.\n"); | 75 | if (IS_ENABLED(CONFIG_RCU_CPU_STALL_INFO)) |
76 | #endif | 76 | pr_info("\tAdditional per-CPU info printed with stalls.\n"); |
77 | #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE | 77 | if (NUM_RCU_LVL_4 != 0) |
78 | pr_info("\tRCU torture testing starts during boot.\n"); | 78 | pr_info("\tFour-level hierarchy is enabled.\n"); |
79 | #endif | 79 | if (CONFIG_RCU_FANOUT_LEAF != 16) |
80 | #if defined(CONFIG_RCU_CPU_STALL_INFO) | 80 | pr_info("\tBuild-time adjustment of leaf fanout to %d.\n", |
81 | pr_info("\tAdditional per-CPU info printed with stalls.\n"); | 81 | CONFIG_RCU_FANOUT_LEAF); |
82 | #endif | ||
83 | #if NUM_RCU_LVL_4 != 0 | ||
84 | pr_info("\tFour-level hierarchy is enabled.\n"); | ||
85 | #endif | ||
86 | if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) | 82 | if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) |
87 | pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); | 83 | pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); |
88 | if (nr_cpu_ids != NR_CPUS) | 84 | if (nr_cpu_ids != NR_CPUS) |
89 | pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); | 85 | pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); |
90 | #ifdef CONFIG_RCU_BOOST | 86 | if (IS_ENABLED(CONFIG_RCU_BOOST)) |
91 | pr_info("\tRCU kthread priority: %d.\n", kthread_prio); | 87 | pr_info("\tRCU kthread priority: %d.\n", kthread_prio); |
92 | #endif | ||
93 | } | 88 | } |
94 | 89 | ||
95 | #ifdef CONFIG_PREEMPT_RCU | 90 | #ifdef CONFIG_PREEMPT_RCU |
@@ -180,7 +175,7 @@ static void rcu_preempt_note_context_switch(void) | |||
180 | * But first, note that the current CPU must still be | 175 | * But first, note that the current CPU must still be |
181 | * on line! | 176 | * on line! |
182 | */ | 177 | */ |
183 | WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); | 178 | WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0); |
184 | WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); | 179 | WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); |
185 | if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { | 180 | if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { |
186 | list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); | 181 | list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); |
@@ -233,43 +228,6 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) | |||
233 | } | 228 | } |
234 | 229 | ||
235 | /* | 230 | /* |
236 | * Record a quiescent state for all tasks that were previously queued | ||
237 | * on the specified rcu_node structure and that were blocking the current | ||
238 | * RCU grace period. The caller must hold the specified rnp->lock with | ||
239 | * irqs disabled, and this lock is released upon return, but irqs remain | ||
240 | * disabled. | ||
241 | */ | ||
242 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) | ||
243 | __releases(rnp->lock) | ||
244 | { | ||
245 | unsigned long mask; | ||
246 | struct rcu_node *rnp_p; | ||
247 | |||
248 | if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { | ||
249 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
250 | return; /* Still need more quiescent states! */ | ||
251 | } | ||
252 | |||
253 | rnp_p = rnp->parent; | ||
254 | if (rnp_p == NULL) { | ||
255 | /* | ||
256 | * Either there is only one rcu_node in the tree, | ||
257 | * or tasks were kicked up to root rcu_node due to | ||
258 | * CPUs going offline. | ||
259 | */ | ||
260 | rcu_report_qs_rsp(&rcu_preempt_state, flags); | ||
261 | return; | ||
262 | } | ||
263 | |||
264 | /* Report up the rest of the hierarchy. */ | ||
265 | mask = rnp->grpmask; | ||
266 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
267 | raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ | ||
268 | smp_mb__after_unlock_lock(); | ||
269 | rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags); | ||
270 | } | ||
271 | |||
272 | /* | ||
273 | * Advance a ->blkd_tasks-list pointer to the next entry, instead | 231 | * Advance a ->blkd_tasks-list pointer to the next entry, instead |
274 | * returning NULL if at the end of the list. | 232 | * returning NULL if at the end of the list. |
275 | */ | 233 | */ |
@@ -300,7 +258,6 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp) | |||
300 | */ | 258 | */ |
301 | void rcu_read_unlock_special(struct task_struct *t) | 259 | void rcu_read_unlock_special(struct task_struct *t) |
302 | { | 260 | { |
303 | bool empty; | ||
304 | bool empty_exp; | 261 | bool empty_exp; |
305 | bool empty_norm; | 262 | bool empty_norm; |
306 | bool empty_exp_now; | 263 | bool empty_exp_now; |
@@ -334,7 +291,13 @@ void rcu_read_unlock_special(struct task_struct *t) | |||
334 | } | 291 | } |
335 | 292 | ||
336 | /* Hardware IRQ handlers cannot block, complain if they get here. */ | 293 | /* Hardware IRQ handlers cannot block, complain if they get here. */ |
337 | if (WARN_ON_ONCE(in_irq() || in_serving_softirq())) { | 294 | if (in_irq() || in_serving_softirq()) { |
295 | lockdep_rcu_suspicious(__FILE__, __LINE__, | ||
296 | "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n"); | ||
297 | pr_alert("->rcu_read_unlock_special: %#x (b: %d, nq: %d)\n", | ||
298 | t->rcu_read_unlock_special.s, | ||
299 | t->rcu_read_unlock_special.b.blocked, | ||
300 | t->rcu_read_unlock_special.b.need_qs); | ||
338 | local_irq_restore(flags); | 301 | local_irq_restore(flags); |
339 | return; | 302 | return; |
340 | } | 303 | } |
@@ -356,7 +319,6 @@ void rcu_read_unlock_special(struct task_struct *t) | |||
356 | break; | 319 | break; |
357 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 320 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
358 | } | 321 | } |
359 | empty = !rcu_preempt_has_tasks(rnp); | ||
360 | empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); | 322 | empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); |
361 | empty_exp = !rcu_preempted_readers_exp(rnp); | 323 | empty_exp = !rcu_preempted_readers_exp(rnp); |
362 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ | 324 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ |
@@ -377,14 +339,6 @@ void rcu_read_unlock_special(struct task_struct *t) | |||
377 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 339 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
378 | 340 | ||
379 | /* | 341 | /* |
380 | * If this was the last task on the list, go see if we | ||
381 | * need to propagate ->qsmaskinit bit clearing up the | ||
382 | * rcu_node tree. | ||
383 | */ | ||
384 | if (!empty && !rcu_preempt_has_tasks(rnp)) | ||
385 | rcu_cleanup_dead_rnp(rnp); | ||
386 | |||
387 | /* | ||
388 | * If this was the last task on the current list, and if | 342 | * If this was the last task on the current list, and if |
389 | * we aren't waiting on any CPUs, report the quiescent state. | 343 | * we aren't waiting on any CPUs, report the quiescent state. |
390 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, | 344 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, |
@@ -399,7 +353,8 @@ void rcu_read_unlock_special(struct task_struct *t) | |||
399 | rnp->grplo, | 353 | rnp->grplo, |
400 | rnp->grphi, | 354 | rnp->grphi, |
401 | !!rnp->gp_tasks); | 355 | !!rnp->gp_tasks); |
402 | rcu_report_unblock_qs_rnp(rnp, flags); | 356 | rcu_report_unblock_qs_rnp(&rcu_preempt_state, |
357 | rnp, flags); | ||
403 | } else { | 358 | } else { |
404 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 359 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
405 | } | 360 | } |
@@ -520,10 +475,6 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) | |||
520 | WARN_ON_ONCE(rnp->qsmask); | 475 | WARN_ON_ONCE(rnp->qsmask); |
521 | } | 476 | } |
522 | 477 | ||
523 | #ifdef CONFIG_HOTPLUG_CPU | ||
524 | |||
525 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
526 | |||
527 | /* | 478 | /* |
528 | * Check for a quiescent state from the current CPU. When a task blocks, | 479 | * Check for a quiescent state from the current CPU. When a task blocks, |
529 | * the task is recorded in the corresponding CPU's rcu_node structure, | 480 | * the task is recorded in the corresponding CPU's rcu_node structure, |
@@ -585,7 +536,7 @@ void synchronize_rcu(void) | |||
585 | "Illegal synchronize_rcu() in RCU read-side critical section"); | 536 | "Illegal synchronize_rcu() in RCU read-side critical section"); |
586 | if (!rcu_scheduler_active) | 537 | if (!rcu_scheduler_active) |
587 | return; | 538 | return; |
588 | if (rcu_expedited) | 539 | if (rcu_gp_is_expedited()) |
589 | synchronize_rcu_expedited(); | 540 | synchronize_rcu_expedited(); |
590 | else | 541 | else |
591 | wait_rcu_gp(call_rcu); | 542 | wait_rcu_gp(call_rcu); |
@@ -630,9 +581,6 @@ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) | |||
630 | * recursively up the tree. (Calm down, calm down, we do the recursion | 581 | * recursively up the tree. (Calm down, calm down, we do the recursion |
631 | * iteratively!) | 582 | * iteratively!) |
632 | * | 583 | * |
633 | * Most callers will set the "wake" flag, but the task initiating the | ||
634 | * expedited grace period need not wake itself. | ||
635 | * | ||
636 | * Caller must hold sync_rcu_preempt_exp_mutex. | 584 | * Caller must hold sync_rcu_preempt_exp_mutex. |
637 | */ | 585 | */ |
638 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | 586 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, |
@@ -667,29 +615,85 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | |||
667 | 615 | ||
668 | /* | 616 | /* |
669 | * Snapshot the tasks blocking the newly started preemptible-RCU expedited | 617 | * Snapshot the tasks blocking the newly started preemptible-RCU expedited |
670 | * grace period for the specified rcu_node structure. If there are no such | 618 | * grace period for the specified rcu_node structure, phase 1. If there |
671 | * tasks, report it up the rcu_node hierarchy. | 619 | * are such tasks, set the ->expmask bits up the rcu_node tree and also |
620 | * set the ->expmask bits on the leaf rcu_node structures to tell phase 2 | ||
621 | * that work is needed here. | ||
672 | * | 622 | * |
673 | * Caller must hold sync_rcu_preempt_exp_mutex and must exclude | 623 | * Caller must hold sync_rcu_preempt_exp_mutex. |
674 | * CPU hotplug operations. | ||
675 | */ | 624 | */ |
676 | static void | 625 | static void |
677 | sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | 626 | sync_rcu_preempt_exp_init1(struct rcu_state *rsp, struct rcu_node *rnp) |
678 | { | 627 | { |
679 | unsigned long flags; | 628 | unsigned long flags; |
680 | int must_wait = 0; | 629 | unsigned long mask; |
630 | struct rcu_node *rnp_up; | ||
681 | 631 | ||
682 | raw_spin_lock_irqsave(&rnp->lock, flags); | 632 | raw_spin_lock_irqsave(&rnp->lock, flags); |
683 | smp_mb__after_unlock_lock(); | 633 | smp_mb__after_unlock_lock(); |
634 | WARN_ON_ONCE(rnp->expmask); | ||
635 | WARN_ON_ONCE(rnp->exp_tasks); | ||
684 | if (!rcu_preempt_has_tasks(rnp)) { | 636 | if (!rcu_preempt_has_tasks(rnp)) { |
637 | /* No blocked tasks, nothing to do. */ | ||
685 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 638 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
686 | } else { | 639 | return; |
640 | } | ||
641 | /* Call for Phase 2 and propagate ->expmask bits up the tree. */ | ||
642 | rnp->expmask = 1; | ||
643 | rnp_up = rnp; | ||
644 | while (rnp_up->parent) { | ||
645 | mask = rnp_up->grpmask; | ||
646 | rnp_up = rnp_up->parent; | ||
647 | if (rnp_up->expmask & mask) | ||
648 | break; | ||
649 | raw_spin_lock(&rnp_up->lock); /* irqs already off */ | ||
650 | smp_mb__after_unlock_lock(); | ||
651 | rnp_up->expmask |= mask; | ||
652 | raw_spin_unlock(&rnp_up->lock); /* irqs still off */ | ||
653 | } | ||
654 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
655 | } | ||
656 | |||
657 | /* | ||
658 | * Snapshot the tasks blocking the newly started preemptible-RCU expedited | ||
659 | * grace period for the specified rcu_node structure, phase 2. If the | ||
660 | * leaf rcu_node structure has its ->expmask field set, check for tasks. | ||
661 | * If there are some, clear ->expmask and set ->exp_tasks accordingly, | ||
662 | * then initiate RCU priority boosting. Otherwise, clear ->expmask and | ||
663 | * invoke rcu_report_exp_rnp() to clear out the upper-level ->expmask bits, | ||
664 | * enabling rcu_read_unlock_special() to do the bit-clearing. | ||
665 | * | ||
666 | * Caller must hold sync_rcu_preempt_exp_mutex. | ||
667 | */ | ||
668 | static void | ||
669 | sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp) | ||
670 | { | ||
671 | unsigned long flags; | ||
672 | |||
673 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
674 | smp_mb__after_unlock_lock(); | ||
675 | if (!rnp->expmask) { | ||
676 | /* Phase 1 didn't do anything, so Phase 2 doesn't either. */ | ||
677 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
678 | return; | ||
679 | } | ||
680 | |||
681 | /* Phase 1 is over. */ | ||
682 | rnp->expmask = 0; | ||
683 | |||
684 | /* | ||
685 | * If there are still blocked tasks, set up ->exp_tasks so that | ||
686 | * rcu_read_unlock_special() will wake us and then boost them. | ||
687 | */ | ||
688 | if (rcu_preempt_has_tasks(rnp)) { | ||
687 | rnp->exp_tasks = rnp->blkd_tasks.next; | 689 | rnp->exp_tasks = rnp->blkd_tasks.next; |
688 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ | 690 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ |
689 | must_wait = 1; | 691 | return; |
690 | } | 692 | } |
691 | if (!must_wait) | 693 | |
692 | rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */ | 694 | /* No longer any blocked tasks, so undo bit setting. */ |
695 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
696 | rcu_report_exp_rnp(rsp, rnp, false); | ||
693 | } | 697 | } |
694 | 698 | ||
695 | /** | 699 | /** |
@@ -706,7 +710,6 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | |||
706 | */ | 710 | */ |
707 | void synchronize_rcu_expedited(void) | 711 | void synchronize_rcu_expedited(void) |
708 | { | 712 | { |
709 | unsigned long flags; | ||
710 | struct rcu_node *rnp; | 713 | struct rcu_node *rnp; |
711 | struct rcu_state *rsp = &rcu_preempt_state; | 714 | struct rcu_state *rsp = &rcu_preempt_state; |
712 | unsigned long snap; | 715 | unsigned long snap; |
@@ -757,19 +760,16 @@ void synchronize_rcu_expedited(void) | |||
757 | /* force all RCU readers onto ->blkd_tasks lists. */ | 760 | /* force all RCU readers onto ->blkd_tasks lists. */ |
758 | synchronize_sched_expedited(); | 761 | synchronize_sched_expedited(); |
759 | 762 | ||
760 | /* Initialize ->expmask for all non-leaf rcu_node structures. */ | 763 | /* |
761 | rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { | 764 | * Snapshot current state of ->blkd_tasks lists into ->expmask. |
762 | raw_spin_lock_irqsave(&rnp->lock, flags); | 765 | * Phase 1 sets bits and phase 2 permits rcu_read_unlock_special() |
763 | smp_mb__after_unlock_lock(); | 766 | * to start clearing them. Doing this in one phase leads to |
764 | rnp->expmask = rnp->qsmaskinit; | 767 | * strange races between setting and clearing bits, so just say "no"! |
765 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 768 | */ |
766 | } | 769 | rcu_for_each_leaf_node(rsp, rnp) |
767 | 770 | sync_rcu_preempt_exp_init1(rsp, rnp); | |
768 | /* Snapshot current state of ->blkd_tasks lists. */ | ||
769 | rcu_for_each_leaf_node(rsp, rnp) | 771 | rcu_for_each_leaf_node(rsp, rnp) |
770 | sync_rcu_preempt_exp_init(rsp, rnp); | 772 | sync_rcu_preempt_exp_init2(rsp, rnp); |
771 | if (NUM_RCU_NODES > 1) | ||
772 | sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); | ||
773 | 773 | ||
774 | put_online_cpus(); | 774 | put_online_cpus(); |
775 | 775 | ||
@@ -859,8 +859,6 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) | |||
859 | return 0; | 859 | return 0; |
860 | } | 860 | } |
861 | 861 | ||
862 | #ifdef CONFIG_HOTPLUG_CPU | ||
863 | |||
864 | /* | 862 | /* |
865 | * Because there is no preemptible RCU, there can be no readers blocked. | 863 | * Because there is no preemptible RCU, there can be no readers blocked. |
866 | */ | 864 | */ |
@@ -869,8 +867,6 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp) | |||
869 | return false; | 867 | return false; |
870 | } | 868 | } |
871 | 869 | ||
872 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
873 | |||
874 | /* | 870 | /* |
875 | * Because preemptible RCU does not exist, we never have to check for | 871 | * Because preemptible RCU does not exist, we never have to check for |
876 | * tasks blocked within RCU read-side critical sections. | 872 | * tasks blocked within RCU read-side critical sections. |
@@ -1170,7 +1166,7 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) | |||
1170 | * Returns zero if all is well, a negated errno otherwise. | 1166 | * Returns zero if all is well, a negated errno otherwise. |
1171 | */ | 1167 | */ |
1172 | static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | 1168 | static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, |
1173 | struct rcu_node *rnp) | 1169 | struct rcu_node *rnp) |
1174 | { | 1170 | { |
1175 | int rnp_index = rnp - &rsp->node[0]; | 1171 | int rnp_index = rnp - &rsp->node[0]; |
1176 | unsigned long flags; | 1172 | unsigned long flags; |
@@ -1180,7 +1176,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |||
1180 | if (&rcu_preempt_state != rsp) | 1176 | if (&rcu_preempt_state != rsp) |
1181 | return 0; | 1177 | return 0; |
1182 | 1178 | ||
1183 | if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0) | 1179 | if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0) |
1184 | return 0; | 1180 | return 0; |
1185 | 1181 | ||
1186 | rsp->boost = 1; | 1182 | rsp->boost = 1; |
@@ -1273,7 +1269,7 @@ static void rcu_cpu_kthread(unsigned int cpu) | |||
1273 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) | 1269 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) |
1274 | { | 1270 | { |
1275 | struct task_struct *t = rnp->boost_kthread_task; | 1271 | struct task_struct *t = rnp->boost_kthread_task; |
1276 | unsigned long mask = rnp->qsmaskinit; | 1272 | unsigned long mask = rcu_rnp_online_cpus(rnp); |
1277 | cpumask_var_t cm; | 1273 | cpumask_var_t cm; |
1278 | int cpu; | 1274 | int cpu; |
1279 | 1275 | ||
@@ -1945,7 +1941,8 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) | |||
1945 | rhp = ACCESS_ONCE(rdp->nocb_follower_head); | 1941 | rhp = ACCESS_ONCE(rdp->nocb_follower_head); |
1946 | 1942 | ||
1947 | /* Having no rcuo kthread but CBs after scheduler starts is bad! */ | 1943 | /* Having no rcuo kthread but CBs after scheduler starts is bad! */ |
1948 | if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp) { | 1944 | if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp && |
1945 | rcu_scheduler_fully_active) { | ||
1949 | /* RCU callback enqueued before CPU first came online??? */ | 1946 | /* RCU callback enqueued before CPU first came online??? */ |
1950 | pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n", | 1947 | pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n", |
1951 | cpu, rhp->func); | 1948 | cpu, rhp->func); |
@@ -2392,18 +2389,8 @@ void __init rcu_init_nohz(void) | |||
2392 | pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); | 2389 | pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); |
2393 | 2390 | ||
2394 | for_each_rcu_flavor(rsp) { | 2391 | for_each_rcu_flavor(rsp) { |
2395 | for_each_cpu(cpu, rcu_nocb_mask) { | 2392 | for_each_cpu(cpu, rcu_nocb_mask) |
2396 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 2393 | init_nocb_callback_list(per_cpu_ptr(rsp->rda, cpu)); |
2397 | |||
2398 | /* | ||
2399 | * If there are early callbacks, they will need | ||
2400 | * to be moved to the nocb lists. | ||
2401 | */ | ||
2402 | WARN_ON_ONCE(rdp->nxttail[RCU_NEXT_TAIL] != | ||
2403 | &rdp->nxtlist && | ||
2404 | rdp->nxttail[RCU_NEXT_TAIL] != NULL); | ||
2405 | init_nocb_callback_list(rdp); | ||
2406 | } | ||
2407 | rcu_organize_nocb_kthreads(rsp); | 2394 | rcu_organize_nocb_kthreads(rsp); |
2408 | } | 2395 | } |
2409 | } | 2396 | } |
@@ -2540,6 +2527,16 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) | |||
2540 | if (!rcu_is_nocb_cpu(rdp->cpu)) | 2527 | if (!rcu_is_nocb_cpu(rdp->cpu)) |
2541 | return false; | 2528 | return false; |
2542 | 2529 | ||
2530 | /* If there are early-boot callbacks, move them to nocb lists. */ | ||
2531 | if (rdp->nxtlist) { | ||
2532 | rdp->nocb_head = rdp->nxtlist; | ||
2533 | rdp->nocb_tail = rdp->nxttail[RCU_NEXT_TAIL]; | ||
2534 | atomic_long_set(&rdp->nocb_q_count, rdp->qlen); | ||
2535 | atomic_long_set(&rdp->nocb_q_count_lazy, rdp->qlen_lazy); | ||
2536 | rdp->nxtlist = NULL; | ||
2537 | rdp->qlen = 0; | ||
2538 | rdp->qlen_lazy = 0; | ||
2539 | } | ||
2543 | rdp->nxttail[RCU_NEXT_TAIL] = NULL; | 2540 | rdp->nxttail[RCU_NEXT_TAIL] = NULL; |
2544 | return true; | 2541 | return true; |
2545 | } | 2542 | } |
@@ -2763,7 +2760,8 @@ static void rcu_sysidle_exit(int irq) | |||
2763 | 2760 | ||
2764 | /* | 2761 | /* |
2765 | * Check to see if the current CPU is idle. Note that usermode execution | 2762 | * Check to see if the current CPU is idle. Note that usermode execution |
2766 | * does not count as idle. The caller must have disabled interrupts. | 2763 | * does not count as idle. The caller must have disabled interrupts, |
2764 | * and must be running on tick_do_timer_cpu. | ||
2767 | */ | 2765 | */ |
2768 | static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, | 2766 | static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, |
2769 | unsigned long *maxj) | 2767 | unsigned long *maxj) |
@@ -2784,8 +2782,8 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, | |||
2784 | if (!*isidle || rdp->rsp != rcu_state_p || | 2782 | if (!*isidle || rdp->rsp != rcu_state_p || |
2785 | cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu) | 2783 | cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu) |
2786 | return; | 2784 | return; |
2787 | if (rcu_gp_in_progress(rdp->rsp)) | 2785 | /* Verify affinity of current kthread. */ |
2788 | WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu); | 2786 | WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu); |
2789 | 2787 | ||
2790 | /* Pick up current idle and NMI-nesting counter and check. */ | 2788 | /* Pick up current idle and NMI-nesting counter and check. */ |
2791 | cur = atomic_read(&rdtp->dynticks_idle); | 2789 | cur = atomic_read(&rdtp->dynticks_idle); |
@@ -3068,11 +3066,10 @@ static void rcu_bind_gp_kthread(void) | |||
3068 | return; | 3066 | return; |
3069 | #ifdef CONFIG_NO_HZ_FULL_SYSIDLE | 3067 | #ifdef CONFIG_NO_HZ_FULL_SYSIDLE |
3070 | cpu = tick_do_timer_cpu; | 3068 | cpu = tick_do_timer_cpu; |
3071 | if (cpu >= 0 && cpu < nr_cpu_ids && raw_smp_processor_id() != cpu) | 3069 | if (cpu >= 0 && cpu < nr_cpu_ids) |
3072 | set_cpus_allowed_ptr(current, cpumask_of(cpu)); | 3070 | set_cpus_allowed_ptr(current, cpumask_of(cpu)); |
3073 | #else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ | 3071 | #else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ |
3074 | if (!is_housekeeping_cpu(raw_smp_processor_id())) | 3072 | housekeeping_affine(current); |
3075 | housekeeping_affine(current); | ||
3076 | #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ | 3073 | #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ |
3077 | } | 3074 | } |
3078 | 3075 | ||
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index fbb6240509ea..f92361efd0f5 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c | |||
@@ -283,8 +283,8 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) | |||
283 | seq_puts(m, "\n"); | 283 | seq_puts(m, "\n"); |
284 | level = rnp->level; | 284 | level = rnp->level; |
285 | } | 285 | } |
286 | seq_printf(m, "%lx/%lx %c%c>%c %d:%d ^%d ", | 286 | seq_printf(m, "%lx/%lx->%lx %c%c>%c %d:%d ^%d ", |
287 | rnp->qsmask, rnp->qsmaskinit, | 287 | rnp->qsmask, rnp->qsmaskinit, rnp->qsmaskinitnext, |
288 | ".G"[rnp->gp_tasks != NULL], | 288 | ".G"[rnp->gp_tasks != NULL], |
289 | ".E"[rnp->exp_tasks != NULL], | 289 | ".E"[rnp->exp_tasks != NULL], |
290 | ".T"[!list_empty(&rnp->blkd_tasks)], | 290 | ".T"[!list_empty(&rnp->blkd_tasks)], |
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index e0d31a345ee6..1f133350da01 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c | |||
@@ -62,6 +62,63 @@ MODULE_ALIAS("rcupdate"); | |||
62 | 62 | ||
63 | module_param(rcu_expedited, int, 0); | 63 | module_param(rcu_expedited, int, 0); |
64 | 64 | ||
65 | #ifndef CONFIG_TINY_RCU | ||
66 | |||
67 | static atomic_t rcu_expedited_nesting = | ||
68 | ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0); | ||
69 | |||
70 | /* | ||
71 | * Should normal grace-period primitives be expedited? Intended for | ||
72 | * use within RCU. Note that this function takes the rcu_expedited | ||
73 | * sysfs/boot variable into account as well as the rcu_expedite_gp() | ||
74 | * nesting. So looping on rcu_unexpedite_gp() until rcu_gp_is_expedited() | ||
75 | * returns false is a -really- bad idea. | ||
76 | */ | ||
77 | bool rcu_gp_is_expedited(void) | ||
78 | { | ||
79 | return rcu_expedited || atomic_read(&rcu_expedited_nesting); | ||
80 | } | ||
81 | EXPORT_SYMBOL_GPL(rcu_gp_is_expedited); | ||
82 | |||
83 | /** | ||
84 | * rcu_expedite_gp - Expedite future RCU grace periods | ||
85 | * | ||
86 | * After a call to this function, future calls to synchronize_rcu() and | ||
87 | * friends act as the corresponding synchronize_rcu_expedited() function | ||
88 | * had instead been called. | ||
89 | */ | ||
90 | void rcu_expedite_gp(void) | ||
91 | { | ||
92 | atomic_inc(&rcu_expedited_nesting); | ||
93 | } | ||
94 | EXPORT_SYMBOL_GPL(rcu_expedite_gp); | ||
95 | |||
96 | /** | ||
97 | * rcu_unexpedite_gp - Cancel prior rcu_expedite_gp() invocation | ||
98 | * | ||
99 | * Undo a prior call to rcu_expedite_gp(). If all prior calls to | ||
100 | * rcu_expedite_gp() are undone by a subsequent call to rcu_unexpedite_gp(), | ||
101 | * and if the rcu_expedited sysfs/boot parameter is not set, then all | ||
102 | * subsequent calls to synchronize_rcu() and friends will return to | ||
103 | * their normal non-expedited behavior. | ||
104 | */ | ||
105 | void rcu_unexpedite_gp(void) | ||
106 | { | ||
107 | atomic_dec(&rcu_expedited_nesting); | ||
108 | } | ||
109 | EXPORT_SYMBOL_GPL(rcu_unexpedite_gp); | ||
110 | |||
111 | #endif /* #ifndef CONFIG_TINY_RCU */ | ||
112 | |||
113 | /* | ||
114 | * Inform RCU of the end of the in-kernel boot sequence. | ||
115 | */ | ||
116 | void rcu_end_inkernel_boot(void) | ||
117 | { | ||
118 | if (IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT)) | ||
119 | rcu_unexpedite_gp(); | ||
120 | } | ||
121 | |||
65 | #ifdef CONFIG_PREEMPT_RCU | 122 | #ifdef CONFIG_PREEMPT_RCU |
66 | 123 | ||
67 | /* | 124 | /* |
@@ -199,16 +256,13 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); | |||
199 | 256 | ||
200 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 257 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
201 | 258 | ||
202 | struct rcu_synchronize { | 259 | /** |
203 | struct rcu_head head; | 260 | * wakeme_after_rcu() - Callback function to awaken a task after grace period |
204 | struct completion completion; | 261 | * @head: Pointer to rcu_head member within rcu_synchronize structure |
205 | }; | 262 | * |
206 | 263 | * Awaken the corresponding task now that a grace period has elapsed. | |
207 | /* | ||
208 | * Awaken the corresponding synchronize_rcu() instance now that a | ||
209 | * grace period has elapsed. | ||
210 | */ | 264 | */ |
211 | static void wakeme_after_rcu(struct rcu_head *head) | 265 | void wakeme_after_rcu(struct rcu_head *head) |
212 | { | 266 | { |
213 | struct rcu_synchronize *rcu; | 267 | struct rcu_synchronize *rcu; |
214 | 268 | ||
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 94b2d7b88a27..b0090accfb5b 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c | |||
@@ -198,6 +198,8 @@ exit_idle: | |||
198 | start_critical_timings(); | 198 | start_critical_timings(); |
199 | } | 199 | } |
200 | 200 | ||
201 | DEFINE_PER_CPU(bool, cpu_dead_idle); | ||
202 | |||
201 | /* | 203 | /* |
202 | * Generic idle loop implementation | 204 | * Generic idle loop implementation |
203 | * | 205 | * |
@@ -222,8 +224,13 @@ static void cpu_idle_loop(void) | |||
222 | check_pgt_cache(); | 224 | check_pgt_cache(); |
223 | rmb(); | 225 | rmb(); |
224 | 226 | ||
225 | if (cpu_is_offline(smp_processor_id())) | 227 | if (cpu_is_offline(smp_processor_id())) { |
228 | rcu_cpu_notify(NULL, CPU_DYING_IDLE, | ||
229 | (void *)(long)smp_processor_id()); | ||
230 | smp_mb(); /* all activity before dead. */ | ||
231 | this_cpu_write(cpu_dead_idle, true); | ||
226 | arch_cpu_idle_dead(); | 232 | arch_cpu_idle_dead(); |
233 | } | ||
227 | 234 | ||
228 | local_irq_disable(); | 235 | local_irq_disable(); |
229 | arch_cpu_idle_enter(); | 236 | arch_cpu_idle_enter(); |
diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 40190f28db35..c697f73d82d6 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/cpu.h> | 4 | #include <linux/cpu.h> |
5 | #include <linux/err.h> | 5 | #include <linux/err.h> |
6 | #include <linux/smp.h> | 6 | #include <linux/smp.h> |
7 | #include <linux/delay.h> | ||
7 | #include <linux/init.h> | 8 | #include <linux/init.h> |
8 | #include <linux/list.h> | 9 | #include <linux/list.h> |
9 | #include <linux/slab.h> | 10 | #include <linux/slab.h> |
@@ -314,3 +315,158 @@ void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread) | |||
314 | put_online_cpus(); | 315 | put_online_cpus(); |
315 | } | 316 | } |
316 | EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread); | 317 | EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread); |
318 | |||
319 | static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD); | ||
320 | |||
321 | /* | ||
322 | * Called to poll specified CPU's state, for example, when waiting for | ||
323 | * a CPU to come online. | ||
324 | */ | ||
325 | int cpu_report_state(int cpu) | ||
326 | { | ||
327 | return atomic_read(&per_cpu(cpu_hotplug_state, cpu)); | ||
328 | } | ||
329 | |||
330 | /* | ||
331 | * If CPU has died properly, set its state to CPU_UP_PREPARE and | ||
332 | * return success. Otherwise, return -EBUSY if the CPU died after | ||
333 | * cpu_wait_death() timed out. And yet otherwise again, return -EAGAIN | ||
334 | * if cpu_wait_death() timed out and the CPU still hasn't gotten around | ||
335 | * to dying. In the latter two cases, the CPU might not be set up | ||
336 | * properly, but it is up to the arch-specific code to decide. | ||
337 | * Finally, -EIO indicates an unanticipated problem. | ||
338 | * | ||
339 | * Note that it is permissible to omit this call entirely, as is | ||
340 | * done in architectures that do no CPU-hotplug error checking. | ||
341 | */ | ||
342 | int cpu_check_up_prepare(int cpu) | ||
343 | { | ||
344 | if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) { | ||
345 | atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE); | ||
346 | return 0; | ||
347 | } | ||
348 | |||
349 | switch (atomic_read(&per_cpu(cpu_hotplug_state, cpu))) { | ||
350 | |||
351 | case CPU_POST_DEAD: | ||
352 | |||
353 | /* The CPU died properly, so just start it up again. */ | ||
354 | atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE); | ||
355 | return 0; | ||
356 | |||
357 | case CPU_DEAD_FROZEN: | ||
358 | |||
359 | /* | ||
360 | * Timeout during CPU death, so let caller know. | ||
361 | * The outgoing CPU completed its processing, but after | ||
362 | * cpu_wait_death() timed out and reported the error. The | ||
363 | * caller is free to proceed, in which case the state | ||
364 | * will be reset properly by cpu_set_state_online(). | ||
365 | * Proceeding despite this -EBUSY return makes sense | ||
366 | * for systems where the outgoing CPUs take themselves | ||
367 | * offline, with no post-death manipulation required from | ||
368 | * a surviving CPU. | ||
369 | */ | ||
370 | return -EBUSY; | ||
371 | |||
372 | case CPU_BROKEN: | ||
373 | |||
374 | /* | ||
375 | * The most likely reason we got here is that there was | ||
376 | * a timeout during CPU death, and the outgoing CPU never | ||
377 | * did complete its processing. This could happen on | ||
378 | * a virtualized system if the outgoing VCPU gets preempted | ||
379 | * for more than five seconds, and the user attempts to | ||
380 | * immediately online that same CPU. Trying again later | ||
381 | * might return -EBUSY above, hence -EAGAIN. | ||
382 | */ | ||
383 | return -EAGAIN; | ||
384 | |||
385 | default: | ||
386 | |||
387 | /* Should not happen. Famous last words. */ | ||
388 | return -EIO; | ||
389 | } | ||
390 | } | ||
391 | |||
392 | /* | ||
393 | * Mark the specified CPU online. | ||
394 | * | ||
395 | * Note that it is permissible to omit this call entirely, as is | ||
396 | * done in architectures that do no CPU-hotplug error checking. | ||
397 | */ | ||
398 | void cpu_set_state_online(int cpu) | ||
399 | { | ||
400 | (void)atomic_xchg(&per_cpu(cpu_hotplug_state, cpu), CPU_ONLINE); | ||
401 | } | ||
402 | |||
403 | #ifdef CONFIG_HOTPLUG_CPU | ||
404 | |||
405 | /* | ||
406 | * Wait for the specified CPU to exit the idle loop and die. | ||
407 | */ | ||
408 | bool cpu_wait_death(unsigned int cpu, int seconds) | ||
409 | { | ||
410 | int jf_left = seconds * HZ; | ||
411 | int oldstate; | ||
412 | bool ret = true; | ||
413 | int sleep_jf = 1; | ||
414 | |||
415 | might_sleep(); | ||
416 | |||
417 | /* The outgoing CPU will normally get done quite quickly. */ | ||
418 | if (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) == CPU_DEAD) | ||
419 | goto update_state; | ||
420 | udelay(5); | ||
421 | |||
422 | /* But if the outgoing CPU dawdles, wait increasingly long times. */ | ||
423 | while (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) != CPU_DEAD) { | ||
424 | schedule_timeout_uninterruptible(sleep_jf); | ||
425 | jf_left -= sleep_jf; | ||
426 | if (jf_left <= 0) | ||
427 | break; | ||
428 | sleep_jf = DIV_ROUND_UP(sleep_jf * 11, 10); | ||
429 | } | ||
430 | update_state: | ||
431 | oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu)); | ||
432 | if (oldstate == CPU_DEAD) { | ||
433 | /* Outgoing CPU died normally, update state. */ | ||
434 | smp_mb(); /* atomic_read() before update. */ | ||
435 | atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_POST_DEAD); | ||
436 | } else { | ||
437 | /* Outgoing CPU still hasn't died, set state accordingly. */ | ||
438 | if (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu), | ||
439 | oldstate, CPU_BROKEN) != oldstate) | ||
440 | goto update_state; | ||
441 | ret = false; | ||
442 | } | ||
443 | return ret; | ||
444 | } | ||
445 | |||
446 | /* | ||
447 | * Called by the outgoing CPU to report its successful death. Return | ||
448 | * false if this report follows the surviving CPU's timing out. | ||
449 | * | ||
450 | * A separate "CPU_DEAD_FROZEN" is used when the surviving CPU | ||
451 | * timed out. This approach allows architectures to omit calls to | ||
452 | * cpu_check_up_prepare() and cpu_set_state_online() without defeating | ||
453 | * the next cpu_wait_death()'s polling loop. | ||
454 | */ | ||
455 | bool cpu_report_death(void) | ||
456 | { | ||
457 | int oldstate; | ||
458 | int newstate; | ||
459 | int cpu = smp_processor_id(); | ||
460 | |||
461 | do { | ||
462 | oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu)); | ||
463 | if (oldstate != CPU_BROKEN) | ||
464 | newstate = CPU_DEAD; | ||
465 | else | ||
466 | newstate = CPU_DEAD_FROZEN; | ||
467 | } while (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu), | ||
468 | oldstate, newstate) != oldstate); | ||
469 | return newstate == CPU_DEAD; | ||
470 | } | ||
471 | |||
472 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c5cefb3c009c..1ad74c0df01f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -1180,16 +1180,7 @@ config DEBUG_CREDENTIALS | |||
1180 | menu "RCU Debugging" | 1180 | menu "RCU Debugging" |
1181 | 1181 | ||
1182 | config PROVE_RCU | 1182 | config PROVE_RCU |
1183 | bool "RCU debugging: prove RCU correctness" | 1183 | def_bool PROVE_LOCKING |
1184 | depends on PROVE_LOCKING | ||
1185 | default n | ||
1186 | help | ||
1187 | This feature enables lockdep extensions that check for correct | ||
1188 | use of RCU APIs. This is currently under development. Say Y | ||
1189 | if you want to debug RCU usage or help work on the PROVE_RCU | ||
1190 | feature. | ||
1191 | |||
1192 | Say N if you are unsure. | ||
1193 | 1184 | ||
1194 | config PROVE_RCU_REPEATEDLY | 1185 | config PROVE_RCU_REPEATEDLY |
1195 | bool "RCU debugging: don't disable PROVE_RCU on first splat" | 1186 | bool "RCU debugging: don't disable PROVE_RCU on first splat" |
@@ -1257,6 +1248,30 @@ config RCU_TORTURE_TEST_RUNNABLE | |||
1257 | Say N here if you want the RCU torture tests to start only | 1248 | Say N here if you want the RCU torture tests to start only |
1258 | after being manually enabled via /proc. | 1249 | after being manually enabled via /proc. |
1259 | 1250 | ||
1251 | config RCU_TORTURE_TEST_SLOW_INIT | ||
1252 | bool "Slow down RCU grace-period initialization to expose races" | ||
1253 | depends on RCU_TORTURE_TEST | ||
1254 | help | ||
1255 | This option makes grace-period initialization block for a | ||
1256 | few jiffies between initializing each pair of consecutive | ||
1257 | rcu_node structures. This helps to expose races involving | ||
1258 | grace-period initialization, in other words, it makes your | ||
1259 | kernel less stable. It can also greatly increase grace-period | ||
1260 | latency, especially on systems with large numbers of CPUs. | ||
1261 | This is useful when torture-testing RCU, but in almost no | ||
1262 | other circumstance. | ||
1263 | |||
1264 | Say Y here if you want your system to crash and hang more often. | ||
1265 | Say N if you want a sane system. | ||
1266 | |||
1267 | config RCU_TORTURE_TEST_SLOW_INIT_DELAY | ||
1268 | int "How much to slow down RCU grace-period initialization" | ||
1269 | range 0 5 | ||
1270 | default 3 | ||
1271 | help | ||
1272 | This option specifies the number of jiffies to wait between | ||
1273 | each rcu_node structure initialization. | ||
1274 | |||
1260 | config RCU_CPU_STALL_TIMEOUT | 1275 | config RCU_CPU_STALL_TIMEOUT |
1261 | int "RCU CPU stall timeout in seconds" | 1276 | int "RCU CPU stall timeout in seconds" |
1262 | depends on RCU_STALL_COMMON | 1277 | depends on RCU_STALL_COMMON |
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 368d64ac779e..dd2812ceb0ba 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh | |||
@@ -310,7 +310,7 @@ function dump(first, pastlast) | |||
310 | cfr[jn] = cf[j] "." cfrep[cf[j]]; | 310 | cfr[jn] = cf[j] "." cfrep[cf[j]]; |
311 | } | 311 | } |
312 | if (cpusr[jn] > ncpus && ncpus != 0) | 312 | if (cpusr[jn] > ncpus && ncpus != 0) |
313 | ovf = "(!)"; | 313 | ovf = "-ovf"; |
314 | else | 314 | else |
315 | ovf = ""; | 315 | ovf = ""; |
316 | print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date`"; | 316 | print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date`"; |
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon index d2d2a86139db..49701218dc62 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon | |||
@@ -1,2 +1,3 @@ | |||
1 | CONFIG_RCU_TORTURE_TEST=y | 1 | CONFIG_RCU_TORTURE_TEST=y |
2 | CONFIG_PRINTK_TIME=y | 2 | CONFIG_PRINTK_TIME=y |
3 | CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y | ||