diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2012-10-07 11:36:12 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2012-10-08 12:06:38 -0400 |
commit | a4fbe35a124526e6759be07bd9c7ea796ba1e00d (patch) | |
tree | cb5c5a1608fcff588ed9a204ea67d5891adb18fb /kernel/rcutree.h | |
parent | cb349ca95407cbc11424d5e9fc7c8e700709041b (diff) |
rcu: Grace-period initialization excludes only RCU notifier
Kirill noted the following deadlock cycle on shutdown involving padata:
> With commit 755609a9087fa983f567dc5452b2fa7b089b591f I've got deadlock on
> poweroff.
>
> It guess it happens because of race for cpu_hotplug.lock:
>
> CPU A CPU B
> disable_nonboot_cpus()
> _cpu_down()
> cpu_hotplug_begin()
> mutex_lock(&cpu_hotplug.lock);
> __cpu_notify()
> padata_cpu_callback()
> __padata_remove_cpu()
> padata_replace()
> synchronize_rcu()
> rcu_gp_kthread()
> get_online_cpus();
> mutex_lock(&cpu_hotplug.lock);
It would of course be good to eliminate grace-period delays from
CPU-hotplug notifiers, but that is a separate issue. Deadlock is
not an appropriate diagnostic for excessive CPU-hotplug latency.
Fortunately, grace-period initialization does not actually need to
exclude all of the CPU-hotplug operation, but rather only RCU's own
CPU_UP_PREPARE and CPU_DEAD CPU-hotplug notifiers. This commit therefore
introduces a new per-rcu_state onoff_mutex that provides the required
concurrency control in place of the get_online_cpus() that was previously
in rcu_gp_init().
Reported-by: "Kirill A. Shutemov" <kirill@shutemov.name>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Kirill A. Shutemov <kirill@shutemov.name>
Diffstat (limited to 'kernel/rcutree.h')
-rw-r--r-- | kernel/rcutree.h | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 5faf05d68326..a240f032848e 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -394,11 +394,17 @@ struct rcu_state { | |||
394 | struct rcu_head **orphan_donetail; /* Tail of above. */ | 394 | struct rcu_head **orphan_donetail; /* Tail of above. */ |
395 | long qlen_lazy; /* Number of lazy callbacks. */ | 395 | long qlen_lazy; /* Number of lazy callbacks. */ |
396 | long qlen; /* Total number of callbacks. */ | 396 | long qlen; /* Total number of callbacks. */ |
397 | /* End of fields guarded by onofflock. */ | ||
398 | |||
399 | struct mutex onoff_mutex; /* Coordinate hotplug & GPs. */ | ||
400 | |||
397 | struct mutex barrier_mutex; /* Guards barrier fields. */ | 401 | struct mutex barrier_mutex; /* Guards barrier fields. */ |
398 | atomic_t barrier_cpu_count; /* # CPUs waiting on. */ | 402 | atomic_t barrier_cpu_count; /* # CPUs waiting on. */ |
399 | struct completion barrier_completion; /* Wake at barrier end. */ | 403 | struct completion barrier_completion; /* Wake at barrier end. */ |
400 | unsigned long n_barrier_done; /* ++ at start and end of */ | 404 | unsigned long n_barrier_done; /* ++ at start and end of */ |
401 | /* _rcu_barrier(). */ | 405 | /* _rcu_barrier(). */ |
406 | /* End of fields guarded by barrier_mutex. */ | ||
407 | |||
402 | unsigned long jiffies_force_qs; /* Time at which to invoke */ | 408 | unsigned long jiffies_force_qs; /* Time at which to invoke */ |
403 | /* force_quiescent_state(). */ | 409 | /* force_quiescent_state(). */ |
404 | unsigned long n_force_qs; /* Number of calls to */ | 410 | unsigned long n_force_qs; /* Number of calls to */ |