aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSrivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>2011-12-21 16:15:29 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2011-12-22 02:02:20 -0500
commite30e2fdfe56288576ee9e04dbb06b4bd5f282203 (patch)
tree15c207b3c124d1fa5ef2ee400b3f27ca233d47a5
parentecefc36b41ac0fe92d76273a23faf27b2da13411 (diff)
VFS: Fix race between CPU hotplug and lglocks
Currently, the *_global_[un]lock_online() routines are not at all synchronized with CPU hotplug. Soft-lockups detected as a consequence of this race was reported earlier at https://lkml.org/lkml/2011/8/24/185. (Thanks to Cong Meng for finding out that the root-cause of this issue is the race condition between br_write_[un]lock() and CPU hotplug, which results in the lock states getting messed up). Fixing this race by just adding {get,put}_online_cpus() at appropriate places in *_global_[un]lock_online() is not a good option, because, then suddenly br_write_[un]lock() would become blocking, whereas they have been kept as non-blocking all this time, and we would want to keep them that way. So, overall, we want to ensure 3 things: 1. br_write_lock() and br_write_unlock() must remain as non-blocking. 2. The corresponding lock and unlock of the per-cpu spinlocks must not happen for different sets of CPUs. 3. Either prevent any new CPU online operation in between this lock-unlock, or ensure that the newly onlined CPU does not proceed with its corresponding per-cpu spinlock unlocked. To achieve all this: (a) We introduce a new spinlock that is taken by the *_global_lock_online() routine and released by the *_global_unlock_online() routine. (b) We register a callback for CPU hotplug notifications, and this callback takes the same spinlock as above. (c) We maintain a bitmap which is close to the cpu_online_mask, and once it is initialized in the lock_init() code, all future updates to it are done in the callback, under the above spinlock. (d) The above bitmap is used (instead of cpu_online_mask) while locking and unlocking the per-cpu locks. The callback takes the spinlock upon the CPU_UP_PREPARE event. So, if the br_write_lock-unlock sequence is in progress, the callback keeps spinning, thus preventing the CPU online operation till the lock-unlock sequence is complete. This takes care of requirement (3). The bitmap that we maintain remains unmodified throughout the lock-unlock sequence, since all updates to it are managed by the callback, which takes the same spinlock as the one taken by the lock code and released only by the unlock routine. Combining this with (d) above, satisfies requirement (2). Overall, since we use a spinlock (mentioned in (a)) to prevent CPU hotplug operations from racing with br_write_lock-unlock, requirement (1) is also taken care of. By the way, it is to be noted that a CPU offline operation can actually run in parallel with our lock-unlock sequence, because our callback doesn't react to notifications earlier than CPU_DEAD (in order to maintain our bitmap properly). And this means, since we use our own bitmap (which is stale, on purpose) during the lock-unlock sequence, we could end up unlocking the per-cpu lock of an offline CPU (because we had locked it earlier, when the CPU was online), in order to satisfy requirement (2). But this is harmless, though it looks a bit awkward. Debugged-by: Cong Meng <mc@linux.vnet.ibm.com> Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Cc: stable@vger.kernel.org
-rw-r--r--include/linux/lglock.h36
1 files changed, 32 insertions, 4 deletions
diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index f549056fb20..87f402ccec5 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -22,6 +22,7 @@
22#include <linux/spinlock.h> 22#include <linux/spinlock.h>
23#include <linux/lockdep.h> 23#include <linux/lockdep.h>
24#include <linux/percpu.h> 24#include <linux/percpu.h>
25#include <linux/cpu.h>
25 26
26/* can make br locks by using local lock for read side, global lock for write */ 27/* can make br locks by using local lock for read side, global lock for write */
27#define br_lock_init(name) name##_lock_init() 28#define br_lock_init(name) name##_lock_init()
@@ -72,9 +73,31 @@
72 73
73#define DEFINE_LGLOCK(name) \ 74#define DEFINE_LGLOCK(name) \
74 \ 75 \
76 DEFINE_SPINLOCK(name##_cpu_lock); \
77 cpumask_t name##_cpus __read_mostly; \
75 DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ 78 DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \
76 DEFINE_LGLOCK_LOCKDEP(name); \ 79 DEFINE_LGLOCK_LOCKDEP(name); \
77 \ 80 \
81 static int \
82 name##_lg_cpu_callback(struct notifier_block *nb, \
83 unsigned long action, void *hcpu) \
84 { \
85 switch (action & ~CPU_TASKS_FROZEN) { \
86 case CPU_UP_PREPARE: \
87 spin_lock(&name##_cpu_lock); \
88 cpu_set((unsigned long)hcpu, name##_cpus); \
89 spin_unlock(&name##_cpu_lock); \
90 break; \
91 case CPU_UP_CANCELED: case CPU_DEAD: \
92 spin_lock(&name##_cpu_lock); \
93 cpu_clear((unsigned long)hcpu, name##_cpus); \
94 spin_unlock(&name##_cpu_lock); \
95 } \
96 return NOTIFY_OK; \
97 } \
98 static struct notifier_block name##_lg_cpu_notifier = { \
99 .notifier_call = name##_lg_cpu_callback, \
100 }; \
78 void name##_lock_init(void) { \ 101 void name##_lock_init(void) { \
79 int i; \ 102 int i; \
80 LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ 103 LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \
@@ -83,6 +106,11 @@
83 lock = &per_cpu(name##_lock, i); \ 106 lock = &per_cpu(name##_lock, i); \
84 *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ 107 *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \
85 } \ 108 } \
109 register_hotcpu_notifier(&name##_lg_cpu_notifier); \
110 get_online_cpus(); \
111 for_each_online_cpu(i) \
112 cpu_set(i, name##_cpus); \
113 put_online_cpus(); \
86 } \ 114 } \
87 EXPORT_SYMBOL(name##_lock_init); \ 115 EXPORT_SYMBOL(name##_lock_init); \
88 \ 116 \
@@ -124,9 +152,9 @@
124 \ 152 \
125 void name##_global_lock_online(void) { \ 153 void name##_global_lock_online(void) { \
126 int i; \ 154 int i; \
127 preempt_disable(); \ 155 spin_lock(&name##_cpu_lock); \
128 rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ 156 rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \
129 for_each_online_cpu(i) { \ 157 for_each_cpu(i, &name##_cpus) { \
130 arch_spinlock_t *lock; \ 158 arch_spinlock_t *lock; \
131 lock = &per_cpu(name##_lock, i); \ 159 lock = &per_cpu(name##_lock, i); \
132 arch_spin_lock(lock); \ 160 arch_spin_lock(lock); \
@@ -137,12 +165,12 @@
137 void name##_global_unlock_online(void) { \ 165 void name##_global_unlock_online(void) { \
138 int i; \ 166 int i; \
139 rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ 167 rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \
140 for_each_online_cpu(i) { \ 168 for_each_cpu(i, &name##_cpus) { \
141 arch_spinlock_t *lock; \ 169 arch_spinlock_t *lock; \
142 lock = &per_cpu(name##_lock, i); \ 170 lock = &per_cpu(name##_lock, i); \
143 arch_spin_unlock(lock); \ 171 arch_spin_unlock(lock); \
144 } \ 172 } \
145 preempt_enable(); \ 173 spin_unlock(&name##_cpu_lock); \
146 } \ 174 } \
147 EXPORT_SYMBOL(name##_global_unlock_online); \ 175 EXPORT_SYMBOL(name##_global_unlock_online); \
148 \ 176 \