diff options
author | Gautham R Shenoy <ego@in.ibm.com> | 2008-01-25 15:08:01 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-01-25 15:08:01 -0500 |
commit | d221938c049f4845da13c8593132595a6b9222a8 (patch) | |
tree | 8eae6c7095a3d7d31d7435befc30019540a4d13e | |
parent | 6b2d7700266b9402e12824e11e0099ae6a4a6a79 (diff) |
cpu-hotplug: refcount based cpu hotplug
This patch implements a Refcount + Waitqueue based model for
cpu-hotplug.
Now, a thread which wants to prevent cpu-hotplug, will bump up a global
refcount and the thread which wants to perform a cpu-hotplug operation
will block till the global refcount goes to zero.
The readers, if any, during an ongoing cpu-hotplug operation are blocked
until the cpu-hotplug operation is over.
Signed-off-by: Gautham R Shenoy <ego@in.ibm.com>
Signed-off-by: Paul Jackson <pj@sgi.com> [For !CONFIG_HOTPLUG_CPU ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/cpu.h | 3 | ||||
-rw-r--r-- | init/main.c | 1 | ||||
-rw-r--r-- | kernel/cpu.c | 152 |
3 files changed, 115 insertions, 41 deletions
diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 92f2029a34f3..a40247e4d462 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h | |||
@@ -83,6 +83,9 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb) | |||
83 | 83 | ||
84 | #endif /* CONFIG_SMP */ | 84 | #endif /* CONFIG_SMP */ |
85 | extern struct sysdev_class cpu_sysdev_class; | 85 | extern struct sysdev_class cpu_sysdev_class; |
86 | extern void cpu_hotplug_init(void); | ||
87 | extern void cpu_maps_update_begin(void); | ||
88 | extern void cpu_maps_update_done(void); | ||
86 | 89 | ||
87 | #ifdef CONFIG_HOTPLUG_CPU | 90 | #ifdef CONFIG_HOTPLUG_CPU |
88 | /* Stop CPUs going up and down. */ | 91 | /* Stop CPUs going up and down. */ |
diff --git a/init/main.c b/init/main.c index 80b04b6c5157..f287ca5862b9 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -607,6 +607,7 @@ asmlinkage void __init start_kernel(void) | |||
607 | vfs_caches_init_early(); | 607 | vfs_caches_init_early(); |
608 | cpuset_init_early(); | 608 | cpuset_init_early(); |
609 | mem_init(); | 609 | mem_init(); |
610 | cpu_hotplug_init(); | ||
610 | kmem_cache_init(); | 611 | kmem_cache_init(); |
611 | setup_per_cpu_pageset(); | 612 | setup_per_cpu_pageset(); |
612 | numa_policy_init(); | 613 | numa_policy_init(); |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 6b3a0c15144f..656dc3fcbbae 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -15,9 +15,8 @@ | |||
15 | #include <linux/stop_machine.h> | 15 | #include <linux/stop_machine.h> |
16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
17 | 17 | ||
18 | /* This protects CPUs going up and down... */ | 18 | /* Serializes the updates to cpu_online_map, cpu_present_map */ |
19 | static DEFINE_MUTEX(cpu_add_remove_lock); | 19 | static DEFINE_MUTEX(cpu_add_remove_lock); |
20 | static DEFINE_MUTEX(cpu_bitmask_lock); | ||
21 | 20 | ||
22 | static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); | 21 | static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); |
23 | 22 | ||
@@ -26,52 +25,123 @@ static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); | |||
26 | */ | 25 | */ |
27 | static int cpu_hotplug_disabled; | 26 | static int cpu_hotplug_disabled; |
28 | 27 | ||
29 | #ifdef CONFIG_HOTPLUG_CPU | 28 | static struct { |
29 | struct task_struct *active_writer; | ||
30 | struct mutex lock; /* Synchronizes accesses to refcount, */ | ||
31 | /* | ||
32 | * Also blocks the new readers during | ||
33 | * an ongoing cpu hotplug operation. | ||
34 | */ | ||
35 | int refcount; | ||
36 | wait_queue_head_t writer_queue; | ||
37 | } cpu_hotplug; | ||
30 | 38 | ||
31 | /* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */ | 39 | #define writer_exists() (cpu_hotplug.active_writer != NULL) |
32 | static struct task_struct *recursive; | 40 | |
33 | static int recursive_depth; | 41 | void __init cpu_hotplug_init(void) |
42 | { | ||
43 | cpu_hotplug.active_writer = NULL; | ||
44 | mutex_init(&cpu_hotplug.lock); | ||
45 | cpu_hotplug.refcount = 0; | ||
46 | init_waitqueue_head(&cpu_hotplug.writer_queue); | ||
47 | } | ||
48 | |||
49 | #ifdef CONFIG_HOTPLUG_CPU | ||
34 | 50 | ||
35 | void lock_cpu_hotplug(void) | 51 | void lock_cpu_hotplug(void) |
36 | { | 52 | { |
37 | struct task_struct *tsk = current; | 53 | might_sleep(); |
38 | 54 | if (cpu_hotplug.active_writer == current) | |
39 | if (tsk == recursive) { | ||
40 | static int warnings = 10; | ||
41 | if (warnings) { | ||
42 | printk(KERN_ERR "Lukewarm IQ detected in hotplug locking\n"); | ||
43 | WARN_ON(1); | ||
44 | warnings--; | ||
45 | } | ||
46 | recursive_depth++; | ||
47 | return; | 55 | return; |
48 | } | 56 | mutex_lock(&cpu_hotplug.lock); |
49 | mutex_lock(&cpu_bitmask_lock); | 57 | cpu_hotplug.refcount++; |
50 | recursive = tsk; | 58 | mutex_unlock(&cpu_hotplug.lock); |
59 | |||
51 | } | 60 | } |
52 | EXPORT_SYMBOL_GPL(lock_cpu_hotplug); | 61 | EXPORT_SYMBOL_GPL(lock_cpu_hotplug); |
53 | 62 | ||
54 | void unlock_cpu_hotplug(void) | 63 | void unlock_cpu_hotplug(void) |
55 | { | 64 | { |
56 | WARN_ON(recursive != current); | 65 | if (cpu_hotplug.active_writer == current) |
57 | if (recursive_depth) { | ||
58 | recursive_depth--; | ||
59 | return; | 66 | return; |
60 | } | 67 | mutex_lock(&cpu_hotplug.lock); |
61 | recursive = NULL; | 68 | cpu_hotplug.refcount--; |
62 | mutex_unlock(&cpu_bitmask_lock); | 69 | |
70 | if (unlikely(writer_exists()) && !cpu_hotplug.refcount) | ||
71 | wake_up(&cpu_hotplug.writer_queue); | ||
72 | |||
73 | mutex_unlock(&cpu_hotplug.lock); | ||
74 | |||
63 | } | 75 | } |
64 | EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); | 76 | EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); |
65 | 77 | ||
66 | #endif /* CONFIG_HOTPLUG_CPU */ | 78 | #endif /* CONFIG_HOTPLUG_CPU */ |
67 | 79 | ||
80 | /* | ||
81 | * The following two API's must be used when attempting | ||
82 | * to serialize the updates to cpu_online_map, cpu_present_map. | ||
83 | */ | ||
84 | void cpu_maps_update_begin(void) | ||
85 | { | ||
86 | mutex_lock(&cpu_add_remove_lock); | ||
87 | } | ||
88 | |||
89 | void cpu_maps_update_done(void) | ||
90 | { | ||
91 | mutex_unlock(&cpu_add_remove_lock); | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * This ensures that the hotplug operation can begin only when the | ||
96 | * refcount goes to zero. | ||
97 | * | ||
98 | * Note that during a cpu-hotplug operation, the new readers, if any, | ||
99 | * will be blocked by the cpu_hotplug.lock | ||
100 | * | ||
101 | * Since cpu_maps_update_begin is always called after invoking | ||
102 | * cpu_maps_update_begin, we can be sure that only one writer is active. | ||
103 | * | ||
104 | * Note that theoretically, there is a possibility of a livelock: | ||
105 | * - Refcount goes to zero, last reader wakes up the sleeping | ||
106 | * writer. | ||
107 | * - Last reader unlocks the cpu_hotplug.lock. | ||
108 | * - A new reader arrives at this moment, bumps up the refcount. | ||
109 | * - The writer acquires the cpu_hotplug.lock finds the refcount | ||
110 | * non zero and goes to sleep again. | ||
111 | * | ||
112 | * However, this is very difficult to achieve in practice since | ||
113 | * lock_cpu_hotplug() not an api which is called all that often. | ||
114 | * | ||
115 | */ | ||
116 | static void cpu_hotplug_begin(void) | ||
117 | { | ||
118 | DECLARE_WAITQUEUE(wait, current); | ||
119 | |||
120 | mutex_lock(&cpu_hotplug.lock); | ||
121 | |||
122 | cpu_hotplug.active_writer = current; | ||
123 | add_wait_queue_exclusive(&cpu_hotplug.writer_queue, &wait); | ||
124 | while (cpu_hotplug.refcount) { | ||
125 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
126 | mutex_unlock(&cpu_hotplug.lock); | ||
127 | schedule(); | ||
128 | mutex_lock(&cpu_hotplug.lock); | ||
129 | } | ||
130 | remove_wait_queue_locked(&cpu_hotplug.writer_queue, &wait); | ||
131 | } | ||
132 | |||
133 | static void cpu_hotplug_done(void) | ||
134 | { | ||
135 | cpu_hotplug.active_writer = NULL; | ||
136 | mutex_unlock(&cpu_hotplug.lock); | ||
137 | } | ||
68 | /* Need to know about CPUs going up/down? */ | 138 | /* Need to know about CPUs going up/down? */ |
69 | int __cpuinit register_cpu_notifier(struct notifier_block *nb) | 139 | int __cpuinit register_cpu_notifier(struct notifier_block *nb) |
70 | { | 140 | { |
71 | int ret; | 141 | int ret; |
72 | mutex_lock(&cpu_add_remove_lock); | 142 | cpu_maps_update_begin(); |
73 | ret = raw_notifier_chain_register(&cpu_chain, nb); | 143 | ret = raw_notifier_chain_register(&cpu_chain, nb); |
74 | mutex_unlock(&cpu_add_remove_lock); | 144 | cpu_maps_update_done(); |
75 | return ret; | 145 | return ret; |
76 | } | 146 | } |
77 | 147 | ||
@@ -81,9 +151,9 @@ EXPORT_SYMBOL(register_cpu_notifier); | |||
81 | 151 | ||
82 | void unregister_cpu_notifier(struct notifier_block *nb) | 152 | void unregister_cpu_notifier(struct notifier_block *nb) |
83 | { | 153 | { |
84 | mutex_lock(&cpu_add_remove_lock); | 154 | cpu_maps_update_begin(); |
85 | raw_notifier_chain_unregister(&cpu_chain, nb); | 155 | raw_notifier_chain_unregister(&cpu_chain, nb); |
86 | mutex_unlock(&cpu_add_remove_lock); | 156 | cpu_maps_update_done(); |
87 | } | 157 | } |
88 | EXPORT_SYMBOL(unregister_cpu_notifier); | 158 | EXPORT_SYMBOL(unregister_cpu_notifier); |
89 | 159 | ||
@@ -147,6 +217,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) | |||
147 | if (!cpu_online(cpu)) | 217 | if (!cpu_online(cpu)) |
148 | return -EINVAL; | 218 | return -EINVAL; |
149 | 219 | ||
220 | cpu_hotplug_begin(); | ||
150 | raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu); | 221 | raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu); |
151 | err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, | 222 | err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, |
152 | hcpu, -1, &nr_calls); | 223 | hcpu, -1, &nr_calls); |
@@ -166,9 +237,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) | |||
166 | cpu_clear(cpu, tmp); | 237 | cpu_clear(cpu, tmp); |
167 | set_cpus_allowed(current, tmp); | 238 | set_cpus_allowed(current, tmp); |
168 | 239 | ||
169 | mutex_lock(&cpu_bitmask_lock); | ||
170 | p = __stop_machine_run(take_cpu_down, &tcd_param, cpu); | 240 | p = __stop_machine_run(take_cpu_down, &tcd_param, cpu); |
171 | mutex_unlock(&cpu_bitmask_lock); | ||
172 | 241 | ||
173 | if (IS_ERR(p) || cpu_online(cpu)) { | 242 | if (IS_ERR(p) || cpu_online(cpu)) { |
174 | /* CPU didn't die: tell everyone. Can't complain. */ | 243 | /* CPU didn't die: tell everyone. Can't complain. */ |
@@ -203,6 +272,7 @@ out_allowed: | |||
203 | set_cpus_allowed(current, old_allowed); | 272 | set_cpus_allowed(current, old_allowed); |
204 | out_release: | 273 | out_release: |
205 | raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu); | 274 | raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu); |
275 | cpu_hotplug_done(); | ||
206 | return err; | 276 | return err; |
207 | } | 277 | } |
208 | 278 | ||
@@ -210,13 +280,13 @@ int cpu_down(unsigned int cpu) | |||
210 | { | 280 | { |
211 | int err = 0; | 281 | int err = 0; |
212 | 282 | ||
213 | mutex_lock(&cpu_add_remove_lock); | 283 | cpu_maps_update_begin(); |
214 | if (cpu_hotplug_disabled) | 284 | if (cpu_hotplug_disabled) |
215 | err = -EBUSY; | 285 | err = -EBUSY; |
216 | else | 286 | else |
217 | err = _cpu_down(cpu, 0); | 287 | err = _cpu_down(cpu, 0); |
218 | 288 | ||
219 | mutex_unlock(&cpu_add_remove_lock); | 289 | cpu_maps_update_done(); |
220 | return err; | 290 | return err; |
221 | } | 291 | } |
222 | #endif /*CONFIG_HOTPLUG_CPU*/ | 292 | #endif /*CONFIG_HOTPLUG_CPU*/ |
@@ -231,6 +301,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) | |||
231 | if (cpu_online(cpu) || !cpu_present(cpu)) | 301 | if (cpu_online(cpu) || !cpu_present(cpu)) |
232 | return -EINVAL; | 302 | return -EINVAL; |
233 | 303 | ||
304 | cpu_hotplug_begin(); | ||
234 | raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu); | 305 | raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu); |
235 | ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu, | 306 | ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu, |
236 | -1, &nr_calls); | 307 | -1, &nr_calls); |
@@ -243,9 +314,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) | |||
243 | } | 314 | } |
244 | 315 | ||
245 | /* Arch-specific enabling code. */ | 316 | /* Arch-specific enabling code. */ |
246 | mutex_lock(&cpu_bitmask_lock); | ||
247 | ret = __cpu_up(cpu); | 317 | ret = __cpu_up(cpu); |
248 | mutex_unlock(&cpu_bitmask_lock); | ||
249 | if (ret != 0) | 318 | if (ret != 0) |
250 | goto out_notify; | 319 | goto out_notify; |
251 | BUG_ON(!cpu_online(cpu)); | 320 | BUG_ON(!cpu_online(cpu)); |
@@ -258,6 +327,7 @@ out_notify: | |||
258 | __raw_notifier_call_chain(&cpu_chain, | 327 | __raw_notifier_call_chain(&cpu_chain, |
259 | CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL); | 328 | CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL); |
260 | raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu); | 329 | raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu); |
330 | cpu_hotplug_done(); | ||
261 | 331 | ||
262 | return ret; | 332 | return ret; |
263 | } | 333 | } |
@@ -275,13 +345,13 @@ int __cpuinit cpu_up(unsigned int cpu) | |||
275 | return -EINVAL; | 345 | return -EINVAL; |
276 | } | 346 | } |
277 | 347 | ||
278 | mutex_lock(&cpu_add_remove_lock); | 348 | cpu_maps_update_begin(); |
279 | if (cpu_hotplug_disabled) | 349 | if (cpu_hotplug_disabled) |
280 | err = -EBUSY; | 350 | err = -EBUSY; |
281 | else | 351 | else |
282 | err = _cpu_up(cpu, 0); | 352 | err = _cpu_up(cpu, 0); |
283 | 353 | ||
284 | mutex_unlock(&cpu_add_remove_lock); | 354 | cpu_maps_update_done(); |
285 | return err; | 355 | return err; |
286 | } | 356 | } |
287 | 357 | ||
@@ -292,7 +362,7 @@ int disable_nonboot_cpus(void) | |||
292 | { | 362 | { |
293 | int cpu, first_cpu, error = 0; | 363 | int cpu, first_cpu, error = 0; |
294 | 364 | ||
295 | mutex_lock(&cpu_add_remove_lock); | 365 | cpu_maps_update_begin(); |
296 | first_cpu = first_cpu(cpu_online_map); | 366 | first_cpu = first_cpu(cpu_online_map); |
297 | /* We take down all of the non-boot CPUs in one shot to avoid races | 367 | /* We take down all of the non-boot CPUs in one shot to avoid races |
298 | * with the userspace trying to use the CPU hotplug at the same time | 368 | * with the userspace trying to use the CPU hotplug at the same time |
@@ -319,7 +389,7 @@ int disable_nonboot_cpus(void) | |||
319 | } else { | 389 | } else { |
320 | printk(KERN_ERR "Non-boot CPUs are not disabled\n"); | 390 | printk(KERN_ERR "Non-boot CPUs are not disabled\n"); |
321 | } | 391 | } |
322 | mutex_unlock(&cpu_add_remove_lock); | 392 | cpu_maps_update_done(); |
323 | return error; | 393 | return error; |
324 | } | 394 | } |
325 | 395 | ||
@@ -328,7 +398,7 @@ void enable_nonboot_cpus(void) | |||
328 | int cpu, error; | 398 | int cpu, error; |
329 | 399 | ||
330 | /* Allow everyone to use the CPU hotplug again */ | 400 | /* Allow everyone to use the CPU hotplug again */ |
331 | mutex_lock(&cpu_add_remove_lock); | 401 | cpu_maps_update_begin(); |
332 | cpu_hotplug_disabled = 0; | 402 | cpu_hotplug_disabled = 0; |
333 | if (cpus_empty(frozen_cpus)) | 403 | if (cpus_empty(frozen_cpus)) |
334 | goto out; | 404 | goto out; |
@@ -344,6 +414,6 @@ void enable_nonboot_cpus(void) | |||
344 | } | 414 | } |
345 | cpus_clear(frozen_cpus); | 415 | cpus_clear(frozen_cpus); |
346 | out: | 416 | out: |
347 | mutex_unlock(&cpu_add_remove_lock); | 417 | cpu_maps_update_done(); |
348 | } | 418 | } |
349 | #endif /* CONFIG_PM_SLEEP_SMP */ | 419 | #endif /* CONFIG_PM_SLEEP_SMP */ |