aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGautham R Shenoy <ego@in.ibm.com>2008-01-25 15:08:01 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-25 15:08:01 -0500
commitd221938c049f4845da13c8593132595a6b9222a8 (patch)
tree8eae6c7095a3d7d31d7435befc30019540a4d13e
parent6b2d7700266b9402e12824e11e0099ae6a4a6a79 (diff)
cpu-hotplug: refcount based cpu hotplug
This patch implements a Refcount + Waitqueue based model for cpu-hotplug. Now, a thread which wants to prevent cpu-hotplug, will bump up a global refcount and the thread which wants to perform a cpu-hotplug operation will block till the global refcount goes to zero. The readers, if any, during an ongoing cpu-hotplug operation are blocked until the cpu-hotplug operation is over. Signed-off-by: Gautham R Shenoy <ego@in.ibm.com> Signed-off-by: Paul Jackson <pj@sgi.com> [For !CONFIG_HOTPLUG_CPU ] Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/cpu.h3
-rw-r--r--init/main.c1
-rw-r--r--kernel/cpu.c152
3 files changed, 115 insertions, 41 deletions
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 92f2029a34f3..a40247e4d462 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -83,6 +83,9 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb)
83 83
84#endif /* CONFIG_SMP */ 84#endif /* CONFIG_SMP */
85extern struct sysdev_class cpu_sysdev_class; 85extern struct sysdev_class cpu_sysdev_class;
86extern void cpu_hotplug_init(void);
87extern void cpu_maps_update_begin(void);
88extern void cpu_maps_update_done(void);
86 89
87#ifdef CONFIG_HOTPLUG_CPU 90#ifdef CONFIG_HOTPLUG_CPU
88/* Stop CPUs going up and down. */ 91/* Stop CPUs going up and down. */
diff --git a/init/main.c b/init/main.c
index 80b04b6c5157..f287ca5862b9 100644
--- a/init/main.c
+++ b/init/main.c
@@ -607,6 +607,7 @@ asmlinkage void __init start_kernel(void)
607 vfs_caches_init_early(); 607 vfs_caches_init_early();
608 cpuset_init_early(); 608 cpuset_init_early();
609 mem_init(); 609 mem_init();
610 cpu_hotplug_init();
610 kmem_cache_init(); 611 kmem_cache_init();
611 setup_per_cpu_pageset(); 612 setup_per_cpu_pageset();
612 numa_policy_init(); 613 numa_policy_init();
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6b3a0c15144f..656dc3fcbbae 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -15,9 +15,8 @@
15#include <linux/stop_machine.h> 15#include <linux/stop_machine.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17 17
18/* This protects CPUs going up and down... */ 18/* Serializes the updates to cpu_online_map, cpu_present_map */
19static DEFINE_MUTEX(cpu_add_remove_lock); 19static DEFINE_MUTEX(cpu_add_remove_lock);
20static DEFINE_MUTEX(cpu_bitmask_lock);
21 20
22static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); 21static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
23 22
@@ -26,52 +25,123 @@ static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
26 */ 25 */
27static int cpu_hotplug_disabled; 26static int cpu_hotplug_disabled;
28 27
29#ifdef CONFIG_HOTPLUG_CPU 28static struct {
29 struct task_struct *active_writer;
30 struct mutex lock; /* Synchronizes accesses to refcount, */
31 /*
32 * Also blocks the new readers during
33 * an ongoing cpu hotplug operation.
34 */
35 int refcount;
36 wait_queue_head_t writer_queue;
37} cpu_hotplug;
30 38
31/* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */ 39#define writer_exists() (cpu_hotplug.active_writer != NULL)
32static struct task_struct *recursive; 40
33static int recursive_depth; 41void __init cpu_hotplug_init(void)
42{
43 cpu_hotplug.active_writer = NULL;
44 mutex_init(&cpu_hotplug.lock);
45 cpu_hotplug.refcount = 0;
46 init_waitqueue_head(&cpu_hotplug.writer_queue);
47}
48
49#ifdef CONFIG_HOTPLUG_CPU
34 50
35void lock_cpu_hotplug(void) 51void lock_cpu_hotplug(void)
36{ 52{
37 struct task_struct *tsk = current; 53 might_sleep();
38 54 if (cpu_hotplug.active_writer == current)
39 if (tsk == recursive) {
40 static int warnings = 10;
41 if (warnings) {
42 printk(KERN_ERR "Lukewarm IQ detected in hotplug locking\n");
43 WARN_ON(1);
44 warnings--;
45 }
46 recursive_depth++;
47 return; 55 return;
48 } 56 mutex_lock(&cpu_hotplug.lock);
49 mutex_lock(&cpu_bitmask_lock); 57 cpu_hotplug.refcount++;
50 recursive = tsk; 58 mutex_unlock(&cpu_hotplug.lock);
59
51} 60}
52EXPORT_SYMBOL_GPL(lock_cpu_hotplug); 61EXPORT_SYMBOL_GPL(lock_cpu_hotplug);
53 62
54void unlock_cpu_hotplug(void) 63void unlock_cpu_hotplug(void)
55{ 64{
56 WARN_ON(recursive != current); 65 if (cpu_hotplug.active_writer == current)
57 if (recursive_depth) {
58 recursive_depth--;
59 return; 66 return;
60 } 67 mutex_lock(&cpu_hotplug.lock);
61 recursive = NULL; 68 cpu_hotplug.refcount--;
62 mutex_unlock(&cpu_bitmask_lock); 69
70 if (unlikely(writer_exists()) && !cpu_hotplug.refcount)
71 wake_up(&cpu_hotplug.writer_queue);
72
73 mutex_unlock(&cpu_hotplug.lock);
74
63} 75}
64EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); 76EXPORT_SYMBOL_GPL(unlock_cpu_hotplug);
65 77
66#endif /* CONFIG_HOTPLUG_CPU */ 78#endif /* CONFIG_HOTPLUG_CPU */
67 79
80/*
81 * The following two API's must be used when attempting
82 * to serialize the updates to cpu_online_map, cpu_present_map.
83 */
84void cpu_maps_update_begin(void)
85{
86 mutex_lock(&cpu_add_remove_lock);
87}
88
89void cpu_maps_update_done(void)
90{
91 mutex_unlock(&cpu_add_remove_lock);
92}
93
94/*
95 * This ensures that the hotplug operation can begin only when the
96 * refcount goes to zero.
97 *
98 * Note that during a cpu-hotplug operation, the new readers, if any,
99 * will be blocked by the cpu_hotplug.lock
100 *
101 * Since cpu_maps_update_begin is always called after invoking
102 * cpu_maps_update_begin, we can be sure that only one writer is active.
103 *
104 * Note that theoretically, there is a possibility of a livelock:
105 * - Refcount goes to zero, last reader wakes up the sleeping
106 * writer.
107 * - Last reader unlocks the cpu_hotplug.lock.
108 * - A new reader arrives at this moment, bumps up the refcount.
109 * - The writer acquires the cpu_hotplug.lock finds the refcount
110 * non zero and goes to sleep again.
111 *
112 * However, this is very difficult to achieve in practice since
113 * lock_cpu_hotplug() not an api which is called all that often.
114 *
115 */
116static void cpu_hotplug_begin(void)
117{
118 DECLARE_WAITQUEUE(wait, current);
119
120 mutex_lock(&cpu_hotplug.lock);
121
122 cpu_hotplug.active_writer = current;
123 add_wait_queue_exclusive(&cpu_hotplug.writer_queue, &wait);
124 while (cpu_hotplug.refcount) {
125 set_current_state(TASK_UNINTERRUPTIBLE);
126 mutex_unlock(&cpu_hotplug.lock);
127 schedule();
128 mutex_lock(&cpu_hotplug.lock);
129 }
130 remove_wait_queue_locked(&cpu_hotplug.writer_queue, &wait);
131}
132
133static void cpu_hotplug_done(void)
134{
135 cpu_hotplug.active_writer = NULL;
136 mutex_unlock(&cpu_hotplug.lock);
137}
68/* Need to know about CPUs going up/down? */ 138/* Need to know about CPUs going up/down? */
69int __cpuinit register_cpu_notifier(struct notifier_block *nb) 139int __cpuinit register_cpu_notifier(struct notifier_block *nb)
70{ 140{
71 int ret; 141 int ret;
72 mutex_lock(&cpu_add_remove_lock); 142 cpu_maps_update_begin();
73 ret = raw_notifier_chain_register(&cpu_chain, nb); 143 ret = raw_notifier_chain_register(&cpu_chain, nb);
74 mutex_unlock(&cpu_add_remove_lock); 144 cpu_maps_update_done();
75 return ret; 145 return ret;
76} 146}
77 147
@@ -81,9 +151,9 @@ EXPORT_SYMBOL(register_cpu_notifier);
81 151
82void unregister_cpu_notifier(struct notifier_block *nb) 152void unregister_cpu_notifier(struct notifier_block *nb)
83{ 153{
84 mutex_lock(&cpu_add_remove_lock); 154 cpu_maps_update_begin();
85 raw_notifier_chain_unregister(&cpu_chain, nb); 155 raw_notifier_chain_unregister(&cpu_chain, nb);
86 mutex_unlock(&cpu_add_remove_lock); 156 cpu_maps_update_done();
87} 157}
88EXPORT_SYMBOL(unregister_cpu_notifier); 158EXPORT_SYMBOL(unregister_cpu_notifier);
89 159
@@ -147,6 +217,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
147 if (!cpu_online(cpu)) 217 if (!cpu_online(cpu))
148 return -EINVAL; 218 return -EINVAL;
149 219
220 cpu_hotplug_begin();
150 raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu); 221 raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu);
151 err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, 222 err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
152 hcpu, -1, &nr_calls); 223 hcpu, -1, &nr_calls);
@@ -166,9 +237,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
166 cpu_clear(cpu, tmp); 237 cpu_clear(cpu, tmp);
167 set_cpus_allowed(current, tmp); 238 set_cpus_allowed(current, tmp);
168 239
169 mutex_lock(&cpu_bitmask_lock);
170 p = __stop_machine_run(take_cpu_down, &tcd_param, cpu); 240 p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
171 mutex_unlock(&cpu_bitmask_lock);
172 241
173 if (IS_ERR(p) || cpu_online(cpu)) { 242 if (IS_ERR(p) || cpu_online(cpu)) {
174 /* CPU didn't die: tell everyone. Can't complain. */ 243 /* CPU didn't die: tell everyone. Can't complain. */
@@ -203,6 +272,7 @@ out_allowed:
203 set_cpus_allowed(current, old_allowed); 272 set_cpus_allowed(current, old_allowed);
204out_release: 273out_release:
205 raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu); 274 raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu);
275 cpu_hotplug_done();
206 return err; 276 return err;
207} 277}
208 278
@@ -210,13 +280,13 @@ int cpu_down(unsigned int cpu)
210{ 280{
211 int err = 0; 281 int err = 0;
212 282
213 mutex_lock(&cpu_add_remove_lock); 283 cpu_maps_update_begin();
214 if (cpu_hotplug_disabled) 284 if (cpu_hotplug_disabled)
215 err = -EBUSY; 285 err = -EBUSY;
216 else 286 else
217 err = _cpu_down(cpu, 0); 287 err = _cpu_down(cpu, 0);
218 288
219 mutex_unlock(&cpu_add_remove_lock); 289 cpu_maps_update_done();
220 return err; 290 return err;
221} 291}
222#endif /*CONFIG_HOTPLUG_CPU*/ 292#endif /*CONFIG_HOTPLUG_CPU*/
@@ -231,6 +301,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
231 if (cpu_online(cpu) || !cpu_present(cpu)) 301 if (cpu_online(cpu) || !cpu_present(cpu))
232 return -EINVAL; 302 return -EINVAL;
233 303
304 cpu_hotplug_begin();
234 raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu); 305 raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu);
235 ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu, 306 ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
236 -1, &nr_calls); 307 -1, &nr_calls);
@@ -243,9 +314,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
243 } 314 }
244 315
245 /* Arch-specific enabling code. */ 316 /* Arch-specific enabling code. */
246 mutex_lock(&cpu_bitmask_lock);
247 ret = __cpu_up(cpu); 317 ret = __cpu_up(cpu);
248 mutex_unlock(&cpu_bitmask_lock);
249 if (ret != 0) 318 if (ret != 0)
250 goto out_notify; 319 goto out_notify;
251 BUG_ON(!cpu_online(cpu)); 320 BUG_ON(!cpu_online(cpu));
@@ -258,6 +327,7 @@ out_notify:
258 __raw_notifier_call_chain(&cpu_chain, 327 __raw_notifier_call_chain(&cpu_chain,
259 CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL); 328 CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
260 raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu); 329 raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu);
330 cpu_hotplug_done();
261 331
262 return ret; 332 return ret;
263} 333}
@@ -275,13 +345,13 @@ int __cpuinit cpu_up(unsigned int cpu)
275 return -EINVAL; 345 return -EINVAL;
276 } 346 }
277 347
278 mutex_lock(&cpu_add_remove_lock); 348 cpu_maps_update_begin();
279 if (cpu_hotplug_disabled) 349 if (cpu_hotplug_disabled)
280 err = -EBUSY; 350 err = -EBUSY;
281 else 351 else
282 err = _cpu_up(cpu, 0); 352 err = _cpu_up(cpu, 0);
283 353
284 mutex_unlock(&cpu_add_remove_lock); 354 cpu_maps_update_done();
285 return err; 355 return err;
286} 356}
287 357
@@ -292,7 +362,7 @@ int disable_nonboot_cpus(void)
292{ 362{
293 int cpu, first_cpu, error = 0; 363 int cpu, first_cpu, error = 0;
294 364
295 mutex_lock(&cpu_add_remove_lock); 365 cpu_maps_update_begin();
296 first_cpu = first_cpu(cpu_online_map); 366 first_cpu = first_cpu(cpu_online_map);
297 /* We take down all of the non-boot CPUs in one shot to avoid races 367 /* We take down all of the non-boot CPUs in one shot to avoid races
298 * with the userspace trying to use the CPU hotplug at the same time 368 * with the userspace trying to use the CPU hotplug at the same time
@@ -319,7 +389,7 @@ int disable_nonboot_cpus(void)
319 } else { 389 } else {
320 printk(KERN_ERR "Non-boot CPUs are not disabled\n"); 390 printk(KERN_ERR "Non-boot CPUs are not disabled\n");
321 } 391 }
322 mutex_unlock(&cpu_add_remove_lock); 392 cpu_maps_update_done();
323 return error; 393 return error;
324} 394}
325 395
@@ -328,7 +398,7 @@ void enable_nonboot_cpus(void)
328 int cpu, error; 398 int cpu, error;
329 399
330 /* Allow everyone to use the CPU hotplug again */ 400 /* Allow everyone to use the CPU hotplug again */
331 mutex_lock(&cpu_add_remove_lock); 401 cpu_maps_update_begin();
332 cpu_hotplug_disabled = 0; 402 cpu_hotplug_disabled = 0;
333 if (cpus_empty(frozen_cpus)) 403 if (cpus_empty(frozen_cpus))
334 goto out; 404 goto out;
@@ -344,6 +414,6 @@ void enable_nonboot_cpus(void)
344 } 414 }
345 cpus_clear(frozen_cpus); 415 cpus_clear(frozen_cpus);
346out: 416out:
347 mutex_unlock(&cpu_add_remove_lock); 417 cpu_maps_update_done();
348} 418}
349#endif /* CONFIG_PM_SLEEP_SMP */ 419#endif /* CONFIG_PM_SLEEP_SMP */