diff options
-rw-r--r-- | kernel/smp.c | 264 |
1 files changed, 166 insertions, 98 deletions
diff --git a/kernel/smp.c b/kernel/smp.c index 6ecf4b9895d4..7a0ce25829dc 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -10,23 +10,28 @@ | |||
10 | #include <linux/rcupdate.h> | 10 | #include <linux/rcupdate.h> |
11 | #include <linux/rculist.h> | 11 | #include <linux/rculist.h> |
12 | #include <linux/smp.h> | 12 | #include <linux/smp.h> |
13 | #include <linux/cpu.h> | ||
13 | 14 | ||
14 | static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); | 15 | static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); |
15 | static LIST_HEAD(call_function_queue); | 16 | |
16 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock); | 17 | static struct { |
18 | struct list_head queue; | ||
19 | spinlock_t lock; | ||
20 | } call_function __cacheline_aligned_in_smp = { | ||
21 | .queue = LIST_HEAD_INIT(call_function.queue), | ||
22 | .lock = __SPIN_LOCK_UNLOCKED(call_function.lock), | ||
23 | }; | ||
17 | 24 | ||
18 | enum { | 25 | enum { |
19 | CSD_FLAG_WAIT = 0x01, | 26 | CSD_FLAG_WAIT = 0x01, |
20 | CSD_FLAG_ALLOC = 0x02, | 27 | CSD_FLAG_LOCK = 0x02, |
21 | CSD_FLAG_LOCK = 0x04, | ||
22 | }; | 28 | }; |
23 | 29 | ||
24 | struct call_function_data { | 30 | struct call_function_data { |
25 | struct call_single_data csd; | 31 | struct call_single_data csd; |
26 | spinlock_t lock; | 32 | spinlock_t lock; |
27 | unsigned int refs; | 33 | unsigned int refs; |
28 | struct rcu_head rcu_head; | 34 | cpumask_var_t cpumask; |
29 | unsigned long cpumask_bits[]; | ||
30 | }; | 35 | }; |
31 | 36 | ||
32 | struct call_single_queue { | 37 | struct call_single_queue { |
@@ -34,8 +39,45 @@ struct call_single_queue { | |||
34 | spinlock_t lock; | 39 | spinlock_t lock; |
35 | }; | 40 | }; |
36 | 41 | ||
42 | static DEFINE_PER_CPU(struct call_function_data, cfd_data) = { | ||
43 | .lock = __SPIN_LOCK_UNLOCKED(cfd_data.lock), | ||
44 | }; | ||
45 | |||
46 | static int | ||
47 | hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) | ||
48 | { | ||
49 | long cpu = (long)hcpu; | ||
50 | struct call_function_data *cfd = &per_cpu(cfd_data, cpu); | ||
51 | |||
52 | switch (action) { | ||
53 | case CPU_UP_PREPARE: | ||
54 | case CPU_UP_PREPARE_FROZEN: | ||
55 | if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, | ||
56 | cpu_to_node(cpu))) | ||
57 | return NOTIFY_BAD; | ||
58 | break; | ||
59 | |||
60 | #ifdef CONFIG_CPU_HOTPLUG | ||
61 | case CPU_UP_CANCELED: | ||
62 | case CPU_UP_CANCELED_FROZEN: | ||
63 | |||
64 | case CPU_DEAD: | ||
65 | case CPU_DEAD_FROZEN: | ||
66 | free_cpumask_var(cfd->cpumask); | ||
67 | break; | ||
68 | #endif | ||
69 | }; | ||
70 | |||
71 | return NOTIFY_OK; | ||
72 | } | ||
73 | |||
74 | static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { | ||
75 | .notifier_call = hotplug_cfd, | ||
76 | }; | ||
77 | |||
37 | static int __cpuinit init_call_single_data(void) | 78 | static int __cpuinit init_call_single_data(void) |
38 | { | 79 | { |
80 | void *cpu = (void *)(long)smp_processor_id(); | ||
39 | int i; | 81 | int i; |
40 | 82 | ||
41 | for_each_possible_cpu(i) { | 83 | for_each_possible_cpu(i) { |
@@ -44,18 +86,69 @@ static int __cpuinit init_call_single_data(void) | |||
44 | spin_lock_init(&q->lock); | 86 | spin_lock_init(&q->lock); |
45 | INIT_LIST_HEAD(&q->list); | 87 | INIT_LIST_HEAD(&q->list); |
46 | } | 88 | } |
89 | |||
90 | hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); | ||
91 | register_cpu_notifier(&hotplug_cfd_notifier); | ||
92 | |||
47 | return 0; | 93 | return 0; |
48 | } | 94 | } |
49 | early_initcall(init_call_single_data); | 95 | early_initcall(init_call_single_data); |
50 | 96 | ||
51 | static void csd_flag_wait(struct call_single_data *data) | 97 | /* |
98 | * csd_wait/csd_complete are used for synchronous ipi calls | ||
99 | */ | ||
100 | static void csd_wait_prepare(struct call_single_data *data) | ||
52 | { | 101 | { |
53 | /* Wait for response */ | 102 | data->flags |= CSD_FLAG_WAIT; |
54 | do { | 103 | } |
55 | if (!(data->flags & CSD_FLAG_WAIT)) | 104 | |
56 | break; | 105 | static void csd_complete(struct call_single_data *data) |
106 | { | ||
107 | if (data->flags & CSD_FLAG_WAIT) { | ||
108 | /* | ||
109 | * ensure we're all done before saying we are | ||
110 | */ | ||
111 | smp_mb(); | ||
112 | data->flags &= ~CSD_FLAG_WAIT; | ||
113 | } | ||
114 | } | ||
115 | |||
116 | static void csd_wait(struct call_single_data *data) | ||
117 | { | ||
118 | while (data->flags & CSD_FLAG_WAIT) | ||
57 | cpu_relax(); | 119 | cpu_relax(); |
58 | } while (1); | 120 | } |
121 | |||
122 | /* | ||
123 | * csd_lock/csd_unlock used to serialize access to per-cpu csd resources | ||
124 | * | ||
125 | * For non-synchronous ipi calls the csd can still be in use by the previous | ||
126 | * function call. For multi-cpu calls its even more interesting as we'll have | ||
127 | * to ensure no other cpu is observing our csd. | ||
128 | */ | ||
129 | static void csd_lock(struct call_single_data *data) | ||
130 | { | ||
131 | while (data->flags & CSD_FLAG_LOCK) | ||
132 | cpu_relax(); | ||
133 | data->flags = CSD_FLAG_LOCK; | ||
134 | |||
135 | /* | ||
136 | * prevent CPU from reordering the above assignment to ->flags | ||
137 | * with any subsequent assignments to other fields of the | ||
138 | * specified call_single_data structure. | ||
139 | */ | ||
140 | |||
141 | smp_mb(); | ||
142 | } | ||
143 | |||
144 | static void csd_unlock(struct call_single_data *data) | ||
145 | { | ||
146 | WARN_ON(!(data->flags & CSD_FLAG_LOCK)); | ||
147 | /* | ||
148 | * ensure we're all done before releasing data | ||
149 | */ | ||
150 | smp_mb(); | ||
151 | data->flags &= ~CSD_FLAG_LOCK; | ||
59 | } | 152 | } |
60 | 153 | ||
61 | /* | 154 | /* |
@@ -89,16 +182,7 @@ static void generic_exec_single(int cpu, struct call_single_data *data) | |||
89 | arch_send_call_function_single_ipi(cpu); | 182 | arch_send_call_function_single_ipi(cpu); |
90 | 183 | ||
91 | if (wait) | 184 | if (wait) |
92 | csd_flag_wait(data); | 185 | csd_wait(data); |
93 | } | ||
94 | |||
95 | static void rcu_free_call_data(struct rcu_head *head) | ||
96 | { | ||
97 | struct call_function_data *data; | ||
98 | |||
99 | data = container_of(head, struct call_function_data, rcu_head); | ||
100 | |||
101 | kfree(data); | ||
102 | } | 186 | } |
103 | 187 | ||
104 | /* | 188 | /* |
@@ -122,41 +206,35 @@ void generic_smp_call_function_interrupt(void) | |||
122 | * It's ok to use list_for_each_rcu() here even though we may delete | 206 | * It's ok to use list_for_each_rcu() here even though we may delete |
123 | * 'pos', since list_del_rcu() doesn't clear ->next | 207 | * 'pos', since list_del_rcu() doesn't clear ->next |
124 | */ | 208 | */ |
125 | rcu_read_lock(); | 209 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { |
126 | list_for_each_entry_rcu(data, &call_function_queue, csd.list) { | ||
127 | int refs; | 210 | int refs; |
128 | 211 | ||
129 | if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits))) | 212 | spin_lock(&data->lock); |
213 | if (!cpumask_test_cpu(cpu, data->cpumask)) { | ||
214 | spin_unlock(&data->lock); | ||
130 | continue; | 215 | continue; |
216 | } | ||
217 | cpumask_clear_cpu(cpu, data->cpumask); | ||
218 | spin_unlock(&data->lock); | ||
131 | 219 | ||
132 | data->csd.func(data->csd.info); | 220 | data->csd.func(data->csd.info); |
133 | 221 | ||
134 | spin_lock(&data->lock); | 222 | spin_lock(&data->lock); |
135 | cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits)); | ||
136 | WARN_ON(data->refs == 0); | 223 | WARN_ON(data->refs == 0); |
137 | data->refs--; | 224 | refs = --data->refs; |
138 | refs = data->refs; | 225 | if (!refs) { |
226 | spin_lock(&call_function.lock); | ||
227 | list_del_rcu(&data->csd.list); | ||
228 | spin_unlock(&call_function.lock); | ||
229 | } | ||
139 | spin_unlock(&data->lock); | 230 | spin_unlock(&data->lock); |
140 | 231 | ||
141 | if (refs) | 232 | if (refs) |
142 | continue; | 233 | continue; |
143 | 234 | ||
144 | spin_lock(&call_function_lock); | 235 | csd_complete(&data->csd); |
145 | list_del_rcu(&data->csd.list); | 236 | csd_unlock(&data->csd); |
146 | spin_unlock(&call_function_lock); | ||
147 | |||
148 | if (data->csd.flags & CSD_FLAG_WAIT) { | ||
149 | /* | ||
150 | * serialize stores to data with the flag clear | ||
151 | * and wakeup | ||
152 | */ | ||
153 | smp_wmb(); | ||
154 | data->csd.flags &= ~CSD_FLAG_WAIT; | ||
155 | } | ||
156 | if (data->csd.flags & CSD_FLAG_ALLOC) | ||
157 | call_rcu(&data->rcu_head, rcu_free_call_data); | ||
158 | } | 237 | } |
159 | rcu_read_unlock(); | ||
160 | 238 | ||
161 | put_cpu(); | 239 | put_cpu(); |
162 | } | 240 | } |
@@ -192,14 +270,14 @@ void generic_smp_call_function_single_interrupt(void) | |||
192 | 270 | ||
193 | data->func(data->info); | 271 | data->func(data->info); |
194 | 272 | ||
195 | if (data_flags & CSD_FLAG_WAIT) { | 273 | if (data_flags & CSD_FLAG_WAIT) |
196 | smp_wmb(); | 274 | csd_complete(data); |
197 | data->flags &= ~CSD_FLAG_WAIT; | 275 | |
198 | } else if (data_flags & CSD_FLAG_LOCK) { | 276 | /* |
199 | smp_wmb(); | 277 | * Unlocked CSDs are valid through generic_exec_single() |
200 | data->flags &= ~CSD_FLAG_LOCK; | 278 | */ |
201 | } else if (data_flags & CSD_FLAG_ALLOC) | 279 | if (data_flags & CSD_FLAG_LOCK) |
202 | kfree(data); | 280 | csd_unlock(data); |
203 | } | 281 | } |
204 | } | 282 | } |
205 | 283 | ||
@@ -218,7 +296,9 @@ static DEFINE_PER_CPU(struct call_single_data, csd_data); | |||
218 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | 296 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, |
219 | int wait) | 297 | int wait) |
220 | { | 298 | { |
221 | struct call_single_data d; | 299 | struct call_single_data d = { |
300 | .flags = 0, | ||
301 | }; | ||
222 | unsigned long flags; | 302 | unsigned long flags; |
223 | /* prevent preemption and reschedule on another processor, | 303 | /* prevent preemption and reschedule on another processor, |
224 | as well as CPU removal */ | 304 | as well as CPU removal */ |
@@ -239,13 +319,11 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | |||
239 | /* | 319 | /* |
240 | * We are calling a function on a single CPU | 320 | * We are calling a function on a single CPU |
241 | * and we are not going to wait for it to finish. | 321 | * and we are not going to wait for it to finish. |
242 | * We first try to allocate the data, but if we | 322 | * We use a per cpu data to pass the information to |
243 | * fail, we fall back to use a per cpu data to pass | 323 | * that CPU. Since all callers of this code will |
244 | * the information to that CPU. Since all callers | 324 | * use the same data, we must synchronize the |
245 | * of this code will use the same data, we must | 325 | * callers to prevent a new caller from corrupting |
246 | * synchronize the callers to prevent a new caller | 326 | * the data before the callee can access it. |
247 | * from corrupting the data before the callee | ||
248 | * can access it. | ||
249 | * | 327 | * |
250 | * The CSD_FLAG_LOCK is used to let us know when | 328 | * The CSD_FLAG_LOCK is used to let us know when |
251 | * the IPI handler is done with the data. | 329 | * the IPI handler is done with the data. |
@@ -255,18 +333,11 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | |||
255 | * will make sure the callee is done with the | 333 | * will make sure the callee is done with the |
256 | * data before a new caller will use it. | 334 | * data before a new caller will use it. |
257 | */ | 335 | */ |
258 | data = kmalloc(sizeof(*data), GFP_ATOMIC); | 336 | data = &__get_cpu_var(csd_data); |
259 | if (data) | 337 | csd_lock(data); |
260 | data->flags = CSD_FLAG_ALLOC; | ||
261 | else { | ||
262 | data = &per_cpu(csd_data, me); | ||
263 | while (data->flags & CSD_FLAG_LOCK) | ||
264 | cpu_relax(); | ||
265 | data->flags = CSD_FLAG_LOCK; | ||
266 | } | ||
267 | } else { | 338 | } else { |
268 | data = &d; | 339 | data = &d; |
269 | data->flags = CSD_FLAG_WAIT; | 340 | csd_wait_prepare(data); |
270 | } | 341 | } |
271 | 342 | ||
272 | data->func = func; | 343 | data->func = func; |
@@ -326,14 +397,14 @@ void smp_call_function_many(const struct cpumask *mask, | |||
326 | { | 397 | { |
327 | struct call_function_data *data; | 398 | struct call_function_data *data; |
328 | unsigned long flags; | 399 | unsigned long flags; |
329 | int cpu, next_cpu; | 400 | int cpu, next_cpu, me = smp_processor_id(); |
330 | 401 | ||
331 | /* Can deadlock when called with interrupts disabled */ | 402 | /* Can deadlock when called with interrupts disabled */ |
332 | WARN_ON(irqs_disabled()); | 403 | WARN_ON(irqs_disabled()); |
333 | 404 | ||
334 | /* So, what's a CPU they want? Ignoring this one. */ | 405 | /* So, what's a CPU they want? Ignoring this one. */ |
335 | cpu = cpumask_first_and(mask, cpu_online_mask); | 406 | cpu = cpumask_first_and(mask, cpu_online_mask); |
336 | if (cpu == smp_processor_id()) | 407 | if (cpu == me) |
337 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); | 408 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); |
338 | /* No online cpus? We're done. */ | 409 | /* No online cpus? We're done. */ |
339 | if (cpu >= nr_cpu_ids) | 410 | if (cpu >= nr_cpu_ids) |
@@ -341,7 +412,7 @@ void smp_call_function_many(const struct cpumask *mask, | |||
341 | 412 | ||
342 | /* Do we have another CPU which isn't us? */ | 413 | /* Do we have another CPU which isn't us? */ |
343 | next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); | 414 | next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); |
344 | if (next_cpu == smp_processor_id()) | 415 | if (next_cpu == me) |
345 | next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); | 416 | next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); |
346 | 417 | ||
347 | /* Fastpath: do that cpu by itself. */ | 418 | /* Fastpath: do that cpu by itself. */ |
@@ -350,31 +421,28 @@ void smp_call_function_many(const struct cpumask *mask, | |||
350 | return; | 421 | return; |
351 | } | 422 | } |
352 | 423 | ||
353 | data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC); | 424 | data = &__get_cpu_var(cfd_data); |
354 | if (unlikely(!data)) { | 425 | csd_lock(&data->csd); |
355 | /* Slow path. */ | ||
356 | for_each_online_cpu(cpu) { | ||
357 | if (cpu == smp_processor_id()) | ||
358 | continue; | ||
359 | if (cpumask_test_cpu(cpu, mask)) | ||
360 | smp_call_function_single(cpu, func, info, wait); | ||
361 | } | ||
362 | return; | ||
363 | } | ||
364 | 426 | ||
365 | spin_lock_init(&data->lock); | 427 | spin_lock_irqsave(&data->lock, flags); |
366 | data->csd.flags = CSD_FLAG_ALLOC; | ||
367 | if (wait) | 428 | if (wait) |
368 | data->csd.flags |= CSD_FLAG_WAIT; | 429 | csd_wait_prepare(&data->csd); |
430 | |||
369 | data->csd.func = func; | 431 | data->csd.func = func; |
370 | data->csd.info = info; | 432 | data->csd.info = info; |
371 | cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask); | 433 | cpumask_and(data->cpumask, mask, cpu_online_mask); |
372 | cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits)); | 434 | cpumask_clear_cpu(me, data->cpumask); |
373 | data->refs = cpumask_weight(to_cpumask(data->cpumask_bits)); | 435 | data->refs = cpumask_weight(data->cpumask); |
374 | 436 | ||
375 | spin_lock_irqsave(&call_function_lock, flags); | 437 | spin_lock(&call_function.lock); |
376 | list_add_tail_rcu(&data->csd.list, &call_function_queue); | 438 | /* |
377 | spin_unlock_irqrestore(&call_function_lock, flags); | 439 | * Place entry at the _HEAD_ of the list, so that any cpu still |
440 | * observing the entry in generic_smp_call_function_interrupt() will | ||
441 | * not miss any other list entries. | ||
442 | */ | ||
443 | list_add_rcu(&data->csd.list, &call_function.queue); | ||
444 | spin_unlock(&call_function.lock); | ||
445 | spin_unlock_irqrestore(&data->lock, flags); | ||
378 | 446 | ||
379 | /* | 447 | /* |
380 | * Make the list addition visible before sending the ipi. | 448 | * Make the list addition visible before sending the ipi. |
@@ -384,11 +452,11 @@ void smp_call_function_many(const struct cpumask *mask, | |||
384 | smp_mb(); | 452 | smp_mb(); |
385 | 453 | ||
386 | /* Send a message to all CPUs in the map */ | 454 | /* Send a message to all CPUs in the map */ |
387 | arch_send_call_function_ipi_mask(to_cpumask(data->cpumask_bits)); | 455 | arch_send_call_function_ipi_mask(data->cpumask); |
388 | 456 | ||
389 | /* optionally wait for the CPUs to complete */ | 457 | /* optionally wait for the CPUs to complete */ |
390 | if (wait) | 458 | if (wait) |
391 | csd_flag_wait(&data->csd); | 459 | csd_wait(&data->csd); |
392 | } | 460 | } |
393 | EXPORT_SYMBOL(smp_call_function_many); | 461 | EXPORT_SYMBOL(smp_call_function_many); |
394 | 462 | ||
@@ -418,20 +486,20 @@ EXPORT_SYMBOL(smp_call_function); | |||
418 | 486 | ||
419 | void ipi_call_lock(void) | 487 | void ipi_call_lock(void) |
420 | { | 488 | { |
421 | spin_lock(&call_function_lock); | 489 | spin_lock(&call_function.lock); |
422 | } | 490 | } |
423 | 491 | ||
424 | void ipi_call_unlock(void) | 492 | void ipi_call_unlock(void) |
425 | { | 493 | { |
426 | spin_unlock(&call_function_lock); | 494 | spin_unlock(&call_function.lock); |
427 | } | 495 | } |
428 | 496 | ||
429 | void ipi_call_lock_irq(void) | 497 | void ipi_call_lock_irq(void) |
430 | { | 498 | { |
431 | spin_lock_irq(&call_function_lock); | 499 | spin_lock_irq(&call_function.lock); |
432 | } | 500 | } |
433 | 501 | ||
434 | void ipi_call_unlock_irq(void) | 502 | void ipi_call_unlock_irq(void) |
435 | { | 503 | { |
436 | spin_unlock_irq(&call_function_lock); | 504 | spin_unlock_irq(&call_function.lock); |
437 | } | 505 | } |