diff options
-rw-r--r-- | include/linux/smp.h | 3 | ||||
-rw-r--r-- | kernel/smp.c | 183 |
2 files changed, 32 insertions, 154 deletions
diff --git a/include/linux/smp.h b/include/linux/smp.h index dd6f06be3c9f..3e07a7df6478 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h | |||
@@ -89,7 +89,8 @@ void kick_all_cpus_sync(void); | |||
89 | #ifdef CONFIG_USE_GENERIC_SMP_HELPERS | 89 | #ifdef CONFIG_USE_GENERIC_SMP_HELPERS |
90 | void __init call_function_init(void); | 90 | void __init call_function_init(void); |
91 | void generic_smp_call_function_single_interrupt(void); | 91 | void generic_smp_call_function_single_interrupt(void); |
92 | void generic_smp_call_function_interrupt(void); | 92 | #define generic_smp_call_function_interrupt \ |
93 | generic_smp_call_function_single_interrupt | ||
93 | #else | 94 | #else |
94 | static inline void call_function_init(void) { } | 95 | static inline void call_function_init(void) { } |
95 | #endif | 96 | #endif |
diff --git a/kernel/smp.c b/kernel/smp.c index 69f38bd98b42..8e451f3ff51b 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -16,22 +16,12 @@ | |||
16 | #include "smpboot.h" | 16 | #include "smpboot.h" |
17 | 17 | ||
18 | #ifdef CONFIG_USE_GENERIC_SMP_HELPERS | 18 | #ifdef CONFIG_USE_GENERIC_SMP_HELPERS |
19 | static struct { | ||
20 | struct list_head queue; | ||
21 | raw_spinlock_t lock; | ||
22 | } call_function __cacheline_aligned_in_smp = | ||
23 | { | ||
24 | .queue = LIST_HEAD_INIT(call_function.queue), | ||
25 | .lock = __RAW_SPIN_LOCK_UNLOCKED(call_function.lock), | ||
26 | }; | ||
27 | |||
28 | enum { | 19 | enum { |
29 | CSD_FLAG_LOCK = 0x01, | 20 | CSD_FLAG_LOCK = 0x01, |
30 | }; | 21 | }; |
31 | 22 | ||
32 | struct call_function_data { | 23 | struct call_function_data { |
33 | struct call_single_data csd; | 24 | struct call_single_data __percpu *csd; |
34 | atomic_t refs; | ||
35 | cpumask_var_t cpumask; | 25 | cpumask_var_t cpumask; |
36 | cpumask_var_t cpumask_ipi; | 26 | cpumask_var_t cpumask_ipi; |
37 | }; | 27 | }; |
@@ -60,6 +50,11 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
60 | if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL, | 50 | if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL, |
61 | cpu_to_node(cpu))) | 51 | cpu_to_node(cpu))) |
62 | return notifier_from_errno(-ENOMEM); | 52 | return notifier_from_errno(-ENOMEM); |
53 | cfd->csd = alloc_percpu(struct call_single_data); | ||
54 | if (!cfd->csd) { | ||
55 | free_cpumask_var(cfd->cpumask); | ||
56 | return notifier_from_errno(-ENOMEM); | ||
57 | } | ||
63 | break; | 58 | break; |
64 | 59 | ||
65 | #ifdef CONFIG_HOTPLUG_CPU | 60 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -70,6 +65,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
70 | case CPU_DEAD_FROZEN: | 65 | case CPU_DEAD_FROZEN: |
71 | free_cpumask_var(cfd->cpumask); | 66 | free_cpumask_var(cfd->cpumask); |
72 | free_cpumask_var(cfd->cpumask_ipi); | 67 | free_cpumask_var(cfd->cpumask_ipi); |
68 | free_percpu(cfd->csd); | ||
73 | break; | 69 | break; |
74 | #endif | 70 | #endif |
75 | }; | 71 | }; |
@@ -171,85 +167,6 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait) | |||
171 | } | 167 | } |
172 | 168 | ||
173 | /* | 169 | /* |
174 | * Invoked by arch to handle an IPI for call function. Must be called with | ||
175 | * interrupts disabled. | ||
176 | */ | ||
177 | void generic_smp_call_function_interrupt(void) | ||
178 | { | ||
179 | struct call_function_data *data; | ||
180 | int cpu = smp_processor_id(); | ||
181 | |||
182 | /* | ||
183 | * Shouldn't receive this interrupt on a cpu that is not yet online. | ||
184 | */ | ||
185 | WARN_ON_ONCE(!cpu_online(cpu)); | ||
186 | |||
187 | /* | ||
188 | * Ensure entry is visible on call_function_queue after we have | ||
189 | * entered the IPI. See comment in smp_call_function_many. | ||
190 | * If we don't have this, then we may miss an entry on the list | ||
191 | * and never get another IPI to process it. | ||
192 | */ | ||
193 | smp_mb(); | ||
194 | |||
195 | /* | ||
196 | * It's ok to use list_for_each_rcu() here even though we may | ||
197 | * delete 'pos', since list_del_rcu() doesn't clear ->next | ||
198 | */ | ||
199 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { | ||
200 | int refs; | ||
201 | smp_call_func_t func; | ||
202 | |||
203 | /* | ||
204 | * Since we walk the list without any locks, we might | ||
205 | * see an entry that was completed, removed from the | ||
206 | * list and is in the process of being reused. | ||
207 | * | ||
208 | * We must check that the cpu is in the cpumask before | ||
209 | * checking the refs, and both must be set before | ||
210 | * executing the callback on this cpu. | ||
211 | */ | ||
212 | |||
213 | if (!cpumask_test_cpu(cpu, data->cpumask)) | ||
214 | continue; | ||
215 | |||
216 | smp_rmb(); | ||
217 | |||
218 | if (atomic_read(&data->refs) == 0) | ||
219 | continue; | ||
220 | |||
221 | func = data->csd.func; /* save for later warn */ | ||
222 | func(data->csd.info); | ||
223 | |||
224 | /* | ||
225 | * If the cpu mask is not still set then func enabled | ||
226 | * interrupts (BUG), and this cpu took another smp call | ||
227 | * function interrupt and executed func(info) twice | ||
228 | * on this cpu. That nested execution decremented refs. | ||
229 | */ | ||
230 | if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) { | ||
231 | WARN(1, "%pf enabled interrupts and double executed\n", func); | ||
232 | continue; | ||
233 | } | ||
234 | |||
235 | refs = atomic_dec_return(&data->refs); | ||
236 | WARN_ON(refs < 0); | ||
237 | |||
238 | if (refs) | ||
239 | continue; | ||
240 | |||
241 | WARN_ON(!cpumask_empty(data->cpumask)); | ||
242 | |||
243 | raw_spin_lock(&call_function.lock); | ||
244 | list_del_rcu(&data->csd.list); | ||
245 | raw_spin_unlock(&call_function.lock); | ||
246 | |||
247 | csd_unlock(&data->csd); | ||
248 | } | ||
249 | |||
250 | } | ||
251 | |||
252 | /* | ||
253 | * Invoked by arch to handle an IPI for call function single. Must be | 170 | * Invoked by arch to handle an IPI for call function single. Must be |
254 | * called from the arch with interrupts disabled. | 171 | * called from the arch with interrupts disabled. |
255 | */ | 172 | */ |
@@ -453,8 +370,7 @@ void smp_call_function_many(const struct cpumask *mask, | |||
453 | smp_call_func_t func, void *info, bool wait) | 370 | smp_call_func_t func, void *info, bool wait) |
454 | { | 371 | { |
455 | struct call_function_data *data; | 372 | struct call_function_data *data; |
456 | unsigned long flags; | 373 | int cpu, next_cpu, this_cpu = smp_processor_id(); |
457 | int refs, cpu, next_cpu, this_cpu = smp_processor_id(); | ||
458 | 374 | ||
459 | /* | 375 | /* |
460 | * Can deadlock when called with interrupts disabled. | 376 | * Can deadlock when called with interrupts disabled. |
@@ -486,50 +402,13 @@ void smp_call_function_many(const struct cpumask *mask, | |||
486 | } | 402 | } |
487 | 403 | ||
488 | data = &__get_cpu_var(cfd_data); | 404 | data = &__get_cpu_var(cfd_data); |
489 | csd_lock(&data->csd); | ||
490 | |||
491 | /* This BUG_ON verifies our reuse assertions and can be removed */ | ||
492 | BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask)); | ||
493 | |||
494 | /* | ||
495 | * The global call function queue list add and delete are protected | ||
496 | * by a lock, but the list is traversed without any lock, relying | ||
497 | * on the rcu list add and delete to allow safe concurrent traversal. | ||
498 | * We reuse the call function data without waiting for any grace | ||
499 | * period after some other cpu removes it from the global queue. | ||
500 | * This means a cpu might find our data block as it is being | ||
501 | * filled out. | ||
502 | * | ||
503 | * We hold off the interrupt handler on the other cpu by | ||
504 | * ordering our writes to the cpu mask vs our setting of the | ||
505 | * refs counter. We assert only the cpu owning the data block | ||
506 | * will set a bit in cpumask, and each bit will only be cleared | ||
507 | * by the subject cpu. Each cpu must first find its bit is | ||
508 | * set and then check that refs is set indicating the element is | ||
509 | * ready to be processed, otherwise it must skip the entry. | ||
510 | * | ||
511 | * On the previous iteration refs was set to 0 by another cpu. | ||
512 | * To avoid the use of transitivity, set the counter to 0 here | ||
513 | * so the wmb will pair with the rmb in the interrupt handler. | ||
514 | */ | ||
515 | atomic_set(&data->refs, 0); /* convert 3rd to 1st party write */ | ||
516 | |||
517 | data->csd.func = func; | ||
518 | data->csd.info = info; | ||
519 | 405 | ||
520 | /* Ensure 0 refs is visible before mask. Also orders func and info */ | ||
521 | smp_wmb(); | ||
522 | |||
523 | /* We rely on the "and" being processed before the store */ | ||
524 | cpumask_and(data->cpumask, mask, cpu_online_mask); | 406 | cpumask_and(data->cpumask, mask, cpu_online_mask); |
525 | cpumask_clear_cpu(this_cpu, data->cpumask); | 407 | cpumask_clear_cpu(this_cpu, data->cpumask); |
526 | refs = cpumask_weight(data->cpumask); | ||
527 | 408 | ||
528 | /* Some callers race with other cpus changing the passed mask */ | 409 | /* Some callers race with other cpus changing the passed mask */ |
529 | if (unlikely(!refs)) { | 410 | if (unlikely(!cpumask_weight(data->cpumask))) |
530 | csd_unlock(&data->csd); | ||
531 | return; | 411 | return; |
532 | } | ||
533 | 412 | ||
534 | /* | 413 | /* |
535 | * After we put an entry into the list, data->cpumask | 414 | * After we put an entry into the list, data->cpumask |
@@ -537,34 +416,32 @@ void smp_call_function_many(const struct cpumask *mask, | |||
537 | * a SMP function call, so data->cpumask will be zero. | 416 | * a SMP function call, so data->cpumask will be zero. |
538 | */ | 417 | */ |
539 | cpumask_copy(data->cpumask_ipi, data->cpumask); | 418 | cpumask_copy(data->cpumask_ipi, data->cpumask); |
540 | raw_spin_lock_irqsave(&call_function.lock, flags); | ||
541 | /* | ||
542 | * Place entry at the _HEAD_ of the list, so that any cpu still | ||
543 | * observing the entry in generic_smp_call_function_interrupt() | ||
544 | * will not miss any other list entries: | ||
545 | */ | ||
546 | list_add_rcu(&data->csd.list, &call_function.queue); | ||
547 | /* | ||
548 | * We rely on the wmb() in list_add_rcu to complete our writes | ||
549 | * to the cpumask before this write to refs, which indicates | ||
550 | * data is on the list and is ready to be processed. | ||
551 | */ | ||
552 | atomic_set(&data->refs, refs); | ||
553 | raw_spin_unlock_irqrestore(&call_function.lock, flags); | ||
554 | 419 | ||
555 | /* | 420 | for_each_cpu(cpu, data->cpumask) { |
556 | * Make the list addition visible before sending the ipi. | 421 | struct call_single_data *csd = per_cpu_ptr(data->csd, cpu); |
557 | * (IPIs must obey or appear to obey normal Linux cache | 422 | struct call_single_queue *dst = |
558 | * coherency rules -- see comment in generic_exec_single). | 423 | &per_cpu(call_single_queue, cpu); |
559 | */ | 424 | unsigned long flags; |
560 | smp_mb(); | 425 | |
426 | csd_lock(csd); | ||
427 | csd->func = func; | ||
428 | csd->info = info; | ||
429 | |||
430 | raw_spin_lock_irqsave(&dst->lock, flags); | ||
431 | list_add_tail(&csd->list, &dst->list); | ||
432 | raw_spin_unlock_irqrestore(&dst->lock, flags); | ||
433 | } | ||
561 | 434 | ||
562 | /* Send a message to all CPUs in the map */ | 435 | /* Send a message to all CPUs in the map */ |
563 | arch_send_call_function_ipi_mask(data->cpumask_ipi); | 436 | arch_send_call_function_ipi_mask(data->cpumask_ipi); |
564 | 437 | ||
565 | /* Optionally wait for the CPUs to complete */ | 438 | if (wait) { |
566 | if (wait) | 439 | for_each_cpu(cpu, data->cpumask) { |
567 | csd_lock_wait(&data->csd); | 440 | struct call_single_data *csd = |
441 | per_cpu_ptr(data->csd, cpu); | ||
442 | csd_lock_wait(csd); | ||
443 | } | ||
444 | } | ||
568 | } | 445 | } |
569 | EXPORT_SYMBOL(smp_call_function_many); | 446 | EXPORT_SYMBOL(smp_call_function_many); |
570 | 447 | ||