aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/smp.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-02-25 07:59:47 -0500
committerIngo Molnar <mingo@elte.hu>2009-02-25 08:13:43 -0500
commit8969a5ede0f9e17da4b943712429aef2c9bcd82b (patch)
tree73fa99a1f5596429051b65d355694c90b0e15539 /kernel/smp.c
parent15d0d3b3371227f846b9f644547fde081c7e1c0c (diff)
generic-ipi: remove kmalloc()
Remove the use of kmalloc() from the smp_call_function_*() calls. Steven's generic-ipi patch (d7240b98: generic-ipi: use per cpu data for single cpu ipi calls) started the discussion on the use of kmalloc() in this code and fixed the smp_call_function_single(.wait=0) fallback case. In this patch we complete this by also providing means for the _many() call, which fully removes the need for kmalloc() in this code. The problem with the _many() call is that other cpus might still be observing our entry when we're done with it. It solved this by dynamically allocating data elements and RCU-freeing it. We solve it by using a single per-cpu entry which provides static storage and solves one half of the problem (avoiding referencing freed data). The other half, ensuring the queue iteration it still possible, is done by placing re-used entries at the head of the list. This means that if someone was still iterating that entry when it got moved, he will now re-visit the entries on the list he had already seen, but avoids skipping over entries like would have happened had we placed the new entry at the end. Furthermore, visiting entries twice is not a problem, since we remove our cpu from the entry's cpumask once its called. Many thanks to Oleg for his suggestions and him poking holes in my earlier attempts. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Nick Piggin <npiggin@suse.de> Cc: Jens Axboe <jens.axboe@oracle.com> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/smp.c')
-rw-r--r--kernel/smp.c264
1 files changed, 166 insertions, 98 deletions
diff --git a/kernel/smp.c b/kernel/smp.c
index 6ecf4b9895d4..7a0ce25829dc 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -10,23 +10,28 @@
10#include <linux/rcupdate.h> 10#include <linux/rcupdate.h>
11#include <linux/rculist.h> 11#include <linux/rculist.h>
12#include <linux/smp.h> 12#include <linux/smp.h>
13#include <linux/cpu.h>
13 14
14static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); 15static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
15static LIST_HEAD(call_function_queue); 16
16__cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock); 17static struct {
18 struct list_head queue;
19 spinlock_t lock;
20} call_function __cacheline_aligned_in_smp = {
21 .queue = LIST_HEAD_INIT(call_function.queue),
22 .lock = __SPIN_LOCK_UNLOCKED(call_function.lock),
23};
17 24
18enum { 25enum {
19 CSD_FLAG_WAIT = 0x01, 26 CSD_FLAG_WAIT = 0x01,
20 CSD_FLAG_ALLOC = 0x02, 27 CSD_FLAG_LOCK = 0x02,
21 CSD_FLAG_LOCK = 0x04,
22}; 28};
23 29
24struct call_function_data { 30struct call_function_data {
25 struct call_single_data csd; 31 struct call_single_data csd;
26 spinlock_t lock; 32 spinlock_t lock;
27 unsigned int refs; 33 unsigned int refs;
28 struct rcu_head rcu_head; 34 cpumask_var_t cpumask;
29 unsigned long cpumask_bits[];
30}; 35};
31 36
32struct call_single_queue { 37struct call_single_queue {
@@ -34,8 +39,45 @@ struct call_single_queue {
34 spinlock_t lock; 39 spinlock_t lock;
35}; 40};
36 41
42static DEFINE_PER_CPU(struct call_function_data, cfd_data) = {
43 .lock = __SPIN_LOCK_UNLOCKED(cfd_data.lock),
44};
45
46static int
47hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
48{
49 long cpu = (long)hcpu;
50 struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
51
52 switch (action) {
53 case CPU_UP_PREPARE:
54 case CPU_UP_PREPARE_FROZEN:
55 if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
56 cpu_to_node(cpu)))
57 return NOTIFY_BAD;
58 break;
59
60#ifdef CONFIG_CPU_HOTPLUG
61 case CPU_UP_CANCELED:
62 case CPU_UP_CANCELED_FROZEN:
63
64 case CPU_DEAD:
65 case CPU_DEAD_FROZEN:
66 free_cpumask_var(cfd->cpumask);
67 break;
68#endif
69 };
70
71 return NOTIFY_OK;
72}
73
74static struct notifier_block __cpuinitdata hotplug_cfd_notifier = {
75 .notifier_call = hotplug_cfd,
76};
77
37static int __cpuinit init_call_single_data(void) 78static int __cpuinit init_call_single_data(void)
38{ 79{
80 void *cpu = (void *)(long)smp_processor_id();
39 int i; 81 int i;
40 82
41 for_each_possible_cpu(i) { 83 for_each_possible_cpu(i) {
@@ -44,18 +86,69 @@ static int __cpuinit init_call_single_data(void)
44 spin_lock_init(&q->lock); 86 spin_lock_init(&q->lock);
45 INIT_LIST_HEAD(&q->list); 87 INIT_LIST_HEAD(&q->list);
46 } 88 }
89
90 hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
91 register_cpu_notifier(&hotplug_cfd_notifier);
92
47 return 0; 93 return 0;
48} 94}
49early_initcall(init_call_single_data); 95early_initcall(init_call_single_data);
50 96
51static void csd_flag_wait(struct call_single_data *data) 97/*
98 * csd_wait/csd_complete are used for synchronous ipi calls
99 */
100static void csd_wait_prepare(struct call_single_data *data)
52{ 101{
53 /* Wait for response */ 102 data->flags |= CSD_FLAG_WAIT;
54 do { 103}
55 if (!(data->flags & CSD_FLAG_WAIT)) 104
56 break; 105static void csd_complete(struct call_single_data *data)
106{
107 if (data->flags & CSD_FLAG_WAIT) {
108 /*
109 * ensure we're all done before saying we are
110 */
111 smp_mb();
112 data->flags &= ~CSD_FLAG_WAIT;
113 }
114}
115
116static void csd_wait(struct call_single_data *data)
117{
118 while (data->flags & CSD_FLAG_WAIT)
57 cpu_relax(); 119 cpu_relax();
58 } while (1); 120}
121
122/*
123 * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
124 *
125 * For non-synchronous ipi calls the csd can still be in use by the previous
126 * function call. For multi-cpu calls its even more interesting as we'll have
127 * to ensure no other cpu is observing our csd.
128 */
129static void csd_lock(struct call_single_data *data)
130{
131 while (data->flags & CSD_FLAG_LOCK)
132 cpu_relax();
133 data->flags = CSD_FLAG_LOCK;
134
135 /*
136 * prevent CPU from reordering the above assignment to ->flags
137 * with any subsequent assignments to other fields of the
138 * specified call_single_data structure.
139 */
140
141 smp_mb();
142}
143
144static void csd_unlock(struct call_single_data *data)
145{
146 WARN_ON(!(data->flags & CSD_FLAG_LOCK));
147 /*
148 * ensure we're all done before releasing data
149 */
150 smp_mb();
151 data->flags &= ~CSD_FLAG_LOCK;
59} 152}
60 153
61/* 154/*
@@ -89,16 +182,7 @@ static void generic_exec_single(int cpu, struct call_single_data *data)
89 arch_send_call_function_single_ipi(cpu); 182 arch_send_call_function_single_ipi(cpu);
90 183
91 if (wait) 184 if (wait)
92 csd_flag_wait(data); 185 csd_wait(data);
93}
94
95static void rcu_free_call_data(struct rcu_head *head)
96{
97 struct call_function_data *data;
98
99 data = container_of(head, struct call_function_data, rcu_head);
100
101 kfree(data);
102} 186}
103 187
104/* 188/*
@@ -122,41 +206,35 @@ void generic_smp_call_function_interrupt(void)
122 * It's ok to use list_for_each_rcu() here even though we may delete 206 * It's ok to use list_for_each_rcu() here even though we may delete
123 * 'pos', since list_del_rcu() doesn't clear ->next 207 * 'pos', since list_del_rcu() doesn't clear ->next
124 */ 208 */
125 rcu_read_lock(); 209 list_for_each_entry_rcu(data, &call_function.queue, csd.list) {
126 list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
127 int refs; 210 int refs;
128 211
129 if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits))) 212 spin_lock(&data->lock);
213 if (!cpumask_test_cpu(cpu, data->cpumask)) {
214 spin_unlock(&data->lock);
130 continue; 215 continue;
216 }
217 cpumask_clear_cpu(cpu, data->cpumask);
218 spin_unlock(&data->lock);
131 219
132 data->csd.func(data->csd.info); 220 data->csd.func(data->csd.info);
133 221
134 spin_lock(&data->lock); 222 spin_lock(&data->lock);
135 cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits));
136 WARN_ON(data->refs == 0); 223 WARN_ON(data->refs == 0);
137 data->refs--; 224 refs = --data->refs;
138 refs = data->refs; 225 if (!refs) {
226 spin_lock(&call_function.lock);
227 list_del_rcu(&data->csd.list);
228 spin_unlock(&call_function.lock);
229 }
139 spin_unlock(&data->lock); 230 spin_unlock(&data->lock);
140 231
141 if (refs) 232 if (refs)
142 continue; 233 continue;
143 234
144 spin_lock(&call_function_lock); 235 csd_complete(&data->csd);
145 list_del_rcu(&data->csd.list); 236 csd_unlock(&data->csd);
146 spin_unlock(&call_function_lock);
147
148 if (data->csd.flags & CSD_FLAG_WAIT) {
149 /*
150 * serialize stores to data with the flag clear
151 * and wakeup
152 */
153 smp_wmb();
154 data->csd.flags &= ~CSD_FLAG_WAIT;
155 }
156 if (data->csd.flags & CSD_FLAG_ALLOC)
157 call_rcu(&data->rcu_head, rcu_free_call_data);
158 } 237 }
159 rcu_read_unlock();
160 238
161 put_cpu(); 239 put_cpu();
162} 240}
@@ -192,14 +270,14 @@ void generic_smp_call_function_single_interrupt(void)
192 270
193 data->func(data->info); 271 data->func(data->info);
194 272
195 if (data_flags & CSD_FLAG_WAIT) { 273 if (data_flags & CSD_FLAG_WAIT)
196 smp_wmb(); 274 csd_complete(data);
197 data->flags &= ~CSD_FLAG_WAIT; 275
198 } else if (data_flags & CSD_FLAG_LOCK) { 276 /*
199 smp_wmb(); 277 * Unlocked CSDs are valid through generic_exec_single()
200 data->flags &= ~CSD_FLAG_LOCK; 278 */
201 } else if (data_flags & CSD_FLAG_ALLOC) 279 if (data_flags & CSD_FLAG_LOCK)
202 kfree(data); 280 csd_unlock(data);
203 } 281 }
204} 282}
205 283
@@ -218,7 +296,9 @@ static DEFINE_PER_CPU(struct call_single_data, csd_data);
218int smp_call_function_single(int cpu, void (*func) (void *info), void *info, 296int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
219 int wait) 297 int wait)
220{ 298{
221 struct call_single_data d; 299 struct call_single_data d = {
300 .flags = 0,
301 };
222 unsigned long flags; 302 unsigned long flags;
223 /* prevent preemption and reschedule on another processor, 303 /* prevent preemption and reschedule on another processor,
224 as well as CPU removal */ 304 as well as CPU removal */
@@ -239,13 +319,11 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
239 /* 319 /*
240 * We are calling a function on a single CPU 320 * We are calling a function on a single CPU
241 * and we are not going to wait for it to finish. 321 * and we are not going to wait for it to finish.
242 * We first try to allocate the data, but if we 322 * We use a per cpu data to pass the information to
243 * fail, we fall back to use a per cpu data to pass 323 * that CPU. Since all callers of this code will
244 * the information to that CPU. Since all callers 324 * use the same data, we must synchronize the
245 * of this code will use the same data, we must 325 * callers to prevent a new caller from corrupting
246 * synchronize the callers to prevent a new caller 326 * the data before the callee can access it.
247 * from corrupting the data before the callee
248 * can access it.
249 * 327 *
250 * The CSD_FLAG_LOCK is used to let us know when 328 * The CSD_FLAG_LOCK is used to let us know when
251 * the IPI handler is done with the data. 329 * the IPI handler is done with the data.
@@ -255,18 +333,11 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
255 * will make sure the callee is done with the 333 * will make sure the callee is done with the
256 * data before a new caller will use it. 334 * data before a new caller will use it.
257 */ 335 */
258 data = kmalloc(sizeof(*data), GFP_ATOMIC); 336 data = &__get_cpu_var(csd_data);
259 if (data) 337 csd_lock(data);
260 data->flags = CSD_FLAG_ALLOC;
261 else {
262 data = &per_cpu(csd_data, me);
263 while (data->flags & CSD_FLAG_LOCK)
264 cpu_relax();
265 data->flags = CSD_FLAG_LOCK;
266 }
267 } else { 338 } else {
268 data = &d; 339 data = &d;
269 data->flags = CSD_FLAG_WAIT; 340 csd_wait_prepare(data);
270 } 341 }
271 342
272 data->func = func; 343 data->func = func;
@@ -326,14 +397,14 @@ void smp_call_function_many(const struct cpumask *mask,
326{ 397{
327 struct call_function_data *data; 398 struct call_function_data *data;
328 unsigned long flags; 399 unsigned long flags;
329 int cpu, next_cpu; 400 int cpu, next_cpu, me = smp_processor_id();
330 401
331 /* Can deadlock when called with interrupts disabled */ 402 /* Can deadlock when called with interrupts disabled */
332 WARN_ON(irqs_disabled()); 403 WARN_ON(irqs_disabled());
333 404
334 /* So, what's a CPU they want? Ignoring this one. */ 405 /* So, what's a CPU they want? Ignoring this one. */
335 cpu = cpumask_first_and(mask, cpu_online_mask); 406 cpu = cpumask_first_and(mask, cpu_online_mask);
336 if (cpu == smp_processor_id()) 407 if (cpu == me)
337 cpu = cpumask_next_and(cpu, mask, cpu_online_mask); 408 cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
338 /* No online cpus? We're done. */ 409 /* No online cpus? We're done. */
339 if (cpu >= nr_cpu_ids) 410 if (cpu >= nr_cpu_ids)
@@ -341,7 +412,7 @@ void smp_call_function_many(const struct cpumask *mask,
341 412
342 /* Do we have another CPU which isn't us? */ 413 /* Do we have another CPU which isn't us? */
343 next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); 414 next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
344 if (next_cpu == smp_processor_id()) 415 if (next_cpu == me)
345 next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); 416 next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
346 417
347 /* Fastpath: do that cpu by itself. */ 418 /* Fastpath: do that cpu by itself. */
@@ -350,31 +421,28 @@ void smp_call_function_many(const struct cpumask *mask,
350 return; 421 return;
351 } 422 }
352 423
353 data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC); 424 data = &__get_cpu_var(cfd_data);
354 if (unlikely(!data)) { 425 csd_lock(&data->csd);
355 /* Slow path. */
356 for_each_online_cpu(cpu) {
357 if (cpu == smp_processor_id())
358 continue;
359 if (cpumask_test_cpu(cpu, mask))
360 smp_call_function_single(cpu, func, info, wait);
361 }
362 return;
363 }
364 426
365 spin_lock_init(&data->lock); 427 spin_lock_irqsave(&data->lock, flags);
366 data->csd.flags = CSD_FLAG_ALLOC;
367 if (wait) 428 if (wait)
368 data->csd.flags |= CSD_FLAG_WAIT; 429 csd_wait_prepare(&data->csd);
430
369 data->csd.func = func; 431 data->csd.func = func;
370 data->csd.info = info; 432 data->csd.info = info;
371 cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask); 433 cpumask_and(data->cpumask, mask, cpu_online_mask);
372 cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits)); 434 cpumask_clear_cpu(me, data->cpumask);
373 data->refs = cpumask_weight(to_cpumask(data->cpumask_bits)); 435 data->refs = cpumask_weight(data->cpumask);
374 436
375 spin_lock_irqsave(&call_function_lock, flags); 437 spin_lock(&call_function.lock);
376 list_add_tail_rcu(&data->csd.list, &call_function_queue); 438 /*
377 spin_unlock_irqrestore(&call_function_lock, flags); 439 * Place entry at the _HEAD_ of the list, so that any cpu still
440 * observing the entry in generic_smp_call_function_interrupt() will
441 * not miss any other list entries.
442 */
443 list_add_rcu(&data->csd.list, &call_function.queue);
444 spin_unlock(&call_function.lock);
445 spin_unlock_irqrestore(&data->lock, flags);
378 446
379 /* 447 /*
380 * Make the list addition visible before sending the ipi. 448 * Make the list addition visible before sending the ipi.
@@ -384,11 +452,11 @@ void smp_call_function_many(const struct cpumask *mask,
384 smp_mb(); 452 smp_mb();
385 453
386 /* Send a message to all CPUs in the map */ 454 /* Send a message to all CPUs in the map */
387 arch_send_call_function_ipi_mask(to_cpumask(data->cpumask_bits)); 455 arch_send_call_function_ipi_mask(data->cpumask);
388 456
389 /* optionally wait for the CPUs to complete */ 457 /* optionally wait for the CPUs to complete */
390 if (wait) 458 if (wait)
391 csd_flag_wait(&data->csd); 459 csd_wait(&data->csd);
392} 460}
393EXPORT_SYMBOL(smp_call_function_many); 461EXPORT_SYMBOL(smp_call_function_many);
394 462
@@ -418,20 +486,20 @@ EXPORT_SYMBOL(smp_call_function);
418 486
419void ipi_call_lock(void) 487void ipi_call_lock(void)
420{ 488{
421 spin_lock(&call_function_lock); 489 spin_lock(&call_function.lock);
422} 490}
423 491
424void ipi_call_unlock(void) 492void ipi_call_unlock(void)
425{ 493{
426 spin_unlock(&call_function_lock); 494 spin_unlock(&call_function.lock);
427} 495}
428 496
429void ipi_call_lock_irq(void) 497void ipi_call_lock_irq(void)
430{ 498{
431 spin_lock_irq(&call_function_lock); 499 spin_lock_irq(&call_function.lock);
432} 500}
433 501
434void ipi_call_unlock_irq(void) 502void ipi_call_unlock_irq(void)
435{ 503{
436 spin_unlock_irq(&call_function_lock); 504 spin_unlock_irq(&call_function.lock);
437} 505}