aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--block/blk-softirq.c2
-rw-r--r--include/linux/smp.h3
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/smp.c427
-rw-r--r--kernel/softirq.c2
5 files changed, 238 insertions, 198 deletions
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index ce0efc6b26dc..ee9c21602228 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -64,7 +64,7 @@ static int raise_blk_irq(int cpu, struct request *rq)
64 data->info = rq; 64 data->info = rq;
65 data->flags = 0; 65 data->flags = 0;
66 66
67 __smp_call_function_single(cpu, data); 67 __smp_call_function_single(cpu, data, 0);
68 return 0; 68 return 0;
69 } 69 }
70 70
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 715196b09d67..00866d7fdf34 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -82,7 +82,8 @@ smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info,
82 return 0; 82 return 0;
83} 83}
84 84
85void __smp_call_function_single(int cpuid, struct call_single_data *data); 85void __smp_call_function_single(int cpuid, struct call_single_data *data,
86 int wait);
86 87
87/* 88/*
88 * Generic and arch helpers 89 * Generic and arch helpers
diff --git a/kernel/sched.c b/kernel/sched.c
index 8e2558c2ba67..f1cd71918240 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1093,7 +1093,7 @@ static void hrtick_start(struct rq *rq, u64 delay)
1093 if (rq == this_rq()) { 1093 if (rq == this_rq()) {
1094 hrtimer_restart(timer); 1094 hrtimer_restart(timer);
1095 } else if (!rq->hrtick_csd_pending) { 1095 } else if (!rq->hrtick_csd_pending) {
1096 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd); 1096 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0);
1097 rq->hrtick_csd_pending = 1; 1097 rq->hrtick_csd_pending = 1;
1098 } 1098 }
1099} 1099}
diff --git a/kernel/smp.c b/kernel/smp.c
index bbedbb7efe32..7ad2262d2eca 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -2,40 +2,81 @@
2 * Generic helpers for smp ipi calls 2 * Generic helpers for smp ipi calls
3 * 3 *
4 * (C) Jens Axboe <jens.axboe@oracle.com> 2008 4 * (C) Jens Axboe <jens.axboe@oracle.com> 2008
5 *
6 */ 5 */
7#include <linux/init.h>
8#include <linux/module.h>
9#include <linux/percpu.h>
10#include <linux/rcupdate.h> 6#include <linux/rcupdate.h>
11#include <linux/rculist.h> 7#include <linux/rculist.h>
8#include <linux/module.h>
9#include <linux/percpu.h>
10#include <linux/init.h>
12#include <linux/smp.h> 11#include <linux/smp.h>
12#include <linux/cpu.h>
13 13
14static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); 14static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
15static LIST_HEAD(call_function_queue); 15
16__cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock); 16static struct {
17 struct list_head queue;
18 spinlock_t lock;
19} call_function __cacheline_aligned_in_smp =
20 {
21 .queue = LIST_HEAD_INIT(call_function.queue),
22 .lock = __SPIN_LOCK_UNLOCKED(call_function.lock),
23 };
17 24
18enum { 25enum {
19 CSD_FLAG_WAIT = 0x01, 26 CSD_FLAG_LOCK = 0x01,
20 CSD_FLAG_ALLOC = 0x02,
21 CSD_FLAG_LOCK = 0x04,
22}; 27};
23 28
24struct call_function_data { 29struct call_function_data {
25 struct call_single_data csd; 30 struct call_single_data csd;
26 spinlock_t lock; 31 spinlock_t lock;
27 unsigned int refs; 32 unsigned int refs;
28 struct rcu_head rcu_head; 33 cpumask_var_t cpumask;
29 unsigned long cpumask_bits[];
30}; 34};
31 35
32struct call_single_queue { 36struct call_single_queue {
33 struct list_head list; 37 struct list_head list;
34 spinlock_t lock; 38 spinlock_t lock;
39};
40
41static DEFINE_PER_CPU(struct call_function_data, cfd_data) = {
42 .lock = __SPIN_LOCK_UNLOCKED(cfd_data.lock),
43};
44
45static int
46hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
47{
48 long cpu = (long)hcpu;
49 struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
50
51 switch (action) {
52 case CPU_UP_PREPARE:
53 case CPU_UP_PREPARE_FROZEN:
54 if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
55 cpu_to_node(cpu)))
56 return NOTIFY_BAD;
57 break;
58
59#ifdef CONFIG_CPU_HOTPLUG
60 case CPU_UP_CANCELED:
61 case CPU_UP_CANCELED_FROZEN:
62
63 case CPU_DEAD:
64 case CPU_DEAD_FROZEN:
65 free_cpumask_var(cfd->cpumask);
66 break;
67#endif
68 };
69
70 return NOTIFY_OK;
71}
72
73static struct notifier_block __cpuinitdata hotplug_cfd_notifier = {
74 .notifier_call = hotplug_cfd,
35}; 75};
36 76
37static int __cpuinit init_call_single_data(void) 77static int __cpuinit init_call_single_data(void)
38{ 78{
79 void *cpu = (void *)(long)smp_processor_id();
39 int i; 80 int i;
40 81
41 for_each_possible_cpu(i) { 82 for_each_possible_cpu(i) {
@@ -44,29 +85,63 @@ static int __cpuinit init_call_single_data(void)
44 spin_lock_init(&q->lock); 85 spin_lock_init(&q->lock);
45 INIT_LIST_HEAD(&q->list); 86 INIT_LIST_HEAD(&q->list);
46 } 87 }
88
89 hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
90 register_cpu_notifier(&hotplug_cfd_notifier);
91
47 return 0; 92 return 0;
48} 93}
49early_initcall(init_call_single_data); 94early_initcall(init_call_single_data);
50 95
51static void csd_flag_wait(struct call_single_data *data) 96/*
97 * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
98 *
99 * For non-synchronous ipi calls the csd can still be in use by the
100 * previous function call. For multi-cpu calls its even more interesting
101 * as we'll have to ensure no other cpu is observing our csd.
102 */
103static void csd_lock_wait(struct call_single_data *data)
52{ 104{
53 /* Wait for response */ 105 while (data->flags & CSD_FLAG_LOCK)
54 do {
55 if (!(data->flags & CSD_FLAG_WAIT))
56 break;
57 cpu_relax(); 106 cpu_relax();
58 } while (1); 107}
108
109static void csd_lock(struct call_single_data *data)
110{
111 csd_lock_wait(data);
112 data->flags = CSD_FLAG_LOCK;
113
114 /*
115 * prevent CPU from reordering the above assignment
116 * to ->flags with any subsequent assignments to other
117 * fields of the specified call_single_data structure:
118 */
119 smp_mb();
120}
121
122static void csd_unlock(struct call_single_data *data)
123{
124 WARN_ON(!(data->flags & CSD_FLAG_LOCK));
125
126 /*
127 * ensure we're all done before releasing data:
128 */
129 smp_mb();
130
131 data->flags &= ~CSD_FLAG_LOCK;
59} 132}
60 133
61/* 134/*
62 * Insert a previously allocated call_single_data element for execution 135 * Insert a previously allocated call_single_data element
63 * on the given CPU. data must already have ->func, ->info, and ->flags set. 136 * for execution on the given CPU. data must already have
137 * ->func, ->info, and ->flags set.
64 */ 138 */
65static void generic_exec_single(int cpu, struct call_single_data *data) 139static
140void generic_exec_single(int cpu, struct call_single_data *data, int wait)
66{ 141{
67 struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); 142 struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
68 int wait = data->flags & CSD_FLAG_WAIT, ipi;
69 unsigned long flags; 143 unsigned long flags;
144 int ipi;
70 145
71 spin_lock_irqsave(&dst->lock, flags); 146 spin_lock_irqsave(&dst->lock, flags);
72 ipi = list_empty(&dst->list); 147 ipi = list_empty(&dst->list);
@@ -74,24 +149,21 @@ static void generic_exec_single(int cpu, struct call_single_data *data)
74 spin_unlock_irqrestore(&dst->lock, flags); 149 spin_unlock_irqrestore(&dst->lock, flags);
75 150
76 /* 151 /*
77 * Make the list addition visible before sending the ipi. 152 * The list addition should be visible before sending the IPI
153 * handler locks the list to pull the entry off it because of
154 * normal cache coherency rules implied by spinlocks.
155 *
156 * If IPIs can go out of order to the cache coherency protocol
157 * in an architecture, sufficient synchronisation should be added
158 * to arch code to make it appear to obey cache coherency WRT
159 * locking and barrier primitives. Generic code isn't really
160 * equipped to do the right thing...
78 */ 161 */
79 smp_mb();
80
81 if (ipi) 162 if (ipi)
82 arch_send_call_function_single_ipi(cpu); 163 arch_send_call_function_single_ipi(cpu);
83 164
84 if (wait) 165 if (wait)
85 csd_flag_wait(data); 166 csd_lock_wait(data);
86}
87
88static void rcu_free_call_data(struct rcu_head *head)
89{
90 struct call_function_data *data;
91
92 data = container_of(head, struct call_function_data, rcu_head);
93
94 kfree(data);
95} 167}
96 168
97/* 169/*
@@ -104,99 +176,83 @@ void generic_smp_call_function_interrupt(void)
104 int cpu = get_cpu(); 176 int cpu = get_cpu();
105 177
106 /* 178 /*
107 * It's ok to use list_for_each_rcu() here even though we may delete 179 * Ensure entry is visible on call_function_queue after we have
108 * 'pos', since list_del_rcu() doesn't clear ->next 180 * entered the IPI. See comment in smp_call_function_many.
181 * If we don't have this, then we may miss an entry on the list
182 * and never get another IPI to process it.
183 */
184 smp_mb();
185
186 /*
187 * It's ok to use list_for_each_rcu() here even though we may
188 * delete 'pos', since list_del_rcu() doesn't clear ->next
109 */ 189 */
110 rcu_read_lock(); 190 list_for_each_entry_rcu(data, &call_function.queue, csd.list) {
111 list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
112 int refs; 191 int refs;
113 192
114 if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits))) 193 spin_lock(&data->lock);
194 if (!cpumask_test_cpu(cpu, data->cpumask)) {
195 spin_unlock(&data->lock);
115 continue; 196 continue;
197 }
198 cpumask_clear_cpu(cpu, data->cpumask);
199 spin_unlock(&data->lock);
116 200
117 data->csd.func(data->csd.info); 201 data->csd.func(data->csd.info);
118 202
119 spin_lock(&data->lock); 203 spin_lock(&data->lock);
120 cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits));
121 WARN_ON(data->refs == 0); 204 WARN_ON(data->refs == 0);
122 data->refs--; 205 refs = --data->refs;
123 refs = data->refs; 206 if (!refs) {
207 spin_lock(&call_function.lock);
208 list_del_rcu(&data->csd.list);
209 spin_unlock(&call_function.lock);
210 }
124 spin_unlock(&data->lock); 211 spin_unlock(&data->lock);
125 212
126 if (refs) 213 if (refs)
127 continue; 214 continue;
128 215
129 spin_lock(&call_function_lock); 216 csd_unlock(&data->csd);
130 list_del_rcu(&data->csd.list);
131 spin_unlock(&call_function_lock);
132
133 if (data->csd.flags & CSD_FLAG_WAIT) {
134 /*
135 * serialize stores to data with the flag clear
136 * and wakeup
137 */
138 smp_wmb();
139 data->csd.flags &= ~CSD_FLAG_WAIT;
140 }
141 if (data->csd.flags & CSD_FLAG_ALLOC)
142 call_rcu(&data->rcu_head, rcu_free_call_data);
143 } 217 }
144 rcu_read_unlock();
145 218
146 put_cpu(); 219 put_cpu();
147} 220}
148 221
149/* 222/*
150 * Invoked by arch to handle an IPI for call function single. Must be called 223 * Invoked by arch to handle an IPI for call function single. Must be
151 * from the arch with interrupts disabled. 224 * called from the arch with interrupts disabled.
152 */ 225 */
153void generic_smp_call_function_single_interrupt(void) 226void generic_smp_call_function_single_interrupt(void)
154{ 227{
155 struct call_single_queue *q = &__get_cpu_var(call_single_queue); 228 struct call_single_queue *q = &__get_cpu_var(call_single_queue);
229 unsigned int data_flags;
156 LIST_HEAD(list); 230 LIST_HEAD(list);
157 231
158 /* 232 spin_lock(&q->lock);
159 * Need to see other stores to list head for checking whether 233 list_replace_init(&q->list, &list);
160 * list is empty without holding q->lock 234 spin_unlock(&q->lock);
161 */ 235
162 smp_read_barrier_depends(); 236 while (!list_empty(&list)) {
163 while (!list_empty(&q->list)) { 237 struct call_single_data *data;
164 unsigned int data_flags; 238
165 239 data = list_entry(list.next, struct call_single_data, list);
166 spin_lock(&q->lock); 240 list_del(&data->list);
167 list_replace_init(&q->list, &list); 241
168 spin_unlock(&q->lock); 242 /*
169 243 * 'data' can be invalid after this call if flags == 0
170 while (!list_empty(&list)) { 244 * (when called through generic_exec_single()),
171 struct call_single_data *data; 245 * so save them away before making the call:
172 246 */
173 data = list_entry(list.next, struct call_single_data, 247 data_flags = data->flags;
174 list); 248
175 list_del(&data->list); 249 data->func(data->info);
176 250
177 /*
178 * 'data' can be invalid after this call if
179 * flags == 0 (when called through
180 * generic_exec_single(), so save them away before
181 * making the call.
182 */
183 data_flags = data->flags;
184
185 data->func(data->info);
186
187 if (data_flags & CSD_FLAG_WAIT) {
188 smp_wmb();
189 data->flags &= ~CSD_FLAG_WAIT;
190 } else if (data_flags & CSD_FLAG_LOCK) {
191 smp_wmb();
192 data->flags &= ~CSD_FLAG_LOCK;
193 } else if (data_flags & CSD_FLAG_ALLOC)
194 kfree(data);
195 }
196 /* 251 /*
197 * See comment on outer loop 252 * Unlocked CSDs are valid through generic_exec_single():
198 */ 253 */
199 smp_read_barrier_depends(); 254 if (data_flags & CSD_FLAG_LOCK)
255 csd_unlock(data);
200 } 256 }
201} 257}
202 258
@@ -215,65 +271,45 @@ static DEFINE_PER_CPU(struct call_single_data, csd_data);
215int smp_call_function_single(int cpu, void (*func) (void *info), void *info, 271int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
216 int wait) 272 int wait)
217{ 273{
218 struct call_single_data d; 274 struct call_single_data d = {
275 .flags = 0,
276 };
219 unsigned long flags; 277 unsigned long flags;
220 /* prevent preemption and reschedule on another processor, 278 int this_cpu;
221 as well as CPU removal */
222 int me = get_cpu();
223 int err = 0; 279 int err = 0;
224 280
281 /*
282 * prevent preemption and reschedule on another processor,
283 * as well as CPU removal
284 */
285 this_cpu = get_cpu();
286
225 /* Can deadlock when called with interrupts disabled */ 287 /* Can deadlock when called with interrupts disabled */
226 WARN_ON(irqs_disabled()); 288 WARN_ON(irqs_disabled());
227 289
228 if (cpu == me) { 290 if (cpu == this_cpu) {
229 local_irq_save(flags); 291 local_irq_save(flags);
230 func(info); 292 func(info);
231 local_irq_restore(flags); 293 local_irq_restore(flags);
232 } else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { 294 } else {
233 struct call_single_data *data; 295 if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
296 struct call_single_data *data = &d;
297
298 if (!wait)
299 data = &__get_cpu_var(csd_data);
234 300
235 if (!wait) { 301 csd_lock(data);
236 /* 302
237 * We are calling a function on a single CPU 303 data->func = func;
238 * and we are not going to wait for it to finish. 304 data->info = info;
239 * We first try to allocate the data, but if we 305 generic_exec_single(cpu, data, wait);
240 * fail, we fall back to use a per cpu data to pass
241 * the information to that CPU. Since all callers
242 * of this code will use the same data, we must
243 * synchronize the callers to prevent a new caller
244 * from corrupting the data before the callee
245 * can access it.
246 *
247 * The CSD_FLAG_LOCK is used to let us know when
248 * the IPI handler is done with the data.
249 * The first caller will set it, and the callee
250 * will clear it. The next caller must wait for
251 * it to clear before we set it again. This
252 * will make sure the callee is done with the
253 * data before a new caller will use it.
254 */
255 data = kmalloc(sizeof(*data), GFP_ATOMIC);
256 if (data)
257 data->flags = CSD_FLAG_ALLOC;
258 else {
259 data = &per_cpu(csd_data, me);
260 while (data->flags & CSD_FLAG_LOCK)
261 cpu_relax();
262 data->flags = CSD_FLAG_LOCK;
263 }
264 } else { 306 } else {
265 data = &d; 307 err = -ENXIO; /* CPU not online */
266 data->flags = CSD_FLAG_WAIT;
267 } 308 }
268
269 data->func = func;
270 data->info = info;
271 generic_exec_single(cpu, data);
272 } else {
273 err = -ENXIO; /* CPU not online */
274 } 309 }
275 310
276 put_cpu(); 311 put_cpu();
312
277 return err; 313 return err;
278} 314}
279EXPORT_SYMBOL(smp_call_function_single); 315EXPORT_SYMBOL(smp_call_function_single);
@@ -283,23 +319,26 @@ EXPORT_SYMBOL(smp_call_function_single);
283 * @cpu: The CPU to run on. 319 * @cpu: The CPU to run on.
284 * @data: Pre-allocated and setup data structure 320 * @data: Pre-allocated and setup data structure
285 * 321 *
286 * Like smp_call_function_single(), but allow caller to pass in a pre-allocated 322 * Like smp_call_function_single(), but allow caller to pass in a
287 * data structure. Useful for embedding @data inside other structures, for 323 * pre-allocated data structure. Useful for embedding @data inside
288 * instance. 324 * other structures, for instance.
289 *
290 */ 325 */
291void __smp_call_function_single(int cpu, struct call_single_data *data) 326void __smp_call_function_single(int cpu, struct call_single_data *data,
327 int wait)
292{ 328{
329 csd_lock(data);
330
293 /* Can deadlock when called with interrupts disabled */ 331 /* Can deadlock when called with interrupts disabled */
294 WARN_ON((data->flags & CSD_FLAG_WAIT) && irqs_disabled()); 332 WARN_ON(wait && irqs_disabled());
295 333
296 generic_exec_single(cpu, data); 334 generic_exec_single(cpu, data, wait);
297} 335}
298 336
299/* FIXME: Shim for archs using old arch_send_call_function_ipi API. */ 337/* Deprecated: shim for archs using old arch_send_call_function_ipi API. */
338
300#ifndef arch_send_call_function_ipi_mask 339#ifndef arch_send_call_function_ipi_mask
301#define arch_send_call_function_ipi_mask(maskp) \ 340# define arch_send_call_function_ipi_mask(maskp) \
302 arch_send_call_function_ipi(*(maskp)) 341 arch_send_call_function_ipi(*(maskp))
303#endif 342#endif
304 343
305/** 344/**
@@ -307,7 +346,8 @@ void __smp_call_function_single(int cpu, struct call_single_data *data)
307 * @mask: The set of cpus to run on (only runs on online subset). 346 * @mask: The set of cpus to run on (only runs on online subset).
308 * @func: The function to run. This must be fast and non-blocking. 347 * @func: The function to run. This must be fast and non-blocking.
309 * @info: An arbitrary pointer to pass to the function. 348 * @info: An arbitrary pointer to pass to the function.
310 * @wait: If true, wait (atomically) until function has completed on other CPUs. 349 * @wait: If true, wait (atomically) until function has completed
350 * on other CPUs.
311 * 351 *
312 * If @wait is true, then returns once @func has returned. Note that @wait 352 * If @wait is true, then returns once @func has returned. Note that @wait
313 * will be implicitly turned on in case of allocation failures, since 353 * will be implicitly turned on in case of allocation failures, since
@@ -318,27 +358,27 @@ void __smp_call_function_single(int cpu, struct call_single_data *data)
318 * must be disabled when calling this function. 358 * must be disabled when calling this function.
319 */ 359 */
320void smp_call_function_many(const struct cpumask *mask, 360void smp_call_function_many(const struct cpumask *mask,
321 void (*func)(void *), void *info, 361 void (*func)(void *), void *info, bool wait)
322 bool wait)
323{ 362{
324 struct call_function_data *data; 363 struct call_function_data *data;
325 unsigned long flags; 364 unsigned long flags;
326 int cpu, next_cpu; 365 int cpu, next_cpu, this_cpu = smp_processor_id();
327 366
328 /* Can deadlock when called with interrupts disabled */ 367 /* Can deadlock when called with interrupts disabled */
329 WARN_ON(irqs_disabled()); 368 WARN_ON(irqs_disabled());
330 369
331 /* So, what's a CPU they want? Ignoring this one. */ 370 /* So, what's a CPU they want? Ignoring this one. */
332 cpu = cpumask_first_and(mask, cpu_online_mask); 371 cpu = cpumask_first_and(mask, cpu_online_mask);
333 if (cpu == smp_processor_id()) 372 if (cpu == this_cpu)
334 cpu = cpumask_next_and(cpu, mask, cpu_online_mask); 373 cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
374
335 /* No online cpus? We're done. */ 375 /* No online cpus? We're done. */
336 if (cpu >= nr_cpu_ids) 376 if (cpu >= nr_cpu_ids)
337 return; 377 return;
338 378
339 /* Do we have another CPU which isn't us? */ 379 /* Do we have another CPU which isn't us? */
340 next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); 380 next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
341 if (next_cpu == smp_processor_id()) 381 if (next_cpu == this_cpu)
342 next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); 382 next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
343 383
344 /* Fastpath: do that cpu by itself. */ 384 /* Fastpath: do that cpu by itself. */
@@ -347,43 +387,40 @@ void smp_call_function_many(const struct cpumask *mask,
347 return; 387 return;
348 } 388 }
349 389
350 data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC); 390 data = &__get_cpu_var(cfd_data);
351 if (unlikely(!data)) { 391 csd_lock(&data->csd);
352 /* Slow path. */
353 for_each_online_cpu(cpu) {
354 if (cpu == smp_processor_id())
355 continue;
356 if (cpumask_test_cpu(cpu, mask))
357 smp_call_function_single(cpu, func, info, wait);
358 }
359 return;
360 }
361 392
362 spin_lock_init(&data->lock); 393 spin_lock_irqsave(&data->lock, flags);
363 data->csd.flags = CSD_FLAG_ALLOC;
364 if (wait)
365 data->csd.flags |= CSD_FLAG_WAIT;
366 data->csd.func = func; 394 data->csd.func = func;
367 data->csd.info = info; 395 data->csd.info = info;
368 cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask); 396 cpumask_and(data->cpumask, mask, cpu_online_mask);
369 cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits)); 397 cpumask_clear_cpu(this_cpu, data->cpumask);
370 data->refs = cpumask_weight(to_cpumask(data->cpumask_bits)); 398 data->refs = cpumask_weight(data->cpumask);
371 399
372 spin_lock_irqsave(&call_function_lock, flags); 400 spin_lock(&call_function.lock);
373 list_add_tail_rcu(&data->csd.list, &call_function_queue); 401 /*
374 spin_unlock_irqrestore(&call_function_lock, flags); 402 * Place entry at the _HEAD_ of the list, so that any cpu still
403 * observing the entry in generic_smp_call_function_interrupt()
404 * will not miss any other list entries:
405 */
406 list_add_rcu(&data->csd.list, &call_function.queue);
407 spin_unlock(&call_function.lock);
408
409 spin_unlock_irqrestore(&data->lock, flags);
375 410
376 /* 411 /*
377 * Make the list addition visible before sending the ipi. 412 * Make the list addition visible before sending the ipi.
413 * (IPIs must obey or appear to obey normal Linux cache
414 * coherency rules -- see comment in generic_exec_single).
378 */ 415 */
379 smp_mb(); 416 smp_mb();
380 417
381 /* Send a message to all CPUs in the map */ 418 /* Send a message to all CPUs in the map */
382 arch_send_call_function_ipi_mask(to_cpumask(data->cpumask_bits)); 419 arch_send_call_function_ipi_mask(data->cpumask);
383 420
384 /* optionally wait for the CPUs to complete */ 421 /* Optionally wait for the CPUs to complete */
385 if (wait) 422 if (wait)
386 csd_flag_wait(&data->csd); 423 csd_lock_wait(&data->csd);
387} 424}
388EXPORT_SYMBOL(smp_call_function_many); 425EXPORT_SYMBOL(smp_call_function_many);
389 426
@@ -391,7 +428,8 @@ EXPORT_SYMBOL(smp_call_function_many);
391 * smp_call_function(): Run a function on all other CPUs. 428 * smp_call_function(): Run a function on all other CPUs.
392 * @func: The function to run. This must be fast and non-blocking. 429 * @func: The function to run. This must be fast and non-blocking.
393 * @info: An arbitrary pointer to pass to the function. 430 * @info: An arbitrary pointer to pass to the function.
394 * @wait: If true, wait (atomically) until function has completed on other CPUs. 431 * @wait: If true, wait (atomically) until function has completed
432 * on other CPUs.
395 * 433 *
396 * Returns 0. 434 * Returns 0.
397 * 435 *
@@ -407,26 +445,27 @@ int smp_call_function(void (*func)(void *), void *info, int wait)
407 preempt_disable(); 445 preempt_disable();
408 smp_call_function_many(cpu_online_mask, func, info, wait); 446 smp_call_function_many(cpu_online_mask, func, info, wait);
409 preempt_enable(); 447 preempt_enable();
448
410 return 0; 449 return 0;
411} 450}
412EXPORT_SYMBOL(smp_call_function); 451EXPORT_SYMBOL(smp_call_function);
413 452
414void ipi_call_lock(void) 453void ipi_call_lock(void)
415{ 454{
416 spin_lock(&call_function_lock); 455 spin_lock(&call_function.lock);
417} 456}
418 457
419void ipi_call_unlock(void) 458void ipi_call_unlock(void)
420{ 459{
421 spin_unlock(&call_function_lock); 460 spin_unlock(&call_function.lock);
422} 461}
423 462
424void ipi_call_lock_irq(void) 463void ipi_call_lock_irq(void)
425{ 464{
426 spin_lock_irq(&call_function_lock); 465 spin_lock_irq(&call_function.lock);
427} 466}
428 467
429void ipi_call_unlock_irq(void) 468void ipi_call_unlock_irq(void)
430{ 469{
431 spin_unlock_irq(&call_function_lock); 470 spin_unlock_irq(&call_function.lock);
432} 471}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 9041ea7948fe..ebe2a4d59f2c 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -496,7 +496,7 @@ static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softir
496 cp->flags = 0; 496 cp->flags = 0;
497 cp->priv = softirq; 497 cp->priv = softirq;
498 498
499 __smp_call_function_single(cpu, cp); 499 __smp_call_function_single(cpu, cp, 0);
500 return 0; 500 return 0;
501 } 501 }
502 return 1; 502 return 1;