diff options
-rw-r--r-- | block/blk-softirq.c | 2 | ||||
-rw-r--r-- | include/linux/smp.h | 3 | ||||
-rw-r--r-- | kernel/sched.c | 2 | ||||
-rw-r--r-- | kernel/smp.c | 427 | ||||
-rw-r--r-- | kernel/softirq.c | 2 |
5 files changed, 238 insertions, 198 deletions
diff --git a/block/blk-softirq.c b/block/blk-softirq.c index ce0efc6b26dc..ee9c21602228 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c | |||
@@ -64,7 +64,7 @@ static int raise_blk_irq(int cpu, struct request *rq) | |||
64 | data->info = rq; | 64 | data->info = rq; |
65 | data->flags = 0; | 65 | data->flags = 0; |
66 | 66 | ||
67 | __smp_call_function_single(cpu, data); | 67 | __smp_call_function_single(cpu, data, 0); |
68 | return 0; | 68 | return 0; |
69 | } | 69 | } |
70 | 70 | ||
diff --git a/include/linux/smp.h b/include/linux/smp.h index 715196b09d67..00866d7fdf34 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h | |||
@@ -82,7 +82,8 @@ smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info, | |||
82 | return 0; | 82 | return 0; |
83 | } | 83 | } |
84 | 84 | ||
85 | void __smp_call_function_single(int cpuid, struct call_single_data *data); | 85 | void __smp_call_function_single(int cpuid, struct call_single_data *data, |
86 | int wait); | ||
86 | 87 | ||
87 | /* | 88 | /* |
88 | * Generic and arch helpers | 89 | * Generic and arch helpers |
diff --git a/kernel/sched.c b/kernel/sched.c index 8e2558c2ba67..f1cd71918240 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1093,7 +1093,7 @@ static void hrtick_start(struct rq *rq, u64 delay) | |||
1093 | if (rq == this_rq()) { | 1093 | if (rq == this_rq()) { |
1094 | hrtimer_restart(timer); | 1094 | hrtimer_restart(timer); |
1095 | } else if (!rq->hrtick_csd_pending) { | 1095 | } else if (!rq->hrtick_csd_pending) { |
1096 | __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd); | 1096 | __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0); |
1097 | rq->hrtick_csd_pending = 1; | 1097 | rq->hrtick_csd_pending = 1; |
1098 | } | 1098 | } |
1099 | } | 1099 | } |
diff --git a/kernel/smp.c b/kernel/smp.c index bbedbb7efe32..7ad2262d2eca 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -2,40 +2,81 @@ | |||
2 | * Generic helpers for smp ipi calls | 2 | * Generic helpers for smp ipi calls |
3 | * | 3 | * |
4 | * (C) Jens Axboe <jens.axboe@oracle.com> 2008 | 4 | * (C) Jens Axboe <jens.axboe@oracle.com> 2008 |
5 | * | ||
6 | */ | 5 | */ |
7 | #include <linux/init.h> | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/percpu.h> | ||
10 | #include <linux/rcupdate.h> | 6 | #include <linux/rcupdate.h> |
11 | #include <linux/rculist.h> | 7 | #include <linux/rculist.h> |
8 | #include <linux/module.h> | ||
9 | #include <linux/percpu.h> | ||
10 | #include <linux/init.h> | ||
12 | #include <linux/smp.h> | 11 | #include <linux/smp.h> |
12 | #include <linux/cpu.h> | ||
13 | 13 | ||
14 | static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); | 14 | static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); |
15 | static LIST_HEAD(call_function_queue); | 15 | |
16 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock); | 16 | static struct { |
17 | struct list_head queue; | ||
18 | spinlock_t lock; | ||
19 | } call_function __cacheline_aligned_in_smp = | ||
20 | { | ||
21 | .queue = LIST_HEAD_INIT(call_function.queue), | ||
22 | .lock = __SPIN_LOCK_UNLOCKED(call_function.lock), | ||
23 | }; | ||
17 | 24 | ||
18 | enum { | 25 | enum { |
19 | CSD_FLAG_WAIT = 0x01, | 26 | CSD_FLAG_LOCK = 0x01, |
20 | CSD_FLAG_ALLOC = 0x02, | ||
21 | CSD_FLAG_LOCK = 0x04, | ||
22 | }; | 27 | }; |
23 | 28 | ||
24 | struct call_function_data { | 29 | struct call_function_data { |
25 | struct call_single_data csd; | 30 | struct call_single_data csd; |
26 | spinlock_t lock; | 31 | spinlock_t lock; |
27 | unsigned int refs; | 32 | unsigned int refs; |
28 | struct rcu_head rcu_head; | 33 | cpumask_var_t cpumask; |
29 | unsigned long cpumask_bits[]; | ||
30 | }; | 34 | }; |
31 | 35 | ||
32 | struct call_single_queue { | 36 | struct call_single_queue { |
33 | struct list_head list; | 37 | struct list_head list; |
34 | spinlock_t lock; | 38 | spinlock_t lock; |
39 | }; | ||
40 | |||
41 | static DEFINE_PER_CPU(struct call_function_data, cfd_data) = { | ||
42 | .lock = __SPIN_LOCK_UNLOCKED(cfd_data.lock), | ||
43 | }; | ||
44 | |||
45 | static int | ||
46 | hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) | ||
47 | { | ||
48 | long cpu = (long)hcpu; | ||
49 | struct call_function_data *cfd = &per_cpu(cfd_data, cpu); | ||
50 | |||
51 | switch (action) { | ||
52 | case CPU_UP_PREPARE: | ||
53 | case CPU_UP_PREPARE_FROZEN: | ||
54 | if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, | ||
55 | cpu_to_node(cpu))) | ||
56 | return NOTIFY_BAD; | ||
57 | break; | ||
58 | |||
59 | #ifdef CONFIG_CPU_HOTPLUG | ||
60 | case CPU_UP_CANCELED: | ||
61 | case CPU_UP_CANCELED_FROZEN: | ||
62 | |||
63 | case CPU_DEAD: | ||
64 | case CPU_DEAD_FROZEN: | ||
65 | free_cpumask_var(cfd->cpumask); | ||
66 | break; | ||
67 | #endif | ||
68 | }; | ||
69 | |||
70 | return NOTIFY_OK; | ||
71 | } | ||
72 | |||
73 | static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { | ||
74 | .notifier_call = hotplug_cfd, | ||
35 | }; | 75 | }; |
36 | 76 | ||
37 | static int __cpuinit init_call_single_data(void) | 77 | static int __cpuinit init_call_single_data(void) |
38 | { | 78 | { |
79 | void *cpu = (void *)(long)smp_processor_id(); | ||
39 | int i; | 80 | int i; |
40 | 81 | ||
41 | for_each_possible_cpu(i) { | 82 | for_each_possible_cpu(i) { |
@@ -44,29 +85,63 @@ static int __cpuinit init_call_single_data(void) | |||
44 | spin_lock_init(&q->lock); | 85 | spin_lock_init(&q->lock); |
45 | INIT_LIST_HEAD(&q->list); | 86 | INIT_LIST_HEAD(&q->list); |
46 | } | 87 | } |
88 | |||
89 | hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); | ||
90 | register_cpu_notifier(&hotplug_cfd_notifier); | ||
91 | |||
47 | return 0; | 92 | return 0; |
48 | } | 93 | } |
49 | early_initcall(init_call_single_data); | 94 | early_initcall(init_call_single_data); |
50 | 95 | ||
51 | static void csd_flag_wait(struct call_single_data *data) | 96 | /* |
97 | * csd_lock/csd_unlock used to serialize access to per-cpu csd resources | ||
98 | * | ||
99 | * For non-synchronous ipi calls the csd can still be in use by the | ||
100 | * previous function call. For multi-cpu calls its even more interesting | ||
101 | * as we'll have to ensure no other cpu is observing our csd. | ||
102 | */ | ||
103 | static void csd_lock_wait(struct call_single_data *data) | ||
52 | { | 104 | { |
53 | /* Wait for response */ | 105 | while (data->flags & CSD_FLAG_LOCK) |
54 | do { | ||
55 | if (!(data->flags & CSD_FLAG_WAIT)) | ||
56 | break; | ||
57 | cpu_relax(); | 106 | cpu_relax(); |
58 | } while (1); | 107 | } |
108 | |||
109 | static void csd_lock(struct call_single_data *data) | ||
110 | { | ||
111 | csd_lock_wait(data); | ||
112 | data->flags = CSD_FLAG_LOCK; | ||
113 | |||
114 | /* | ||
115 | * prevent CPU from reordering the above assignment | ||
116 | * to ->flags with any subsequent assignments to other | ||
117 | * fields of the specified call_single_data structure: | ||
118 | */ | ||
119 | smp_mb(); | ||
120 | } | ||
121 | |||
122 | static void csd_unlock(struct call_single_data *data) | ||
123 | { | ||
124 | WARN_ON(!(data->flags & CSD_FLAG_LOCK)); | ||
125 | |||
126 | /* | ||
127 | * ensure we're all done before releasing data: | ||
128 | */ | ||
129 | smp_mb(); | ||
130 | |||
131 | data->flags &= ~CSD_FLAG_LOCK; | ||
59 | } | 132 | } |
60 | 133 | ||
61 | /* | 134 | /* |
62 | * Insert a previously allocated call_single_data element for execution | 135 | * Insert a previously allocated call_single_data element |
63 | * on the given CPU. data must already have ->func, ->info, and ->flags set. | 136 | * for execution on the given CPU. data must already have |
137 | * ->func, ->info, and ->flags set. | ||
64 | */ | 138 | */ |
65 | static void generic_exec_single(int cpu, struct call_single_data *data) | 139 | static |
140 | void generic_exec_single(int cpu, struct call_single_data *data, int wait) | ||
66 | { | 141 | { |
67 | struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); | 142 | struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); |
68 | int wait = data->flags & CSD_FLAG_WAIT, ipi; | ||
69 | unsigned long flags; | 143 | unsigned long flags; |
144 | int ipi; | ||
70 | 145 | ||
71 | spin_lock_irqsave(&dst->lock, flags); | 146 | spin_lock_irqsave(&dst->lock, flags); |
72 | ipi = list_empty(&dst->list); | 147 | ipi = list_empty(&dst->list); |
@@ -74,24 +149,21 @@ static void generic_exec_single(int cpu, struct call_single_data *data) | |||
74 | spin_unlock_irqrestore(&dst->lock, flags); | 149 | spin_unlock_irqrestore(&dst->lock, flags); |
75 | 150 | ||
76 | /* | 151 | /* |
77 | * Make the list addition visible before sending the ipi. | 152 | * The list addition should be visible before sending the IPI |
153 | * handler locks the list to pull the entry off it because of | ||
154 | * normal cache coherency rules implied by spinlocks. | ||
155 | * | ||
156 | * If IPIs can go out of order to the cache coherency protocol | ||
157 | * in an architecture, sufficient synchronisation should be added | ||
158 | * to arch code to make it appear to obey cache coherency WRT | ||
159 | * locking and barrier primitives. Generic code isn't really | ||
160 | * equipped to do the right thing... | ||
78 | */ | 161 | */ |
79 | smp_mb(); | ||
80 | |||
81 | if (ipi) | 162 | if (ipi) |
82 | arch_send_call_function_single_ipi(cpu); | 163 | arch_send_call_function_single_ipi(cpu); |
83 | 164 | ||
84 | if (wait) | 165 | if (wait) |
85 | csd_flag_wait(data); | 166 | csd_lock_wait(data); |
86 | } | ||
87 | |||
88 | static void rcu_free_call_data(struct rcu_head *head) | ||
89 | { | ||
90 | struct call_function_data *data; | ||
91 | |||
92 | data = container_of(head, struct call_function_data, rcu_head); | ||
93 | |||
94 | kfree(data); | ||
95 | } | 167 | } |
96 | 168 | ||
97 | /* | 169 | /* |
@@ -104,99 +176,83 @@ void generic_smp_call_function_interrupt(void) | |||
104 | int cpu = get_cpu(); | 176 | int cpu = get_cpu(); |
105 | 177 | ||
106 | /* | 178 | /* |
107 | * It's ok to use list_for_each_rcu() here even though we may delete | 179 | * Ensure entry is visible on call_function_queue after we have |
108 | * 'pos', since list_del_rcu() doesn't clear ->next | 180 | * entered the IPI. See comment in smp_call_function_many. |
181 | * If we don't have this, then we may miss an entry on the list | ||
182 | * and never get another IPI to process it. | ||
183 | */ | ||
184 | smp_mb(); | ||
185 | |||
186 | /* | ||
187 | * It's ok to use list_for_each_rcu() here even though we may | ||
188 | * delete 'pos', since list_del_rcu() doesn't clear ->next | ||
109 | */ | 189 | */ |
110 | rcu_read_lock(); | 190 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { |
111 | list_for_each_entry_rcu(data, &call_function_queue, csd.list) { | ||
112 | int refs; | 191 | int refs; |
113 | 192 | ||
114 | if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits))) | 193 | spin_lock(&data->lock); |
194 | if (!cpumask_test_cpu(cpu, data->cpumask)) { | ||
195 | spin_unlock(&data->lock); | ||
115 | continue; | 196 | continue; |
197 | } | ||
198 | cpumask_clear_cpu(cpu, data->cpumask); | ||
199 | spin_unlock(&data->lock); | ||
116 | 200 | ||
117 | data->csd.func(data->csd.info); | 201 | data->csd.func(data->csd.info); |
118 | 202 | ||
119 | spin_lock(&data->lock); | 203 | spin_lock(&data->lock); |
120 | cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits)); | ||
121 | WARN_ON(data->refs == 0); | 204 | WARN_ON(data->refs == 0); |
122 | data->refs--; | 205 | refs = --data->refs; |
123 | refs = data->refs; | 206 | if (!refs) { |
207 | spin_lock(&call_function.lock); | ||
208 | list_del_rcu(&data->csd.list); | ||
209 | spin_unlock(&call_function.lock); | ||
210 | } | ||
124 | spin_unlock(&data->lock); | 211 | spin_unlock(&data->lock); |
125 | 212 | ||
126 | if (refs) | 213 | if (refs) |
127 | continue; | 214 | continue; |
128 | 215 | ||
129 | spin_lock(&call_function_lock); | 216 | csd_unlock(&data->csd); |
130 | list_del_rcu(&data->csd.list); | ||
131 | spin_unlock(&call_function_lock); | ||
132 | |||
133 | if (data->csd.flags & CSD_FLAG_WAIT) { | ||
134 | /* | ||
135 | * serialize stores to data with the flag clear | ||
136 | * and wakeup | ||
137 | */ | ||
138 | smp_wmb(); | ||
139 | data->csd.flags &= ~CSD_FLAG_WAIT; | ||
140 | } | ||
141 | if (data->csd.flags & CSD_FLAG_ALLOC) | ||
142 | call_rcu(&data->rcu_head, rcu_free_call_data); | ||
143 | } | 217 | } |
144 | rcu_read_unlock(); | ||
145 | 218 | ||
146 | put_cpu(); | 219 | put_cpu(); |
147 | } | 220 | } |
148 | 221 | ||
149 | /* | 222 | /* |
150 | * Invoked by arch to handle an IPI for call function single. Must be called | 223 | * Invoked by arch to handle an IPI for call function single. Must be |
151 | * from the arch with interrupts disabled. | 224 | * called from the arch with interrupts disabled. |
152 | */ | 225 | */ |
153 | void generic_smp_call_function_single_interrupt(void) | 226 | void generic_smp_call_function_single_interrupt(void) |
154 | { | 227 | { |
155 | struct call_single_queue *q = &__get_cpu_var(call_single_queue); | 228 | struct call_single_queue *q = &__get_cpu_var(call_single_queue); |
229 | unsigned int data_flags; | ||
156 | LIST_HEAD(list); | 230 | LIST_HEAD(list); |
157 | 231 | ||
158 | /* | 232 | spin_lock(&q->lock); |
159 | * Need to see other stores to list head for checking whether | 233 | list_replace_init(&q->list, &list); |
160 | * list is empty without holding q->lock | 234 | spin_unlock(&q->lock); |
161 | */ | 235 | |
162 | smp_read_barrier_depends(); | 236 | while (!list_empty(&list)) { |
163 | while (!list_empty(&q->list)) { | 237 | struct call_single_data *data; |
164 | unsigned int data_flags; | 238 | |
165 | 239 | data = list_entry(list.next, struct call_single_data, list); | |
166 | spin_lock(&q->lock); | 240 | list_del(&data->list); |
167 | list_replace_init(&q->list, &list); | 241 | |
168 | spin_unlock(&q->lock); | 242 | /* |
169 | 243 | * 'data' can be invalid after this call if flags == 0 | |
170 | while (!list_empty(&list)) { | 244 | * (when called through generic_exec_single()), |
171 | struct call_single_data *data; | 245 | * so save them away before making the call: |
172 | 246 | */ | |
173 | data = list_entry(list.next, struct call_single_data, | 247 | data_flags = data->flags; |
174 | list); | 248 | |
175 | list_del(&data->list); | 249 | data->func(data->info); |
176 | 250 | ||
177 | /* | ||
178 | * 'data' can be invalid after this call if | ||
179 | * flags == 0 (when called through | ||
180 | * generic_exec_single(), so save them away before | ||
181 | * making the call. | ||
182 | */ | ||
183 | data_flags = data->flags; | ||
184 | |||
185 | data->func(data->info); | ||
186 | |||
187 | if (data_flags & CSD_FLAG_WAIT) { | ||
188 | smp_wmb(); | ||
189 | data->flags &= ~CSD_FLAG_WAIT; | ||
190 | } else if (data_flags & CSD_FLAG_LOCK) { | ||
191 | smp_wmb(); | ||
192 | data->flags &= ~CSD_FLAG_LOCK; | ||
193 | } else if (data_flags & CSD_FLAG_ALLOC) | ||
194 | kfree(data); | ||
195 | } | ||
196 | /* | 251 | /* |
197 | * See comment on outer loop | 252 | * Unlocked CSDs are valid through generic_exec_single(): |
198 | */ | 253 | */ |
199 | smp_read_barrier_depends(); | 254 | if (data_flags & CSD_FLAG_LOCK) |
255 | csd_unlock(data); | ||
200 | } | 256 | } |
201 | } | 257 | } |
202 | 258 | ||
@@ -215,65 +271,45 @@ static DEFINE_PER_CPU(struct call_single_data, csd_data); | |||
215 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | 271 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, |
216 | int wait) | 272 | int wait) |
217 | { | 273 | { |
218 | struct call_single_data d; | 274 | struct call_single_data d = { |
275 | .flags = 0, | ||
276 | }; | ||
219 | unsigned long flags; | 277 | unsigned long flags; |
220 | /* prevent preemption and reschedule on another processor, | 278 | int this_cpu; |
221 | as well as CPU removal */ | ||
222 | int me = get_cpu(); | ||
223 | int err = 0; | 279 | int err = 0; |
224 | 280 | ||
281 | /* | ||
282 | * prevent preemption and reschedule on another processor, | ||
283 | * as well as CPU removal | ||
284 | */ | ||
285 | this_cpu = get_cpu(); | ||
286 | |||
225 | /* Can deadlock when called with interrupts disabled */ | 287 | /* Can deadlock when called with interrupts disabled */ |
226 | WARN_ON(irqs_disabled()); | 288 | WARN_ON(irqs_disabled()); |
227 | 289 | ||
228 | if (cpu == me) { | 290 | if (cpu == this_cpu) { |
229 | local_irq_save(flags); | 291 | local_irq_save(flags); |
230 | func(info); | 292 | func(info); |
231 | local_irq_restore(flags); | 293 | local_irq_restore(flags); |
232 | } else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { | 294 | } else { |
233 | struct call_single_data *data; | 295 | if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { |
296 | struct call_single_data *data = &d; | ||
297 | |||
298 | if (!wait) | ||
299 | data = &__get_cpu_var(csd_data); | ||
234 | 300 | ||
235 | if (!wait) { | 301 | csd_lock(data); |
236 | /* | 302 | |
237 | * We are calling a function on a single CPU | 303 | data->func = func; |
238 | * and we are not going to wait for it to finish. | 304 | data->info = info; |
239 | * We first try to allocate the data, but if we | 305 | generic_exec_single(cpu, data, wait); |
240 | * fail, we fall back to use a per cpu data to pass | ||
241 | * the information to that CPU. Since all callers | ||
242 | * of this code will use the same data, we must | ||
243 | * synchronize the callers to prevent a new caller | ||
244 | * from corrupting the data before the callee | ||
245 | * can access it. | ||
246 | * | ||
247 | * The CSD_FLAG_LOCK is used to let us know when | ||
248 | * the IPI handler is done with the data. | ||
249 | * The first caller will set it, and the callee | ||
250 | * will clear it. The next caller must wait for | ||
251 | * it to clear before we set it again. This | ||
252 | * will make sure the callee is done with the | ||
253 | * data before a new caller will use it. | ||
254 | */ | ||
255 | data = kmalloc(sizeof(*data), GFP_ATOMIC); | ||
256 | if (data) | ||
257 | data->flags = CSD_FLAG_ALLOC; | ||
258 | else { | ||
259 | data = &per_cpu(csd_data, me); | ||
260 | while (data->flags & CSD_FLAG_LOCK) | ||
261 | cpu_relax(); | ||
262 | data->flags = CSD_FLAG_LOCK; | ||
263 | } | ||
264 | } else { | 306 | } else { |
265 | data = &d; | 307 | err = -ENXIO; /* CPU not online */ |
266 | data->flags = CSD_FLAG_WAIT; | ||
267 | } | 308 | } |
268 | |||
269 | data->func = func; | ||
270 | data->info = info; | ||
271 | generic_exec_single(cpu, data); | ||
272 | } else { | ||
273 | err = -ENXIO; /* CPU not online */ | ||
274 | } | 309 | } |
275 | 310 | ||
276 | put_cpu(); | 311 | put_cpu(); |
312 | |||
277 | return err; | 313 | return err; |
278 | } | 314 | } |
279 | EXPORT_SYMBOL(smp_call_function_single); | 315 | EXPORT_SYMBOL(smp_call_function_single); |
@@ -283,23 +319,26 @@ EXPORT_SYMBOL(smp_call_function_single); | |||
283 | * @cpu: The CPU to run on. | 319 | * @cpu: The CPU to run on. |
284 | * @data: Pre-allocated and setup data structure | 320 | * @data: Pre-allocated and setup data structure |
285 | * | 321 | * |
286 | * Like smp_call_function_single(), but allow caller to pass in a pre-allocated | 322 | * Like smp_call_function_single(), but allow caller to pass in a |
287 | * data structure. Useful for embedding @data inside other structures, for | 323 | * pre-allocated data structure. Useful for embedding @data inside |
288 | * instance. | 324 | * other structures, for instance. |
289 | * | ||
290 | */ | 325 | */ |
291 | void __smp_call_function_single(int cpu, struct call_single_data *data) | 326 | void __smp_call_function_single(int cpu, struct call_single_data *data, |
327 | int wait) | ||
292 | { | 328 | { |
329 | csd_lock(data); | ||
330 | |||
293 | /* Can deadlock when called with interrupts disabled */ | 331 | /* Can deadlock when called with interrupts disabled */ |
294 | WARN_ON((data->flags & CSD_FLAG_WAIT) && irqs_disabled()); | 332 | WARN_ON(wait && irqs_disabled()); |
295 | 333 | ||
296 | generic_exec_single(cpu, data); | 334 | generic_exec_single(cpu, data, wait); |
297 | } | 335 | } |
298 | 336 | ||
299 | /* FIXME: Shim for archs using old arch_send_call_function_ipi API. */ | 337 | /* Deprecated: shim for archs using old arch_send_call_function_ipi API. */ |
338 | |||
300 | #ifndef arch_send_call_function_ipi_mask | 339 | #ifndef arch_send_call_function_ipi_mask |
301 | #define arch_send_call_function_ipi_mask(maskp) \ | 340 | # define arch_send_call_function_ipi_mask(maskp) \ |
302 | arch_send_call_function_ipi(*(maskp)) | 341 | arch_send_call_function_ipi(*(maskp)) |
303 | #endif | 342 | #endif |
304 | 343 | ||
305 | /** | 344 | /** |
@@ -307,7 +346,8 @@ void __smp_call_function_single(int cpu, struct call_single_data *data) | |||
307 | * @mask: The set of cpus to run on (only runs on online subset). | 346 | * @mask: The set of cpus to run on (only runs on online subset). |
308 | * @func: The function to run. This must be fast and non-blocking. | 347 | * @func: The function to run. This must be fast and non-blocking. |
309 | * @info: An arbitrary pointer to pass to the function. | 348 | * @info: An arbitrary pointer to pass to the function. |
310 | * @wait: If true, wait (atomically) until function has completed on other CPUs. | 349 | * @wait: If true, wait (atomically) until function has completed |
350 | * on other CPUs. | ||
311 | * | 351 | * |
312 | * If @wait is true, then returns once @func has returned. Note that @wait | 352 | * If @wait is true, then returns once @func has returned. Note that @wait |
313 | * will be implicitly turned on in case of allocation failures, since | 353 | * will be implicitly turned on in case of allocation failures, since |
@@ -318,27 +358,27 @@ void __smp_call_function_single(int cpu, struct call_single_data *data) | |||
318 | * must be disabled when calling this function. | 358 | * must be disabled when calling this function. |
319 | */ | 359 | */ |
320 | void smp_call_function_many(const struct cpumask *mask, | 360 | void smp_call_function_many(const struct cpumask *mask, |
321 | void (*func)(void *), void *info, | 361 | void (*func)(void *), void *info, bool wait) |
322 | bool wait) | ||
323 | { | 362 | { |
324 | struct call_function_data *data; | 363 | struct call_function_data *data; |
325 | unsigned long flags; | 364 | unsigned long flags; |
326 | int cpu, next_cpu; | 365 | int cpu, next_cpu, this_cpu = smp_processor_id(); |
327 | 366 | ||
328 | /* Can deadlock when called with interrupts disabled */ | 367 | /* Can deadlock when called with interrupts disabled */ |
329 | WARN_ON(irqs_disabled()); | 368 | WARN_ON(irqs_disabled()); |
330 | 369 | ||
331 | /* So, what's a CPU they want? Ignoring this one. */ | 370 | /* So, what's a CPU they want? Ignoring this one. */ |
332 | cpu = cpumask_first_and(mask, cpu_online_mask); | 371 | cpu = cpumask_first_and(mask, cpu_online_mask); |
333 | if (cpu == smp_processor_id()) | 372 | if (cpu == this_cpu) |
334 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); | 373 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); |
374 | |||
335 | /* No online cpus? We're done. */ | 375 | /* No online cpus? We're done. */ |
336 | if (cpu >= nr_cpu_ids) | 376 | if (cpu >= nr_cpu_ids) |
337 | return; | 377 | return; |
338 | 378 | ||
339 | /* Do we have another CPU which isn't us? */ | 379 | /* Do we have another CPU which isn't us? */ |
340 | next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); | 380 | next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); |
341 | if (next_cpu == smp_processor_id()) | 381 | if (next_cpu == this_cpu) |
342 | next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); | 382 | next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); |
343 | 383 | ||
344 | /* Fastpath: do that cpu by itself. */ | 384 | /* Fastpath: do that cpu by itself. */ |
@@ -347,43 +387,40 @@ void smp_call_function_many(const struct cpumask *mask, | |||
347 | return; | 387 | return; |
348 | } | 388 | } |
349 | 389 | ||
350 | data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC); | 390 | data = &__get_cpu_var(cfd_data); |
351 | if (unlikely(!data)) { | 391 | csd_lock(&data->csd); |
352 | /* Slow path. */ | ||
353 | for_each_online_cpu(cpu) { | ||
354 | if (cpu == smp_processor_id()) | ||
355 | continue; | ||
356 | if (cpumask_test_cpu(cpu, mask)) | ||
357 | smp_call_function_single(cpu, func, info, wait); | ||
358 | } | ||
359 | return; | ||
360 | } | ||
361 | 392 | ||
362 | spin_lock_init(&data->lock); | 393 | spin_lock_irqsave(&data->lock, flags); |
363 | data->csd.flags = CSD_FLAG_ALLOC; | ||
364 | if (wait) | ||
365 | data->csd.flags |= CSD_FLAG_WAIT; | ||
366 | data->csd.func = func; | 394 | data->csd.func = func; |
367 | data->csd.info = info; | 395 | data->csd.info = info; |
368 | cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask); | 396 | cpumask_and(data->cpumask, mask, cpu_online_mask); |
369 | cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits)); | 397 | cpumask_clear_cpu(this_cpu, data->cpumask); |
370 | data->refs = cpumask_weight(to_cpumask(data->cpumask_bits)); | 398 | data->refs = cpumask_weight(data->cpumask); |
371 | 399 | ||
372 | spin_lock_irqsave(&call_function_lock, flags); | 400 | spin_lock(&call_function.lock); |
373 | list_add_tail_rcu(&data->csd.list, &call_function_queue); | 401 | /* |
374 | spin_unlock_irqrestore(&call_function_lock, flags); | 402 | * Place entry at the _HEAD_ of the list, so that any cpu still |
403 | * observing the entry in generic_smp_call_function_interrupt() | ||
404 | * will not miss any other list entries: | ||
405 | */ | ||
406 | list_add_rcu(&data->csd.list, &call_function.queue); | ||
407 | spin_unlock(&call_function.lock); | ||
408 | |||
409 | spin_unlock_irqrestore(&data->lock, flags); | ||
375 | 410 | ||
376 | /* | 411 | /* |
377 | * Make the list addition visible before sending the ipi. | 412 | * Make the list addition visible before sending the ipi. |
413 | * (IPIs must obey or appear to obey normal Linux cache | ||
414 | * coherency rules -- see comment in generic_exec_single). | ||
378 | */ | 415 | */ |
379 | smp_mb(); | 416 | smp_mb(); |
380 | 417 | ||
381 | /* Send a message to all CPUs in the map */ | 418 | /* Send a message to all CPUs in the map */ |
382 | arch_send_call_function_ipi_mask(to_cpumask(data->cpumask_bits)); | 419 | arch_send_call_function_ipi_mask(data->cpumask); |
383 | 420 | ||
384 | /* optionally wait for the CPUs to complete */ | 421 | /* Optionally wait for the CPUs to complete */ |
385 | if (wait) | 422 | if (wait) |
386 | csd_flag_wait(&data->csd); | 423 | csd_lock_wait(&data->csd); |
387 | } | 424 | } |
388 | EXPORT_SYMBOL(smp_call_function_many); | 425 | EXPORT_SYMBOL(smp_call_function_many); |
389 | 426 | ||
@@ -391,7 +428,8 @@ EXPORT_SYMBOL(smp_call_function_many); | |||
391 | * smp_call_function(): Run a function on all other CPUs. | 428 | * smp_call_function(): Run a function on all other CPUs. |
392 | * @func: The function to run. This must be fast and non-blocking. | 429 | * @func: The function to run. This must be fast and non-blocking. |
393 | * @info: An arbitrary pointer to pass to the function. | 430 | * @info: An arbitrary pointer to pass to the function. |
394 | * @wait: If true, wait (atomically) until function has completed on other CPUs. | 431 | * @wait: If true, wait (atomically) until function has completed |
432 | * on other CPUs. | ||
395 | * | 433 | * |
396 | * Returns 0. | 434 | * Returns 0. |
397 | * | 435 | * |
@@ -407,26 +445,27 @@ int smp_call_function(void (*func)(void *), void *info, int wait) | |||
407 | preempt_disable(); | 445 | preempt_disable(); |
408 | smp_call_function_many(cpu_online_mask, func, info, wait); | 446 | smp_call_function_many(cpu_online_mask, func, info, wait); |
409 | preempt_enable(); | 447 | preempt_enable(); |
448 | |||
410 | return 0; | 449 | return 0; |
411 | } | 450 | } |
412 | EXPORT_SYMBOL(smp_call_function); | 451 | EXPORT_SYMBOL(smp_call_function); |
413 | 452 | ||
414 | void ipi_call_lock(void) | 453 | void ipi_call_lock(void) |
415 | { | 454 | { |
416 | spin_lock(&call_function_lock); | 455 | spin_lock(&call_function.lock); |
417 | } | 456 | } |
418 | 457 | ||
419 | void ipi_call_unlock(void) | 458 | void ipi_call_unlock(void) |
420 | { | 459 | { |
421 | spin_unlock(&call_function_lock); | 460 | spin_unlock(&call_function.lock); |
422 | } | 461 | } |
423 | 462 | ||
424 | void ipi_call_lock_irq(void) | 463 | void ipi_call_lock_irq(void) |
425 | { | 464 | { |
426 | spin_lock_irq(&call_function_lock); | 465 | spin_lock_irq(&call_function.lock); |
427 | } | 466 | } |
428 | 467 | ||
429 | void ipi_call_unlock_irq(void) | 468 | void ipi_call_unlock_irq(void) |
430 | { | 469 | { |
431 | spin_unlock_irq(&call_function_lock); | 470 | spin_unlock_irq(&call_function.lock); |
432 | } | 471 | } |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 9041ea7948fe..ebe2a4d59f2c 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -496,7 +496,7 @@ static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softir | |||
496 | cp->flags = 0; | 496 | cp->flags = 0; |
497 | cp->priv = softirq; | 497 | cp->priv = softirq; |
498 | 498 | ||
499 | __smp_call_function_single(cpu, cp); | 499 | __smp_call_function_single(cpu, cp, 0); |
500 | return 0; | 500 | return 0; |
501 | } | 501 | } |
502 | return 1; | 502 | return 1; |