diff options
author | Yi Li <yi.li@analog.com> | 2009-12-17 03:20:32 -0500 |
---|---|---|
committer | Mike Frysinger <vapier@gentoo.org> | 2011-01-10 07:18:15 -0500 |
commit | 73a400646b8e26615f3ef1a0a4bc0cd0d5bd284c (patch) | |
tree | 331002731d5aac594bb99a5a9cc61f5c0933ca69 /arch/blackfin/mach-common/smp.c | |
parent | 2c1657c29f810d0ba32cde54cba1e916815493e5 (diff) |
Blackfin: SMP: rewrite IPI handling to avoid memory allocation
Currently, sending an interprocessor interrupt (IPI) requires building up
a message dynamically which means memory allocation. But often times, we
will want to send an IPI in low level contexts where allocation is not
possible which may lead to a panic(). So create a per-cpu static array
for the message queue and use that instead.
Further, while we have two supplemental interrupts, we are currently only
using one of them. So use the second one for the most common IPI message
of all -- smp_send_reschedule(). This avoids ugly contention for locks
which in turn would require an IPI message ...
In general, this improves SMP performance, and in some cases allows the
SMP port to work in places it wouldn't before. Such as the PREEMPT_RT
state where the slab is protected by a per-cpu spin lock. If the slab
kmalloc/kfree were to put the task to sleep, and that task was actually
the IPI handler, then the system falls down yet again.
After running some various stress tests on the system, the static limit
of 5 messages seems to work. On the off chance even this overflows, we
simply panic(), and we can review that scenario to see if the limit needs
to be increased a bit more.
Signed-off-by: Yi Li <yi.li@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Diffstat (limited to 'arch/blackfin/mach-common/smp.c')
-rw-r--r-- | arch/blackfin/mach-common/smp.c | 197 |
1 files changed, 78 insertions, 119 deletions
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c index 930608dd358d..9f251406a76a 100644 --- a/arch/blackfin/mach-common/smp.c +++ b/arch/blackfin/mach-common/smp.c | |||
@@ -60,8 +60,7 @@ struct smp_call_struct { | |||
60 | void (*func)(void *info); | 60 | void (*func)(void *info); |
61 | void *info; | 61 | void *info; |
62 | int wait; | 62 | int wait; |
63 | cpumask_t pending; | 63 | cpumask_t *waitmask; |
64 | cpumask_t waitmask; | ||
65 | }; | 64 | }; |
66 | 65 | ||
67 | static struct blackfin_flush_data smp_flush_data; | 66 | static struct blackfin_flush_data smp_flush_data; |
@@ -69,15 +68,19 @@ static struct blackfin_flush_data smp_flush_data; | |||
69 | static DEFINE_SPINLOCK(stop_lock); | 68 | static DEFINE_SPINLOCK(stop_lock); |
70 | 69 | ||
71 | struct ipi_message { | 70 | struct ipi_message { |
72 | struct list_head list; | ||
73 | unsigned long type; | 71 | unsigned long type; |
74 | struct smp_call_struct call_struct; | 72 | struct smp_call_struct call_struct; |
75 | }; | 73 | }; |
76 | 74 | ||
75 | /* A magic number - stress test shows this is safe for common cases */ | ||
76 | #define BFIN_IPI_MSGQ_LEN 5 | ||
77 | |||
78 | /* Simple FIFO buffer, overflow leads to panic */ | ||
77 | struct ipi_message_queue { | 79 | struct ipi_message_queue { |
78 | struct list_head head; | ||
79 | spinlock_t lock; | 80 | spinlock_t lock; |
80 | unsigned long count; | 81 | unsigned long count; |
82 | unsigned long head; /* head of the queue */ | ||
83 | struct ipi_message ipi_message[BFIN_IPI_MSGQ_LEN]; | ||
81 | }; | 84 | }; |
82 | 85 | ||
83 | static DEFINE_PER_CPU(struct ipi_message_queue, ipi_msg_queue); | 86 | static DEFINE_PER_CPU(struct ipi_message_queue, ipi_msg_queue); |
@@ -116,7 +119,6 @@ static void ipi_call_function(unsigned int cpu, struct ipi_message *msg) | |||
116 | func = msg->call_struct.func; | 119 | func = msg->call_struct.func; |
117 | info = msg->call_struct.info; | 120 | info = msg->call_struct.info; |
118 | wait = msg->call_struct.wait; | 121 | wait = msg->call_struct.wait; |
119 | cpu_clear(cpu, msg->call_struct.pending); | ||
120 | func(info); | 122 | func(info); |
121 | if (wait) { | 123 | if (wait) { |
122 | #ifdef __ARCH_SYNC_CORE_DCACHE | 124 | #ifdef __ARCH_SYNC_CORE_DCACHE |
@@ -127,51 +129,57 @@ static void ipi_call_function(unsigned int cpu, struct ipi_message *msg) | |||
127 | */ | 129 | */ |
128 | resync_core_dcache(); | 130 | resync_core_dcache(); |
129 | #endif | 131 | #endif |
130 | cpu_clear(cpu, msg->call_struct.waitmask); | 132 | cpu_clear(cpu, *msg->call_struct.waitmask); |
131 | } else | 133 | } |
132 | kfree(msg); | ||
133 | } | 134 | } |
134 | 135 | ||
135 | static irqreturn_t ipi_handler(int irq, void *dev_instance) | 136 | /* Use IRQ_SUPPLE_0 to request reschedule. |
137 | * When returning from interrupt to user space, | ||
138 | * there is chance to reschedule */ | ||
139 | static irqreturn_t ipi_handler_int0(int irq, void *dev_instance) | ||
140 | { | ||
141 | unsigned int cpu = smp_processor_id(); | ||
142 | |||
143 | platform_clear_ipi(cpu, IRQ_SUPPLE_0); | ||
144 | return IRQ_HANDLED; | ||
145 | } | ||
146 | |||
147 | static irqreturn_t ipi_handler_int1(int irq, void *dev_instance) | ||
136 | { | 148 | { |
137 | struct ipi_message *msg; | 149 | struct ipi_message *msg; |
138 | struct ipi_message_queue *msg_queue; | 150 | struct ipi_message_queue *msg_queue; |
139 | unsigned int cpu = smp_processor_id(); | 151 | unsigned int cpu = smp_processor_id(); |
152 | unsigned long flags; | ||
140 | 153 | ||
141 | platform_clear_ipi(cpu); | 154 | platform_clear_ipi(cpu, IRQ_SUPPLE_1); |
142 | 155 | ||
143 | msg_queue = &__get_cpu_var(ipi_msg_queue); | 156 | msg_queue = &__get_cpu_var(ipi_msg_queue); |
144 | msg_queue->count++; | ||
145 | 157 | ||
146 | spin_lock(&msg_queue->lock); | 158 | spin_lock_irqsave(&msg_queue->lock, flags); |
147 | while (!list_empty(&msg_queue->head)) { | 159 | |
148 | msg = list_entry(msg_queue->head.next, typeof(*msg), list); | 160 | while (msg_queue->count) { |
149 | list_del(&msg->list); | 161 | msg = &msg_queue->ipi_message[msg_queue->head]; |
150 | switch (msg->type) { | 162 | switch (msg->type) { |
151 | case BFIN_IPI_RESCHEDULE: | ||
152 | /* That's the easiest one; leave it to | ||
153 | * return_from_int. */ | ||
154 | kfree(msg); | ||
155 | break; | ||
156 | case BFIN_IPI_CALL_FUNC: | 163 | case BFIN_IPI_CALL_FUNC: |
157 | spin_unlock(&msg_queue->lock); | 164 | spin_unlock_irqrestore(&msg_queue->lock, flags); |
158 | ipi_call_function(cpu, msg); | 165 | ipi_call_function(cpu, msg); |
159 | spin_lock(&msg_queue->lock); | 166 | spin_lock_irqsave(&msg_queue->lock, flags); |
160 | break; | 167 | break; |
161 | case BFIN_IPI_CPU_STOP: | 168 | case BFIN_IPI_CPU_STOP: |
162 | spin_unlock(&msg_queue->lock); | 169 | spin_unlock_irqrestore(&msg_queue->lock, flags); |
163 | ipi_cpu_stop(cpu); | 170 | ipi_cpu_stop(cpu); |
164 | spin_lock(&msg_queue->lock); | 171 | spin_lock_irqsave(&msg_queue->lock, flags); |
165 | kfree(msg); | ||
166 | break; | 172 | break; |
167 | default: | 173 | default: |
168 | printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%lx\n", | 174 | printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%lx\n", |
169 | cpu, msg->type); | 175 | cpu, msg->type); |
170 | kfree(msg); | ||
171 | break; | 176 | break; |
172 | } | 177 | } |
178 | msg_queue->head++; | ||
179 | msg_queue->head %= BFIN_IPI_MSGQ_LEN; | ||
180 | msg_queue->count--; | ||
173 | } | 181 | } |
174 | spin_unlock(&msg_queue->lock); | 182 | spin_unlock_irqrestore(&msg_queue->lock, flags); |
175 | return IRQ_HANDLED; | 183 | return IRQ_HANDLED; |
176 | } | 184 | } |
177 | 185 | ||
@@ -181,48 +189,47 @@ static void ipi_queue_init(void) | |||
181 | struct ipi_message_queue *msg_queue; | 189 | struct ipi_message_queue *msg_queue; |
182 | for_each_possible_cpu(cpu) { | 190 | for_each_possible_cpu(cpu) { |
183 | msg_queue = &per_cpu(ipi_msg_queue, cpu); | 191 | msg_queue = &per_cpu(ipi_msg_queue, cpu); |
184 | INIT_LIST_HEAD(&msg_queue->head); | ||
185 | spin_lock_init(&msg_queue->lock); | 192 | spin_lock_init(&msg_queue->lock); |
186 | msg_queue->count = 0; | 193 | msg_queue->count = 0; |
194 | msg_queue->head = 0; | ||
187 | } | 195 | } |
188 | } | 196 | } |
189 | 197 | ||
190 | int smp_call_function(void (*func)(void *info), void *info, int wait) | 198 | static inline void smp_send_message(cpumask_t callmap, unsigned long type, |
199 | void (*func) (void *info), void *info, int wait) | ||
191 | { | 200 | { |
192 | unsigned int cpu; | 201 | unsigned int cpu; |
193 | cpumask_t callmap; | ||
194 | unsigned long flags; | ||
195 | struct ipi_message_queue *msg_queue; | 202 | struct ipi_message_queue *msg_queue; |
196 | struct ipi_message *msg; | 203 | struct ipi_message *msg; |
197 | 204 | unsigned long flags, next_msg; | |
198 | callmap = cpu_online_map; | 205 | cpumask_t waitmask = callmap; /* waitmask is shared by all cpus */ |
199 | cpu_clear(smp_processor_id(), callmap); | ||
200 | if (cpus_empty(callmap)) | ||
201 | return 0; | ||
202 | |||
203 | msg = kmalloc(sizeof(*msg), GFP_ATOMIC); | ||
204 | if (!msg) | ||
205 | return -ENOMEM; | ||
206 | INIT_LIST_HEAD(&msg->list); | ||
207 | msg->call_struct.func = func; | ||
208 | msg->call_struct.info = info; | ||
209 | msg->call_struct.wait = wait; | ||
210 | msg->call_struct.pending = callmap; | ||
211 | msg->call_struct.waitmask = callmap; | ||
212 | msg->type = BFIN_IPI_CALL_FUNC; | ||
213 | 206 | ||
214 | for_each_cpu_mask(cpu, callmap) { | 207 | for_each_cpu_mask(cpu, callmap) { |
215 | msg_queue = &per_cpu(ipi_msg_queue, cpu); | 208 | msg_queue = &per_cpu(ipi_msg_queue, cpu); |
216 | spin_lock_irqsave(&msg_queue->lock, flags); | 209 | spin_lock_irqsave(&msg_queue->lock, flags); |
217 | list_add_tail(&msg->list, &msg_queue->head); | 210 | if (msg_queue->count < BFIN_IPI_MSGQ_LEN) { |
211 | next_msg = (msg_queue->head + msg_queue->count) | ||
212 | % BFIN_IPI_MSGQ_LEN; | ||
213 | msg = &msg_queue->ipi_message[next_msg]; | ||
214 | msg->type = type; | ||
215 | if (type == BFIN_IPI_CALL_FUNC) { | ||
216 | msg->call_struct.func = func; | ||
217 | msg->call_struct.info = info; | ||
218 | msg->call_struct.wait = wait; | ||
219 | msg->call_struct.waitmask = &waitmask; | ||
220 | } | ||
221 | msg_queue->count++; | ||
222 | } else | ||
223 | panic("IPI message queue overflow\n"); | ||
218 | spin_unlock_irqrestore(&msg_queue->lock, flags); | 224 | spin_unlock_irqrestore(&msg_queue->lock, flags); |
219 | platform_send_ipi_cpu(cpu); | 225 | platform_send_ipi_cpu(cpu, IRQ_SUPPLE_1); |
220 | } | 226 | } |
227 | |||
221 | if (wait) { | 228 | if (wait) { |
222 | while (!cpus_empty(msg->call_struct.waitmask)) | 229 | while (!cpus_empty(waitmask)) |
223 | blackfin_dcache_invalidate_range( | 230 | blackfin_dcache_invalidate_range( |
224 | (unsigned long)(&msg->call_struct.waitmask), | 231 | (unsigned long)(&waitmask), |
225 | (unsigned long)(&msg->call_struct.waitmask)); | 232 | (unsigned long)(&waitmask)); |
226 | #ifdef __ARCH_SYNC_CORE_DCACHE | 233 | #ifdef __ARCH_SYNC_CORE_DCACHE |
227 | /* | 234 | /* |
228 | * Invalidate D cache in case shared data was changed by | 235 | * Invalidate D cache in case shared data was changed by |
@@ -230,8 +237,20 @@ int smp_call_function(void (*func)(void *info), void *info, int wait) | |||
230 | */ | 237 | */ |
231 | resync_core_dcache(); | 238 | resync_core_dcache(); |
232 | #endif | 239 | #endif |
233 | kfree(msg); | ||
234 | } | 240 | } |
241 | } | ||
242 | |||
243 | int smp_call_function(void (*func)(void *info), void *info, int wait) | ||
244 | { | ||
245 | cpumask_t callmap; | ||
246 | |||
247 | callmap = cpu_online_map; | ||
248 | cpu_clear(smp_processor_id(), callmap); | ||
249 | if (cpus_empty(callmap)) | ||
250 | return 0; | ||
251 | |||
252 | smp_send_message(callmap, BFIN_IPI_CALL_FUNC, func, info, wait); | ||
253 | |||
235 | return 0; | 254 | return 0; |
236 | } | 255 | } |
237 | EXPORT_SYMBOL_GPL(smp_call_function); | 256 | EXPORT_SYMBOL_GPL(smp_call_function); |
@@ -241,100 +260,39 @@ int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, | |||
241 | { | 260 | { |
242 | unsigned int cpu = cpuid; | 261 | unsigned int cpu = cpuid; |
243 | cpumask_t callmap; | 262 | cpumask_t callmap; |
244 | unsigned long flags; | ||
245 | struct ipi_message_queue *msg_queue; | ||
246 | struct ipi_message *msg; | ||
247 | 263 | ||
248 | if (cpu_is_offline(cpu)) | 264 | if (cpu_is_offline(cpu)) |
249 | return 0; | 265 | return 0; |
250 | cpus_clear(callmap); | 266 | cpus_clear(callmap); |
251 | cpu_set(cpu, callmap); | 267 | cpu_set(cpu, callmap); |
252 | 268 | ||
253 | msg = kmalloc(sizeof(*msg), GFP_ATOMIC); | 269 | smp_send_message(callmap, BFIN_IPI_CALL_FUNC, func, info, wait); |
254 | if (!msg) | ||
255 | return -ENOMEM; | ||
256 | INIT_LIST_HEAD(&msg->list); | ||
257 | msg->call_struct.func = func; | ||
258 | msg->call_struct.info = info; | ||
259 | msg->call_struct.wait = wait; | ||
260 | msg->call_struct.pending = callmap; | ||
261 | msg->call_struct.waitmask = callmap; | ||
262 | msg->type = BFIN_IPI_CALL_FUNC; | ||
263 | |||
264 | msg_queue = &per_cpu(ipi_msg_queue, cpu); | ||
265 | spin_lock_irqsave(&msg_queue->lock, flags); | ||
266 | list_add_tail(&msg->list, &msg_queue->head); | ||
267 | spin_unlock_irqrestore(&msg_queue->lock, flags); | ||
268 | platform_send_ipi_cpu(cpu); | ||
269 | 270 | ||
270 | if (wait) { | ||
271 | while (!cpus_empty(msg->call_struct.waitmask)) | ||
272 | blackfin_dcache_invalidate_range( | ||
273 | (unsigned long)(&msg->call_struct.waitmask), | ||
274 | (unsigned long)(&msg->call_struct.waitmask)); | ||
275 | #ifdef __ARCH_SYNC_CORE_DCACHE | ||
276 | /* | ||
277 | * Invalidate D cache in case shared data was changed by | ||
278 | * other processors to ensure cache coherence. | ||
279 | */ | ||
280 | resync_core_dcache(); | ||
281 | #endif | ||
282 | kfree(msg); | ||
283 | } | ||
284 | return 0; | 271 | return 0; |
285 | } | 272 | } |
286 | EXPORT_SYMBOL_GPL(smp_call_function_single); | 273 | EXPORT_SYMBOL_GPL(smp_call_function_single); |
287 | 274 | ||
288 | void smp_send_reschedule(int cpu) | 275 | void smp_send_reschedule(int cpu) |
289 | { | 276 | { |
290 | unsigned long flags; | 277 | /* simply trigger an ipi */ |
291 | struct ipi_message_queue *msg_queue; | ||
292 | struct ipi_message *msg; | ||
293 | |||
294 | if (cpu_is_offline(cpu)) | 278 | if (cpu_is_offline(cpu)) |
295 | return; | 279 | return; |
296 | 280 | platform_send_ipi_cpu(cpu, IRQ_SUPPLE_0); | |
297 | msg = kzalloc(sizeof(*msg), GFP_ATOMIC); | ||
298 | if (!msg) | ||
299 | return; | ||
300 | INIT_LIST_HEAD(&msg->list); | ||
301 | msg->type = BFIN_IPI_RESCHEDULE; | ||
302 | |||
303 | msg_queue = &per_cpu(ipi_msg_queue, cpu); | ||
304 | spin_lock_irqsave(&msg_queue->lock, flags); | ||
305 | list_add_tail(&msg->list, &msg_queue->head); | ||
306 | spin_unlock_irqrestore(&msg_queue->lock, flags); | ||
307 | platform_send_ipi_cpu(cpu); | ||
308 | 281 | ||
309 | return; | 282 | return; |
310 | } | 283 | } |
311 | 284 | ||
312 | void smp_send_stop(void) | 285 | void smp_send_stop(void) |
313 | { | 286 | { |
314 | unsigned int cpu; | ||
315 | cpumask_t callmap; | 287 | cpumask_t callmap; |
316 | unsigned long flags; | ||
317 | struct ipi_message_queue *msg_queue; | ||
318 | struct ipi_message *msg; | ||
319 | 288 | ||
320 | callmap = cpu_online_map; | 289 | callmap = cpu_online_map; |
321 | cpu_clear(smp_processor_id(), callmap); | 290 | cpu_clear(smp_processor_id(), callmap); |
322 | if (cpus_empty(callmap)) | 291 | if (cpus_empty(callmap)) |
323 | return; | 292 | return; |
324 | 293 | ||
325 | msg = kzalloc(sizeof(*msg), GFP_ATOMIC); | 294 | smp_send_message(callmap, BFIN_IPI_CPU_STOP, NULL, NULL, 0); |
326 | if (!msg) | ||
327 | return; | ||
328 | INIT_LIST_HEAD(&msg->list); | ||
329 | msg->type = BFIN_IPI_CPU_STOP; | ||
330 | 295 | ||
331 | for_each_cpu_mask(cpu, callmap) { | ||
332 | msg_queue = &per_cpu(ipi_msg_queue, cpu); | ||
333 | spin_lock_irqsave(&msg_queue->lock, flags); | ||
334 | list_add_tail(&msg->list, &msg_queue->head); | ||
335 | spin_unlock_irqrestore(&msg_queue->lock, flags); | ||
336 | platform_send_ipi_cpu(cpu); | ||
337 | } | ||
338 | return; | 296 | return; |
339 | } | 297 | } |
340 | 298 | ||
@@ -441,7 +399,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) | |||
441 | { | 399 | { |
442 | platform_prepare_cpus(max_cpus); | 400 | platform_prepare_cpus(max_cpus); |
443 | ipi_queue_init(); | 401 | ipi_queue_init(); |
444 | platform_request_ipi(ipi_handler); | 402 | platform_request_ipi(IRQ_SUPPLE_0, ipi_handler_int0); |
403 | platform_request_ipi(IRQ_SUPPLE_1, ipi_handler_int1); | ||
445 | } | 404 | } |
446 | 405 | ||
447 | void __init smp_cpus_done(unsigned int max_cpus) | 406 | void __init smp_cpus_done(unsigned int max_cpus) |