aboutsummaryrefslogtreecommitdiffstats
path: root/arch/blackfin/mach-common/smp.c
diff options
context:
space:
mode:
authorYi Li <yi.li@analog.com>2009-12-17 03:20:32 -0500
committerMike Frysinger <vapier@gentoo.org>2011-01-10 07:18:15 -0500
commit73a400646b8e26615f3ef1a0a4bc0cd0d5bd284c (patch)
tree331002731d5aac594bb99a5a9cc61f5c0933ca69 /arch/blackfin/mach-common/smp.c
parent2c1657c29f810d0ba32cde54cba1e916815493e5 (diff)
Blackfin: SMP: rewrite IPI handling to avoid memory allocation
Currently, sending an interprocessor interrupt (IPI) requires building up a message dynamically which means memory allocation. But often times, we will want to send an IPI in low level contexts where allocation is not possible which may lead to a panic(). So create a per-cpu static array for the message queue and use that instead. Further, while we have two supplemental interrupts, we are currently only using one of them. So use the second one for the most common IPI message of all -- smp_send_reschedule(). This avoids ugly contention for locks which in turn would require an IPI message ... In general, this improves SMP performance, and in some cases allows the SMP port to work in places it wouldn't before. Such as the PREEMPT_RT state where the slab is protected by a per-cpu spin lock. If the slab kmalloc/kfree were to put the task to sleep, and that task was actually the IPI handler, then the system falls down yet again. After running some various stress tests on the system, the static limit of 5 messages seems to work. On the off chance even this overflows, we simply panic(), and we can review that scenario to see if the limit needs to be increased a bit more. Signed-off-by: Yi Li <yi.li@analog.com> Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Diffstat (limited to 'arch/blackfin/mach-common/smp.c')
-rw-r--r--arch/blackfin/mach-common/smp.c197
1 files changed, 78 insertions, 119 deletions
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 930608dd358d..9f251406a76a 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -60,8 +60,7 @@ struct smp_call_struct {
60 void (*func)(void *info); 60 void (*func)(void *info);
61 void *info; 61 void *info;
62 int wait; 62 int wait;
63 cpumask_t pending; 63 cpumask_t *waitmask;
64 cpumask_t waitmask;
65}; 64};
66 65
67static struct blackfin_flush_data smp_flush_data; 66static struct blackfin_flush_data smp_flush_data;
@@ -69,15 +68,19 @@ static struct blackfin_flush_data smp_flush_data;
69static DEFINE_SPINLOCK(stop_lock); 68static DEFINE_SPINLOCK(stop_lock);
70 69
71struct ipi_message { 70struct ipi_message {
72 struct list_head list;
73 unsigned long type; 71 unsigned long type;
74 struct smp_call_struct call_struct; 72 struct smp_call_struct call_struct;
75}; 73};
76 74
75/* A magic number - stress test shows this is safe for common cases */
76#define BFIN_IPI_MSGQ_LEN 5
77
78/* Simple FIFO buffer, overflow leads to panic */
77struct ipi_message_queue { 79struct ipi_message_queue {
78 struct list_head head;
79 spinlock_t lock; 80 spinlock_t lock;
80 unsigned long count; 81 unsigned long count;
82 unsigned long head; /* head of the queue */
83 struct ipi_message ipi_message[BFIN_IPI_MSGQ_LEN];
81}; 84};
82 85
83static DEFINE_PER_CPU(struct ipi_message_queue, ipi_msg_queue); 86static DEFINE_PER_CPU(struct ipi_message_queue, ipi_msg_queue);
@@ -116,7 +119,6 @@ static void ipi_call_function(unsigned int cpu, struct ipi_message *msg)
116 func = msg->call_struct.func; 119 func = msg->call_struct.func;
117 info = msg->call_struct.info; 120 info = msg->call_struct.info;
118 wait = msg->call_struct.wait; 121 wait = msg->call_struct.wait;
119 cpu_clear(cpu, msg->call_struct.pending);
120 func(info); 122 func(info);
121 if (wait) { 123 if (wait) {
122#ifdef __ARCH_SYNC_CORE_DCACHE 124#ifdef __ARCH_SYNC_CORE_DCACHE
@@ -127,51 +129,57 @@ static void ipi_call_function(unsigned int cpu, struct ipi_message *msg)
127 */ 129 */
128 resync_core_dcache(); 130 resync_core_dcache();
129#endif 131#endif
130 cpu_clear(cpu, msg->call_struct.waitmask); 132 cpu_clear(cpu, *msg->call_struct.waitmask);
131 } else 133 }
132 kfree(msg);
133} 134}
134 135
135static irqreturn_t ipi_handler(int irq, void *dev_instance) 136/* Use IRQ_SUPPLE_0 to request reschedule.
137 * When returning from interrupt to user space,
138 * there is chance to reschedule */
139static irqreturn_t ipi_handler_int0(int irq, void *dev_instance)
140{
141 unsigned int cpu = smp_processor_id();
142
143 platform_clear_ipi(cpu, IRQ_SUPPLE_0);
144 return IRQ_HANDLED;
145}
146
147static irqreturn_t ipi_handler_int1(int irq, void *dev_instance)
136{ 148{
137 struct ipi_message *msg; 149 struct ipi_message *msg;
138 struct ipi_message_queue *msg_queue; 150 struct ipi_message_queue *msg_queue;
139 unsigned int cpu = smp_processor_id(); 151 unsigned int cpu = smp_processor_id();
152 unsigned long flags;
140 153
141 platform_clear_ipi(cpu); 154 platform_clear_ipi(cpu, IRQ_SUPPLE_1);
142 155
143 msg_queue = &__get_cpu_var(ipi_msg_queue); 156 msg_queue = &__get_cpu_var(ipi_msg_queue);
144 msg_queue->count++;
145 157
146 spin_lock(&msg_queue->lock); 158 spin_lock_irqsave(&msg_queue->lock, flags);
147 while (!list_empty(&msg_queue->head)) { 159
148 msg = list_entry(msg_queue->head.next, typeof(*msg), list); 160 while (msg_queue->count) {
149 list_del(&msg->list); 161 msg = &msg_queue->ipi_message[msg_queue->head];
150 switch (msg->type) { 162 switch (msg->type) {
151 case BFIN_IPI_RESCHEDULE:
152 /* That's the easiest one; leave it to
153 * return_from_int. */
154 kfree(msg);
155 break;
156 case BFIN_IPI_CALL_FUNC: 163 case BFIN_IPI_CALL_FUNC:
157 spin_unlock(&msg_queue->lock); 164 spin_unlock_irqrestore(&msg_queue->lock, flags);
158 ipi_call_function(cpu, msg); 165 ipi_call_function(cpu, msg);
159 spin_lock(&msg_queue->lock); 166 spin_lock_irqsave(&msg_queue->lock, flags);
160 break; 167 break;
161 case BFIN_IPI_CPU_STOP: 168 case BFIN_IPI_CPU_STOP:
162 spin_unlock(&msg_queue->lock); 169 spin_unlock_irqrestore(&msg_queue->lock, flags);
163 ipi_cpu_stop(cpu); 170 ipi_cpu_stop(cpu);
164 spin_lock(&msg_queue->lock); 171 spin_lock_irqsave(&msg_queue->lock, flags);
165 kfree(msg);
166 break; 172 break;
167 default: 173 default:
168 printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%lx\n", 174 printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%lx\n",
169 cpu, msg->type); 175 cpu, msg->type);
170 kfree(msg);
171 break; 176 break;
172 } 177 }
178 msg_queue->head++;
179 msg_queue->head %= BFIN_IPI_MSGQ_LEN;
180 msg_queue->count--;
173 } 181 }
174 spin_unlock(&msg_queue->lock); 182 spin_unlock_irqrestore(&msg_queue->lock, flags);
175 return IRQ_HANDLED; 183 return IRQ_HANDLED;
176} 184}
177 185
@@ -181,48 +189,47 @@ static void ipi_queue_init(void)
181 struct ipi_message_queue *msg_queue; 189 struct ipi_message_queue *msg_queue;
182 for_each_possible_cpu(cpu) { 190 for_each_possible_cpu(cpu) {
183 msg_queue = &per_cpu(ipi_msg_queue, cpu); 191 msg_queue = &per_cpu(ipi_msg_queue, cpu);
184 INIT_LIST_HEAD(&msg_queue->head);
185 spin_lock_init(&msg_queue->lock); 192 spin_lock_init(&msg_queue->lock);
186 msg_queue->count = 0; 193 msg_queue->count = 0;
194 msg_queue->head = 0;
187 } 195 }
188} 196}
189 197
190int smp_call_function(void (*func)(void *info), void *info, int wait) 198static inline void smp_send_message(cpumask_t callmap, unsigned long type,
199 void (*func) (void *info), void *info, int wait)
191{ 200{
192 unsigned int cpu; 201 unsigned int cpu;
193 cpumask_t callmap;
194 unsigned long flags;
195 struct ipi_message_queue *msg_queue; 202 struct ipi_message_queue *msg_queue;
196 struct ipi_message *msg; 203 struct ipi_message *msg;
197 204 unsigned long flags, next_msg;
198 callmap = cpu_online_map; 205 cpumask_t waitmask = callmap; /* waitmask is shared by all cpus */
199 cpu_clear(smp_processor_id(), callmap);
200 if (cpus_empty(callmap))
201 return 0;
202
203 msg = kmalloc(sizeof(*msg), GFP_ATOMIC);
204 if (!msg)
205 return -ENOMEM;
206 INIT_LIST_HEAD(&msg->list);
207 msg->call_struct.func = func;
208 msg->call_struct.info = info;
209 msg->call_struct.wait = wait;
210 msg->call_struct.pending = callmap;
211 msg->call_struct.waitmask = callmap;
212 msg->type = BFIN_IPI_CALL_FUNC;
213 206
214 for_each_cpu_mask(cpu, callmap) { 207 for_each_cpu_mask(cpu, callmap) {
215 msg_queue = &per_cpu(ipi_msg_queue, cpu); 208 msg_queue = &per_cpu(ipi_msg_queue, cpu);
216 spin_lock_irqsave(&msg_queue->lock, flags); 209 spin_lock_irqsave(&msg_queue->lock, flags);
217 list_add_tail(&msg->list, &msg_queue->head); 210 if (msg_queue->count < BFIN_IPI_MSGQ_LEN) {
211 next_msg = (msg_queue->head + msg_queue->count)
212 % BFIN_IPI_MSGQ_LEN;
213 msg = &msg_queue->ipi_message[next_msg];
214 msg->type = type;
215 if (type == BFIN_IPI_CALL_FUNC) {
216 msg->call_struct.func = func;
217 msg->call_struct.info = info;
218 msg->call_struct.wait = wait;
219 msg->call_struct.waitmask = &waitmask;
220 }
221 msg_queue->count++;
222 } else
223 panic("IPI message queue overflow\n");
218 spin_unlock_irqrestore(&msg_queue->lock, flags); 224 spin_unlock_irqrestore(&msg_queue->lock, flags);
219 platform_send_ipi_cpu(cpu); 225 platform_send_ipi_cpu(cpu, IRQ_SUPPLE_1);
220 } 226 }
227
221 if (wait) { 228 if (wait) {
222 while (!cpus_empty(msg->call_struct.waitmask)) 229 while (!cpus_empty(waitmask))
223 blackfin_dcache_invalidate_range( 230 blackfin_dcache_invalidate_range(
224 (unsigned long)(&msg->call_struct.waitmask), 231 (unsigned long)(&waitmask),
225 (unsigned long)(&msg->call_struct.waitmask)); 232 (unsigned long)(&waitmask));
226#ifdef __ARCH_SYNC_CORE_DCACHE 233#ifdef __ARCH_SYNC_CORE_DCACHE
227 /* 234 /*
228 * Invalidate D cache in case shared data was changed by 235 * Invalidate D cache in case shared data was changed by
@@ -230,8 +237,20 @@ int smp_call_function(void (*func)(void *info), void *info, int wait)
230 */ 237 */
231 resync_core_dcache(); 238 resync_core_dcache();
232#endif 239#endif
233 kfree(msg);
234 } 240 }
241}
242
243int smp_call_function(void (*func)(void *info), void *info, int wait)
244{
245 cpumask_t callmap;
246
247 callmap = cpu_online_map;
248 cpu_clear(smp_processor_id(), callmap);
249 if (cpus_empty(callmap))
250 return 0;
251
252 smp_send_message(callmap, BFIN_IPI_CALL_FUNC, func, info, wait);
253
235 return 0; 254 return 0;
236} 255}
237EXPORT_SYMBOL_GPL(smp_call_function); 256EXPORT_SYMBOL_GPL(smp_call_function);
@@ -241,100 +260,39 @@ int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
241{ 260{
242 unsigned int cpu = cpuid; 261 unsigned int cpu = cpuid;
243 cpumask_t callmap; 262 cpumask_t callmap;
244 unsigned long flags;
245 struct ipi_message_queue *msg_queue;
246 struct ipi_message *msg;
247 263
248 if (cpu_is_offline(cpu)) 264 if (cpu_is_offline(cpu))
249 return 0; 265 return 0;
250 cpus_clear(callmap); 266 cpus_clear(callmap);
251 cpu_set(cpu, callmap); 267 cpu_set(cpu, callmap);
252 268
253 msg = kmalloc(sizeof(*msg), GFP_ATOMIC); 269 smp_send_message(callmap, BFIN_IPI_CALL_FUNC, func, info, wait);
254 if (!msg)
255 return -ENOMEM;
256 INIT_LIST_HEAD(&msg->list);
257 msg->call_struct.func = func;
258 msg->call_struct.info = info;
259 msg->call_struct.wait = wait;
260 msg->call_struct.pending = callmap;
261 msg->call_struct.waitmask = callmap;
262 msg->type = BFIN_IPI_CALL_FUNC;
263
264 msg_queue = &per_cpu(ipi_msg_queue, cpu);
265 spin_lock_irqsave(&msg_queue->lock, flags);
266 list_add_tail(&msg->list, &msg_queue->head);
267 spin_unlock_irqrestore(&msg_queue->lock, flags);
268 platform_send_ipi_cpu(cpu);
269 270
270 if (wait) {
271 while (!cpus_empty(msg->call_struct.waitmask))
272 blackfin_dcache_invalidate_range(
273 (unsigned long)(&msg->call_struct.waitmask),
274 (unsigned long)(&msg->call_struct.waitmask));
275#ifdef __ARCH_SYNC_CORE_DCACHE
276 /*
277 * Invalidate D cache in case shared data was changed by
278 * other processors to ensure cache coherence.
279 */
280 resync_core_dcache();
281#endif
282 kfree(msg);
283 }
284 return 0; 271 return 0;
285} 272}
286EXPORT_SYMBOL_GPL(smp_call_function_single); 273EXPORT_SYMBOL_GPL(smp_call_function_single);
287 274
288void smp_send_reschedule(int cpu) 275void smp_send_reschedule(int cpu)
289{ 276{
290 unsigned long flags; 277 /* simply trigger an ipi */
291 struct ipi_message_queue *msg_queue;
292 struct ipi_message *msg;
293
294 if (cpu_is_offline(cpu)) 278 if (cpu_is_offline(cpu))
295 return; 279 return;
296 280 platform_send_ipi_cpu(cpu, IRQ_SUPPLE_0);
297 msg = kzalloc(sizeof(*msg), GFP_ATOMIC);
298 if (!msg)
299 return;
300 INIT_LIST_HEAD(&msg->list);
301 msg->type = BFIN_IPI_RESCHEDULE;
302
303 msg_queue = &per_cpu(ipi_msg_queue, cpu);
304 spin_lock_irqsave(&msg_queue->lock, flags);
305 list_add_tail(&msg->list, &msg_queue->head);
306 spin_unlock_irqrestore(&msg_queue->lock, flags);
307 platform_send_ipi_cpu(cpu);
308 281
309 return; 282 return;
310} 283}
311 284
312void smp_send_stop(void) 285void smp_send_stop(void)
313{ 286{
314 unsigned int cpu;
315 cpumask_t callmap; 287 cpumask_t callmap;
316 unsigned long flags;
317 struct ipi_message_queue *msg_queue;
318 struct ipi_message *msg;
319 288
320 callmap = cpu_online_map; 289 callmap = cpu_online_map;
321 cpu_clear(smp_processor_id(), callmap); 290 cpu_clear(smp_processor_id(), callmap);
322 if (cpus_empty(callmap)) 291 if (cpus_empty(callmap))
323 return; 292 return;
324 293
325 msg = kzalloc(sizeof(*msg), GFP_ATOMIC); 294 smp_send_message(callmap, BFIN_IPI_CPU_STOP, NULL, NULL, 0);
326 if (!msg)
327 return;
328 INIT_LIST_HEAD(&msg->list);
329 msg->type = BFIN_IPI_CPU_STOP;
330 295
331 for_each_cpu_mask(cpu, callmap) {
332 msg_queue = &per_cpu(ipi_msg_queue, cpu);
333 spin_lock_irqsave(&msg_queue->lock, flags);
334 list_add_tail(&msg->list, &msg_queue->head);
335 spin_unlock_irqrestore(&msg_queue->lock, flags);
336 platform_send_ipi_cpu(cpu);
337 }
338 return; 296 return;
339} 297}
340 298
@@ -441,7 +399,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
441{ 399{
442 platform_prepare_cpus(max_cpus); 400 platform_prepare_cpus(max_cpus);
443 ipi_queue_init(); 401 ipi_queue_init();
444 platform_request_ipi(ipi_handler); 402 platform_request_ipi(IRQ_SUPPLE_0, ipi_handler_int0);
403 platform_request_ipi(IRQ_SUPPLE_1, ipi_handler_int1);
445} 404}
446 405
447void __init smp_cpus_done(unsigned int max_cpus) 406void __init smp_cpus_done(unsigned int max_cpus)