diff options
Diffstat (limited to 'kernel/softirq.c')
-rw-r--r-- | kernel/softirq.c | 496 |
1 files changed, 496 insertions, 0 deletions
diff --git a/kernel/softirq.c b/kernel/softirq.c new file mode 100644 index 000000000000..b4ab6af1dea8 --- /dev/null +++ b/kernel/softirq.c | |||
@@ -0,0 +1,496 @@ | |||
1 | /* | ||
2 | * linux/kernel/softirq.c | ||
3 | * | ||
4 | * Copyright (C) 1992 Linus Torvalds | ||
5 | * | ||
6 | * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) | ||
7 | */ | ||
8 | |||
9 | #include <linux/module.h> | ||
10 | #include <linux/kernel_stat.h> | ||
11 | #include <linux/interrupt.h> | ||
12 | #include <linux/init.h> | ||
13 | #include <linux/mm.h> | ||
14 | #include <linux/notifier.h> | ||
15 | #include <linux/percpu.h> | ||
16 | #include <linux/cpu.h> | ||
17 | #include <linux/kthread.h> | ||
18 | #include <linux/rcupdate.h> | ||
19 | |||
20 | #include <asm/irq.h> | ||
21 | /* | ||
22 | - No shared variables, all the data are CPU local. | ||
23 | - If a softirq needs serialization, let it serialize itself | ||
24 | by its own spinlocks. | ||
25 | - Even if softirq is serialized, only local cpu is marked for | ||
26 | execution. Hence, we get something sort of weak cpu binding. | ||
27 | Though it is still not clear, will it result in better locality | ||
28 | or will not. | ||
29 | |||
30 | Examples: | ||
31 | - NET RX softirq. It is multithreaded and does not require | ||
32 | any global serialization. | ||
33 | - NET TX softirq. It kicks software netdevice queues, hence | ||
34 | it is logically serialized per device, but this serialization | ||
35 | is invisible to common code. | ||
36 | - Tasklets: serialized wrt itself. | ||
37 | */ | ||
38 | |||
39 | #ifndef __ARCH_IRQ_STAT | ||
40 | irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned; | ||
41 | EXPORT_SYMBOL(irq_stat); | ||
42 | #endif | ||
43 | |||
44 | static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp; | ||
45 | |||
46 | static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); | ||
47 | |||
48 | /* | ||
49 | * we cannot loop indefinitely here to avoid userspace starvation, | ||
50 | * but we also don't want to introduce a worst case 1/HZ latency | ||
51 | * to the pending events, so lets the scheduler to balance | ||
52 | * the softirq load for us. | ||
53 | */ | ||
54 | static inline void wakeup_softirqd(void) | ||
55 | { | ||
56 | /* Interrupts are disabled: no need to stop preemption */ | ||
57 | struct task_struct *tsk = __get_cpu_var(ksoftirqd); | ||
58 | |||
59 | if (tsk && tsk->state != TASK_RUNNING) | ||
60 | wake_up_process(tsk); | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * We restart softirq processing MAX_SOFTIRQ_RESTART times, | ||
65 | * and we fall back to softirqd after that. | ||
66 | * | ||
67 | * This number has been established via experimentation. | ||
68 | * The two things to balance is latency against fairness - | ||
69 | * we want to handle softirqs as soon as possible, but they | ||
70 | * should not be able to lock up the box. | ||
71 | */ | ||
72 | #define MAX_SOFTIRQ_RESTART 10 | ||
73 | |||
74 | asmlinkage void __do_softirq(void) | ||
75 | { | ||
76 | struct softirq_action *h; | ||
77 | __u32 pending; | ||
78 | int max_restart = MAX_SOFTIRQ_RESTART; | ||
79 | int cpu; | ||
80 | |||
81 | pending = local_softirq_pending(); | ||
82 | |||
83 | local_bh_disable(); | ||
84 | cpu = smp_processor_id(); | ||
85 | restart: | ||
86 | /* Reset the pending bitmask before enabling irqs */ | ||
87 | local_softirq_pending() = 0; | ||
88 | |||
89 | local_irq_enable(); | ||
90 | |||
91 | h = softirq_vec; | ||
92 | |||
93 | do { | ||
94 | if (pending & 1) { | ||
95 | h->action(h); | ||
96 | rcu_bh_qsctr_inc(cpu); | ||
97 | } | ||
98 | h++; | ||
99 | pending >>= 1; | ||
100 | } while (pending); | ||
101 | |||
102 | local_irq_disable(); | ||
103 | |||
104 | pending = local_softirq_pending(); | ||
105 | if (pending && --max_restart) | ||
106 | goto restart; | ||
107 | |||
108 | if (pending) | ||
109 | wakeup_softirqd(); | ||
110 | |||
111 | __local_bh_enable(); | ||
112 | } | ||
113 | |||
114 | #ifndef __ARCH_HAS_DO_SOFTIRQ | ||
115 | |||
116 | asmlinkage void do_softirq(void) | ||
117 | { | ||
118 | __u32 pending; | ||
119 | unsigned long flags; | ||
120 | |||
121 | if (in_interrupt()) | ||
122 | return; | ||
123 | |||
124 | local_irq_save(flags); | ||
125 | |||
126 | pending = local_softirq_pending(); | ||
127 | |||
128 | if (pending) | ||
129 | __do_softirq(); | ||
130 | |||
131 | local_irq_restore(flags); | ||
132 | } | ||
133 | |||
134 | EXPORT_SYMBOL(do_softirq); | ||
135 | |||
136 | #endif | ||
137 | |||
138 | void local_bh_enable(void) | ||
139 | { | ||
140 | WARN_ON(irqs_disabled()); | ||
141 | /* | ||
142 | * Keep preemption disabled until we are done with | ||
143 | * softirq processing: | ||
144 | */ | ||
145 | sub_preempt_count(SOFTIRQ_OFFSET - 1); | ||
146 | |||
147 | if (unlikely(!in_interrupt() && local_softirq_pending())) | ||
148 | do_softirq(); | ||
149 | |||
150 | dec_preempt_count(); | ||
151 | preempt_check_resched(); | ||
152 | } | ||
153 | EXPORT_SYMBOL(local_bh_enable); | ||
154 | |||
155 | #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED | ||
156 | # define invoke_softirq() __do_softirq() | ||
157 | #else | ||
158 | # define invoke_softirq() do_softirq() | ||
159 | #endif | ||
160 | |||
161 | /* | ||
162 | * Exit an interrupt context. Process softirqs if needed and possible: | ||
163 | */ | ||
164 | void irq_exit(void) | ||
165 | { | ||
166 | account_system_vtime(current); | ||
167 | sub_preempt_count(IRQ_EXIT_OFFSET); | ||
168 | if (!in_interrupt() && local_softirq_pending()) | ||
169 | invoke_softirq(); | ||
170 | preempt_enable_no_resched(); | ||
171 | } | ||
172 | |||
173 | /* | ||
174 | * This function must run with irqs disabled! | ||
175 | */ | ||
176 | inline fastcall void raise_softirq_irqoff(unsigned int nr) | ||
177 | { | ||
178 | __raise_softirq_irqoff(nr); | ||
179 | |||
180 | /* | ||
181 | * If we're in an interrupt or softirq, we're done | ||
182 | * (this also catches softirq-disabled code). We will | ||
183 | * actually run the softirq once we return from | ||
184 | * the irq or softirq. | ||
185 | * | ||
186 | * Otherwise we wake up ksoftirqd to make sure we | ||
187 | * schedule the softirq soon. | ||
188 | */ | ||
189 | if (!in_interrupt()) | ||
190 | wakeup_softirqd(); | ||
191 | } | ||
192 | |||
193 | EXPORT_SYMBOL(raise_softirq_irqoff); | ||
194 | |||
195 | void fastcall raise_softirq(unsigned int nr) | ||
196 | { | ||
197 | unsigned long flags; | ||
198 | |||
199 | local_irq_save(flags); | ||
200 | raise_softirq_irqoff(nr); | ||
201 | local_irq_restore(flags); | ||
202 | } | ||
203 | |||
204 | void open_softirq(int nr, void (*action)(struct softirq_action*), void *data) | ||
205 | { | ||
206 | softirq_vec[nr].data = data; | ||
207 | softirq_vec[nr].action = action; | ||
208 | } | ||
209 | |||
210 | EXPORT_SYMBOL(open_softirq); | ||
211 | |||
212 | /* Tasklets */ | ||
213 | struct tasklet_head | ||
214 | { | ||
215 | struct tasklet_struct *list; | ||
216 | }; | ||
217 | |||
218 | /* Some compilers disobey section attribute on statics when not | ||
219 | initialized -- RR */ | ||
220 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL }; | ||
221 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL }; | ||
222 | |||
223 | void fastcall __tasklet_schedule(struct tasklet_struct *t) | ||
224 | { | ||
225 | unsigned long flags; | ||
226 | |||
227 | local_irq_save(flags); | ||
228 | t->next = __get_cpu_var(tasklet_vec).list; | ||
229 | __get_cpu_var(tasklet_vec).list = t; | ||
230 | raise_softirq_irqoff(TASKLET_SOFTIRQ); | ||
231 | local_irq_restore(flags); | ||
232 | } | ||
233 | |||
234 | EXPORT_SYMBOL(__tasklet_schedule); | ||
235 | |||
236 | void fastcall __tasklet_hi_schedule(struct tasklet_struct *t) | ||
237 | { | ||
238 | unsigned long flags; | ||
239 | |||
240 | local_irq_save(flags); | ||
241 | t->next = __get_cpu_var(tasklet_hi_vec).list; | ||
242 | __get_cpu_var(tasklet_hi_vec).list = t; | ||
243 | raise_softirq_irqoff(HI_SOFTIRQ); | ||
244 | local_irq_restore(flags); | ||
245 | } | ||
246 | |||
247 | EXPORT_SYMBOL(__tasklet_hi_schedule); | ||
248 | |||
249 | static void tasklet_action(struct softirq_action *a) | ||
250 | { | ||
251 | struct tasklet_struct *list; | ||
252 | |||
253 | local_irq_disable(); | ||
254 | list = __get_cpu_var(tasklet_vec).list; | ||
255 | __get_cpu_var(tasklet_vec).list = NULL; | ||
256 | local_irq_enable(); | ||
257 | |||
258 | while (list) { | ||
259 | struct tasklet_struct *t = list; | ||
260 | |||
261 | list = list->next; | ||
262 | |||
263 | if (tasklet_trylock(t)) { | ||
264 | if (!atomic_read(&t->count)) { | ||
265 | if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) | ||
266 | BUG(); | ||
267 | t->func(t->data); | ||
268 | tasklet_unlock(t); | ||
269 | continue; | ||
270 | } | ||
271 | tasklet_unlock(t); | ||
272 | } | ||
273 | |||
274 | local_irq_disable(); | ||
275 | t->next = __get_cpu_var(tasklet_vec).list; | ||
276 | __get_cpu_var(tasklet_vec).list = t; | ||
277 | __raise_softirq_irqoff(TASKLET_SOFTIRQ); | ||
278 | local_irq_enable(); | ||
279 | } | ||
280 | } | ||
281 | |||
282 | static void tasklet_hi_action(struct softirq_action *a) | ||
283 | { | ||
284 | struct tasklet_struct *list; | ||
285 | |||
286 | local_irq_disable(); | ||
287 | list = __get_cpu_var(tasklet_hi_vec).list; | ||
288 | __get_cpu_var(tasklet_hi_vec).list = NULL; | ||
289 | local_irq_enable(); | ||
290 | |||
291 | while (list) { | ||
292 | struct tasklet_struct *t = list; | ||
293 | |||
294 | list = list->next; | ||
295 | |||
296 | if (tasklet_trylock(t)) { | ||
297 | if (!atomic_read(&t->count)) { | ||
298 | if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) | ||
299 | BUG(); | ||
300 | t->func(t->data); | ||
301 | tasklet_unlock(t); | ||
302 | continue; | ||
303 | } | ||
304 | tasklet_unlock(t); | ||
305 | } | ||
306 | |||
307 | local_irq_disable(); | ||
308 | t->next = __get_cpu_var(tasklet_hi_vec).list; | ||
309 | __get_cpu_var(tasklet_hi_vec).list = t; | ||
310 | __raise_softirq_irqoff(HI_SOFTIRQ); | ||
311 | local_irq_enable(); | ||
312 | } | ||
313 | } | ||
314 | |||
315 | |||
316 | void tasklet_init(struct tasklet_struct *t, | ||
317 | void (*func)(unsigned long), unsigned long data) | ||
318 | { | ||
319 | t->next = NULL; | ||
320 | t->state = 0; | ||
321 | atomic_set(&t->count, 0); | ||
322 | t->func = func; | ||
323 | t->data = data; | ||
324 | } | ||
325 | |||
326 | EXPORT_SYMBOL(tasklet_init); | ||
327 | |||
328 | void tasklet_kill(struct tasklet_struct *t) | ||
329 | { | ||
330 | if (in_interrupt()) | ||
331 | printk("Attempt to kill tasklet from interrupt\n"); | ||
332 | |||
333 | while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { | ||
334 | do | ||
335 | yield(); | ||
336 | while (test_bit(TASKLET_STATE_SCHED, &t->state)); | ||
337 | } | ||
338 | tasklet_unlock_wait(t); | ||
339 | clear_bit(TASKLET_STATE_SCHED, &t->state); | ||
340 | } | ||
341 | |||
342 | EXPORT_SYMBOL(tasklet_kill); | ||
343 | |||
344 | void __init softirq_init(void) | ||
345 | { | ||
346 | open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL); | ||
347 | open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL); | ||
348 | } | ||
349 | |||
350 | static int ksoftirqd(void * __bind_cpu) | ||
351 | { | ||
352 | set_user_nice(current, 19); | ||
353 | current->flags |= PF_NOFREEZE; | ||
354 | |||
355 | set_current_state(TASK_INTERRUPTIBLE); | ||
356 | |||
357 | while (!kthread_should_stop()) { | ||
358 | preempt_disable(); | ||
359 | if (!local_softirq_pending()) { | ||
360 | preempt_enable_no_resched(); | ||
361 | schedule(); | ||
362 | preempt_disable(); | ||
363 | } | ||
364 | |||
365 | __set_current_state(TASK_RUNNING); | ||
366 | |||
367 | while (local_softirq_pending()) { | ||
368 | /* Preempt disable stops cpu going offline. | ||
369 | If already offline, we'll be on wrong CPU: | ||
370 | don't process */ | ||
371 | if (cpu_is_offline((long)__bind_cpu)) | ||
372 | goto wait_to_die; | ||
373 | do_softirq(); | ||
374 | preempt_enable_no_resched(); | ||
375 | cond_resched(); | ||
376 | preempt_disable(); | ||
377 | } | ||
378 | preempt_enable(); | ||
379 | set_current_state(TASK_INTERRUPTIBLE); | ||
380 | } | ||
381 | __set_current_state(TASK_RUNNING); | ||
382 | return 0; | ||
383 | |||
384 | wait_to_die: | ||
385 | preempt_enable(); | ||
386 | /* Wait for kthread_stop */ | ||
387 | set_current_state(TASK_INTERRUPTIBLE); | ||
388 | while (!kthread_should_stop()) { | ||
389 | schedule(); | ||
390 | set_current_state(TASK_INTERRUPTIBLE); | ||
391 | } | ||
392 | __set_current_state(TASK_RUNNING); | ||
393 | return 0; | ||
394 | } | ||
395 | |||
396 | #ifdef CONFIG_HOTPLUG_CPU | ||
397 | /* | ||
398 | * tasklet_kill_immediate is called to remove a tasklet which can already be | ||
399 | * scheduled for execution on @cpu. | ||
400 | * | ||
401 | * Unlike tasklet_kill, this function removes the tasklet | ||
402 | * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state. | ||
403 | * | ||
404 | * When this function is called, @cpu must be in the CPU_DEAD state. | ||
405 | */ | ||
406 | void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu) | ||
407 | { | ||
408 | struct tasklet_struct **i; | ||
409 | |||
410 | BUG_ON(cpu_online(cpu)); | ||
411 | BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state)); | ||
412 | |||
413 | if (!test_bit(TASKLET_STATE_SCHED, &t->state)) | ||
414 | return; | ||
415 | |||
416 | /* CPU is dead, so no lock needed. */ | ||
417 | for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) { | ||
418 | if (*i == t) { | ||
419 | *i = t->next; | ||
420 | return; | ||
421 | } | ||
422 | } | ||
423 | BUG(); | ||
424 | } | ||
425 | |||
426 | static void takeover_tasklets(unsigned int cpu) | ||
427 | { | ||
428 | struct tasklet_struct **i; | ||
429 | |||
430 | /* CPU is dead, so no lock needed. */ | ||
431 | local_irq_disable(); | ||
432 | |||
433 | /* Find end, append list for that CPU. */ | ||
434 | for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next); | ||
435 | *i = per_cpu(tasklet_vec, cpu).list; | ||
436 | per_cpu(tasklet_vec, cpu).list = NULL; | ||
437 | raise_softirq_irqoff(TASKLET_SOFTIRQ); | ||
438 | |||
439 | for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next); | ||
440 | *i = per_cpu(tasklet_hi_vec, cpu).list; | ||
441 | per_cpu(tasklet_hi_vec, cpu).list = NULL; | ||
442 | raise_softirq_irqoff(HI_SOFTIRQ); | ||
443 | |||
444 | local_irq_enable(); | ||
445 | } | ||
446 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
447 | |||
448 | static int __devinit cpu_callback(struct notifier_block *nfb, | ||
449 | unsigned long action, | ||
450 | void *hcpu) | ||
451 | { | ||
452 | int hotcpu = (unsigned long)hcpu; | ||
453 | struct task_struct *p; | ||
454 | |||
455 | switch (action) { | ||
456 | case CPU_UP_PREPARE: | ||
457 | BUG_ON(per_cpu(tasklet_vec, hotcpu).list); | ||
458 | BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list); | ||
459 | p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); | ||
460 | if (IS_ERR(p)) { | ||
461 | printk("ksoftirqd for %i failed\n", hotcpu); | ||
462 | return NOTIFY_BAD; | ||
463 | } | ||
464 | kthread_bind(p, hotcpu); | ||
465 | per_cpu(ksoftirqd, hotcpu) = p; | ||
466 | break; | ||
467 | case CPU_ONLINE: | ||
468 | wake_up_process(per_cpu(ksoftirqd, hotcpu)); | ||
469 | break; | ||
470 | #ifdef CONFIG_HOTPLUG_CPU | ||
471 | case CPU_UP_CANCELED: | ||
472 | /* Unbind so it can run. Fall thru. */ | ||
473 | kthread_bind(per_cpu(ksoftirqd, hotcpu), smp_processor_id()); | ||
474 | case CPU_DEAD: | ||
475 | p = per_cpu(ksoftirqd, hotcpu); | ||
476 | per_cpu(ksoftirqd, hotcpu) = NULL; | ||
477 | kthread_stop(p); | ||
478 | takeover_tasklets(hotcpu); | ||
479 | break; | ||
480 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
481 | } | ||
482 | return NOTIFY_OK; | ||
483 | } | ||
484 | |||
485 | static struct notifier_block __devinitdata cpu_nfb = { | ||
486 | .notifier_call = cpu_callback | ||
487 | }; | ||
488 | |||
489 | __init int spawn_ksoftirqd(void) | ||
490 | { | ||
491 | void *cpu = (void *)(long)smp_processor_id(); | ||
492 | cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); | ||
493 | cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); | ||
494 | register_cpu_notifier(&cpu_nfb); | ||
495 | return 0; | ||
496 | } | ||