diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2010-09-27 11:48:26 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2010-10-12 10:39:07 -0400 |
commit | 1f5a5b87f78fade3ae48dfd55e8765d1d622ea4e (patch) | |
tree | 762a5dbf40129ffd9667a170b2503a77c95320f7 /kernel/irq | |
parent | 1318a481fc37c503a901b96ae06b692ca2b21af5 (diff) |
genirq: Implement a sane sparse_irq allocator
The current sparse_irq allocator has several short comings due to
failures in the design or the lack of it:
- Requires iteration over the number of active irqs to find a free slot
(Some architectures have grown their own workarounds for this)
- Removal of entries is not possible
- Racy between create_irq_nr and destroy_irq (plugged by horrible
callbacks)
- Migration of active irq descriptors is not possible
- No bulk allocation of irq ranges
- Sprinkeled irq_desc references all over the place outside of kernel/irq/
(The previous chip functions series is addressing this issue)
Implement a sane allocator which fixes the above short comings (though
migration of active descriptors needs a full tree wide cleanup of the
direct and mostly unlocked access to irq_desc).
The new allocator still uses a radix_tree, but uses a bitmap for
keeping track of allocated irq numbers. That allows:
- Fast lookup of a free slot
- Allows the removal of descriptors
- Prevents the create/destroy race
- Bulk allocation of consecutive irq ranges
- Basic design is ready for migration of life descriptors after
further cleanups
The bitmap is also used in the SPARSE_IRQ=n case for lookup and
raceless (de)allocation of irq numbers. So it removes the requirement
for looping through the descriptor array to find slots.
Right now it uses sparse_irq_lock to protect the bitmap and the radix
tree, but after cleaning up all users we should be able convert that
to a mutex and to switch the radix_tree and decriptor allocations to
GFP_KERNEL.
[ Folded in a bugfix from Yinghai Lu ]
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/irq')
-rw-r--r-- | kernel/irq/irqdesc.c | 231 |
1 files changed, 223 insertions, 8 deletions
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 29963f99f24..4eea48b4f57 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/interrupt.h> | 13 | #include <linux/interrupt.h> |
14 | #include <linux/kernel_stat.h> | 14 | #include <linux/kernel_stat.h> |
15 | #include <linux/radix-tree.h> | 15 | #include <linux/radix-tree.h> |
16 | #include <linux/bitmap.h> | ||
16 | 17 | ||
17 | #include "internals.h" | 18 | #include "internals.h" |
18 | 19 | ||
@@ -33,9 +34,54 @@ static void __init init_irq_default_affinity(void) | |||
33 | } | 34 | } |
34 | #endif | 35 | #endif |
35 | 36 | ||
37 | #ifdef CONFIG_SMP | ||
38 | static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) | ||
39 | { | ||
40 | if (!zalloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node)) | ||
41 | return -ENOMEM; | ||
42 | |||
43 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
44 | if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { | ||
45 | free_cpumask_var(desc->irq_data.affinity); | ||
46 | return -ENOMEM; | ||
47 | } | ||
48 | #endif | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | static void desc_smp_init(struct irq_desc *desc, int node) | ||
53 | { | ||
54 | desc->node = node; | ||
55 | cpumask_copy(desc->irq_data.affinity, irq_default_affinity); | ||
56 | } | ||
57 | |||
58 | #else | ||
59 | static inline int | ||
60 | alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) { return 0; } | ||
61 | static inline void desc_smp_init(struct irq_desc *desc, int node) { } | ||
62 | #endif | ||
63 | |||
64 | static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) | ||
65 | { | ||
66 | desc->irq_data.irq = irq; | ||
67 | desc->irq_data.chip = &no_irq_chip; | ||
68 | desc->irq_data.chip_data = NULL; | ||
69 | desc->irq_data.handler_data = NULL; | ||
70 | desc->irq_data.msi_desc = NULL; | ||
71 | desc->status = IRQ_DEFAULT_INIT_FLAGS; | ||
72 | desc->handle_irq = handle_bad_irq; | ||
73 | desc->depth = 1; | ||
74 | desc->name = NULL; | ||
75 | memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); | ||
76 | desc_smp_init(desc, node); | ||
77 | } | ||
78 | |||
36 | int nr_irqs = NR_IRQS; | 79 | int nr_irqs = NR_IRQS; |
37 | EXPORT_SYMBOL_GPL(nr_irqs); | 80 | EXPORT_SYMBOL_GPL(nr_irqs); |
38 | 81 | ||
82 | DEFINE_RAW_SPINLOCK(sparse_irq_lock); | ||
83 | static DECLARE_BITMAP(allocated_irqs, NR_IRQS); | ||
84 | |||
39 | #ifdef CONFIG_SPARSE_IRQ | 85 | #ifdef CONFIG_SPARSE_IRQ |
40 | 86 | ||
41 | static struct irq_desc irq_desc_init = { | 87 | static struct irq_desc irq_desc_init = { |
@@ -85,14 +131,9 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node) | |||
85 | arch_init_chip_data(desc, node); | 131 | arch_init_chip_data(desc, node); |
86 | } | 132 | } |
87 | 133 | ||
88 | /* | ||
89 | * Protect the sparse_irqs: | ||
90 | */ | ||
91 | DEFINE_RAW_SPINLOCK(sparse_irq_lock); | ||
92 | |||
93 | static RADIX_TREE(irq_desc_tree, GFP_ATOMIC); | 134 | static RADIX_TREE(irq_desc_tree, GFP_ATOMIC); |
94 | 135 | ||
95 | static void set_irq_desc(unsigned int irq, struct irq_desc *desc) | 136 | static void irq_insert_desc(unsigned int irq, struct irq_desc *desc) |
96 | { | 137 | { |
97 | radix_tree_insert(&irq_desc_tree, irq, desc); | 138 | radix_tree_insert(&irq_desc_tree, irq, desc); |
98 | } | 139 | } |
@@ -111,6 +152,94 @@ void replace_irq_desc(unsigned int irq, struct irq_desc *desc) | |||
111 | radix_tree_replace_slot(ptr, desc); | 152 | radix_tree_replace_slot(ptr, desc); |
112 | } | 153 | } |
113 | 154 | ||
155 | static void delete_irq_desc(unsigned int irq) | ||
156 | { | ||
157 | radix_tree_delete(&irq_desc_tree, irq); | ||
158 | } | ||
159 | |||
160 | #ifdef CONFIG_SMP | ||
161 | static void free_masks(struct irq_desc *desc) | ||
162 | { | ||
163 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
164 | free_cpumask_var(desc->pending_mask); | ||
165 | #endif | ||
166 | free_cpumask_var(desc->affinity); | ||
167 | } | ||
168 | #else | ||
169 | static inline void free_masks(struct irq_desc *desc) { } | ||
170 | #endif | ||
171 | |||
172 | static struct irq_desc *alloc_desc(int irq, int node) | ||
173 | { | ||
174 | struct irq_desc *desc; | ||
175 | gfp_t gfp = GFP_KERNEL; | ||
176 | |||
177 | desc = kzalloc_node(sizeof(*desc), gfp, node); | ||
178 | if (!desc) | ||
179 | return NULL; | ||
180 | /* allocate based on nr_cpu_ids */ | ||
181 | desc->kstat_irqs = kzalloc_node(nr_cpu_ids * sizeof(*desc->kstat_irqs), | ||
182 | gfp, node); | ||
183 | if (!desc->kstat_irqs) | ||
184 | goto err_desc; | ||
185 | |||
186 | if (alloc_masks(desc, gfp, node)) | ||
187 | goto err_kstat; | ||
188 | |||
189 | raw_spin_lock_init(&desc->lock); | ||
190 | lockdep_set_class(&desc->lock, &irq_desc_lock_class); | ||
191 | |||
192 | desc_set_defaults(irq, desc, node); | ||
193 | |||
194 | return desc; | ||
195 | |||
196 | err_kstat: | ||
197 | kfree(desc->kstat_irqs); | ||
198 | err_desc: | ||
199 | kfree(desc); | ||
200 | return NULL; | ||
201 | } | ||
202 | |||
203 | static void free_desc(unsigned int irq) | ||
204 | { | ||
205 | struct irq_desc *desc = irq_to_desc(irq); | ||
206 | unsigned long flags; | ||
207 | |||
208 | raw_spin_lock_irqsave(&sparse_irq_lock, flags); | ||
209 | delete_irq_desc(irq); | ||
210 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
211 | |||
212 | free_masks(desc); | ||
213 | kfree(desc->kstat_irqs); | ||
214 | kfree(desc); | ||
215 | } | ||
216 | |||
217 | static int alloc_descs(unsigned int start, unsigned int cnt, int node) | ||
218 | { | ||
219 | struct irq_desc *desc; | ||
220 | unsigned long flags; | ||
221 | int i; | ||
222 | |||
223 | for (i = 0; i < cnt; i++) { | ||
224 | desc = alloc_desc(start + i, node); | ||
225 | if (!desc) | ||
226 | goto err; | ||
227 | raw_spin_lock_irqsave(&sparse_irq_lock, flags); | ||
228 | irq_insert_desc(start + i, desc); | ||
229 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
230 | } | ||
231 | return start; | ||
232 | |||
233 | err: | ||
234 | for (i--; i >= 0; i--) | ||
235 | free_desc(start + i); | ||
236 | |||
237 | raw_spin_lock_irqsave(&sparse_irq_lock, flags); | ||
238 | bitmap_clear(allocated_irqs, start, cnt); | ||
239 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
240 | return -ENOMEM; | ||
241 | } | ||
242 | |||
114 | static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = { | 243 | static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = { |
115 | [0 ... NR_IRQS_LEGACY-1] = { | 244 | [0 ... NR_IRQS_LEGACY-1] = { |
116 | .status = IRQ_DEFAULT_INIT_FLAGS, | 245 | .status = IRQ_DEFAULT_INIT_FLAGS, |
@@ -155,7 +284,7 @@ int __init early_irq_init(void) | |||
155 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); | 284 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); |
156 | alloc_desc_masks(&desc[i], node, true); | 285 | alloc_desc_masks(&desc[i], node, true); |
157 | init_desc_masks(&desc[i]); | 286 | init_desc_masks(&desc[i]); |
158 | set_irq_desc(i, &desc[i]); | 287 | irq_insert_desc(i, &desc[i]); |
159 | } | 288 | } |
160 | 289 | ||
161 | return arch_early_irq_init(); | 290 | return arch_early_irq_init(); |
@@ -192,7 +321,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node) | |||
192 | } | 321 | } |
193 | init_one_irq_desc(irq, desc, node); | 322 | init_one_irq_desc(irq, desc, node); |
194 | 323 | ||
195 | set_irq_desc(irq, desc); | 324 | irq_insert_desc(irq, desc); |
196 | 325 | ||
197 | out_unlock: | 326 | out_unlock: |
198 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | 327 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); |
@@ -245,8 +374,94 @@ struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node) | |||
245 | { | 374 | { |
246 | return irq_to_desc(irq); | 375 | return irq_to_desc(irq); |
247 | } | 376 | } |
377 | |||
378 | #ifdef CONFIG_SMP | ||
379 | static inline int desc_node(struct irq_desc *desc) | ||
380 | { | ||
381 | return desc->irq_data.node; | ||
382 | } | ||
383 | #else | ||
384 | static inline int desc_node(struct irq_desc *desc) { return 0; } | ||
385 | #endif | ||
386 | |||
387 | static void free_desc(unsigned int irq) | ||
388 | { | ||
389 | struct irq_desc *desc = irq_to_desc(irq); | ||
390 | unsigned long flags; | ||
391 | |||
392 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
393 | desc_set_defaults(irq, desc, desc_node(desc)); | ||
394 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
395 | } | ||
396 | |||
397 | static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) | ||
398 | { | ||
399 | return start; | ||
400 | } | ||
248 | #endif /* !CONFIG_SPARSE_IRQ */ | 401 | #endif /* !CONFIG_SPARSE_IRQ */ |
249 | 402 | ||
403 | /* Dynamic interrupt handling */ | ||
404 | |||
405 | /** | ||
406 | * irq_free_descs - free irq descriptors | ||
407 | * @from: Start of descriptor range | ||
408 | * @cnt: Number of consecutive irqs to free | ||
409 | */ | ||
410 | void irq_free_descs(unsigned int from, unsigned int cnt) | ||
411 | { | ||
412 | unsigned long flags; | ||
413 | int i; | ||
414 | |||
415 | if (from >= nr_irqs || (from + cnt) > nr_irqs) | ||
416 | return; | ||
417 | |||
418 | for (i = 0; i < cnt; i++) | ||
419 | free_desc(from + i); | ||
420 | |||
421 | raw_spin_lock_irqsave(&sparse_irq_lock, flags); | ||
422 | bitmap_clear(allocated_irqs, from, cnt); | ||
423 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
424 | } | ||
425 | |||
426 | /** | ||
427 | * irq_alloc_descs - allocate and initialize a range of irq descriptors | ||
428 | * @irq: Allocate for specific irq number if irq >= 0 | ||
429 | * @from: Start the search from this irq number | ||
430 | * @cnt: Number of consecutive irqs to allocate. | ||
431 | * @node: Preferred node on which the irq descriptor should be allocated | ||
432 | * | ||
433 | * Returns the first irq number or error code | ||
434 | */ | ||
435 | int __ref | ||
436 | irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) | ||
437 | { | ||
438 | unsigned long flags; | ||
439 | int start, ret; | ||
440 | |||
441 | if (!cnt) | ||
442 | return -EINVAL; | ||
443 | |||
444 | raw_spin_lock_irqsave(&sparse_irq_lock, flags); | ||
445 | |||
446 | start = bitmap_find_next_zero_area(allocated_irqs, nr_irqs, from, cnt, 0); | ||
447 | ret = -EEXIST; | ||
448 | if (irq >=0 && start != irq) | ||
449 | goto err; | ||
450 | |||
451 | ret = -ENOMEM; | ||
452 | if (start >= nr_irqs) | ||
453 | goto err; | ||
454 | |||
455 | bitmap_set(allocated_irqs, start, cnt); | ||
456 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
457 | return alloc_descs(start, cnt, node); | ||
458 | |||
459 | err: | ||
460 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
461 | return ret; | ||
462 | } | ||
463 | |||
464 | /* Statistics access */ | ||
250 | void clear_kstat_irqs(struct irq_desc *desc) | 465 | void clear_kstat_irqs(struct irq_desc *desc) |
251 | { | 466 | { |
252 | memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); | 467 | memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); |