diff options
| author | Thomas Gleixner <tglx@linutronix.de> | 2010-09-27 11:48:26 -0400 |
|---|---|---|
| committer | Thomas Gleixner <tglx@linutronix.de> | 2010-10-12 10:39:07 -0400 |
| commit | 1f5a5b87f78fade3ae48dfd55e8765d1d622ea4e (patch) | |
| tree | 762a5dbf40129ffd9667a170b2503a77c95320f7 | |
| parent | 1318a481fc37c503a901b96ae06b692ca2b21af5 (diff) | |
genirq: Implement a sane sparse_irq allocator
The current sparse_irq allocator has several short comings due to
failures in the design or the lack of it:
- Requires iteration over the number of active irqs to find a free slot
(Some architectures have grown their own workarounds for this)
- Removal of entries is not possible
- Racy between create_irq_nr and destroy_irq (plugged by horrible
callbacks)
- Migration of active irq descriptors is not possible
- No bulk allocation of irq ranges
- Sprinkeled irq_desc references all over the place outside of kernel/irq/
(The previous chip functions series is addressing this issue)
Implement a sane allocator which fixes the above short comings (though
migration of active descriptors needs a full tree wide cleanup of the
direct and mostly unlocked access to irq_desc).
The new allocator still uses a radix_tree, but uses a bitmap for
keeping track of allocated irq numbers. That allows:
- Fast lookup of a free slot
- Allows the removal of descriptors
- Prevents the create/destroy race
- Bulk allocation of consecutive irq ranges
- Basic design is ready for migration of life descriptors after
further cleanups
The bitmap is also used in the SPARSE_IRQ=n case for lookup and
raceless (de)allocation of irq numbers. So it removes the requirement
for looping through the descriptor array to find slots.
Right now it uses sparse_irq_lock to protect the bitmap and the radix
tree, but after cleaning up all users we should be able convert that
to a mutex and to switch the radix_tree and decriptor allocations to
GFP_KERNEL.
[ Folded in a bugfix from Yinghai Lu ]
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
| -rw-r--r-- | include/linux/irq.h | 23 | ||||
| -rw-r--r-- | kernel/irq/irqdesc.c | 231 |
2 files changed, 246 insertions, 8 deletions
diff --git a/include/linux/irq.h b/include/linux/irq.h index 30a300991ed4..cefacf928b33 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h | |||
| @@ -398,6 +398,29 @@ static inline struct irq_2_iommu *irq_data_get_iommu(struct irq_data *d) | |||
| 398 | } | 398 | } |
| 399 | #endif | 399 | #endif |
| 400 | 400 | ||
| 401 | int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node); | ||
| 402 | void irq_free_descs(unsigned int irq, unsigned int cnt); | ||
| 403 | |||
| 404 | static inline int irq_alloc_desc(int node) | ||
| 405 | { | ||
| 406 | return irq_alloc_descs(-1, 0, 1, node); | ||
| 407 | } | ||
| 408 | |||
| 409 | static inline int irq_alloc_desc_at(unsigned int at, int node) | ||
| 410 | { | ||
| 411 | return irq_alloc_descs(at, at, 1, node); | ||
| 412 | } | ||
| 413 | |||
| 414 | static inline int irq_alloc_desc_from(unsigned int from, int node) | ||
| 415 | { | ||
| 416 | return irq_alloc_descs(-1, from, 1, node); | ||
| 417 | } | ||
| 418 | |||
| 419 | static inline void irq_free_desc(unsigned int irq) | ||
| 420 | { | ||
| 421 | irq_free_descs(irq, 1); | ||
| 422 | } | ||
| 423 | |||
| 401 | #endif /* CONFIG_GENERIC_HARDIRQS */ | 424 | #endif /* CONFIG_GENERIC_HARDIRQS */ |
| 402 | 425 | ||
| 403 | #endif /* !CONFIG_S390 */ | 426 | #endif /* !CONFIG_S390 */ |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 29963f99f24d..4eea48b4f576 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/interrupt.h> | 13 | #include <linux/interrupt.h> |
| 14 | #include <linux/kernel_stat.h> | 14 | #include <linux/kernel_stat.h> |
| 15 | #include <linux/radix-tree.h> | 15 | #include <linux/radix-tree.h> |
| 16 | #include <linux/bitmap.h> | ||
| 16 | 17 | ||
| 17 | #include "internals.h" | 18 | #include "internals.h" |
| 18 | 19 | ||
| @@ -33,9 +34,54 @@ static void __init init_irq_default_affinity(void) | |||
| 33 | } | 34 | } |
| 34 | #endif | 35 | #endif |
| 35 | 36 | ||
| 37 | #ifdef CONFIG_SMP | ||
| 38 | static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) | ||
| 39 | { | ||
| 40 | if (!zalloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node)) | ||
| 41 | return -ENOMEM; | ||
| 42 | |||
| 43 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
| 44 | if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { | ||
| 45 | free_cpumask_var(desc->irq_data.affinity); | ||
| 46 | return -ENOMEM; | ||
| 47 | } | ||
| 48 | #endif | ||
| 49 | return 0; | ||
| 50 | } | ||
| 51 | |||
| 52 | static void desc_smp_init(struct irq_desc *desc, int node) | ||
| 53 | { | ||
| 54 | desc->node = node; | ||
| 55 | cpumask_copy(desc->irq_data.affinity, irq_default_affinity); | ||
| 56 | } | ||
| 57 | |||
| 58 | #else | ||
| 59 | static inline int | ||
| 60 | alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) { return 0; } | ||
| 61 | static inline void desc_smp_init(struct irq_desc *desc, int node) { } | ||
| 62 | #endif | ||
| 63 | |||
| 64 | static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) | ||
| 65 | { | ||
| 66 | desc->irq_data.irq = irq; | ||
| 67 | desc->irq_data.chip = &no_irq_chip; | ||
| 68 | desc->irq_data.chip_data = NULL; | ||
| 69 | desc->irq_data.handler_data = NULL; | ||
| 70 | desc->irq_data.msi_desc = NULL; | ||
| 71 | desc->status = IRQ_DEFAULT_INIT_FLAGS; | ||
| 72 | desc->handle_irq = handle_bad_irq; | ||
| 73 | desc->depth = 1; | ||
| 74 | desc->name = NULL; | ||
| 75 | memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); | ||
| 76 | desc_smp_init(desc, node); | ||
| 77 | } | ||
| 78 | |||
| 36 | int nr_irqs = NR_IRQS; | 79 | int nr_irqs = NR_IRQS; |
| 37 | EXPORT_SYMBOL_GPL(nr_irqs); | 80 | EXPORT_SYMBOL_GPL(nr_irqs); |
| 38 | 81 | ||
| 82 | DEFINE_RAW_SPINLOCK(sparse_irq_lock); | ||
| 83 | static DECLARE_BITMAP(allocated_irqs, NR_IRQS); | ||
| 84 | |||
| 39 | #ifdef CONFIG_SPARSE_IRQ | 85 | #ifdef CONFIG_SPARSE_IRQ |
| 40 | 86 | ||
| 41 | static struct irq_desc irq_desc_init = { | 87 | static struct irq_desc irq_desc_init = { |
| @@ -85,14 +131,9 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node) | |||
| 85 | arch_init_chip_data(desc, node); | 131 | arch_init_chip_data(desc, node); |
| 86 | } | 132 | } |
| 87 | 133 | ||
| 88 | /* | ||
| 89 | * Protect the sparse_irqs: | ||
| 90 | */ | ||
| 91 | DEFINE_RAW_SPINLOCK(sparse_irq_lock); | ||
| 92 | |||
| 93 | static RADIX_TREE(irq_desc_tree, GFP_ATOMIC); | 134 | static RADIX_TREE(irq_desc_tree, GFP_ATOMIC); |
| 94 | 135 | ||
| 95 | static void set_irq_desc(unsigned int irq, struct irq_desc *desc) | 136 | static void irq_insert_desc(unsigned int irq, struct irq_desc *desc) |
| 96 | { | 137 | { |
| 97 | radix_tree_insert(&irq_desc_tree, irq, desc); | 138 | radix_tree_insert(&irq_desc_tree, irq, desc); |
| 98 | } | 139 | } |
| @@ -111,6 +152,94 @@ void replace_irq_desc(unsigned int irq, struct irq_desc *desc) | |||
| 111 | radix_tree_replace_slot(ptr, desc); | 152 | radix_tree_replace_slot(ptr, desc); |
| 112 | } | 153 | } |
| 113 | 154 | ||
| 155 | static void delete_irq_desc(unsigned int irq) | ||
| 156 | { | ||
| 157 | radix_tree_delete(&irq_desc_tree, irq); | ||
| 158 | } | ||
| 159 | |||
| 160 | #ifdef CONFIG_SMP | ||
| 161 | static void free_masks(struct irq_desc *desc) | ||
| 162 | { | ||
| 163 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
| 164 | free_cpumask_var(desc->pending_mask); | ||
| 165 | #endif | ||
| 166 | free_cpumask_var(desc->affinity); | ||
| 167 | } | ||
| 168 | #else | ||
| 169 | static inline void free_masks(struct irq_desc *desc) { } | ||
| 170 | #endif | ||
| 171 | |||
| 172 | static struct irq_desc *alloc_desc(int irq, int node) | ||
| 173 | { | ||
| 174 | struct irq_desc *desc; | ||
| 175 | gfp_t gfp = GFP_KERNEL; | ||
| 176 | |||
| 177 | desc = kzalloc_node(sizeof(*desc), gfp, node); | ||
| 178 | if (!desc) | ||
| 179 | return NULL; | ||
| 180 | /* allocate based on nr_cpu_ids */ | ||
| 181 | desc->kstat_irqs = kzalloc_node(nr_cpu_ids * sizeof(*desc->kstat_irqs), | ||
| 182 | gfp, node); | ||
| 183 | if (!desc->kstat_irqs) | ||
| 184 | goto err_desc; | ||
| 185 | |||
| 186 | if (alloc_masks(desc, gfp, node)) | ||
| 187 | goto err_kstat; | ||
| 188 | |||
| 189 | raw_spin_lock_init(&desc->lock); | ||
| 190 | lockdep_set_class(&desc->lock, &irq_desc_lock_class); | ||
| 191 | |||
| 192 | desc_set_defaults(irq, desc, node); | ||
| 193 | |||
| 194 | return desc; | ||
| 195 | |||
| 196 | err_kstat: | ||
| 197 | kfree(desc->kstat_irqs); | ||
| 198 | err_desc: | ||
| 199 | kfree(desc); | ||
| 200 | return NULL; | ||
| 201 | } | ||
| 202 | |||
| 203 | static void free_desc(unsigned int irq) | ||
| 204 | { | ||
| 205 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 206 | unsigned long flags; | ||
| 207 | |||
| 208 | raw_spin_lock_irqsave(&sparse_irq_lock, flags); | ||
| 209 | delete_irq_desc(irq); | ||
| 210 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
| 211 | |||
| 212 | free_masks(desc); | ||
| 213 | kfree(desc->kstat_irqs); | ||
| 214 | kfree(desc); | ||
| 215 | } | ||
| 216 | |||
| 217 | static int alloc_descs(unsigned int start, unsigned int cnt, int node) | ||
| 218 | { | ||
| 219 | struct irq_desc *desc; | ||
| 220 | unsigned long flags; | ||
| 221 | int i; | ||
| 222 | |||
| 223 | for (i = 0; i < cnt; i++) { | ||
| 224 | desc = alloc_desc(start + i, node); | ||
| 225 | if (!desc) | ||
| 226 | goto err; | ||
| 227 | raw_spin_lock_irqsave(&sparse_irq_lock, flags); | ||
| 228 | irq_insert_desc(start + i, desc); | ||
| 229 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
| 230 | } | ||
| 231 | return start; | ||
| 232 | |||
| 233 | err: | ||
| 234 | for (i--; i >= 0; i--) | ||
| 235 | free_desc(start + i); | ||
| 236 | |||
| 237 | raw_spin_lock_irqsave(&sparse_irq_lock, flags); | ||
| 238 | bitmap_clear(allocated_irqs, start, cnt); | ||
| 239 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
| 240 | return -ENOMEM; | ||
| 241 | } | ||
| 242 | |||
| 114 | static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = { | 243 | static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = { |
| 115 | [0 ... NR_IRQS_LEGACY-1] = { | 244 | [0 ... NR_IRQS_LEGACY-1] = { |
| 116 | .status = IRQ_DEFAULT_INIT_FLAGS, | 245 | .status = IRQ_DEFAULT_INIT_FLAGS, |
| @@ -155,7 +284,7 @@ int __init early_irq_init(void) | |||
| 155 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); | 284 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); |
| 156 | alloc_desc_masks(&desc[i], node, true); | 285 | alloc_desc_masks(&desc[i], node, true); |
| 157 | init_desc_masks(&desc[i]); | 286 | init_desc_masks(&desc[i]); |
| 158 | set_irq_desc(i, &desc[i]); | 287 | irq_insert_desc(i, &desc[i]); |
| 159 | } | 288 | } |
| 160 | 289 | ||
| 161 | return arch_early_irq_init(); | 290 | return arch_early_irq_init(); |
| @@ -192,7 +321,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node) | |||
| 192 | } | 321 | } |
| 193 | init_one_irq_desc(irq, desc, node); | 322 | init_one_irq_desc(irq, desc, node); |
| 194 | 323 | ||
| 195 | set_irq_desc(irq, desc); | 324 | irq_insert_desc(irq, desc); |
| 196 | 325 | ||
| 197 | out_unlock: | 326 | out_unlock: |
| 198 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | 327 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); |
| @@ -245,8 +374,94 @@ struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node) | |||
| 245 | { | 374 | { |
| 246 | return irq_to_desc(irq); | 375 | return irq_to_desc(irq); |
| 247 | } | 376 | } |
| 377 | |||
| 378 | #ifdef CONFIG_SMP | ||
| 379 | static inline int desc_node(struct irq_desc *desc) | ||
| 380 | { | ||
| 381 | return desc->irq_data.node; | ||
| 382 | } | ||
| 383 | #else | ||
| 384 | static inline int desc_node(struct irq_desc *desc) { return 0; } | ||
| 385 | #endif | ||
| 386 | |||
| 387 | static void free_desc(unsigned int irq) | ||
| 388 | { | ||
| 389 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 390 | unsigned long flags; | ||
| 391 | |||
| 392 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
| 393 | desc_set_defaults(irq, desc, desc_node(desc)); | ||
| 394 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
| 395 | } | ||
| 396 | |||
| 397 | static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) | ||
| 398 | { | ||
| 399 | return start; | ||
| 400 | } | ||
| 248 | #endif /* !CONFIG_SPARSE_IRQ */ | 401 | #endif /* !CONFIG_SPARSE_IRQ */ |
| 249 | 402 | ||
| 403 | /* Dynamic interrupt handling */ | ||
| 404 | |||
| 405 | /** | ||
| 406 | * irq_free_descs - free irq descriptors | ||
| 407 | * @from: Start of descriptor range | ||
| 408 | * @cnt: Number of consecutive irqs to free | ||
| 409 | */ | ||
| 410 | void irq_free_descs(unsigned int from, unsigned int cnt) | ||
| 411 | { | ||
| 412 | unsigned long flags; | ||
| 413 | int i; | ||
| 414 | |||
| 415 | if (from >= nr_irqs || (from + cnt) > nr_irqs) | ||
| 416 | return; | ||
| 417 | |||
| 418 | for (i = 0; i < cnt; i++) | ||
| 419 | free_desc(from + i); | ||
| 420 | |||
| 421 | raw_spin_lock_irqsave(&sparse_irq_lock, flags); | ||
| 422 | bitmap_clear(allocated_irqs, from, cnt); | ||
| 423 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
| 424 | } | ||
| 425 | |||
| 426 | /** | ||
| 427 | * irq_alloc_descs - allocate and initialize a range of irq descriptors | ||
| 428 | * @irq: Allocate for specific irq number if irq >= 0 | ||
| 429 | * @from: Start the search from this irq number | ||
| 430 | * @cnt: Number of consecutive irqs to allocate. | ||
| 431 | * @node: Preferred node on which the irq descriptor should be allocated | ||
| 432 | * | ||
| 433 | * Returns the first irq number or error code | ||
| 434 | */ | ||
| 435 | int __ref | ||
| 436 | irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) | ||
| 437 | { | ||
| 438 | unsigned long flags; | ||
| 439 | int start, ret; | ||
| 440 | |||
| 441 | if (!cnt) | ||
| 442 | return -EINVAL; | ||
| 443 | |||
| 444 | raw_spin_lock_irqsave(&sparse_irq_lock, flags); | ||
| 445 | |||
| 446 | start = bitmap_find_next_zero_area(allocated_irqs, nr_irqs, from, cnt, 0); | ||
| 447 | ret = -EEXIST; | ||
| 448 | if (irq >=0 && start != irq) | ||
| 449 | goto err; | ||
| 450 | |||
| 451 | ret = -ENOMEM; | ||
| 452 | if (start >= nr_irqs) | ||
| 453 | goto err; | ||
| 454 | |||
| 455 | bitmap_set(allocated_irqs, start, cnt); | ||
| 456 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
| 457 | return alloc_descs(start, cnt, node); | ||
| 458 | |||
| 459 | err: | ||
| 460 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
| 461 | return ret; | ||
| 462 | } | ||
| 463 | |||
| 464 | /* Statistics access */ | ||
| 250 | void clear_kstat_irqs(struct irq_desc *desc) | 465 | void clear_kstat_irqs(struct irq_desc *desc) |
| 251 | { | 466 | { |
| 252 | memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); | 467 | memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); |
