diff options
author | Yinghai Lu <yinghai@kernel.org> | 2008-12-05 21:58:31 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-12-08 08:31:51 -0500 |
commit | 0b8f1efad30bd58f89961b82dfe68b9edf8fd2ac (patch) | |
tree | 239251bad791fd60af8c0f2ba365b7188395c83f /kernel/irq | |
parent | 218d11a8b071b23b76c484fd5f72a4fe3306801e (diff) |
sparse irq_desc[] array: core kernel and x86 changes
Impact: new feature
Problem on distro kernels: irq_desc[NR_IRQS] takes megabytes of RAM with
NR_CPUS set to large values. The goal is to be able to scale up to much
larger NR_IRQS value without impacting the (important) common case.
To solve this, we generalize irq_desc[NR_IRQS] to an (optional) array of
irq_desc pointers.
When CONFIG_SPARSE_IRQ=y is used, we use kzalloc_node to get irq_desc,
this also makes the IRQ descriptors NUMA-local (to the site that calls
request_irq()).
This gets rid of the irq_cfg[] static array on x86 as well: irq_cfg now
uses desc->chip_data for x86 to store irq_cfg.
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/irq')
-rw-r--r-- | kernel/irq/autoprobe.c | 15 | ||||
-rw-r--r-- | kernel/irq/chip.c | 3 | ||||
-rw-r--r-- | kernel/irq/handle.c | 181 | ||||
-rw-r--r-- | kernel/irq/proc.c | 6 | ||||
-rw-r--r-- | kernel/irq/spurious.c | 5 |
5 files changed, 202 insertions, 8 deletions
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index cc0f7321b8ce..650ce4102a63 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c | |||
@@ -40,6 +40,9 @@ unsigned long probe_irq_on(void) | |||
40 | * flush such a longstanding irq before considering it as spurious. | 40 | * flush such a longstanding irq before considering it as spurious. |
41 | */ | 41 | */ |
42 | for_each_irq_desc_reverse(i, desc) { | 42 | for_each_irq_desc_reverse(i, desc) { |
43 | if (!desc) | ||
44 | continue; | ||
45 | |||
43 | spin_lock_irq(&desc->lock); | 46 | spin_lock_irq(&desc->lock); |
44 | if (!desc->action && !(desc->status & IRQ_NOPROBE)) { | 47 | if (!desc->action && !(desc->status & IRQ_NOPROBE)) { |
45 | /* | 48 | /* |
@@ -68,6 +71,9 @@ unsigned long probe_irq_on(void) | |||
68 | * happened in the previous stage, it may have masked itself) | 71 | * happened in the previous stage, it may have masked itself) |
69 | */ | 72 | */ |
70 | for_each_irq_desc_reverse(i, desc) { | 73 | for_each_irq_desc_reverse(i, desc) { |
74 | if (!desc) | ||
75 | continue; | ||
76 | |||
71 | spin_lock_irq(&desc->lock); | 77 | spin_lock_irq(&desc->lock); |
72 | if (!desc->action && !(desc->status & IRQ_NOPROBE)) { | 78 | if (!desc->action && !(desc->status & IRQ_NOPROBE)) { |
73 | desc->status |= IRQ_AUTODETECT | IRQ_WAITING; | 79 | desc->status |= IRQ_AUTODETECT | IRQ_WAITING; |
@@ -86,6 +92,9 @@ unsigned long probe_irq_on(void) | |||
86 | * Now filter out any obviously spurious interrupts | 92 | * Now filter out any obviously spurious interrupts |
87 | */ | 93 | */ |
88 | for_each_irq_desc(i, desc) { | 94 | for_each_irq_desc(i, desc) { |
95 | if (!desc) | ||
96 | continue; | ||
97 | |||
89 | spin_lock_irq(&desc->lock); | 98 | spin_lock_irq(&desc->lock); |
90 | status = desc->status; | 99 | status = desc->status; |
91 | 100 | ||
@@ -124,6 +133,9 @@ unsigned int probe_irq_mask(unsigned long val) | |||
124 | int i; | 133 | int i; |
125 | 134 | ||
126 | for_each_irq_desc(i, desc) { | 135 | for_each_irq_desc(i, desc) { |
136 | if (!desc) | ||
137 | continue; | ||
138 | |||
127 | spin_lock_irq(&desc->lock); | 139 | spin_lock_irq(&desc->lock); |
128 | status = desc->status; | 140 | status = desc->status; |
129 | 141 | ||
@@ -166,6 +178,9 @@ int probe_irq_off(unsigned long val) | |||
166 | unsigned int status; | 178 | unsigned int status; |
167 | 179 | ||
168 | for_each_irq_desc(i, desc) { | 180 | for_each_irq_desc(i, desc) { |
181 | if (!desc) | ||
182 | continue; | ||
183 | |||
169 | spin_lock_irq(&desc->lock); | 184 | spin_lock_irq(&desc->lock); |
170 | status = desc->status; | 185 | status = desc->status; |
171 | 186 | ||
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 10b5092e9bfe..8e4fce4a1b1f 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
@@ -24,9 +24,10 @@ | |||
24 | */ | 24 | */ |
25 | void dynamic_irq_init(unsigned int irq) | 25 | void dynamic_irq_init(unsigned int irq) |
26 | { | 26 | { |
27 | struct irq_desc *desc = irq_to_desc(irq); | 27 | struct irq_desc *desc; |
28 | unsigned long flags; | 28 | unsigned long flags; |
29 | 29 | ||
30 | desc = irq_to_desc(irq); | ||
30 | if (!desc) { | 31 | if (!desc) { |
31 | WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq); | 32 | WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq); |
32 | return; | 33 | return; |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index c815b42d0f5b..96ca203eb51b 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -15,9 +15,16 @@ | |||
15 | #include <linux/random.h> | 15 | #include <linux/random.h> |
16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
17 | #include <linux/kernel_stat.h> | 17 | #include <linux/kernel_stat.h> |
18 | #include <linux/rculist.h> | ||
19 | #include <linux/hash.h> | ||
18 | 20 | ||
19 | #include "internals.h" | 21 | #include "internals.h" |
20 | 22 | ||
23 | /* | ||
24 | * lockdep: we want to handle all irq_desc locks as a single lock-class: | ||
25 | */ | ||
26 | static struct lock_class_key irq_desc_lock_class; | ||
27 | |||
21 | /** | 28 | /** |
22 | * handle_bad_irq - handle spurious and unhandled irqs | 29 | * handle_bad_irq - handle spurious and unhandled irqs |
23 | * @irq: the interrupt number | 30 | * @irq: the interrupt number |
@@ -49,6 +56,155 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc) | |||
49 | int nr_irqs = NR_IRQS; | 56 | int nr_irqs = NR_IRQS; |
50 | EXPORT_SYMBOL_GPL(nr_irqs); | 57 | EXPORT_SYMBOL_GPL(nr_irqs); |
51 | 58 | ||
59 | void __init __attribute__((weak)) arch_early_irq_init(void) | ||
60 | { | ||
61 | } | ||
62 | |||
63 | #ifdef CONFIG_SPARSE_IRQ | ||
64 | static struct irq_desc irq_desc_init = { | ||
65 | .irq = -1, | ||
66 | .status = IRQ_DISABLED, | ||
67 | .chip = &no_irq_chip, | ||
68 | .handle_irq = handle_bad_irq, | ||
69 | .depth = 1, | ||
70 | .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), | ||
71 | #ifdef CONFIG_SMP | ||
72 | .affinity = CPU_MASK_ALL | ||
73 | #endif | ||
74 | }; | ||
75 | |||
76 | static void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) | ||
77 | { | ||
78 | unsigned long bytes; | ||
79 | char *ptr; | ||
80 | int node; | ||
81 | |||
82 | /* Compute how many bytes we need per irq and allocate them */ | ||
83 | bytes = nr * sizeof(unsigned int); | ||
84 | |||
85 | node = cpu_to_node(cpu); | ||
86 | ptr = kzalloc_node(bytes, GFP_ATOMIC, node); | ||
87 | printk(KERN_DEBUG " alloc kstat_irqs on cpu %d node %d\n", cpu, node); | ||
88 | |||
89 | if (ptr) | ||
90 | desc->kstat_irqs = (unsigned int *)ptr; | ||
91 | } | ||
92 | |||
93 | void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu) | ||
94 | { | ||
95 | } | ||
96 | |||
97 | static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) | ||
98 | { | ||
99 | memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); | ||
100 | desc->irq = irq; | ||
101 | #ifdef CONFIG_SMP | ||
102 | desc->cpu = cpu; | ||
103 | #endif | ||
104 | lockdep_set_class(&desc->lock, &irq_desc_lock_class); | ||
105 | init_kstat_irqs(desc, cpu, nr_cpu_ids); | ||
106 | if (!desc->kstat_irqs) { | ||
107 | printk(KERN_ERR "can not alloc kstat_irqs\n"); | ||
108 | BUG_ON(1); | ||
109 | } | ||
110 | arch_init_chip_data(desc, cpu); | ||
111 | } | ||
112 | |||
113 | /* | ||
114 | * Protect the sparse_irqs: | ||
115 | */ | ||
116 | static DEFINE_SPINLOCK(sparse_irq_lock); | ||
117 | |||
118 | struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly; | ||
119 | |||
120 | static struct irq_desc irq_desc_legacy[16] __cacheline_aligned_in_smp = { | ||
121 | [0 ... 15] = { | ||
122 | .irq = -1, | ||
123 | .status = IRQ_DISABLED, | ||
124 | .chip = &no_irq_chip, | ||
125 | .handle_irq = handle_bad_irq, | ||
126 | .depth = 1, | ||
127 | .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), | ||
128 | #ifdef CONFIG_SMP | ||
129 | .affinity = CPU_MASK_ALL | ||
130 | #endif | ||
131 | } | ||
132 | }; | ||
133 | |||
134 | /* FIXME: use bootmem alloc ...*/ | ||
135 | static unsigned int kstat_irqs_legacy[16][NR_CPUS]; | ||
136 | |||
137 | void __init early_irq_init(void) | ||
138 | { | ||
139 | struct irq_desc *desc; | ||
140 | int legacy_count; | ||
141 | int i; | ||
142 | |||
143 | desc = irq_desc_legacy; | ||
144 | legacy_count = ARRAY_SIZE(irq_desc_legacy); | ||
145 | |||
146 | for (i = 0; i < legacy_count; i++) { | ||
147 | desc[i].irq = i; | ||
148 | desc[i].kstat_irqs = kstat_irqs_legacy[i]; | ||
149 | |||
150 | irq_desc_ptrs[i] = desc + i; | ||
151 | } | ||
152 | |||
153 | for (i = legacy_count; i < NR_IRQS; i++) | ||
154 | irq_desc_ptrs[i] = NULL; | ||
155 | |||
156 | arch_early_irq_init(); | ||
157 | } | ||
158 | |||
159 | struct irq_desc *irq_to_desc(unsigned int irq) | ||
160 | { | ||
161 | return (irq < NR_IRQS) ? irq_desc_ptrs[irq] : NULL; | ||
162 | } | ||
163 | |||
164 | struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) | ||
165 | { | ||
166 | struct irq_desc *desc; | ||
167 | unsigned long flags; | ||
168 | int node; | ||
169 | |||
170 | if (irq >= NR_IRQS) { | ||
171 | printk(KERN_WARNING "irq >= NR_IRQS in irq_to_desc_alloc: %d %d\n", | ||
172 | irq, NR_IRQS); | ||
173 | WARN_ON(1); | ||
174 | return NULL; | ||
175 | } | ||
176 | |||
177 | desc = irq_desc_ptrs[irq]; | ||
178 | if (desc) | ||
179 | return desc; | ||
180 | |||
181 | spin_lock_irqsave(&sparse_irq_lock, flags); | ||
182 | |||
183 | /* We have to check it to avoid races with another CPU */ | ||
184 | desc = irq_desc_ptrs[irq]; | ||
185 | if (desc) | ||
186 | goto out_unlock; | ||
187 | |||
188 | node = cpu_to_node(cpu); | ||
189 | desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); | ||
190 | printk(KERN_DEBUG " alloc irq_desc for %d on cpu %d node %d\n", | ||
191 | irq, cpu, node); | ||
192 | if (!desc) { | ||
193 | printk(KERN_ERR "can not alloc irq_desc\n"); | ||
194 | BUG_ON(1); | ||
195 | } | ||
196 | init_one_irq_desc(irq, desc, cpu); | ||
197 | |||
198 | irq_desc_ptrs[irq] = desc; | ||
199 | |||
200 | out_unlock: | ||
201 | spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
202 | |||
203 | return desc; | ||
204 | } | ||
205 | |||
206 | #else | ||
207 | |||
52 | struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { | 208 | struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { |
53 | [0 ... NR_IRQS-1] = { | 209 | [0 ... NR_IRQS-1] = { |
54 | .status = IRQ_DISABLED, | 210 | .status = IRQ_DISABLED, |
@@ -62,6 +218,8 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { | |||
62 | } | 218 | } |
63 | }; | 219 | }; |
64 | 220 | ||
221 | #endif | ||
222 | |||
65 | /* | 223 | /* |
66 | * What should we do if we get a hw irq event on an illegal vector? | 224 | * What should we do if we get a hw irq event on an illegal vector? |
67 | * Each architecture has to answer this themself. | 225 | * Each architecture has to answer this themself. |
@@ -261,17 +419,28 @@ out: | |||
261 | 419 | ||
262 | 420 | ||
263 | #ifdef CONFIG_TRACE_IRQFLAGS | 421 | #ifdef CONFIG_TRACE_IRQFLAGS |
264 | /* | ||
265 | * lockdep: we want to handle all irq_desc locks as a single lock-class: | ||
266 | */ | ||
267 | static struct lock_class_key irq_desc_lock_class; | ||
268 | |||
269 | void early_init_irq_lock_class(void) | 422 | void early_init_irq_lock_class(void) |
270 | { | 423 | { |
424 | #ifndef CONFIG_SPARSE_IRQ | ||
271 | struct irq_desc *desc; | 425 | struct irq_desc *desc; |
272 | int i; | 426 | int i; |
273 | 427 | ||
274 | for_each_irq_desc(i, desc) | 428 | for_each_irq_desc(i, desc) { |
429 | if (!desc) | ||
430 | continue; | ||
431 | |||
275 | lockdep_set_class(&desc->lock, &irq_desc_lock_class); | 432 | lockdep_set_class(&desc->lock, &irq_desc_lock_class); |
433 | } | ||
434 | #endif | ||
435 | } | ||
436 | #endif | ||
437 | |||
438 | #ifdef CONFIG_SPARSE_IRQ | ||
439 | unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) | ||
440 | { | ||
441 | struct irq_desc *desc = irq_to_desc(irq); | ||
442 | return desc->kstat_irqs[cpu]; | ||
276 | } | 443 | } |
277 | #endif | 444 | #endif |
445 | EXPORT_SYMBOL(kstat_irqs_cpu); | ||
446 | |||
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index d257e7d6a8a4..f6b3440f05bc 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -243,7 +243,11 @@ void init_irq_proc(void) | |||
243 | /* | 243 | /* |
244 | * Create entries for all existing IRQs. | 244 | * Create entries for all existing IRQs. |
245 | */ | 245 | */ |
246 | for_each_irq_desc(irq, desc) | 246 | for_each_irq_desc(irq, desc) { |
247 | if (!desc) | ||
248 | continue; | ||
249 | |||
247 | register_irq_proc(irq, desc); | 250 | register_irq_proc(irq, desc); |
251 | } | ||
248 | } | 252 | } |
249 | 253 | ||
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index dd364c11e56e..3738107531fd 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
@@ -91,6 +91,9 @@ static int misrouted_irq(int irq) | |||
91 | int i, ok = 0; | 91 | int i, ok = 0; |
92 | 92 | ||
93 | for_each_irq_desc(i, desc) { | 93 | for_each_irq_desc(i, desc) { |
94 | if (!desc) | ||
95 | continue; | ||
96 | |||
94 | if (!i) | 97 | if (!i) |
95 | continue; | 98 | continue; |
96 | 99 | ||
@@ -112,6 +115,8 @@ static void poll_spurious_irqs(unsigned long dummy) | |||
112 | for_each_irq_desc(i, desc) { | 115 | for_each_irq_desc(i, desc) { |
113 | unsigned int status; | 116 | unsigned int status; |
114 | 117 | ||
118 | if (!desc) | ||
119 | continue; | ||
115 | if (!i) | 120 | if (!i) |
116 | continue; | 121 | continue; |
117 | 122 | ||