aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorYinghai Lu <yinghai@kernel.org>2008-12-05 21:58:31 -0500
committerIngo Molnar <mingo@elte.hu>2008-12-08 08:31:51 -0500
commit0b8f1efad30bd58f89961b82dfe68b9edf8fd2ac (patch)
tree239251bad791fd60af8c0f2ba365b7188395c83f /arch/x86
parent218d11a8b071b23b76c484fd5f72a4fe3306801e (diff)
sparse irq_desc[] array: core kernel and x86 changes
Impact: new feature Problem on distro kernels: irq_desc[NR_IRQS] takes megabytes of RAM with NR_CPUS set to large values. The goal is to be able to scale up to much larger NR_IRQS value without impacting the (important) common case. To solve this, we generalize irq_desc[NR_IRQS] to an (optional) array of irq_desc pointers. When CONFIG_SPARSE_IRQ=y is used, we use kzalloc_node to get irq_desc, this also makes the IRQ descriptors NUMA-local (to the site that calls request_irq()). This gets rid of the irq_cfg[] static array on x86 as well: irq_cfg now uses desc->chip_data for x86 to store irq_cfg. Signed-off-by: Yinghai Lu <yinghai@kernel.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig10
-rw-r--r--arch/x86/include/asm/irq_vectors.h9
-rw-r--r--arch/x86/kernel/io_apic.c301
-rw-r--r--arch/x86/kernel/irq.c3
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/irq_64.c2
-rw-r--r--arch/x86/kernel/irqinit_32.c1
-rw-r--r--arch/x86/kernel/irqinit_64.c1
8 files changed, 216 insertions, 113 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac22bb7719f7..48ac688de3cd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -238,6 +238,16 @@ config X86_HAS_BOOT_CPU_ID
238 def_bool y 238 def_bool y
239 depends on X86_VOYAGER 239 depends on X86_VOYAGER
240 240
241config SPARSE_IRQ
242 bool "Support sparse irq numbering"
243 depends on PCI_MSI || HT_IRQ
244 default y
245 help
246 This enables support for sparse irq, esp for msi/msi-x. You may need
247 if you have lots of cards supports msi-x installed.
248
249 If you don't know what to do here, say Y.
250
241config X86_FIND_SMP_CONFIG 251config X86_FIND_SMP_CONFIG
242 def_bool y 252 def_bool y
243 depends on X86_MPPARSE || X86_VOYAGER 253 depends on X86_MPPARSE || X86_VOYAGER
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 0005adb0f941..bb6b69a6b125 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -102,11 +102,20 @@
102#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) 102#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
103 103
104#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) 104#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
105
106#ifndef CONFIG_SPARSE_IRQ
105# if NR_CPUS < MAX_IO_APICS 107# if NR_CPUS < MAX_IO_APICS
106# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) 108# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
107# else 109# else
108# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) 110# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
109# endif 111# endif
112#else
113# if (8 * NR_CPUS) > (32 * MAX_IO_APICS)
114# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS))
115# else
116# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
117# endif
118#endif
110 119
111#elif defined(CONFIG_X86_VOYAGER) 120#elif defined(CONFIG_X86_VOYAGER)
112 121
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9043251210fb..9de17f5c1125 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -108,8 +108,33 @@ static int __init parse_noapic(char *str)
108early_param("noapic", parse_noapic); 108early_param("noapic", parse_noapic);
109 109
110struct irq_pin_list; 110struct irq_pin_list;
111
112/*
113 * This is performance-critical, we want to do it O(1)
114 *
115 * the indexing order of this array favors 1:1 mappings
116 * between pins and IRQs.
117 */
118
119struct irq_pin_list {
120 int apic, pin;
121 struct irq_pin_list *next;
122};
123
124static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
125{
126 struct irq_pin_list *pin;
127 int node;
128
129 node = cpu_to_node(cpu);
130
131 pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
132 printk(KERN_DEBUG " alloc irq_2_pin on cpu %d node %d\n", cpu, node);
133
134 return pin;
135}
136
111struct irq_cfg { 137struct irq_cfg {
112 unsigned int irq;
113 struct irq_pin_list *irq_2_pin; 138 struct irq_pin_list *irq_2_pin;
114 cpumask_t domain; 139 cpumask_t domain;
115 cpumask_t old_domain; 140 cpumask_t old_domain;
@@ -119,83 +144,93 @@ struct irq_cfg {
119}; 144};
120 145
121/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 146/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
147#ifdef CONFIG_SPARSE_IRQ
148static struct irq_cfg irq_cfgx[] = {
149#else
122static struct irq_cfg irq_cfgx[NR_IRQS] = { 150static struct irq_cfg irq_cfgx[NR_IRQS] = {
123 [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, 151#endif
124 [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, 152 [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
125 [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, 153 [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
126 [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, 154 [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
127 [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, 155 [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
128 [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, 156 [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
129 [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, 157 [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
130 [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, 158 [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
131 [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, 159 [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
132 [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, 160 [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
133 [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, 161 [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
134 [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, 162 [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
135 [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, 163 [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
136 [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, 164 [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
137 [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, 165 [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
138 [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, 166 [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
167 [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
139}; 168};
140 169
141#define for_each_irq_cfg(irq, cfg) \ 170void __init arch_early_irq_init(void)
142 for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
143
144static struct irq_cfg *irq_cfg(unsigned int irq)
145{ 171{
146 return irq < nr_irqs ? irq_cfgx + irq : NULL; 172 struct irq_cfg *cfg;
147} 173 struct irq_desc *desc;
174 int count;
175 int i;
148 176
149static struct irq_cfg *irq_cfg_alloc(unsigned int irq) 177 cfg = irq_cfgx;
150{ 178 count = ARRAY_SIZE(irq_cfgx);
151 return irq_cfg(irq);
152}
153 179
154/* 180 for (i = 0; i < count; i++) {
155 * Rough estimation of how many shared IRQs there are, can be changed 181 desc = irq_to_desc(i);
156 * anytime. 182 desc->chip_data = &cfg[i];
157 */ 183 }
158#define MAX_PLUS_SHARED_IRQS NR_IRQS 184}
159#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
160 185
161/* 186#ifdef CONFIG_SPARSE_IRQ
162 * This is performance-critical, we want to do it O(1) 187static struct irq_cfg *irq_cfg(unsigned int irq)
163 * 188{
164 * the indexing order of this array favors 1:1 mappings 189 struct irq_cfg *cfg = NULL;
165 * between pins and IRQs. 190 struct irq_desc *desc;
166 */
167 191
168struct irq_pin_list { 192 desc = irq_to_desc(irq);
169 int apic, pin; 193 if (desc)
170 struct irq_pin_list *next; 194 cfg = desc->chip_data;
171};
172 195
173static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; 196 return cfg;
174static struct irq_pin_list *irq_2_pin_ptr; 197}
175 198
176static void __init irq_2_pin_init(void) 199static struct irq_cfg *get_one_free_irq_cfg(int cpu)
177{ 200{
178 struct irq_pin_list *pin = irq_2_pin_head; 201 struct irq_cfg *cfg;
179 int i; 202 int node;
203
204 node = cpu_to_node(cpu);
180 205
181 for (i = 1; i < PIN_MAP_SIZE; i++) 206 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
182 pin[i-1].next = &pin[i]; 207 printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node);
183 208
184 irq_2_pin_ptr = &pin[0]; 209 return cfg;
185} 210}
186 211
187static struct irq_pin_list *get_one_free_irq_2_pin(void) 212void arch_init_chip_data(struct irq_desc *desc, int cpu)
188{ 213{
189 struct irq_pin_list *pin = irq_2_pin_ptr; 214 struct irq_cfg *cfg;
190 215
191 if (!pin) 216 cfg = desc->chip_data;
192 panic("can not get more irq_2_pin\n"); 217 if (!cfg) {
218 desc->chip_data = get_one_free_irq_cfg(cpu);
219 if (!desc->chip_data) {
220 printk(KERN_ERR "can not alloc irq_cfg\n");
221 BUG_ON(1);
222 }
223 }
224}
193 225
194 irq_2_pin_ptr = pin->next; 226#else
195 pin->next = NULL; 227static struct irq_cfg *irq_cfg(unsigned int irq)
196 return pin; 228{
229 return irq < nr_irqs ? irq_cfgx + irq : NULL;
197} 230}
198 231
232#endif
233
199struct io_apic { 234struct io_apic {
200 unsigned int index; 235 unsigned int index;
201 unsigned int unused[3]; 236 unsigned int unused[3];
@@ -397,16 +432,19 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
397 * shared ISA-space IRQs, so we have to support them. We are super 432 * shared ISA-space IRQs, so we have to support them. We are super
398 * fast in the common case, and fast for shared ISA-space IRQs. 433 * fast in the common case, and fast for shared ISA-space IRQs.
399 */ 434 */
400static void add_pin_to_irq(unsigned int irq, int apic, int pin) 435static void add_pin_to_irq_cpu(unsigned int irq, int cpu, int apic, int pin)
401{ 436{
402 struct irq_cfg *cfg;
403 struct irq_pin_list *entry; 437 struct irq_pin_list *entry;
438 struct irq_cfg *cfg = irq_cfg(irq);
404 439
405 /* first time to refer irq_cfg, so with new */
406 cfg = irq_cfg_alloc(irq);
407 entry = cfg->irq_2_pin; 440 entry = cfg->irq_2_pin;
408 if (!entry) { 441 if (!entry) {
409 entry = get_one_free_irq_2_pin(); 442 entry = get_one_free_irq_2_pin(cpu);
443 if (!entry) {
444 printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
445 apic, pin);
446 return;
447 }
410 cfg->irq_2_pin = entry; 448 cfg->irq_2_pin = entry;
411 entry->apic = apic; 449 entry->apic = apic;
412 entry->pin = pin; 450 entry->pin = pin;
@@ -421,7 +459,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
421 entry = entry->next; 459 entry = entry->next;
422 } 460 }
423 461
424 entry->next = get_one_free_irq_2_pin(); 462 entry->next = get_one_free_irq_2_pin(cpu);
425 entry = entry->next; 463 entry = entry->next;
426 entry->apic = apic; 464 entry->apic = apic;
427 entry->pin = pin; 465 entry->pin = pin;
@@ -430,7 +468,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
430/* 468/*
431 * Reroute an IRQ to a different pin. 469 * Reroute an IRQ to a different pin.
432 */ 470 */
433static void __init replace_pin_at_irq(unsigned int irq, 471static void __init replace_pin_at_irq(unsigned int irq, int cpu,
434 int oldapic, int oldpin, 472 int oldapic, int oldpin,
435 int newapic, int newpin) 473 int newapic, int newpin)
436{ 474{
@@ -451,7 +489,7 @@ static void __init replace_pin_at_irq(unsigned int irq,
451 489
452 /* why? call replace before add? */ 490 /* why? call replace before add? */
453 if (!replaced) 491 if (!replaced)
454 add_pin_to_irq(irq, newapic, newpin); 492 add_pin_to_irq_cpu(irq, cpu, newapic, newpin);
455} 493}
456 494
457static inline void io_apic_modify_irq(unsigned int irq, 495static inline void io_apic_modify_irq(unsigned int irq,
@@ -1162,9 +1200,13 @@ void __setup_vector_irq(int cpu)
1162 /* This function must be called with vector_lock held */ 1200 /* This function must be called with vector_lock held */
1163 int irq, vector; 1201 int irq, vector;
1164 struct irq_cfg *cfg; 1202 struct irq_cfg *cfg;
1203 struct irq_desc *desc;
1165 1204
1166 /* Mark the inuse vectors */ 1205 /* Mark the inuse vectors */
1167 for_each_irq_cfg(irq, cfg) { 1206 for_each_irq_desc(irq, desc) {
1207 if (!desc)
1208 continue;
1209 cfg = desc->chip_data;
1168 if (!cpu_isset(cpu, cfg->domain)) 1210 if (!cpu_isset(cpu, cfg->domain))
1169 continue; 1211 continue;
1170 vector = cfg->vector; 1212 vector = cfg->vector;
@@ -1356,6 +1398,8 @@ static void __init setup_IO_APIC_irqs(void)
1356{ 1398{
1357 int apic, pin, idx, irq; 1399 int apic, pin, idx, irq;
1358 int notcon = 0; 1400 int notcon = 0;
1401 struct irq_desc *desc;
1402 int cpu = boot_cpu_id;
1359 1403
1360 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1404 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1361 1405
@@ -1387,7 +1431,12 @@ static void __init setup_IO_APIC_irqs(void)
1387 if (multi_timer_check(apic, irq)) 1431 if (multi_timer_check(apic, irq))
1388 continue; 1432 continue;
1389#endif 1433#endif
1390 add_pin_to_irq(irq, apic, pin); 1434 desc = irq_to_desc_alloc_cpu(irq, cpu);
1435 if (!desc) {
1436 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1437 continue;
1438 }
1439 add_pin_to_irq_cpu(irq, cpu, apic, pin);
1391 1440
1392 setup_IO_APIC_irq(apic, pin, irq, 1441 setup_IO_APIC_irq(apic, pin, irq,
1393 irq_trigger(idx), irq_polarity(idx)); 1442 irq_trigger(idx), irq_polarity(idx));
@@ -1448,6 +1497,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1448 union IO_APIC_reg_03 reg_03; 1497 union IO_APIC_reg_03 reg_03;
1449 unsigned long flags; 1498 unsigned long flags;
1450 struct irq_cfg *cfg; 1499 struct irq_cfg *cfg;
1500 struct irq_desc *desc;
1451 unsigned int irq; 1501 unsigned int irq;
1452 1502
1453 if (apic_verbosity == APIC_QUIET) 1503 if (apic_verbosity == APIC_QUIET)
@@ -1537,8 +1587,13 @@ __apicdebuginit(void) print_IO_APIC(void)
1537 } 1587 }
1538 } 1588 }
1539 printk(KERN_DEBUG "IRQ to pin mappings:\n"); 1589 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1540 for_each_irq_cfg(irq, cfg) { 1590 for_each_irq_desc(irq, desc) {
1541 struct irq_pin_list *entry = cfg->irq_2_pin; 1591 struct irq_pin_list *entry;
1592
1593 if (!desc)
1594 continue;
1595 cfg = desc->chip_data;
1596 entry = cfg->irq_2_pin;
1542 if (!entry) 1597 if (!entry)
1543 continue; 1598 continue;
1544 printk(KERN_DEBUG "IRQ%d ", irq); 1599 printk(KERN_DEBUG "IRQ%d ", irq);
@@ -2022,6 +2077,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
2022{ 2077{
2023 int was_pending = 0; 2078 int was_pending = 0;
2024 unsigned long flags; 2079 unsigned long flags;
2080 struct irq_cfg *cfg;
2025 2081
2026 spin_lock_irqsave(&ioapic_lock, flags); 2082 spin_lock_irqsave(&ioapic_lock, flags);
2027 if (irq < 16) { 2083 if (irq < 16) {
@@ -2029,6 +2085,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
2029 if (i8259A_irq_pending(irq)) 2085 if (i8259A_irq_pending(irq))
2030 was_pending = 1; 2086 was_pending = 1;
2031 } 2087 }
2088 cfg = irq_cfg(irq);
2032 __unmask_IO_APIC_irq(irq); 2089 __unmask_IO_APIC_irq(irq);
2033 spin_unlock_irqrestore(&ioapic_lock, flags); 2090 spin_unlock_irqrestore(&ioapic_lock, flags);
2034 2091
@@ -2178,6 +2235,9 @@ static void ir_irq_migration(struct work_struct *work)
2178 struct irq_desc *desc; 2235 struct irq_desc *desc;
2179 2236
2180 for_each_irq_desc(irq, desc) { 2237 for_each_irq_desc(irq, desc) {
2238 if (!desc)
2239 continue;
2240
2181 if (desc->status & IRQ_MOVE_PENDING) { 2241 if (desc->status & IRQ_MOVE_PENDING) {
2182 unsigned long flags; 2242 unsigned long flags;
2183 2243
@@ -2229,6 +2289,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2229 struct irq_cfg *cfg; 2289 struct irq_cfg *cfg;
2230 irq = __get_cpu_var(vector_irq)[vector]; 2290 irq = __get_cpu_var(vector_irq)[vector];
2231 2291
2292 if (irq == -1)
2293 continue;
2294
2232 desc = irq_to_desc(irq); 2295 desc = irq_to_desc(irq);
2233 if (!desc) 2296 if (!desc)
2234 continue; 2297 continue;
@@ -2430,8 +2493,12 @@ static inline void init_IO_APIC_traps(void)
2430 * Also, we've got to be careful not to trash gate 2493 * Also, we've got to be careful not to trash gate
2431 * 0x80, because int 0x80 is hm, kind of importantish. ;) 2494 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2432 */ 2495 */
2433 for_each_irq_cfg(irq, cfg) { 2496 for_each_irq_desc(irq, desc) {
2434 if (IO_APIC_IRQ(irq) && !cfg->vector) { 2497 if (!desc)
2498 continue;
2499
2500 cfg = desc->chip_data;
2501 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
2435 /* 2502 /*
2436 * Hmm.. We don't have an entry for this, 2503 * Hmm.. We don't have an entry for this,
2437 * so default to an old-fashioned 8259 2504 * so default to an old-fashioned 8259
@@ -2439,11 +2506,9 @@ static inline void init_IO_APIC_traps(void)
2439 */ 2506 */
2440 if (irq < 16) 2507 if (irq < 16)
2441 make_8259A_irq(irq); 2508 make_8259A_irq(irq);
2442 else { 2509 else
2443 desc = irq_to_desc(irq);
2444 /* Strange. Oh, well.. */ 2510 /* Strange. Oh, well.. */
2445 desc->chip = &no_irq_chip; 2511 desc->chip = &no_irq_chip;
2446 }
2447 } 2512 }
2448 } 2513 }
2449} 2514}
@@ -2654,7 +2719,7 @@ static inline void __init check_timer(void)
2654 * Ok, does IRQ0 through the IOAPIC work? 2719 * Ok, does IRQ0 through the IOAPIC work?
2655 */ 2720 */
2656 if (no_pin1) { 2721 if (no_pin1) {
2657 add_pin_to_irq(0, apic1, pin1); 2722 add_pin_to_irq_cpu(0, boot_cpu_id, apic1, pin1);
2658 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); 2723 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
2659 } 2724 }
2660 unmask_IO_APIC_irq(0); 2725 unmask_IO_APIC_irq(0);
@@ -2683,7 +2748,7 @@ static inline void __init check_timer(void)
2683 /* 2748 /*
2684 * legacy devices should be connected to IO APIC #0 2749 * legacy devices should be connected to IO APIC #0
2685 */ 2750 */
2686 replace_pin_at_irq(0, apic1, pin1, apic2, pin2); 2751 replace_pin_at_irq(0, boot_cpu_id, apic1, pin1, apic2, pin2);
2687 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); 2752 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
2688 unmask_IO_APIC_irq(0); 2753 unmask_IO_APIC_irq(0);
2689 enable_8259A_irq(0); 2754 enable_8259A_irq(0);
@@ -2902,21 +2967,25 @@ unsigned int create_irq_nr(unsigned int irq_want)
2902 unsigned int irq; 2967 unsigned int irq;
2903 unsigned int new; 2968 unsigned int new;
2904 unsigned long flags; 2969 unsigned long flags;
2905 struct irq_cfg *cfg_new; 2970 struct irq_cfg *cfg_new = NULL;
2906 2971 int cpu = boot_cpu_id;
2907 irq_want = nr_irqs - 1; 2972 struct irq_desc *desc_new = NULL;
2908 2973
2909 irq = 0; 2974 irq = 0;
2910 spin_lock_irqsave(&vector_lock, flags); 2975 spin_lock_irqsave(&vector_lock, flags);
2911 for (new = irq_want; new > 0; new--) { 2976 for (new = irq_want; new > 0; new--) {
2912 if (platform_legacy_irq(new)) 2977 if (platform_legacy_irq(new))
2913 continue; 2978 continue;
2914 cfg_new = irq_cfg(new); 2979
2915 if (cfg_new && cfg_new->vector != 0) 2980 desc_new = irq_to_desc_alloc_cpu(new, cpu);
2981 if (!desc_new) {
2982 printk(KERN_INFO "can not get irq_desc for %d\n", new);
2983 continue;
2984 }
2985 cfg_new = desc_new->chip_data;
2986
2987 if (cfg_new->vector != 0)
2916 continue; 2988 continue;
2917 /* check if need to create one */
2918 if (!cfg_new)
2919 cfg_new = irq_cfg_alloc(new);
2920 if (__assign_irq_vector(new, TARGET_CPUS) == 0) 2989 if (__assign_irq_vector(new, TARGET_CPUS) == 0)
2921 irq = new; 2990 irq = new;
2922 break; 2991 break;
@@ -2925,6 +2994,9 @@ unsigned int create_irq_nr(unsigned int irq_want)
2925 2994
2926 if (irq > 0) { 2995 if (irq > 0) {
2927 dynamic_irq_init(irq); 2996 dynamic_irq_init(irq);
2997 /* restore it, in case dynamic_irq_init clear it */
2998 if (desc_new)
2999 desc_new->chip_data = cfg_new;
2928 } 3000 }
2929 return irq; 3001 return irq;
2930} 3002}
@@ -2944,8 +3016,16 @@ int create_irq(void)
2944void destroy_irq(unsigned int irq) 3016void destroy_irq(unsigned int irq)
2945{ 3017{
2946 unsigned long flags; 3018 unsigned long flags;
3019 struct irq_cfg *cfg;
3020 struct irq_desc *desc;
2947 3021
3022 /* store it, in case dynamic_irq_cleanup clear it */
3023 desc = irq_to_desc(irq);
3024 cfg = desc->chip_data;
2948 dynamic_irq_cleanup(irq); 3025 dynamic_irq_cleanup(irq);
3026 /* connect back irq_cfg */
3027 if (desc)
3028 desc->chip_data = cfg;
2949 3029
2950#ifdef CONFIG_INTR_REMAP 3030#ifdef CONFIG_INTR_REMAP
2951 free_irte(irq); 3031 free_irte(irq);
@@ -3195,26 +3275,13 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
3195 return 0; 3275 return 0;
3196} 3276}
3197 3277
3198static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) 3278int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
3199{
3200 unsigned int irq;
3201
3202 irq = dev->bus->number;
3203 irq <<= 8;
3204 irq |= dev->devfn;
3205 irq <<= 12;
3206
3207 return irq;
3208}
3209
3210int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
3211{ 3279{
3212 unsigned int irq; 3280 unsigned int irq;
3213 int ret; 3281 int ret;
3214 unsigned int irq_want; 3282 unsigned int irq_want;
3215 3283
3216 irq_want = build_irq_for_pci_dev(dev) + 0x100; 3284 irq_want = nr_irqs - 1;
3217
3218 irq = create_irq_nr(irq_want); 3285 irq = create_irq_nr(irq_want);
3219 if (irq == 0) 3286 if (irq == 0)
3220 return -1; 3287 return -1;
@@ -3228,7 +3295,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
3228 goto error; 3295 goto error;
3229no_ir: 3296no_ir:
3230#endif 3297#endif
3231 ret = setup_msi_irq(dev, desc, irq); 3298 ret = setup_msi_irq(dev, msidesc, irq);
3232 if (ret < 0) { 3299 if (ret < 0) {
3233 destroy_irq(irq); 3300 destroy_irq(irq);
3234 return ret; 3301 return ret;
@@ -3246,7 +3313,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3246{ 3313{
3247 unsigned int irq; 3314 unsigned int irq;
3248 int ret, sub_handle; 3315 int ret, sub_handle;
3249 struct msi_desc *desc; 3316 struct msi_desc *msidesc;
3250 unsigned int irq_want; 3317 unsigned int irq_want;
3251 3318
3252#ifdef CONFIG_INTR_REMAP 3319#ifdef CONFIG_INTR_REMAP
@@ -3254,10 +3321,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3254 int index = 0; 3321 int index = 0;
3255#endif 3322#endif
3256 3323
3257 irq_want = build_irq_for_pci_dev(dev) + 0x100; 3324 irq_want = nr_irqs - 1;
3258 sub_handle = 0; 3325 sub_handle = 0;
3259 list_for_each_entry(desc, &dev->msi_list, list) { 3326 list_for_each_entry(msidesc, &dev->msi_list, list) {
3260 irq = create_irq_nr(irq_want--); 3327 irq = create_irq_nr(irq_want);
3328 irq_want--;
3261 if (irq == 0) 3329 if (irq == 0)
3262 return -1; 3330 return -1;
3263#ifdef CONFIG_INTR_REMAP 3331#ifdef CONFIG_INTR_REMAP
@@ -3289,7 +3357,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3289 } 3357 }
3290no_ir: 3358no_ir:
3291#endif 3359#endif
3292 ret = setup_msi_irq(dev, desc, irq); 3360 ret = setup_msi_irq(dev, msidesc, irq);
3293 if (ret < 0) 3361 if (ret < 0)
3294 goto error; 3362 goto error;
3295 sub_handle++; 3363 sub_handle++;
@@ -3707,17 +3775,29 @@ int __init io_apic_get_version(int ioapic)
3707 3775
3708int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) 3776int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
3709{ 3777{
3778 struct irq_desc *desc;
3779 struct irq_cfg *cfg;
3780 int cpu = boot_cpu_id;
3781
3710 if (!IO_APIC_IRQ(irq)) { 3782 if (!IO_APIC_IRQ(irq)) {
3711 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", 3783 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
3712 ioapic); 3784 ioapic);
3713 return -EINVAL; 3785 return -EINVAL;
3714 } 3786 }
3715 3787
3788 desc = irq_to_desc_alloc_cpu(irq, cpu);
3789 if (!desc) {
3790 printk(KERN_INFO "can not get irq_desc %d\n", irq);
3791 return 0;
3792 }
3793
3716 /* 3794 /*
3717 * IRQs < 16 are already in the irq_2_pin[] map 3795 * IRQs < 16 are already in the irq_2_pin[] map
3718 */ 3796 */
3719 if (irq >= 16) 3797 if (irq >= 16) {
3720 add_pin_to_irq(irq, ioapic, pin); 3798 cfg = desc->chip_data;
3799 add_pin_to_irq_cpu(irq, cpu, ioapic, pin);
3800 }
3721 3801
3722 setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); 3802 setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
3723 3803
@@ -3773,7 +3853,8 @@ void __init setup_ioapic_dest(void)
3773 * when you have too many devices, because at that time only boot 3853 * when you have too many devices, because at that time only boot
3774 * cpu is online. 3854 * cpu is online.
3775 */ 3855 */
3776 cfg = irq_cfg(irq); 3856 desc = irq_to_desc(irq);
3857 cfg = desc->chip_data;
3777 if (!cfg->vector) { 3858 if (!cfg->vector) {
3778 setup_IO_APIC_irq(ioapic, pin, irq, 3859 setup_IO_APIC_irq(ioapic, pin, irq,
3779 irq_trigger(irq_entry), 3860 irq_trigger(irq_entry),
@@ -3785,7 +3866,6 @@ void __init setup_ioapic_dest(void)
3785 /* 3866 /*
3786 * Honour affinities which have been set in early boot 3867 * Honour affinities which have been set in early boot
3787 */ 3868 */
3788 desc = irq_to_desc(irq);
3789 if (desc->status & 3869 if (desc->status &
3790 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 3870 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
3791 mask = desc->affinity; 3871 mask = desc->affinity;
@@ -3846,7 +3926,6 @@ void __init ioapic_init_mappings(void)
3846 struct resource *ioapic_res; 3926 struct resource *ioapic_res;
3847 int i; 3927 int i;
3848 3928
3849 irq_2_pin_init();
3850 ioapic_res = ioapic_setup_resources(); 3929 ioapic_res = ioapic_setup_resources();
3851 for (i = 0; i < nr_ioapics; i++) { 3930 for (i = 0; i < nr_ioapics; i++) {
3852 if (smp_found_config) { 3931 if (smp_found_config) {
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index d1d4dc52f649..3f1d9d18df67 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -118,6 +118,9 @@ int show_interrupts(struct seq_file *p, void *v)
118 } 118 }
119 119
120 desc = irq_to_desc(i); 120 desc = irq_to_desc(i);
121 if (!desc)
122 return 0;
123
121 spin_lock_irqsave(&desc->lock, flags); 124 spin_lock_irqsave(&desc->lock, flags);
122#ifndef CONFIG_SMP 125#ifndef CONFIG_SMP
123 any_count = kstat_irqs(i); 126 any_count = kstat_irqs(i);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index a51382672de0..119fc9c8ff7f 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -242,6 +242,8 @@ void fixup_irqs(cpumask_t map)
242 for_each_irq_desc(irq, desc) { 242 for_each_irq_desc(irq, desc) {
243 cpumask_t mask; 243 cpumask_t mask;
244 244
245 if (!desc)
246 continue;
245 if (irq == 2) 247 if (irq == 2)
246 continue; 248 continue;
247 249
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 60eb84eb77a0..900009c70591 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -94,6 +94,8 @@ void fixup_irqs(cpumask_t map)
94 int break_affinity = 0; 94 int break_affinity = 0;
95 int set_affinity = 1; 95 int set_affinity = 1;
96 96
97 if (!desc)
98 continue;
97 if (irq == 2) 99 if (irq == 2)
98 continue; 100 continue;
99 101
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 845aa9803e80..5a5651b7f9e6 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -69,7 +69,6 @@ void __init init_ISA_irqs (void)
69 * 16 old-style INTA-cycle interrupts: 69 * 16 old-style INTA-cycle interrupts:
70 */ 70 */
71 for (i = 0; i < 16; i++) { 71 for (i = 0; i < 16; i++) {
72 /* first time call this irq_desc */
73 struct irq_desc *desc = irq_to_desc(i); 72 struct irq_desc *desc = irq_to_desc(i);
74 73
75 desc->status = IRQ_DISABLED; 74 desc->status = IRQ_DISABLED;
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index ff0235391285..cd9f42d028d9 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -143,7 +143,6 @@ void __init init_ISA_irqs(void)
143 init_8259A(0); 143 init_8259A(0);
144 144
145 for (i = 0; i < 16; i++) { 145 for (i = 0; i < 16; i++) {
146 /* first time call this irq_desc */
147 struct irq_desc *desc = irq_to_desc(i); 146 struct irq_desc *desc = irq_to_desc(i);
148 147
149 desc->status = IRQ_DISABLED; 148 desc->status = IRQ_DISABLED;