diff options
Diffstat (limited to 'arch/x86/kernel/io_apic_64.c')
-rw-r--r-- | arch/x86/kernel/io_apic_64.c | 639 |
1 files changed, 591 insertions, 48 deletions
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 61a83b70c18f..02063ae042f7 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <acpi/acpi_bus.h> | 37 | #include <acpi/acpi_bus.h> |
38 | #endif | 38 | #endif |
39 | #include <linux/bootmem.h> | 39 | #include <linux/bootmem.h> |
40 | #include <linux/dmar.h> | ||
40 | 41 | ||
41 | #include <asm/idle.h> | 42 | #include <asm/idle.h> |
42 | #include <asm/io.h> | 43 | #include <asm/io.h> |
@@ -49,10 +50,13 @@ | |||
49 | #include <asm/nmi.h> | 50 | #include <asm/nmi.h> |
50 | #include <asm/msidef.h> | 51 | #include <asm/msidef.h> |
51 | #include <asm/hypertransport.h> | 52 | #include <asm/hypertransport.h> |
53 | #include <asm/irq_remapping.h> | ||
52 | 54 | ||
53 | #include <mach_ipi.h> | 55 | #include <mach_ipi.h> |
54 | #include <mach_apic.h> | 56 | #include <mach_apic.h> |
55 | 57 | ||
58 | #define __apicdebuginit(type) static type __init | ||
59 | |||
56 | struct irq_cfg { | 60 | struct irq_cfg { |
57 | cpumask_t domain; | 61 | cpumask_t domain; |
58 | cpumask_t old_domain; | 62 | cpumask_t old_domain; |
@@ -87,8 +91,6 @@ int first_system_vector = 0xfe; | |||
87 | 91 | ||
88 | char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; | 92 | char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; |
89 | 93 | ||
90 | #define __apicdebuginit __init | ||
91 | |||
92 | int sis_apic_bug; /* not actually supported, dummy for compile */ | 94 | int sis_apic_bug; /* not actually supported, dummy for compile */ |
93 | 95 | ||
94 | static int no_timer_check; | 96 | static int no_timer_check; |
@@ -108,6 +110,9 @@ static DEFINE_SPINLOCK(vector_lock); | |||
108 | */ | 110 | */ |
109 | int nr_ioapic_registers[MAX_IO_APICS]; | 111 | int nr_ioapic_registers[MAX_IO_APICS]; |
110 | 112 | ||
113 | /* I/O APIC RTE contents at the OS boot up */ | ||
114 | struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; | ||
115 | |||
111 | /* I/O APIC entries */ | 116 | /* I/O APIC entries */ |
112 | struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; | 117 | struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; |
113 | int nr_ioapics; | 118 | int nr_ioapics; |
@@ -303,7 +308,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) | |||
303 | pin = entry->pin; | 308 | pin = entry->pin; |
304 | if (pin == -1) | 309 | if (pin == -1) |
305 | break; | 310 | break; |
306 | io_apic_write(apic, 0x11 + pin*2, dest); | 311 | /* |
312 | * With interrupt-remapping, destination information comes | ||
313 | * from interrupt-remapping table entry. | ||
314 | */ | ||
315 | if (!irq_remapped(irq)) | ||
316 | io_apic_write(apic, 0x11 + pin*2, dest); | ||
307 | reg = io_apic_read(apic, 0x10 + pin*2); | 317 | reg = io_apic_read(apic, 0x10 + pin*2); |
308 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; | 318 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; |
309 | reg |= vector; | 319 | reg |= vector; |
@@ -440,6 +450,69 @@ static void clear_IO_APIC (void) | |||
440 | clear_IO_APIC_pin(apic, pin); | 450 | clear_IO_APIC_pin(apic, pin); |
441 | } | 451 | } |
442 | 452 | ||
453 | /* | ||
454 | * Saves and masks all the unmasked IO-APIC RTE's | ||
455 | */ | ||
456 | int save_mask_IO_APIC_setup(void) | ||
457 | { | ||
458 | union IO_APIC_reg_01 reg_01; | ||
459 | unsigned long flags; | ||
460 | int apic, pin; | ||
461 | |||
462 | /* | ||
463 | * The number of IO-APIC IRQ registers (== #pins): | ||
464 | */ | ||
465 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
466 | spin_lock_irqsave(&ioapic_lock, flags); | ||
467 | reg_01.raw = io_apic_read(apic, 1); | ||
468 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
469 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; | ||
470 | } | ||
471 | |||
472 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
473 | early_ioapic_entries[apic] = | ||
474 | kzalloc(sizeof(struct IO_APIC_route_entry) * | ||
475 | nr_ioapic_registers[apic], GFP_KERNEL); | ||
476 | if (!early_ioapic_entries[apic]) | ||
477 | return -ENOMEM; | ||
478 | } | ||
479 | |||
480 | for (apic = 0; apic < nr_ioapics; apic++) | ||
481 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | ||
482 | struct IO_APIC_route_entry entry; | ||
483 | |||
484 | entry = early_ioapic_entries[apic][pin] = | ||
485 | ioapic_read_entry(apic, pin); | ||
486 | if (!entry.mask) { | ||
487 | entry.mask = 1; | ||
488 | ioapic_write_entry(apic, pin, entry); | ||
489 | } | ||
490 | } | ||
491 | return 0; | ||
492 | } | ||
493 | |||
494 | void restore_IO_APIC_setup(void) | ||
495 | { | ||
496 | int apic, pin; | ||
497 | |||
498 | for (apic = 0; apic < nr_ioapics; apic++) | ||
499 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) | ||
500 | ioapic_write_entry(apic, pin, | ||
501 | early_ioapic_entries[apic][pin]); | ||
502 | } | ||
503 | |||
504 | void reinit_intr_remapped_IO_APIC(int intr_remapping) | ||
505 | { | ||
506 | /* | ||
507 | * for now plain restore of previous settings. | ||
508 | * TBD: In the case of OS enabling interrupt-remapping, | ||
509 | * IO-APIC RTE's need to be setup to point to interrupt-remapping | ||
510 | * table entries. for now, do a plain restore, and wait for | ||
511 | * the setup_IO_APIC_irqs() to do proper initialization. | ||
512 | */ | ||
513 | restore_IO_APIC_setup(); | ||
514 | } | ||
515 | |||
443 | int skip_ioapic_setup; | 516 | int skip_ioapic_setup; |
444 | int ioapic_force; | 517 | int ioapic_force; |
445 | 518 | ||
@@ -839,18 +912,98 @@ void __setup_vector_irq(int cpu) | |||
839 | } | 912 | } |
840 | 913 | ||
841 | static struct irq_chip ioapic_chip; | 914 | static struct irq_chip ioapic_chip; |
915 | #ifdef CONFIG_INTR_REMAP | ||
916 | static struct irq_chip ir_ioapic_chip; | ||
917 | #endif | ||
842 | 918 | ||
843 | static void ioapic_register_intr(int irq, unsigned long trigger) | 919 | static void ioapic_register_intr(int irq, unsigned long trigger) |
844 | { | 920 | { |
845 | if (trigger) { | 921 | if (trigger) |
846 | irq_desc[irq].status |= IRQ_LEVEL; | 922 | irq_desc[irq].status |= IRQ_LEVEL; |
847 | set_irq_chip_and_handler_name(irq, &ioapic_chip, | 923 | else |
848 | handle_fasteoi_irq, "fasteoi"); | ||
849 | } else { | ||
850 | irq_desc[irq].status &= ~IRQ_LEVEL; | 924 | irq_desc[irq].status &= ~IRQ_LEVEL; |
925 | |||
926 | #ifdef CONFIG_INTR_REMAP | ||
927 | if (irq_remapped(irq)) { | ||
928 | irq_desc[irq].status |= IRQ_MOVE_PCNTXT; | ||
929 | if (trigger) | ||
930 | set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, | ||
931 | handle_fasteoi_irq, | ||
932 | "fasteoi"); | ||
933 | else | ||
934 | set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, | ||
935 | handle_edge_irq, "edge"); | ||
936 | return; | ||
937 | } | ||
938 | #endif | ||
939 | if (trigger) | ||
940 | set_irq_chip_and_handler_name(irq, &ioapic_chip, | ||
941 | handle_fasteoi_irq, | ||
942 | "fasteoi"); | ||
943 | else | ||
851 | set_irq_chip_and_handler_name(irq, &ioapic_chip, | 944 | set_irq_chip_and_handler_name(irq, &ioapic_chip, |
852 | handle_edge_irq, "edge"); | 945 | handle_edge_irq, "edge"); |
946 | } | ||
947 | |||
948 | static int setup_ioapic_entry(int apic, int irq, | ||
949 | struct IO_APIC_route_entry *entry, | ||
950 | unsigned int destination, int trigger, | ||
951 | int polarity, int vector) | ||
952 | { | ||
953 | /* | ||
954 | * add it to the IO-APIC irq-routing table: | ||
955 | */ | ||
956 | memset(entry,0,sizeof(*entry)); | ||
957 | |||
958 | #ifdef CONFIG_INTR_REMAP | ||
959 | if (intr_remapping_enabled) { | ||
960 | struct intel_iommu *iommu = map_ioapic_to_ir(apic); | ||
961 | struct irte irte; | ||
962 | struct IR_IO_APIC_route_entry *ir_entry = | ||
963 | (struct IR_IO_APIC_route_entry *) entry; | ||
964 | int index; | ||
965 | |||
966 | if (!iommu) | ||
967 | panic("No mapping iommu for ioapic %d\n", apic); | ||
968 | |||
969 | index = alloc_irte(iommu, irq, 1); | ||
970 | if (index < 0) | ||
971 | panic("Failed to allocate IRTE for ioapic %d\n", apic); | ||
972 | |||
973 | memset(&irte, 0, sizeof(irte)); | ||
974 | |||
975 | irte.present = 1; | ||
976 | irte.dst_mode = INT_DEST_MODE; | ||
977 | irte.trigger_mode = trigger; | ||
978 | irte.dlvry_mode = INT_DELIVERY_MODE; | ||
979 | irte.vector = vector; | ||
980 | irte.dest_id = IRTE_DEST(destination); | ||
981 | |||
982 | modify_irte(irq, &irte); | ||
983 | |||
984 | ir_entry->index2 = (index >> 15) & 0x1; | ||
985 | ir_entry->zero = 0; | ||
986 | ir_entry->format = 1; | ||
987 | ir_entry->index = (index & 0x7fff); | ||
988 | } else | ||
989 | #endif | ||
990 | { | ||
991 | entry->delivery_mode = INT_DELIVERY_MODE; | ||
992 | entry->dest_mode = INT_DEST_MODE; | ||
993 | entry->dest = destination; | ||
853 | } | 994 | } |
995 | |||
996 | entry->mask = 0; /* enable IRQ */ | ||
997 | entry->trigger = trigger; | ||
998 | entry->polarity = polarity; | ||
999 | entry->vector = vector; | ||
1000 | |||
1001 | /* Mask level triggered irqs. | ||
1002 | * Use IRQ_DELAYED_DISABLE for edge triggered irqs. | ||
1003 | */ | ||
1004 | if (trigger) | ||
1005 | entry->mask = 1; | ||
1006 | return 0; | ||
854 | } | 1007 | } |
855 | 1008 | ||
856 | static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, | 1009 | static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, |
@@ -875,24 +1028,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, | |||
875 | apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, | 1028 | apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, |
876 | irq, trigger, polarity); | 1029 | irq, trigger, polarity); |
877 | 1030 | ||
878 | /* | ||
879 | * add it to the IO-APIC irq-routing table: | ||
880 | */ | ||
881 | memset(&entry,0,sizeof(entry)); | ||
882 | |||
883 | entry.delivery_mode = INT_DELIVERY_MODE; | ||
884 | entry.dest_mode = INT_DEST_MODE; | ||
885 | entry.dest = cpu_mask_to_apicid(mask); | ||
886 | entry.mask = 0; /* enable IRQ */ | ||
887 | entry.trigger = trigger; | ||
888 | entry.polarity = polarity; | ||
889 | entry.vector = cfg->vector; | ||
890 | 1031 | ||
891 | /* Mask level triggered irqs. | 1032 | if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, |
892 | * Use IRQ_DELAYED_DISABLE for edge triggered irqs. | 1033 | cpu_mask_to_apicid(mask), trigger, polarity, |
893 | */ | 1034 | cfg->vector)) { |
894 | if (trigger) | 1035 | printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", |
895 | entry.mask = 1; | 1036 | mp_ioapics[apic].mp_apicid, pin); |
1037 | __clear_irq_vector(irq); | ||
1038 | return; | ||
1039 | } | ||
896 | 1040 | ||
897 | ioapic_register_intr(irq, trigger); | 1041 | ioapic_register_intr(irq, trigger); |
898 | if (irq < 16) | 1042 | if (irq < 16) |
@@ -944,6 +1088,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, | |||
944 | { | 1088 | { |
945 | struct IO_APIC_route_entry entry; | 1089 | struct IO_APIC_route_entry entry; |
946 | 1090 | ||
1091 | if (intr_remapping_enabled) | ||
1092 | return; | ||
1093 | |||
947 | memset(&entry, 0, sizeof(entry)); | 1094 | memset(&entry, 0, sizeof(entry)); |
948 | 1095 | ||
949 | /* | 1096 | /* |
@@ -970,7 +1117,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, | |||
970 | ioapic_write_entry(apic, pin, entry); | 1117 | ioapic_write_entry(apic, pin, entry); |
971 | } | 1118 | } |
972 | 1119 | ||
973 | void __apicdebuginit print_IO_APIC(void) | 1120 | |
1121 | __apicdebuginit(void) print_IO_APIC(void) | ||
974 | { | 1122 | { |
975 | int apic, i; | 1123 | int apic, i; |
976 | union IO_APIC_reg_00 reg_00; | 1124 | union IO_APIC_reg_00 reg_00; |
@@ -1064,9 +1212,7 @@ void __apicdebuginit print_IO_APIC(void) | |||
1064 | return; | 1212 | return; |
1065 | } | 1213 | } |
1066 | 1214 | ||
1067 | #if 0 | 1215 | __apicdebuginit(void) print_APIC_bitfield(int base) |
1068 | |||
1069 | static __apicdebuginit void print_APIC_bitfield (int base) | ||
1070 | { | 1216 | { |
1071 | unsigned int v; | 1217 | unsigned int v; |
1072 | int i, j; | 1218 | int i, j; |
@@ -1087,9 +1233,10 @@ static __apicdebuginit void print_APIC_bitfield (int base) | |||
1087 | } | 1233 | } |
1088 | } | 1234 | } |
1089 | 1235 | ||
1090 | void __apicdebuginit print_local_APIC(void * dummy) | 1236 | __apicdebuginit(void) print_local_APIC(void *dummy) |
1091 | { | 1237 | { |
1092 | unsigned int v, ver, maxlvt; | 1238 | unsigned int v, ver, maxlvt; |
1239 | unsigned long icr; | ||
1093 | 1240 | ||
1094 | if (apic_verbosity == APIC_QUIET) | 1241 | if (apic_verbosity == APIC_QUIET) |
1095 | return; | 1242 | return; |
@@ -1097,7 +1244,7 @@ void __apicdebuginit print_local_APIC(void * dummy) | |||
1097 | printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", | 1244 | printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", |
1098 | smp_processor_id(), hard_smp_processor_id()); | 1245 | smp_processor_id(), hard_smp_processor_id()); |
1099 | v = apic_read(APIC_ID); | 1246 | v = apic_read(APIC_ID); |
1100 | printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); | 1247 | printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); |
1101 | v = apic_read(APIC_LVR); | 1248 | v = apic_read(APIC_LVR); |
1102 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); | 1249 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); |
1103 | ver = GET_APIC_VERSION(v); | 1250 | ver = GET_APIC_VERSION(v); |
@@ -1133,10 +1280,9 @@ void __apicdebuginit print_local_APIC(void * dummy) | |||
1133 | v = apic_read(APIC_ESR); | 1280 | v = apic_read(APIC_ESR); |
1134 | printk(KERN_DEBUG "... APIC ESR: %08x\n", v); | 1281 | printk(KERN_DEBUG "... APIC ESR: %08x\n", v); |
1135 | 1282 | ||
1136 | v = apic_read(APIC_ICR); | 1283 | icr = apic_icr_read(); |
1137 | printk(KERN_DEBUG "... APIC ICR: %08x\n", v); | 1284 | printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); |
1138 | v = apic_read(APIC_ICR2); | 1285 | printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32)); |
1139 | printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); | ||
1140 | 1286 | ||
1141 | v = apic_read(APIC_LVTT); | 1287 | v = apic_read(APIC_LVTT); |
1142 | printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); | 1288 | printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); |
@@ -1164,12 +1310,12 @@ void __apicdebuginit print_local_APIC(void * dummy) | |||
1164 | printk("\n"); | 1310 | printk("\n"); |
1165 | } | 1311 | } |
1166 | 1312 | ||
1167 | void print_all_local_APICs (void) | 1313 | __apicdebuginit(void) print_all_local_APICs(void) |
1168 | { | 1314 | { |
1169 | on_each_cpu(print_local_APIC, NULL, 1); | 1315 | on_each_cpu(print_local_APIC, NULL, 1); |
1170 | } | 1316 | } |
1171 | 1317 | ||
1172 | void __apicdebuginit print_PIC(void) | 1318 | __apicdebuginit(void) print_PIC(void) |
1173 | { | 1319 | { |
1174 | unsigned int v; | 1320 | unsigned int v; |
1175 | unsigned long flags; | 1321 | unsigned long flags; |
@@ -1201,7 +1347,17 @@ void __apicdebuginit print_PIC(void) | |||
1201 | printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); | 1347 | printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); |
1202 | } | 1348 | } |
1203 | 1349 | ||
1204 | #endif /* 0 */ | 1350 | __apicdebuginit(int) print_all_ICs(void) |
1351 | { | ||
1352 | print_PIC(); | ||
1353 | print_all_local_APICs(); | ||
1354 | print_IO_APIC(); | ||
1355 | |||
1356 | return 0; | ||
1357 | } | ||
1358 | |||
1359 | fs_initcall(print_all_ICs); | ||
1360 | |||
1205 | 1361 | ||
1206 | void __init enable_IO_APIC(void) | 1362 | void __init enable_IO_APIC(void) |
1207 | { | 1363 | { |
@@ -1291,7 +1447,7 @@ void disable_IO_APIC(void) | |||
1291 | entry.dest_mode = 0; /* Physical */ | 1447 | entry.dest_mode = 0; /* Physical */ |
1292 | entry.delivery_mode = dest_ExtINT; /* ExtInt */ | 1448 | entry.delivery_mode = dest_ExtINT; /* ExtInt */ |
1293 | entry.vector = 0; | 1449 | entry.vector = 0; |
1294 | entry.dest = GET_APIC_ID(read_apic_id()); | 1450 | entry.dest = read_apic_id(); |
1295 | 1451 | ||
1296 | /* | 1452 | /* |
1297 | * Add it to the IO-APIC irq-routing table: | 1453 | * Add it to the IO-APIC irq-routing table: |
@@ -1397,6 +1553,147 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
1397 | */ | 1553 | */ |
1398 | 1554 | ||
1399 | #ifdef CONFIG_SMP | 1555 | #ifdef CONFIG_SMP |
1556 | |||
1557 | #ifdef CONFIG_INTR_REMAP | ||
1558 | static void ir_irq_migration(struct work_struct *work); | ||
1559 | |||
1560 | static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); | ||
1561 | |||
1562 | /* | ||
1563 | * Migrate the IO-APIC irq in the presence of intr-remapping. | ||
1564 | * | ||
1565 | * For edge triggered, irq migration is a simple atomic update(of vector | ||
1566 | * and cpu destination) of IRTE and flush the hardware cache. | ||
1567 | * | ||
1568 | * For level triggered, we need to modify the io-apic RTE aswell with the update | ||
1569 | * vector information, along with modifying IRTE with vector and destination. | ||
1570 | * So irq migration for level triggered is little bit more complex compared to | ||
1571 | * edge triggered migration. But the good news is, we use the same algorithm | ||
1572 | * for level triggered migration as we have today, only difference being, | ||
1573 | * we now initiate the irq migration from process context instead of the | ||
1574 | * interrupt context. | ||
1575 | * | ||
1576 | * In future, when we do a directed EOI (combined with cpu EOI broadcast | ||
1577 | * suppression) to the IO-APIC, level triggered irq migration will also be | ||
1578 | * as simple as edge triggered migration and we can do the irq migration | ||
1579 | * with a simple atomic update to IO-APIC RTE. | ||
1580 | */ | ||
1581 | static void migrate_ioapic_irq(int irq, cpumask_t mask) | ||
1582 | { | ||
1583 | struct irq_cfg *cfg = irq_cfg + irq; | ||
1584 | struct irq_desc *desc = irq_desc + irq; | ||
1585 | cpumask_t tmp, cleanup_mask; | ||
1586 | struct irte irte; | ||
1587 | int modify_ioapic_rte = desc->status & IRQ_LEVEL; | ||
1588 | unsigned int dest; | ||
1589 | unsigned long flags; | ||
1590 | |||
1591 | cpus_and(tmp, mask, cpu_online_map); | ||
1592 | if (cpus_empty(tmp)) | ||
1593 | return; | ||
1594 | |||
1595 | if (get_irte(irq, &irte)) | ||
1596 | return; | ||
1597 | |||
1598 | if (assign_irq_vector(irq, mask)) | ||
1599 | return; | ||
1600 | |||
1601 | cpus_and(tmp, cfg->domain, mask); | ||
1602 | dest = cpu_mask_to_apicid(tmp); | ||
1603 | |||
1604 | if (modify_ioapic_rte) { | ||
1605 | spin_lock_irqsave(&ioapic_lock, flags); | ||
1606 | __target_IO_APIC_irq(irq, dest, cfg->vector); | ||
1607 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1608 | } | ||
1609 | |||
1610 | irte.vector = cfg->vector; | ||
1611 | irte.dest_id = IRTE_DEST(dest); | ||
1612 | |||
1613 | /* | ||
1614 | * Modified the IRTE and flushes the Interrupt entry cache. | ||
1615 | */ | ||
1616 | modify_irte(irq, &irte); | ||
1617 | |||
1618 | if (cfg->move_in_progress) { | ||
1619 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | ||
1620 | cfg->move_cleanup_count = cpus_weight(cleanup_mask); | ||
1621 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
1622 | cfg->move_in_progress = 0; | ||
1623 | } | ||
1624 | |||
1625 | irq_desc[irq].affinity = mask; | ||
1626 | } | ||
1627 | |||
1628 | static int migrate_irq_remapped_level(int irq) | ||
1629 | { | ||
1630 | int ret = -1; | ||
1631 | |||
1632 | mask_IO_APIC_irq(irq); | ||
1633 | |||
1634 | if (io_apic_level_ack_pending(irq)) { | ||
1635 | /* | ||
1636 | * Interrupt in progress. Migrating irq now will change the | ||
1637 | * vector information in the IO-APIC RTE and that will confuse | ||
1638 | * the EOI broadcast performed by cpu. | ||
1639 | * So, delay the irq migration to the next instance. | ||
1640 | */ | ||
1641 | schedule_delayed_work(&ir_migration_work, 1); | ||
1642 | goto unmask; | ||
1643 | } | ||
1644 | |||
1645 | /* everthing is clear. we have right of way */ | ||
1646 | migrate_ioapic_irq(irq, irq_desc[irq].pending_mask); | ||
1647 | |||
1648 | ret = 0; | ||
1649 | irq_desc[irq].status &= ~IRQ_MOVE_PENDING; | ||
1650 | cpus_clear(irq_desc[irq].pending_mask); | ||
1651 | |||
1652 | unmask: | ||
1653 | unmask_IO_APIC_irq(irq); | ||
1654 | return ret; | ||
1655 | } | ||
1656 | |||
1657 | static void ir_irq_migration(struct work_struct *work) | ||
1658 | { | ||
1659 | int irq; | ||
1660 | |||
1661 | for (irq = 0; irq < NR_IRQS; irq++) { | ||
1662 | struct irq_desc *desc = irq_desc + irq; | ||
1663 | if (desc->status & IRQ_MOVE_PENDING) { | ||
1664 | unsigned long flags; | ||
1665 | |||
1666 | spin_lock_irqsave(&desc->lock, flags); | ||
1667 | if (!desc->chip->set_affinity || | ||
1668 | !(desc->status & IRQ_MOVE_PENDING)) { | ||
1669 | desc->status &= ~IRQ_MOVE_PENDING; | ||
1670 | spin_unlock_irqrestore(&desc->lock, flags); | ||
1671 | continue; | ||
1672 | } | ||
1673 | |||
1674 | desc->chip->set_affinity(irq, | ||
1675 | irq_desc[irq].pending_mask); | ||
1676 | spin_unlock_irqrestore(&desc->lock, flags); | ||
1677 | } | ||
1678 | } | ||
1679 | } | ||
1680 | |||
1681 | /* | ||
1682 | * Migrates the IRQ destination in the process context. | ||
1683 | */ | ||
1684 | static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | ||
1685 | { | ||
1686 | if (irq_desc[irq].status & IRQ_LEVEL) { | ||
1687 | irq_desc[irq].status |= IRQ_MOVE_PENDING; | ||
1688 | irq_desc[irq].pending_mask = mask; | ||
1689 | migrate_irq_remapped_level(irq); | ||
1690 | return; | ||
1691 | } | ||
1692 | |||
1693 | migrate_ioapic_irq(irq, mask); | ||
1694 | } | ||
1695 | #endif | ||
1696 | |||
1400 | asmlinkage void smp_irq_move_cleanup_interrupt(void) | 1697 | asmlinkage void smp_irq_move_cleanup_interrupt(void) |
1401 | { | 1698 | { |
1402 | unsigned vector, me; | 1699 | unsigned vector, me; |
@@ -1453,6 +1750,17 @@ static void irq_complete_move(unsigned int irq) | |||
1453 | #else | 1750 | #else |
1454 | static inline void irq_complete_move(unsigned int irq) {} | 1751 | static inline void irq_complete_move(unsigned int irq) {} |
1455 | #endif | 1752 | #endif |
1753 | #ifdef CONFIG_INTR_REMAP | ||
1754 | static void ack_x2apic_level(unsigned int irq) | ||
1755 | { | ||
1756 | ack_x2APIC_irq(); | ||
1757 | } | ||
1758 | |||
1759 | static void ack_x2apic_edge(unsigned int irq) | ||
1760 | { | ||
1761 | ack_x2APIC_irq(); | ||
1762 | } | ||
1763 | #endif | ||
1456 | 1764 | ||
1457 | static void ack_apic_edge(unsigned int irq) | 1765 | static void ack_apic_edge(unsigned int irq) |
1458 | { | 1766 | { |
@@ -1527,6 +1835,21 @@ static struct irq_chip ioapic_chip __read_mostly = { | |||
1527 | .retrigger = ioapic_retrigger_irq, | 1835 | .retrigger = ioapic_retrigger_irq, |
1528 | }; | 1836 | }; |
1529 | 1837 | ||
1838 | #ifdef CONFIG_INTR_REMAP | ||
1839 | static struct irq_chip ir_ioapic_chip __read_mostly = { | ||
1840 | .name = "IR-IO-APIC", | ||
1841 | .startup = startup_ioapic_irq, | ||
1842 | .mask = mask_IO_APIC_irq, | ||
1843 | .unmask = unmask_IO_APIC_irq, | ||
1844 | .ack = ack_x2apic_edge, | ||
1845 | .eoi = ack_x2apic_level, | ||
1846 | #ifdef CONFIG_SMP | ||
1847 | .set_affinity = set_ir_ioapic_affinity_irq, | ||
1848 | #endif | ||
1849 | .retrigger = ioapic_retrigger_irq, | ||
1850 | }; | ||
1851 | #endif | ||
1852 | |||
1530 | static inline void init_IO_APIC_traps(void) | 1853 | static inline void init_IO_APIC_traps(void) |
1531 | { | 1854 | { |
1532 | int irq; | 1855 | int irq; |
@@ -1712,6 +2035,8 @@ static inline void __init check_timer(void) | |||
1712 | * 8259A. | 2035 | * 8259A. |
1713 | */ | 2036 | */ |
1714 | if (pin1 == -1) { | 2037 | if (pin1 == -1) { |
2038 | if (intr_remapping_enabled) | ||
2039 | panic("BIOS bug: timer not connected to IO-APIC"); | ||
1715 | pin1 = pin2; | 2040 | pin1 = pin2; |
1716 | apic1 = apic2; | 2041 | apic1 = apic2; |
1717 | no_pin1 = 1; | 2042 | no_pin1 = 1; |
@@ -1738,6 +2063,8 @@ static inline void __init check_timer(void) | |||
1738 | clear_IO_APIC_pin(0, pin1); | 2063 | clear_IO_APIC_pin(0, pin1); |
1739 | goto out; | 2064 | goto out; |
1740 | } | 2065 | } |
2066 | if (intr_remapping_enabled) | ||
2067 | panic("timer doesn't work through Interrupt-remapped IO-APIC"); | ||
1741 | clear_IO_APIC_pin(apic1, pin1); | 2068 | clear_IO_APIC_pin(apic1, pin1); |
1742 | if (!no_pin1) | 2069 | if (!no_pin1) |
1743 | apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " | 2070 | apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " |
@@ -1854,8 +2181,6 @@ void __init setup_IO_APIC(void) | |||
1854 | setup_IO_APIC_irqs(); | 2181 | setup_IO_APIC_irqs(); |
1855 | init_IO_APIC_traps(); | 2182 | init_IO_APIC_traps(); |
1856 | check_timer(); | 2183 | check_timer(); |
1857 | if (!acpi_ioapic) | ||
1858 | print_IO_APIC(); | ||
1859 | } | 2184 | } |
1860 | 2185 | ||
1861 | struct sysfs_ioapic_data { | 2186 | struct sysfs_ioapic_data { |
@@ -1977,6 +2302,9 @@ void destroy_irq(unsigned int irq) | |||
1977 | 2302 | ||
1978 | dynamic_irq_cleanup(irq); | 2303 | dynamic_irq_cleanup(irq); |
1979 | 2304 | ||
2305 | #ifdef CONFIG_INTR_REMAP | ||
2306 | free_irte(irq); | ||
2307 | #endif | ||
1980 | spin_lock_irqsave(&vector_lock, flags); | 2308 | spin_lock_irqsave(&vector_lock, flags); |
1981 | __clear_irq_vector(irq); | 2309 | __clear_irq_vector(irq); |
1982 | spin_unlock_irqrestore(&vector_lock, flags); | 2310 | spin_unlock_irqrestore(&vector_lock, flags); |
@@ -1995,11 +2323,42 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
1995 | 2323 | ||
1996 | tmp = TARGET_CPUS; | 2324 | tmp = TARGET_CPUS; |
1997 | err = assign_irq_vector(irq, tmp); | 2325 | err = assign_irq_vector(irq, tmp); |
1998 | if (!err) { | 2326 | if (err) |
1999 | cpus_and(tmp, cfg->domain, tmp); | 2327 | return err; |
2000 | dest = cpu_mask_to_apicid(tmp); | 2328 | |
2329 | cpus_and(tmp, cfg->domain, tmp); | ||
2330 | dest = cpu_mask_to_apicid(tmp); | ||
2331 | |||
2332 | #ifdef CONFIG_INTR_REMAP | ||
2333 | if (irq_remapped(irq)) { | ||
2334 | struct irte irte; | ||
2335 | int ir_index; | ||
2336 | u16 sub_handle; | ||
2337 | |||
2338 | ir_index = map_irq_to_irte_handle(irq, &sub_handle); | ||
2339 | BUG_ON(ir_index == -1); | ||
2340 | |||
2341 | memset (&irte, 0, sizeof(irte)); | ||
2342 | |||
2343 | irte.present = 1; | ||
2344 | irte.dst_mode = INT_DEST_MODE; | ||
2345 | irte.trigger_mode = 0; /* edge */ | ||
2346 | irte.dlvry_mode = INT_DELIVERY_MODE; | ||
2347 | irte.vector = cfg->vector; | ||
2348 | irte.dest_id = IRTE_DEST(dest); | ||
2349 | |||
2350 | modify_irte(irq, &irte); | ||
2001 | 2351 | ||
2002 | msg->address_hi = MSI_ADDR_BASE_HI; | 2352 | msg->address_hi = MSI_ADDR_BASE_HI; |
2353 | msg->data = sub_handle; | ||
2354 | msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | | ||
2355 | MSI_ADDR_IR_SHV | | ||
2356 | MSI_ADDR_IR_INDEX1(ir_index) | | ||
2357 | MSI_ADDR_IR_INDEX2(ir_index); | ||
2358 | } else | ||
2359 | #endif | ||
2360 | { | ||
2361 | msg->address_hi = MSI_ADDR_BASE_HI; | ||
2003 | msg->address_lo = | 2362 | msg->address_lo = |
2004 | MSI_ADDR_BASE_LO | | 2363 | MSI_ADDR_BASE_LO | |
2005 | ((INT_DEST_MODE == 0) ? | 2364 | ((INT_DEST_MODE == 0) ? |
@@ -2049,6 +2408,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | |||
2049 | write_msi_msg(irq, &msg); | 2408 | write_msi_msg(irq, &msg); |
2050 | irq_desc[irq].affinity = mask; | 2409 | irq_desc[irq].affinity = mask; |
2051 | } | 2410 | } |
2411 | |||
2412 | #ifdef CONFIG_INTR_REMAP | ||
2413 | /* | ||
2414 | * Migrate the MSI irq to another cpumask. This migration is | ||
2415 | * done in the process context using interrupt-remapping hardware. | ||
2416 | */ | ||
2417 | static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | ||
2418 | { | ||
2419 | struct irq_cfg *cfg = irq_cfg + irq; | ||
2420 | unsigned int dest; | ||
2421 | cpumask_t tmp, cleanup_mask; | ||
2422 | struct irte irte; | ||
2423 | |||
2424 | cpus_and(tmp, mask, cpu_online_map); | ||
2425 | if (cpus_empty(tmp)) | ||
2426 | return; | ||
2427 | |||
2428 | if (get_irte(irq, &irte)) | ||
2429 | return; | ||
2430 | |||
2431 | if (assign_irq_vector(irq, mask)) | ||
2432 | return; | ||
2433 | |||
2434 | cpus_and(tmp, cfg->domain, mask); | ||
2435 | dest = cpu_mask_to_apicid(tmp); | ||
2436 | |||
2437 | irte.vector = cfg->vector; | ||
2438 | irte.dest_id = IRTE_DEST(dest); | ||
2439 | |||
2440 | /* | ||
2441 | * atomically update the IRTE with the new destination and vector. | ||
2442 | */ | ||
2443 | modify_irte(irq, &irte); | ||
2444 | |||
2445 | /* | ||
2446 | * After this point, all the interrupts will start arriving | ||
2447 | * at the new destination. So, time to cleanup the previous | ||
2448 | * vector allocation. | ||
2449 | */ | ||
2450 | if (cfg->move_in_progress) { | ||
2451 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | ||
2452 | cfg->move_cleanup_count = cpus_weight(cleanup_mask); | ||
2453 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
2454 | cfg->move_in_progress = 0; | ||
2455 | } | ||
2456 | |||
2457 | irq_desc[irq].affinity = mask; | ||
2458 | } | ||
2459 | #endif | ||
2052 | #endif /* CONFIG_SMP */ | 2460 | #endif /* CONFIG_SMP */ |
2053 | 2461 | ||
2054 | /* | 2462 | /* |
@@ -2066,26 +2474,157 @@ static struct irq_chip msi_chip = { | |||
2066 | .retrigger = ioapic_retrigger_irq, | 2474 | .retrigger = ioapic_retrigger_irq, |
2067 | }; | 2475 | }; |
2068 | 2476 | ||
2069 | int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) | 2477 | #ifdef CONFIG_INTR_REMAP |
2478 | static struct irq_chip msi_ir_chip = { | ||
2479 | .name = "IR-PCI-MSI", | ||
2480 | .unmask = unmask_msi_irq, | ||
2481 | .mask = mask_msi_irq, | ||
2482 | .ack = ack_x2apic_edge, | ||
2483 | #ifdef CONFIG_SMP | ||
2484 | .set_affinity = ir_set_msi_irq_affinity, | ||
2485 | #endif | ||
2486 | .retrigger = ioapic_retrigger_irq, | ||
2487 | }; | ||
2488 | |||
2489 | /* | ||
2490 | * Map the PCI dev to the corresponding remapping hardware unit | ||
2491 | * and allocate 'nvec' consecutive interrupt-remapping table entries | ||
2492 | * in it. | ||
2493 | */ | ||
2494 | static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) | ||
2070 | { | 2495 | { |
2496 | struct intel_iommu *iommu; | ||
2497 | int index; | ||
2498 | |||
2499 | iommu = map_dev_to_ir(dev); | ||
2500 | if (!iommu) { | ||
2501 | printk(KERN_ERR | ||
2502 | "Unable to map PCI %s to iommu\n", pci_name(dev)); | ||
2503 | return -ENOENT; | ||
2504 | } | ||
2505 | |||
2506 | index = alloc_irte(iommu, irq, nvec); | ||
2507 | if (index < 0) { | ||
2508 | printk(KERN_ERR | ||
2509 | "Unable to allocate %d IRTE for PCI %s\n", nvec, | ||
2510 | pci_name(dev)); | ||
2511 | return -ENOSPC; | ||
2512 | } | ||
2513 | return index; | ||
2514 | } | ||
2515 | #endif | ||
2516 | |||
2517 | static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) | ||
2518 | { | ||
2519 | int ret; | ||
2071 | struct msi_msg msg; | 2520 | struct msi_msg msg; |
2521 | |||
2522 | ret = msi_compose_msg(dev, irq, &msg); | ||
2523 | if (ret < 0) | ||
2524 | return ret; | ||
2525 | |||
2526 | set_irq_msi(irq, desc); | ||
2527 | write_msi_msg(irq, &msg); | ||
2528 | |||
2529 | #ifdef CONFIG_INTR_REMAP | ||
2530 | if (irq_remapped(irq)) { | ||
2531 | struct irq_desc *desc = irq_desc + irq; | ||
2532 | /* | ||
2533 | * irq migration in process context | ||
2534 | */ | ||
2535 | desc->status |= IRQ_MOVE_PCNTXT; | ||
2536 | set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); | ||
2537 | } else | ||
2538 | #endif | ||
2539 | set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); | ||
2540 | |||
2541 | return 0; | ||
2542 | } | ||
2543 | |||
2544 | int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) | ||
2545 | { | ||
2072 | int irq, ret; | 2546 | int irq, ret; |
2547 | |||
2073 | irq = create_irq(); | 2548 | irq = create_irq(); |
2074 | if (irq < 0) | 2549 | if (irq < 0) |
2075 | return irq; | 2550 | return irq; |
2076 | 2551 | ||
2077 | ret = msi_compose_msg(dev, irq, &msg); | 2552 | #ifdef CONFIG_INTR_REMAP |
2553 | if (!intr_remapping_enabled) | ||
2554 | goto no_ir; | ||
2555 | |||
2556 | ret = msi_alloc_irte(dev, irq, 1); | ||
2557 | if (ret < 0) | ||
2558 | goto error; | ||
2559 | no_ir: | ||
2560 | #endif | ||
2561 | ret = setup_msi_irq(dev, desc, irq); | ||
2078 | if (ret < 0) { | 2562 | if (ret < 0) { |
2079 | destroy_irq(irq); | 2563 | destroy_irq(irq); |
2080 | return ret; | 2564 | return ret; |
2081 | } | 2565 | } |
2566 | return 0; | ||
2082 | 2567 | ||
2083 | set_irq_msi(irq, desc); | 2568 | #ifdef CONFIG_INTR_REMAP |
2084 | write_msi_msg(irq, &msg); | 2569 | error: |
2570 | destroy_irq(irq); | ||
2571 | return ret; | ||
2572 | #endif | ||
2573 | } | ||
2085 | 2574 | ||
2086 | set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); | 2575 | int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) |
2576 | { | ||
2577 | int irq, ret, sub_handle; | ||
2578 | struct msi_desc *desc; | ||
2579 | #ifdef CONFIG_INTR_REMAP | ||
2580 | struct intel_iommu *iommu = 0; | ||
2581 | int index = 0; | ||
2582 | #endif | ||
2583 | |||
2584 | sub_handle = 0; | ||
2585 | list_for_each_entry(desc, &dev->msi_list, list) { | ||
2586 | irq = create_irq(); | ||
2587 | if (irq < 0) | ||
2588 | return irq; | ||
2589 | #ifdef CONFIG_INTR_REMAP | ||
2590 | if (!intr_remapping_enabled) | ||
2591 | goto no_ir; | ||
2087 | 2592 | ||
2593 | if (!sub_handle) { | ||
2594 | /* | ||
2595 | * allocate the consecutive block of IRTE's | ||
2596 | * for 'nvec' | ||
2597 | */ | ||
2598 | index = msi_alloc_irte(dev, irq, nvec); | ||
2599 | if (index < 0) { | ||
2600 | ret = index; | ||
2601 | goto error; | ||
2602 | } | ||
2603 | } else { | ||
2604 | iommu = map_dev_to_ir(dev); | ||
2605 | if (!iommu) { | ||
2606 | ret = -ENOENT; | ||
2607 | goto error; | ||
2608 | } | ||
2609 | /* | ||
2610 | * setup the mapping between the irq and the IRTE | ||
2611 | * base index, the sub_handle pointing to the | ||
2612 | * appropriate interrupt remap table entry. | ||
2613 | */ | ||
2614 | set_irte_irq(irq, iommu, index, sub_handle); | ||
2615 | } | ||
2616 | no_ir: | ||
2617 | #endif | ||
2618 | ret = setup_msi_irq(dev, desc, irq); | ||
2619 | if (ret < 0) | ||
2620 | goto error; | ||
2621 | sub_handle++; | ||
2622 | } | ||
2088 | return 0; | 2623 | return 0; |
2624 | |||
2625 | error: | ||
2626 | destroy_irq(irq); | ||
2627 | return ret; | ||
2089 | } | 2628 | } |
2090 | 2629 | ||
2091 | void arch_teardown_msi_irq(unsigned int irq) | 2630 | void arch_teardown_msi_irq(unsigned int irq) |
@@ -2333,6 +2872,10 @@ void __init setup_ioapic_dest(void) | |||
2333 | setup_IO_APIC_irq(ioapic, pin, irq, | 2872 | setup_IO_APIC_irq(ioapic, pin, irq, |
2334 | irq_trigger(irq_entry), | 2873 | irq_trigger(irq_entry), |
2335 | irq_polarity(irq_entry)); | 2874 | irq_polarity(irq_entry)); |
2875 | #ifdef CONFIG_INTR_REMAP | ||
2876 | else if (intr_remapping_enabled) | ||
2877 | set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); | ||
2878 | #endif | ||
2336 | else | 2879 | else |
2337 | set_ioapic_affinity_irq(irq, TARGET_CPUS); | 2880 | set_ioapic_affinity_irq(irq, TARGET_CPUS); |
2338 | } | 2881 | } |