diff options
Diffstat (limited to 'arch/x86/kernel/io_apic_64.c')
-rw-r--r-- | arch/x86/kernel/io_apic_64.c | 608 |
1 files changed, 572 insertions, 36 deletions
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 8269434d1707..b9950dae59b7 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <acpi/acpi_bus.h> | 37 | #include <acpi/acpi_bus.h> |
38 | #endif | 38 | #endif |
39 | #include <linux/bootmem.h> | 39 | #include <linux/bootmem.h> |
40 | #include <linux/dmar.h> | ||
40 | 41 | ||
41 | #include <asm/idle.h> | 42 | #include <asm/idle.h> |
42 | #include <asm/io.h> | 43 | #include <asm/io.h> |
@@ -49,6 +50,7 @@ | |||
49 | #include <asm/nmi.h> | 50 | #include <asm/nmi.h> |
50 | #include <asm/msidef.h> | 51 | #include <asm/msidef.h> |
51 | #include <asm/hypertransport.h> | 52 | #include <asm/hypertransport.h> |
53 | #include <asm/irq_remapping.h> | ||
52 | 54 | ||
53 | #include <mach_ipi.h> | 55 | #include <mach_ipi.h> |
54 | #include <mach_apic.h> | 56 | #include <mach_apic.h> |
@@ -108,6 +110,9 @@ DEFINE_SPINLOCK(vector_lock); | |||
108 | */ | 110 | */ |
109 | int nr_ioapic_registers[MAX_IO_APICS]; | 111 | int nr_ioapic_registers[MAX_IO_APICS]; |
110 | 112 | ||
113 | /* I/O APIC RTE contents at the OS boot up */ | ||
114 | struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; | ||
115 | |||
111 | /* I/O APIC entries */ | 116 | /* I/O APIC entries */ |
112 | struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; | 117 | struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; |
113 | int nr_ioapics; | 118 | int nr_ioapics; |
@@ -303,7 +308,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) | |||
303 | pin = entry->pin; | 308 | pin = entry->pin; |
304 | if (pin == -1) | 309 | if (pin == -1) |
305 | break; | 310 | break; |
306 | io_apic_write(apic, 0x11 + pin*2, dest); | 311 | /* |
312 | * With interrupt-remapping, destination information comes | ||
313 | * from interrupt-remapping table entry. | ||
314 | */ | ||
315 | if (!irq_remapped(irq)) | ||
316 | io_apic_write(apic, 0x11 + pin*2, dest); | ||
307 | reg = io_apic_read(apic, 0x10 + pin*2); | 317 | reg = io_apic_read(apic, 0x10 + pin*2); |
308 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; | 318 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; |
309 | reg |= vector; | 319 | reg |= vector; |
@@ -440,6 +450,69 @@ static void clear_IO_APIC (void) | |||
440 | clear_IO_APIC_pin(apic, pin); | 450 | clear_IO_APIC_pin(apic, pin); |
441 | } | 451 | } |
442 | 452 | ||
453 | /* | ||
454 | * Saves and masks all the unmasked IO-APIC RTE's | ||
455 | */ | ||
456 | int save_mask_IO_APIC_setup(void) | ||
457 | { | ||
458 | union IO_APIC_reg_01 reg_01; | ||
459 | unsigned long flags; | ||
460 | int apic, pin; | ||
461 | |||
462 | /* | ||
463 | * The number of IO-APIC IRQ registers (== #pins): | ||
464 | */ | ||
465 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
466 | spin_lock_irqsave(&ioapic_lock, flags); | ||
467 | reg_01.raw = io_apic_read(apic, 1); | ||
468 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
469 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; | ||
470 | } | ||
471 | |||
472 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
473 | early_ioapic_entries[apic] = | ||
474 | kzalloc(sizeof(struct IO_APIC_route_entry) * | ||
475 | nr_ioapic_registers[apic], GFP_KERNEL); | ||
476 | if (!early_ioapic_entries[apic]) | ||
477 | return -ENOMEM; | ||
478 | } | ||
479 | |||
480 | for (apic = 0; apic < nr_ioapics; apic++) | ||
481 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | ||
482 | struct IO_APIC_route_entry entry; | ||
483 | |||
484 | entry = early_ioapic_entries[apic][pin] = | ||
485 | ioapic_read_entry(apic, pin); | ||
486 | if (!entry.mask) { | ||
487 | entry.mask = 1; | ||
488 | ioapic_write_entry(apic, pin, entry); | ||
489 | } | ||
490 | } | ||
491 | return 0; | ||
492 | } | ||
493 | |||
494 | void restore_IO_APIC_setup(void) | ||
495 | { | ||
496 | int apic, pin; | ||
497 | |||
498 | for (apic = 0; apic < nr_ioapics; apic++) | ||
499 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) | ||
500 | ioapic_write_entry(apic, pin, | ||
501 | early_ioapic_entries[apic][pin]); | ||
502 | } | ||
503 | |||
504 | void reinit_intr_remapped_IO_APIC(int intr_remapping) | ||
505 | { | ||
506 | /* | ||
507 | * for now plain restore of previous settings. | ||
508 | * TBD: In the case of OS enabling interrupt-remapping, | ||
509 | * IO-APIC RTE's need to be setup to point to interrupt-remapping | ||
510 | * table entries. for now, do a plain restore, and wait for | ||
511 | * the setup_IO_APIC_irqs() to do proper initialization. | ||
512 | */ | ||
513 | restore_IO_APIC_setup(); | ||
514 | } | ||
515 | |||
443 | int skip_ioapic_setup; | 516 | int skip_ioapic_setup; |
444 | int ioapic_force; | 517 | int ioapic_force; |
445 | 518 | ||
@@ -834,18 +907,98 @@ void setup_vector_irq(int cpu) | |||
834 | 907 | ||
835 | 908 | ||
836 | static struct irq_chip ioapic_chip; | 909 | static struct irq_chip ioapic_chip; |
910 | #ifdef CONFIG_INTR_REMAP | ||
911 | static struct irq_chip ir_ioapic_chip; | ||
912 | #endif | ||
837 | 913 | ||
838 | static void ioapic_register_intr(int irq, unsigned long trigger) | 914 | static void ioapic_register_intr(int irq, unsigned long trigger) |
839 | { | 915 | { |
840 | if (trigger) { | 916 | if (trigger) |
841 | irq_desc[irq].status |= IRQ_LEVEL; | 917 | irq_desc[irq].status |= IRQ_LEVEL; |
842 | set_irq_chip_and_handler_name(irq, &ioapic_chip, | 918 | else |
843 | handle_fasteoi_irq, "fasteoi"); | ||
844 | } else { | ||
845 | irq_desc[irq].status &= ~IRQ_LEVEL; | 919 | irq_desc[irq].status &= ~IRQ_LEVEL; |
920 | |||
921 | #ifdef CONFIG_INTR_REMAP | ||
922 | if (irq_remapped(irq)) { | ||
923 | irq_desc[irq].status |= IRQ_MOVE_PCNTXT; | ||
924 | if (trigger) | ||
925 | set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, | ||
926 | handle_fasteoi_irq, | ||
927 | "fasteoi"); | ||
928 | else | ||
929 | set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, | ||
930 | handle_edge_irq, "edge"); | ||
931 | return; | ||
932 | } | ||
933 | #endif | ||
934 | if (trigger) | ||
935 | set_irq_chip_and_handler_name(irq, &ioapic_chip, | ||
936 | handle_fasteoi_irq, | ||
937 | "fasteoi"); | ||
938 | else | ||
846 | set_irq_chip_and_handler_name(irq, &ioapic_chip, | 939 | set_irq_chip_and_handler_name(irq, &ioapic_chip, |
847 | handle_edge_irq, "edge"); | 940 | handle_edge_irq, "edge"); |
941 | } | ||
942 | |||
943 | static int setup_ioapic_entry(int apic, int irq, | ||
944 | struct IO_APIC_route_entry *entry, | ||
945 | unsigned int destination, int trigger, | ||
946 | int polarity, int vector) | ||
947 | { | ||
948 | /* | ||
949 | * add it to the IO-APIC irq-routing table: | ||
950 | */ | ||
951 | memset(entry,0,sizeof(*entry)); | ||
952 | |||
953 | #ifdef CONFIG_INTR_REMAP | ||
954 | if (intr_remapping_enabled) { | ||
955 | struct intel_iommu *iommu = map_ioapic_to_ir(apic); | ||
956 | struct irte irte; | ||
957 | struct IR_IO_APIC_route_entry *ir_entry = | ||
958 | (struct IR_IO_APIC_route_entry *) entry; | ||
959 | int index; | ||
960 | |||
961 | if (!iommu) | ||
962 | panic("No mapping iommu for ioapic %d\n", apic); | ||
963 | |||
964 | index = alloc_irte(iommu, irq, 1); | ||
965 | if (index < 0) | ||
966 | panic("Failed to allocate IRTE for ioapic %d\n", apic); | ||
967 | |||
968 | memset(&irte, 0, sizeof(irte)); | ||
969 | |||
970 | irte.present = 1; | ||
971 | irte.dst_mode = INT_DEST_MODE; | ||
972 | irte.trigger_mode = trigger; | ||
973 | irte.dlvry_mode = INT_DELIVERY_MODE; | ||
974 | irte.vector = vector; | ||
975 | irte.dest_id = IRTE_DEST(destination); | ||
976 | |||
977 | modify_irte(irq, &irte); | ||
978 | |||
979 | ir_entry->index2 = (index >> 15) & 0x1; | ||
980 | ir_entry->zero = 0; | ||
981 | ir_entry->format = 1; | ||
982 | ir_entry->index = (index & 0x7fff); | ||
983 | } else | ||
984 | #endif | ||
985 | { | ||
986 | entry->delivery_mode = INT_DELIVERY_MODE; | ||
987 | entry->dest_mode = INT_DEST_MODE; | ||
988 | entry->dest = destination; | ||
848 | } | 989 | } |
990 | |||
991 | entry->mask = 0; /* enable IRQ */ | ||
992 | entry->trigger = trigger; | ||
993 | entry->polarity = polarity; | ||
994 | entry->vector = vector; | ||
995 | |||
996 | /* Mask level triggered irqs. | ||
997 | * Use IRQ_DELAYED_DISABLE for edge triggered irqs. | ||
998 | */ | ||
999 | if (trigger) | ||
1000 | entry->mask = 1; | ||
1001 | return 0; | ||
849 | } | 1002 | } |
850 | 1003 | ||
851 | static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, | 1004 | static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, |
@@ -870,24 +1023,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, | |||
870 | apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, | 1023 | apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, |
871 | irq, trigger, polarity); | 1024 | irq, trigger, polarity); |
872 | 1025 | ||
873 | /* | ||
874 | * add it to the IO-APIC irq-routing table: | ||
875 | */ | ||
876 | memset(&entry,0,sizeof(entry)); | ||
877 | 1026 | ||
878 | entry.delivery_mode = INT_DELIVERY_MODE; | 1027 | if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, |
879 | entry.dest_mode = INT_DEST_MODE; | 1028 | cpu_mask_to_apicid(mask), trigger, polarity, |
880 | entry.dest = cpu_mask_to_apicid(mask); | 1029 | cfg->vector)) { |
881 | entry.mask = 0; /* enable IRQ */ | 1030 | printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", |
882 | entry.trigger = trigger; | 1031 | mp_ioapics[apic].mp_apicid, pin); |
883 | entry.polarity = polarity; | 1032 | __clear_irq_vector(irq); |
884 | entry.vector = cfg->vector; | 1033 | return; |
885 | 1034 | } | |
886 | /* Mask level triggered irqs. | ||
887 | * Use IRQ_DELAYED_DISABLE for edge triggered irqs. | ||
888 | */ | ||
889 | if (trigger) | ||
890 | entry.mask = 1; | ||
891 | 1035 | ||
892 | ioapic_register_intr(irq, trigger); | 1036 | ioapic_register_intr(irq, trigger); |
893 | if (irq < 16) | 1037 | if (irq < 16) |
@@ -939,6 +1083,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, | |||
939 | { | 1083 | { |
940 | struct IO_APIC_route_entry entry; | 1084 | struct IO_APIC_route_entry entry; |
941 | 1085 | ||
1086 | if (intr_remapping_enabled) | ||
1087 | return; | ||
1088 | |||
942 | memset(&entry, 0, sizeof(entry)); | 1089 | memset(&entry, 0, sizeof(entry)); |
943 | 1090 | ||
944 | /* | 1091 | /* |
@@ -1085,6 +1232,7 @@ static __apicdebuginit void print_APIC_bitfield (int base) | |||
1085 | void __apicdebuginit print_local_APIC(void * dummy) | 1232 | void __apicdebuginit print_local_APIC(void * dummy) |
1086 | { | 1233 | { |
1087 | unsigned int v, ver, maxlvt; | 1234 | unsigned int v, ver, maxlvt; |
1235 | unsigned long icr; | ||
1088 | 1236 | ||
1089 | if (apic_verbosity == APIC_QUIET) | 1237 | if (apic_verbosity == APIC_QUIET) |
1090 | return; | 1238 | return; |
@@ -1092,7 +1240,7 @@ void __apicdebuginit print_local_APIC(void * dummy) | |||
1092 | printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", | 1240 | printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", |
1093 | smp_processor_id(), hard_smp_processor_id()); | 1241 | smp_processor_id(), hard_smp_processor_id()); |
1094 | v = apic_read(APIC_ID); | 1242 | v = apic_read(APIC_ID); |
1095 | printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); | 1243 | printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); |
1096 | v = apic_read(APIC_LVR); | 1244 | v = apic_read(APIC_LVR); |
1097 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); | 1245 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); |
1098 | ver = GET_APIC_VERSION(v); | 1246 | ver = GET_APIC_VERSION(v); |
@@ -1128,10 +1276,9 @@ void __apicdebuginit print_local_APIC(void * dummy) | |||
1128 | v = apic_read(APIC_ESR); | 1276 | v = apic_read(APIC_ESR); |
1129 | printk(KERN_DEBUG "... APIC ESR: %08x\n", v); | 1277 | printk(KERN_DEBUG "... APIC ESR: %08x\n", v); |
1130 | 1278 | ||
1131 | v = apic_read(APIC_ICR); | 1279 | icr = apic_icr_read(); |
1132 | printk(KERN_DEBUG "... APIC ICR: %08x\n", v); | 1280 | printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); |
1133 | v = apic_read(APIC_ICR2); | 1281 | printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); |
1134 | printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); | ||
1135 | 1282 | ||
1136 | v = apic_read(APIC_LVTT); | 1283 | v = apic_read(APIC_LVTT); |
1137 | printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); | 1284 | printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); |
@@ -1286,7 +1433,7 @@ void disable_IO_APIC(void) | |||
1286 | entry.dest_mode = 0; /* Physical */ | 1433 | entry.dest_mode = 0; /* Physical */ |
1287 | entry.delivery_mode = dest_ExtINT; /* ExtInt */ | 1434 | entry.delivery_mode = dest_ExtINT; /* ExtInt */ |
1288 | entry.vector = 0; | 1435 | entry.vector = 0; |
1289 | entry.dest = GET_APIC_ID(read_apic_id()); | 1436 | entry.dest = read_apic_id(); |
1290 | 1437 | ||
1291 | /* | 1438 | /* |
1292 | * Add it to the IO-APIC irq-routing table: | 1439 | * Add it to the IO-APIC irq-routing table: |
@@ -1392,6 +1539,147 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
1392 | */ | 1539 | */ |
1393 | 1540 | ||
1394 | #ifdef CONFIG_SMP | 1541 | #ifdef CONFIG_SMP |
1542 | |||
1543 | #ifdef CONFIG_INTR_REMAP | ||
1544 | static void ir_irq_migration(struct work_struct *work); | ||
1545 | |||
1546 | static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); | ||
1547 | |||
1548 | /* | ||
1549 | * Migrate the IO-APIC irq in the presence of intr-remapping. | ||
1550 | * | ||
1551 | * For edge triggered, irq migration is a simple atomic update(of vector | ||
1552 | * and cpu destination) of IRTE and flush the hardware cache. | ||
1553 | * | ||
1554 | * For level triggered, we need to modify the io-apic RTE aswell with the update | ||
1555 | * vector information, along with modifying IRTE with vector and destination. | ||
1556 | * So irq migration for level triggered is little bit more complex compared to | ||
1557 | * edge triggered migration. But the good news is, we use the same algorithm | ||
1558 | * for level triggered migration as we have today, only difference being, | ||
1559 | * we now initiate the irq migration from process context instead of the | ||
1560 | * interrupt context. | ||
1561 | * | ||
1562 | * In future, when we do a directed EOI (combined with cpu EOI broadcast | ||
1563 | * suppression) to the IO-APIC, level triggered irq migration will also be | ||
1564 | * as simple as edge triggered migration and we can do the irq migration | ||
1565 | * with a simple atomic update to IO-APIC RTE. | ||
1566 | */ | ||
1567 | static void migrate_ioapic_irq(int irq, cpumask_t mask) | ||
1568 | { | ||
1569 | struct irq_cfg *cfg = irq_cfg + irq; | ||
1570 | struct irq_desc *desc = irq_desc + irq; | ||
1571 | cpumask_t tmp, cleanup_mask; | ||
1572 | struct irte irte; | ||
1573 | int modify_ioapic_rte = desc->status & IRQ_LEVEL; | ||
1574 | unsigned int dest; | ||
1575 | unsigned long flags; | ||
1576 | |||
1577 | cpus_and(tmp, mask, cpu_online_map); | ||
1578 | if (cpus_empty(tmp)) | ||
1579 | return; | ||
1580 | |||
1581 | if (get_irte(irq, &irte)) | ||
1582 | return; | ||
1583 | |||
1584 | if (assign_irq_vector(irq, mask)) | ||
1585 | return; | ||
1586 | |||
1587 | cpus_and(tmp, cfg->domain, mask); | ||
1588 | dest = cpu_mask_to_apicid(tmp); | ||
1589 | |||
1590 | if (modify_ioapic_rte) { | ||
1591 | spin_lock_irqsave(&ioapic_lock, flags); | ||
1592 | __target_IO_APIC_irq(irq, dest, cfg->vector); | ||
1593 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1594 | } | ||
1595 | |||
1596 | irte.vector = cfg->vector; | ||
1597 | irte.dest_id = IRTE_DEST(dest); | ||
1598 | |||
1599 | /* | ||
1600 | * Modified the IRTE and flushes the Interrupt entry cache. | ||
1601 | */ | ||
1602 | modify_irte(irq, &irte); | ||
1603 | |||
1604 | if (cfg->move_in_progress) { | ||
1605 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | ||
1606 | cfg->move_cleanup_count = cpus_weight(cleanup_mask); | ||
1607 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
1608 | cfg->move_in_progress = 0; | ||
1609 | } | ||
1610 | |||
1611 | irq_desc[irq].affinity = mask; | ||
1612 | } | ||
1613 | |||
1614 | static int migrate_irq_remapped_level(int irq) | ||
1615 | { | ||
1616 | int ret = -1; | ||
1617 | |||
1618 | mask_IO_APIC_irq(irq); | ||
1619 | |||
1620 | if (io_apic_level_ack_pending(irq)) { | ||
1621 | /* | ||
1622 | * Interrupt in progress. Migrating irq now will change the | ||
1623 | * vector information in the IO-APIC RTE and that will confuse | ||
1624 | * the EOI broadcast performed by cpu. | ||
1625 | * So, delay the irq migration to the next instance. | ||
1626 | */ | ||
1627 | schedule_delayed_work(&ir_migration_work, 1); | ||
1628 | goto unmask; | ||
1629 | } | ||
1630 | |||
1631 | /* everthing is clear. we have right of way */ | ||
1632 | migrate_ioapic_irq(irq, irq_desc[irq].pending_mask); | ||
1633 | |||
1634 | ret = 0; | ||
1635 | irq_desc[irq].status &= ~IRQ_MOVE_PENDING; | ||
1636 | cpus_clear(irq_desc[irq].pending_mask); | ||
1637 | |||
1638 | unmask: | ||
1639 | unmask_IO_APIC_irq(irq); | ||
1640 | return ret; | ||
1641 | } | ||
1642 | |||
1643 | static void ir_irq_migration(struct work_struct *work) | ||
1644 | { | ||
1645 | int irq; | ||
1646 | |||
1647 | for (irq = 0; irq < NR_IRQS; irq++) { | ||
1648 | struct irq_desc *desc = irq_desc + irq; | ||
1649 | if (desc->status & IRQ_MOVE_PENDING) { | ||
1650 | unsigned long flags; | ||
1651 | |||
1652 | spin_lock_irqsave(&desc->lock, flags); | ||
1653 | if (!desc->chip->set_affinity || | ||
1654 | !(desc->status & IRQ_MOVE_PENDING)) { | ||
1655 | desc->status &= ~IRQ_MOVE_PENDING; | ||
1656 | spin_unlock_irqrestore(&desc->lock, flags); | ||
1657 | continue; | ||
1658 | } | ||
1659 | |||
1660 | desc->chip->set_affinity(irq, | ||
1661 | irq_desc[irq].pending_mask); | ||
1662 | spin_unlock_irqrestore(&desc->lock, flags); | ||
1663 | } | ||
1664 | } | ||
1665 | } | ||
1666 | |||
1667 | /* | ||
1668 | * Migrates the IRQ destination in the process context. | ||
1669 | */ | ||
1670 | static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | ||
1671 | { | ||
1672 | if (irq_desc[irq].status & IRQ_LEVEL) { | ||
1673 | irq_desc[irq].status |= IRQ_MOVE_PENDING; | ||
1674 | irq_desc[irq].pending_mask = mask; | ||
1675 | migrate_irq_remapped_level(irq); | ||
1676 | return; | ||
1677 | } | ||
1678 | |||
1679 | migrate_ioapic_irq(irq, mask); | ||
1680 | } | ||
1681 | #endif | ||
1682 | |||
1395 | asmlinkage void smp_irq_move_cleanup_interrupt(void) | 1683 | asmlinkage void smp_irq_move_cleanup_interrupt(void) |
1396 | { | 1684 | { |
1397 | unsigned vector, me; | 1685 | unsigned vector, me; |
@@ -1448,6 +1736,17 @@ static void irq_complete_move(unsigned int irq) | |||
1448 | #else | 1736 | #else |
1449 | static inline void irq_complete_move(unsigned int irq) {} | 1737 | static inline void irq_complete_move(unsigned int irq) {} |
1450 | #endif | 1738 | #endif |
1739 | #ifdef CONFIG_INTR_REMAP | ||
1740 | static void ack_x2apic_level(unsigned int irq) | ||
1741 | { | ||
1742 | ack_x2APIC_irq(); | ||
1743 | } | ||
1744 | |||
1745 | static void ack_x2apic_edge(unsigned int irq) | ||
1746 | { | ||
1747 | ack_x2APIC_irq(); | ||
1748 | } | ||
1749 | #endif | ||
1451 | 1750 | ||
1452 | static void ack_apic_edge(unsigned int irq) | 1751 | static void ack_apic_edge(unsigned int irq) |
1453 | { | 1752 | { |
@@ -1522,6 +1821,21 @@ static struct irq_chip ioapic_chip __read_mostly = { | |||
1522 | .retrigger = ioapic_retrigger_irq, | 1821 | .retrigger = ioapic_retrigger_irq, |
1523 | }; | 1822 | }; |
1524 | 1823 | ||
1824 | #ifdef CONFIG_INTR_REMAP | ||
1825 | static struct irq_chip ir_ioapic_chip __read_mostly = { | ||
1826 | .name = "IR-IO-APIC", | ||
1827 | .startup = startup_ioapic_irq, | ||
1828 | .mask = mask_IO_APIC_irq, | ||
1829 | .unmask = unmask_IO_APIC_irq, | ||
1830 | .ack = ack_x2apic_edge, | ||
1831 | .eoi = ack_x2apic_level, | ||
1832 | #ifdef CONFIG_SMP | ||
1833 | .set_affinity = set_ir_ioapic_affinity_irq, | ||
1834 | #endif | ||
1835 | .retrigger = ioapic_retrigger_irq, | ||
1836 | }; | ||
1837 | #endif | ||
1838 | |||
1525 | static inline void init_IO_APIC_traps(void) | 1839 | static inline void init_IO_APIC_traps(void) |
1526 | { | 1840 | { |
1527 | int irq; | 1841 | int irq; |
@@ -1707,6 +2021,8 @@ static inline void __init check_timer(void) | |||
1707 | * 8259A. | 2021 | * 8259A. |
1708 | */ | 2022 | */ |
1709 | if (pin1 == -1) { | 2023 | if (pin1 == -1) { |
2024 | if (intr_remapping_enabled) | ||
2025 | panic("BIOS bug: timer not connected to IO-APIC"); | ||
1710 | pin1 = pin2; | 2026 | pin1 = pin2; |
1711 | apic1 = apic2; | 2027 | apic1 = apic2; |
1712 | no_pin1 = 1; | 2028 | no_pin1 = 1; |
@@ -1733,6 +2049,8 @@ static inline void __init check_timer(void) | |||
1733 | clear_IO_APIC_pin(0, pin1); | 2049 | clear_IO_APIC_pin(0, pin1); |
1734 | goto out; | 2050 | goto out; |
1735 | } | 2051 | } |
2052 | if (intr_remapping_enabled) | ||
2053 | panic("timer doesn't work through Interrupt-remapped IO-APIC"); | ||
1736 | clear_IO_APIC_pin(apic1, pin1); | 2054 | clear_IO_APIC_pin(apic1, pin1); |
1737 | if (!no_pin1) | 2055 | if (!no_pin1) |
1738 | apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " | 2056 | apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " |
@@ -1972,6 +2290,9 @@ void destroy_irq(unsigned int irq) | |||
1972 | 2290 | ||
1973 | dynamic_irq_cleanup(irq); | 2291 | dynamic_irq_cleanup(irq); |
1974 | 2292 | ||
2293 | #ifdef CONFIG_INTR_REMAP | ||
2294 | free_irte(irq); | ||
2295 | #endif | ||
1975 | spin_lock_irqsave(&vector_lock, flags); | 2296 | spin_lock_irqsave(&vector_lock, flags); |
1976 | __clear_irq_vector(irq); | 2297 | __clear_irq_vector(irq); |
1977 | spin_unlock_irqrestore(&vector_lock, flags); | 2298 | spin_unlock_irqrestore(&vector_lock, flags); |
@@ -1990,11 +2311,42 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
1990 | 2311 | ||
1991 | tmp = TARGET_CPUS; | 2312 | tmp = TARGET_CPUS; |
1992 | err = assign_irq_vector(irq, tmp); | 2313 | err = assign_irq_vector(irq, tmp); |
1993 | if (!err) { | 2314 | if (err) |
1994 | cpus_and(tmp, cfg->domain, tmp); | 2315 | return err; |
1995 | dest = cpu_mask_to_apicid(tmp); | 2316 | |
2317 | cpus_and(tmp, cfg->domain, tmp); | ||
2318 | dest = cpu_mask_to_apicid(tmp); | ||
2319 | |||
2320 | #ifdef CONFIG_INTR_REMAP | ||
2321 | if (irq_remapped(irq)) { | ||
2322 | struct irte irte; | ||
2323 | int ir_index; | ||
2324 | u16 sub_handle; | ||
2325 | |||
2326 | ir_index = map_irq_to_irte_handle(irq, &sub_handle); | ||
2327 | BUG_ON(ir_index == -1); | ||
2328 | |||
2329 | memset (&irte, 0, sizeof(irte)); | ||
2330 | |||
2331 | irte.present = 1; | ||
2332 | irte.dst_mode = INT_DEST_MODE; | ||
2333 | irte.trigger_mode = 0; /* edge */ | ||
2334 | irte.dlvry_mode = INT_DELIVERY_MODE; | ||
2335 | irte.vector = cfg->vector; | ||
2336 | irte.dest_id = IRTE_DEST(dest); | ||
2337 | |||
2338 | modify_irte(irq, &irte); | ||
1996 | 2339 | ||
1997 | msg->address_hi = MSI_ADDR_BASE_HI; | 2340 | msg->address_hi = MSI_ADDR_BASE_HI; |
2341 | msg->data = sub_handle; | ||
2342 | msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | | ||
2343 | MSI_ADDR_IR_SHV | | ||
2344 | MSI_ADDR_IR_INDEX1(ir_index) | | ||
2345 | MSI_ADDR_IR_INDEX2(ir_index); | ||
2346 | } else | ||
2347 | #endif | ||
2348 | { | ||
2349 | msg->address_hi = MSI_ADDR_BASE_HI; | ||
1998 | msg->address_lo = | 2350 | msg->address_lo = |
1999 | MSI_ADDR_BASE_LO | | 2351 | MSI_ADDR_BASE_LO | |
2000 | ((INT_DEST_MODE == 0) ? | 2352 | ((INT_DEST_MODE == 0) ? |
@@ -2044,6 +2396,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | |||
2044 | write_msi_msg(irq, &msg); | 2396 | write_msi_msg(irq, &msg); |
2045 | irq_desc[irq].affinity = mask; | 2397 | irq_desc[irq].affinity = mask; |
2046 | } | 2398 | } |
2399 | |||
2400 | #ifdef CONFIG_INTR_REMAP | ||
2401 | /* | ||
2402 | * Migrate the MSI irq to another cpumask. This migration is | ||
2403 | * done in the process context using interrupt-remapping hardware. | ||
2404 | */ | ||
2405 | static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | ||
2406 | { | ||
2407 | struct irq_cfg *cfg = irq_cfg + irq; | ||
2408 | unsigned int dest; | ||
2409 | cpumask_t tmp, cleanup_mask; | ||
2410 | struct irte irte; | ||
2411 | |||
2412 | cpus_and(tmp, mask, cpu_online_map); | ||
2413 | if (cpus_empty(tmp)) | ||
2414 | return; | ||
2415 | |||
2416 | if (get_irte(irq, &irte)) | ||
2417 | return; | ||
2418 | |||
2419 | if (assign_irq_vector(irq, mask)) | ||
2420 | return; | ||
2421 | |||
2422 | cpus_and(tmp, cfg->domain, mask); | ||
2423 | dest = cpu_mask_to_apicid(tmp); | ||
2424 | |||
2425 | irte.vector = cfg->vector; | ||
2426 | irte.dest_id = IRTE_DEST(dest); | ||
2427 | |||
2428 | /* | ||
2429 | * atomically update the IRTE with the new destination and vector. | ||
2430 | */ | ||
2431 | modify_irte(irq, &irte); | ||
2432 | |||
2433 | /* | ||
2434 | * After this point, all the interrupts will start arriving | ||
2435 | * at the new destination. So, time to cleanup the previous | ||
2436 | * vector allocation. | ||
2437 | */ | ||
2438 | if (cfg->move_in_progress) { | ||
2439 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | ||
2440 | cfg->move_cleanup_count = cpus_weight(cleanup_mask); | ||
2441 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
2442 | cfg->move_in_progress = 0; | ||
2443 | } | ||
2444 | |||
2445 | irq_desc[irq].affinity = mask; | ||
2446 | } | ||
2447 | #endif | ||
2047 | #endif /* CONFIG_SMP */ | 2448 | #endif /* CONFIG_SMP */ |
2048 | 2449 | ||
2049 | /* | 2450 | /* |
@@ -2061,26 +2462,157 @@ static struct irq_chip msi_chip = { | |||
2061 | .retrigger = ioapic_retrigger_irq, | 2462 | .retrigger = ioapic_retrigger_irq, |
2062 | }; | 2463 | }; |
2063 | 2464 | ||
2064 | int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) | 2465 | #ifdef CONFIG_INTR_REMAP |
2466 | static struct irq_chip msi_ir_chip = { | ||
2467 | .name = "IR-PCI-MSI", | ||
2468 | .unmask = unmask_msi_irq, | ||
2469 | .mask = mask_msi_irq, | ||
2470 | .ack = ack_x2apic_edge, | ||
2471 | #ifdef CONFIG_SMP | ||
2472 | .set_affinity = ir_set_msi_irq_affinity, | ||
2473 | #endif | ||
2474 | .retrigger = ioapic_retrigger_irq, | ||
2475 | }; | ||
2476 | |||
2477 | /* | ||
2478 | * Map the PCI dev to the corresponding remapping hardware unit | ||
2479 | * and allocate 'nvec' consecutive interrupt-remapping table entries | ||
2480 | * in it. | ||
2481 | */ | ||
2482 | static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) | ||
2483 | { | ||
2484 | struct intel_iommu *iommu; | ||
2485 | int index; | ||
2486 | |||
2487 | iommu = map_dev_to_ir(dev); | ||
2488 | if (!iommu) { | ||
2489 | printk(KERN_ERR | ||
2490 | "Unable to map PCI %s to iommu\n", pci_name(dev)); | ||
2491 | return -ENOENT; | ||
2492 | } | ||
2493 | |||
2494 | index = alloc_irte(iommu, irq, nvec); | ||
2495 | if (index < 0) { | ||
2496 | printk(KERN_ERR | ||
2497 | "Unable to allocate %d IRTE for PCI %s\n", nvec, | ||
2498 | pci_name(dev)); | ||
2499 | return -ENOSPC; | ||
2500 | } | ||
2501 | return index; | ||
2502 | } | ||
2503 | #endif | ||
2504 | |||
2505 | static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) | ||
2065 | { | 2506 | { |
2507 | int ret; | ||
2066 | struct msi_msg msg; | 2508 | struct msi_msg msg; |
2509 | |||
2510 | ret = msi_compose_msg(dev, irq, &msg); | ||
2511 | if (ret < 0) | ||
2512 | return ret; | ||
2513 | |||
2514 | set_irq_msi(irq, desc); | ||
2515 | write_msi_msg(irq, &msg); | ||
2516 | |||
2517 | #ifdef CONFIG_INTR_REMAP | ||
2518 | if (irq_remapped(irq)) { | ||
2519 | struct irq_desc *desc = irq_desc + irq; | ||
2520 | /* | ||
2521 | * irq migration in process context | ||
2522 | */ | ||
2523 | desc->status |= IRQ_MOVE_PCNTXT; | ||
2524 | set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); | ||
2525 | } else | ||
2526 | #endif | ||
2527 | set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); | ||
2528 | |||
2529 | return 0; | ||
2530 | } | ||
2531 | |||
2532 | int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) | ||
2533 | { | ||
2067 | int irq, ret; | 2534 | int irq, ret; |
2535 | |||
2068 | irq = create_irq(); | 2536 | irq = create_irq(); |
2069 | if (irq < 0) | 2537 | if (irq < 0) |
2070 | return irq; | 2538 | return irq; |
2071 | 2539 | ||
2072 | ret = msi_compose_msg(dev, irq, &msg); | 2540 | #ifdef CONFIG_INTR_REMAP |
2541 | if (!intr_remapping_enabled) | ||
2542 | goto no_ir; | ||
2543 | |||
2544 | ret = msi_alloc_irte(dev, irq, 1); | ||
2545 | if (ret < 0) | ||
2546 | goto error; | ||
2547 | no_ir: | ||
2548 | #endif | ||
2549 | ret = setup_msi_irq(dev, desc, irq); | ||
2073 | if (ret < 0) { | 2550 | if (ret < 0) { |
2074 | destroy_irq(irq); | 2551 | destroy_irq(irq); |
2075 | return ret; | 2552 | return ret; |
2076 | } | 2553 | } |
2554 | return 0; | ||
2077 | 2555 | ||
2078 | set_irq_msi(irq, desc); | 2556 | #ifdef CONFIG_INTR_REMAP |
2079 | write_msi_msg(irq, &msg); | 2557 | error: |
2558 | destroy_irq(irq); | ||
2559 | return ret; | ||
2560 | #endif | ||
2561 | } | ||
2080 | 2562 | ||
2081 | set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); | 2563 | int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) |
2564 | { | ||
2565 | int irq, ret, sub_handle; | ||
2566 | struct msi_desc *desc; | ||
2567 | #ifdef CONFIG_INTR_REMAP | ||
2568 | struct intel_iommu *iommu = 0; | ||
2569 | int index = 0; | ||
2570 | #endif | ||
2082 | 2571 | ||
2572 | sub_handle = 0; | ||
2573 | list_for_each_entry(desc, &dev->msi_list, list) { | ||
2574 | irq = create_irq(); | ||
2575 | if (irq < 0) | ||
2576 | return irq; | ||
2577 | #ifdef CONFIG_INTR_REMAP | ||
2578 | if (!intr_remapping_enabled) | ||
2579 | goto no_ir; | ||
2580 | |||
2581 | if (!sub_handle) { | ||
2582 | /* | ||
2583 | * allocate the consecutive block of IRTE's | ||
2584 | * for 'nvec' | ||
2585 | */ | ||
2586 | index = msi_alloc_irte(dev, irq, nvec); | ||
2587 | if (index < 0) { | ||
2588 | ret = index; | ||
2589 | goto error; | ||
2590 | } | ||
2591 | } else { | ||
2592 | iommu = map_dev_to_ir(dev); | ||
2593 | if (!iommu) { | ||
2594 | ret = -ENOENT; | ||
2595 | goto error; | ||
2596 | } | ||
2597 | /* | ||
2598 | * setup the mapping between the irq and the IRTE | ||
2599 | * base index, the sub_handle pointing to the | ||
2600 | * appropriate interrupt remap table entry. | ||
2601 | */ | ||
2602 | set_irte_irq(irq, iommu, index, sub_handle); | ||
2603 | } | ||
2604 | no_ir: | ||
2605 | #endif | ||
2606 | ret = setup_msi_irq(dev, desc, irq); | ||
2607 | if (ret < 0) | ||
2608 | goto error; | ||
2609 | sub_handle++; | ||
2610 | } | ||
2083 | return 0; | 2611 | return 0; |
2612 | |||
2613 | error: | ||
2614 | destroy_irq(irq); | ||
2615 | return ret; | ||
2084 | } | 2616 | } |
2085 | 2617 | ||
2086 | void arch_teardown_msi_irq(unsigned int irq) | 2618 | void arch_teardown_msi_irq(unsigned int irq) |
@@ -2328,6 +2860,10 @@ void __init setup_ioapic_dest(void) | |||
2328 | setup_IO_APIC_irq(ioapic, pin, irq, | 2860 | setup_IO_APIC_irq(ioapic, pin, irq, |
2329 | irq_trigger(irq_entry), | 2861 | irq_trigger(irq_entry), |
2330 | irq_polarity(irq_entry)); | 2862 | irq_polarity(irq_entry)); |
2863 | #ifdef CONFIG_INTR_REMAP | ||
2864 | else if (intr_remapping_enabled) | ||
2865 | set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); | ||
2866 | #endif | ||
2331 | else | 2867 | else |
2332 | set_ioapic_affinity_irq(irq, TARGET_CPUS); | 2868 | set_ioapic_affinity_irq(irq, TARGET_CPUS); |
2333 | } | 2869 | } |