aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/io_apic_64.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/io_apic_64.c')
-rw-r--r--arch/x86/kernel/io_apic_64.c608
1 files changed, 572 insertions, 36 deletions
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 8269434d1707..b9950dae59b7 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -37,6 +37,7 @@
37#include <acpi/acpi_bus.h> 37#include <acpi/acpi_bus.h>
38#endif 38#endif
39#include <linux/bootmem.h> 39#include <linux/bootmem.h>
40#include <linux/dmar.h>
40 41
41#include <asm/idle.h> 42#include <asm/idle.h>
42#include <asm/io.h> 43#include <asm/io.h>
@@ -49,6 +50,7 @@
49#include <asm/nmi.h> 50#include <asm/nmi.h>
50#include <asm/msidef.h> 51#include <asm/msidef.h>
51#include <asm/hypertransport.h> 52#include <asm/hypertransport.h>
53#include <asm/irq_remapping.h>
52 54
53#include <mach_ipi.h> 55#include <mach_ipi.h>
54#include <mach_apic.h> 56#include <mach_apic.h>
@@ -108,6 +110,9 @@ DEFINE_SPINLOCK(vector_lock);
108 */ 110 */
109int nr_ioapic_registers[MAX_IO_APICS]; 111int nr_ioapic_registers[MAX_IO_APICS];
110 112
113/* I/O APIC RTE contents at the OS boot up */
114struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
115
111/* I/O APIC entries */ 116/* I/O APIC entries */
112struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 117struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
113int nr_ioapics; 118int nr_ioapics;
@@ -303,7 +308,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
303 pin = entry->pin; 308 pin = entry->pin;
304 if (pin == -1) 309 if (pin == -1)
305 break; 310 break;
306 io_apic_write(apic, 0x11 + pin*2, dest); 311 /*
312 * With interrupt-remapping, destination information comes
313 * from interrupt-remapping table entry.
314 */
315 if (!irq_remapped(irq))
316 io_apic_write(apic, 0x11 + pin*2, dest);
307 reg = io_apic_read(apic, 0x10 + pin*2); 317 reg = io_apic_read(apic, 0x10 + pin*2);
308 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 318 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
309 reg |= vector; 319 reg |= vector;
@@ -440,6 +450,69 @@ static void clear_IO_APIC (void)
440 clear_IO_APIC_pin(apic, pin); 450 clear_IO_APIC_pin(apic, pin);
441} 451}
442 452
453/*
454 * Saves and masks all the unmasked IO-APIC RTE's
455 */
456int save_mask_IO_APIC_setup(void)
457{
458 union IO_APIC_reg_01 reg_01;
459 unsigned long flags;
460 int apic, pin;
461
462 /*
463 * The number of IO-APIC IRQ registers (== #pins):
464 */
465 for (apic = 0; apic < nr_ioapics; apic++) {
466 spin_lock_irqsave(&ioapic_lock, flags);
467 reg_01.raw = io_apic_read(apic, 1);
468 spin_unlock_irqrestore(&ioapic_lock, flags);
469 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
470 }
471
472 for (apic = 0; apic < nr_ioapics; apic++) {
473 early_ioapic_entries[apic] =
474 kzalloc(sizeof(struct IO_APIC_route_entry) *
475 nr_ioapic_registers[apic], GFP_KERNEL);
476 if (!early_ioapic_entries[apic])
477 return -ENOMEM;
478 }
479
480 for (apic = 0; apic < nr_ioapics; apic++)
481 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
482 struct IO_APIC_route_entry entry;
483
484 entry = early_ioapic_entries[apic][pin] =
485 ioapic_read_entry(apic, pin);
486 if (!entry.mask) {
487 entry.mask = 1;
488 ioapic_write_entry(apic, pin, entry);
489 }
490 }
491 return 0;
492}
493
494void restore_IO_APIC_setup(void)
495{
496 int apic, pin;
497
498 for (apic = 0; apic < nr_ioapics; apic++)
499 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
500 ioapic_write_entry(apic, pin,
501 early_ioapic_entries[apic][pin]);
502}
503
504void reinit_intr_remapped_IO_APIC(int intr_remapping)
505{
506 /*
507 * for now plain restore of previous settings.
508 * TBD: In the case of OS enabling interrupt-remapping,
509 * IO-APIC RTE's need to be setup to point to interrupt-remapping
510 * table entries. for now, do a plain restore, and wait for
511 * the setup_IO_APIC_irqs() to do proper initialization.
512 */
513 restore_IO_APIC_setup();
514}
515
443int skip_ioapic_setup; 516int skip_ioapic_setup;
444int ioapic_force; 517int ioapic_force;
445 518
@@ -834,18 +907,98 @@ void setup_vector_irq(int cpu)
834 907
835 908
836static struct irq_chip ioapic_chip; 909static struct irq_chip ioapic_chip;
910#ifdef CONFIG_INTR_REMAP
911static struct irq_chip ir_ioapic_chip;
912#endif
837 913
838static void ioapic_register_intr(int irq, unsigned long trigger) 914static void ioapic_register_intr(int irq, unsigned long trigger)
839{ 915{
840 if (trigger) { 916 if (trigger)
841 irq_desc[irq].status |= IRQ_LEVEL; 917 irq_desc[irq].status |= IRQ_LEVEL;
842 set_irq_chip_and_handler_name(irq, &ioapic_chip, 918 else
843 handle_fasteoi_irq, "fasteoi");
844 } else {
845 irq_desc[irq].status &= ~IRQ_LEVEL; 919 irq_desc[irq].status &= ~IRQ_LEVEL;
920
921#ifdef CONFIG_INTR_REMAP
922 if (irq_remapped(irq)) {
923 irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
924 if (trigger)
925 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
926 handle_fasteoi_irq,
927 "fasteoi");
928 else
929 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
930 handle_edge_irq, "edge");
931 return;
932 }
933#endif
934 if (trigger)
935 set_irq_chip_and_handler_name(irq, &ioapic_chip,
936 handle_fasteoi_irq,
937 "fasteoi");
938 else
846 set_irq_chip_and_handler_name(irq, &ioapic_chip, 939 set_irq_chip_and_handler_name(irq, &ioapic_chip,
847 handle_edge_irq, "edge"); 940 handle_edge_irq, "edge");
941}
942
943static int setup_ioapic_entry(int apic, int irq,
944 struct IO_APIC_route_entry *entry,
945 unsigned int destination, int trigger,
946 int polarity, int vector)
947{
948 /*
949 * add it to the IO-APIC irq-routing table:
950 */
951 memset(entry,0,sizeof(*entry));
952
953#ifdef CONFIG_INTR_REMAP
954 if (intr_remapping_enabled) {
955 struct intel_iommu *iommu = map_ioapic_to_ir(apic);
956 struct irte irte;
957 struct IR_IO_APIC_route_entry *ir_entry =
958 (struct IR_IO_APIC_route_entry *) entry;
959 int index;
960
961 if (!iommu)
962 panic("No mapping iommu for ioapic %d\n", apic);
963
964 index = alloc_irte(iommu, irq, 1);
965 if (index < 0)
966 panic("Failed to allocate IRTE for ioapic %d\n", apic);
967
968 memset(&irte, 0, sizeof(irte));
969
970 irte.present = 1;
971 irte.dst_mode = INT_DEST_MODE;
972 irte.trigger_mode = trigger;
973 irte.dlvry_mode = INT_DELIVERY_MODE;
974 irte.vector = vector;
975 irte.dest_id = IRTE_DEST(destination);
976
977 modify_irte(irq, &irte);
978
979 ir_entry->index2 = (index >> 15) & 0x1;
980 ir_entry->zero = 0;
981 ir_entry->format = 1;
982 ir_entry->index = (index & 0x7fff);
983 } else
984#endif
985 {
986 entry->delivery_mode = INT_DELIVERY_MODE;
987 entry->dest_mode = INT_DEST_MODE;
988 entry->dest = destination;
848 } 989 }
990
991 entry->mask = 0; /* enable IRQ */
992 entry->trigger = trigger;
993 entry->polarity = polarity;
994 entry->vector = vector;
995
996 /* Mask level triggered irqs.
997 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
998 */
999 if (trigger)
1000 entry->mask = 1;
1001 return 0;
849} 1002}
850 1003
851static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1004static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
@@ -870,24 +1023,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
870 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, 1023 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
871 irq, trigger, polarity); 1024 irq, trigger, polarity);
872 1025
873 /*
874 * add it to the IO-APIC irq-routing table:
875 */
876 memset(&entry,0,sizeof(entry));
877 1026
878 entry.delivery_mode = INT_DELIVERY_MODE; 1027 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
879 entry.dest_mode = INT_DEST_MODE; 1028 cpu_mask_to_apicid(mask), trigger, polarity,
880 entry.dest = cpu_mask_to_apicid(mask); 1029 cfg->vector)) {
881 entry.mask = 0; /* enable IRQ */ 1030 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
882 entry.trigger = trigger; 1031 mp_ioapics[apic].mp_apicid, pin);
883 entry.polarity = polarity; 1032 __clear_irq_vector(irq);
884 entry.vector = cfg->vector; 1033 return;
885 1034 }
886 /* Mask level triggered irqs.
887 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
888 */
889 if (trigger)
890 entry.mask = 1;
891 1035
892 ioapic_register_intr(irq, trigger); 1036 ioapic_register_intr(irq, trigger);
893 if (irq < 16) 1037 if (irq < 16)
@@ -939,6 +1083,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
939{ 1083{
940 struct IO_APIC_route_entry entry; 1084 struct IO_APIC_route_entry entry;
941 1085
1086 if (intr_remapping_enabled)
1087 return;
1088
942 memset(&entry, 0, sizeof(entry)); 1089 memset(&entry, 0, sizeof(entry));
943 1090
944 /* 1091 /*
@@ -1085,6 +1232,7 @@ static __apicdebuginit void print_APIC_bitfield (int base)
1085void __apicdebuginit print_local_APIC(void * dummy) 1232void __apicdebuginit print_local_APIC(void * dummy)
1086{ 1233{
1087 unsigned int v, ver, maxlvt; 1234 unsigned int v, ver, maxlvt;
1235 unsigned long icr;
1088 1236
1089 if (apic_verbosity == APIC_QUIET) 1237 if (apic_verbosity == APIC_QUIET)
1090 return; 1238 return;
@@ -1092,7 +1240,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
1092 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", 1240 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1093 smp_processor_id(), hard_smp_processor_id()); 1241 smp_processor_id(), hard_smp_processor_id());
1094 v = apic_read(APIC_ID); 1242 v = apic_read(APIC_ID);
1095 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); 1243 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
1096 v = apic_read(APIC_LVR); 1244 v = apic_read(APIC_LVR);
1097 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1245 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1098 ver = GET_APIC_VERSION(v); 1246 ver = GET_APIC_VERSION(v);
@@ -1128,10 +1276,9 @@ void __apicdebuginit print_local_APIC(void * dummy)
1128 v = apic_read(APIC_ESR); 1276 v = apic_read(APIC_ESR);
1129 printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 1277 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1130 1278
1131 v = apic_read(APIC_ICR); 1279 icr = apic_icr_read();
1132 printk(KERN_DEBUG "... APIC ICR: %08x\n", v); 1280 printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
1133 v = apic_read(APIC_ICR2); 1281 printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
1134 printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
1135 1282
1136 v = apic_read(APIC_LVTT); 1283 v = apic_read(APIC_LVTT);
1137 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); 1284 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1286,7 +1433,7 @@ void disable_IO_APIC(void)
1286 entry.dest_mode = 0; /* Physical */ 1433 entry.dest_mode = 0; /* Physical */
1287 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1434 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1288 entry.vector = 0; 1435 entry.vector = 0;
1289 entry.dest = GET_APIC_ID(read_apic_id()); 1436 entry.dest = read_apic_id();
1290 1437
1291 /* 1438 /*
1292 * Add it to the IO-APIC irq-routing table: 1439 * Add it to the IO-APIC irq-routing table:
@@ -1392,6 +1539,147 @@ static int ioapic_retrigger_irq(unsigned int irq)
1392 */ 1539 */
1393 1540
1394#ifdef CONFIG_SMP 1541#ifdef CONFIG_SMP
1542
1543#ifdef CONFIG_INTR_REMAP
1544static void ir_irq_migration(struct work_struct *work);
1545
1546static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
1547
1548/*
1549 * Migrate the IO-APIC irq in the presence of intr-remapping.
1550 *
1551 * For edge triggered, irq migration is a simple atomic update(of vector
1552 * and cpu destination) of IRTE and flush the hardware cache.
1553 *
1554 * For level triggered, we need to modify the io-apic RTE aswell with the update
1555 * vector information, along with modifying IRTE with vector and destination.
1556 * So irq migration for level triggered is little bit more complex compared to
1557 * edge triggered migration. But the good news is, we use the same algorithm
1558 * for level triggered migration as we have today, only difference being,
1559 * we now initiate the irq migration from process context instead of the
1560 * interrupt context.
1561 *
1562 * In future, when we do a directed EOI (combined with cpu EOI broadcast
1563 * suppression) to the IO-APIC, level triggered irq migration will also be
1564 * as simple as edge triggered migration and we can do the irq migration
1565 * with a simple atomic update to IO-APIC RTE.
1566 */
1567static void migrate_ioapic_irq(int irq, cpumask_t mask)
1568{
1569 struct irq_cfg *cfg = irq_cfg + irq;
1570 struct irq_desc *desc = irq_desc + irq;
1571 cpumask_t tmp, cleanup_mask;
1572 struct irte irte;
1573 int modify_ioapic_rte = desc->status & IRQ_LEVEL;
1574 unsigned int dest;
1575 unsigned long flags;
1576
1577 cpus_and(tmp, mask, cpu_online_map);
1578 if (cpus_empty(tmp))
1579 return;
1580
1581 if (get_irte(irq, &irte))
1582 return;
1583
1584 if (assign_irq_vector(irq, mask))
1585 return;
1586
1587 cpus_and(tmp, cfg->domain, mask);
1588 dest = cpu_mask_to_apicid(tmp);
1589
1590 if (modify_ioapic_rte) {
1591 spin_lock_irqsave(&ioapic_lock, flags);
1592 __target_IO_APIC_irq(irq, dest, cfg->vector);
1593 spin_unlock_irqrestore(&ioapic_lock, flags);
1594 }
1595
1596 irte.vector = cfg->vector;
1597 irte.dest_id = IRTE_DEST(dest);
1598
1599 /*
1600 * Modified the IRTE and flushes the Interrupt entry cache.
1601 */
1602 modify_irte(irq, &irte);
1603
1604 if (cfg->move_in_progress) {
1605 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
1606 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
1607 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
1608 cfg->move_in_progress = 0;
1609 }
1610
1611 irq_desc[irq].affinity = mask;
1612}
1613
1614static int migrate_irq_remapped_level(int irq)
1615{
1616 int ret = -1;
1617
1618 mask_IO_APIC_irq(irq);
1619
1620 if (io_apic_level_ack_pending(irq)) {
1621 /*
1622 * Interrupt in progress. Migrating irq now will change the
1623 * vector information in the IO-APIC RTE and that will confuse
1624 * the EOI broadcast performed by cpu.
1625 * So, delay the irq migration to the next instance.
1626 */
1627 schedule_delayed_work(&ir_migration_work, 1);
1628 goto unmask;
1629 }
1630
1631 /* everthing is clear. we have right of way */
1632 migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
1633
1634 ret = 0;
1635 irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
1636 cpus_clear(irq_desc[irq].pending_mask);
1637
1638unmask:
1639 unmask_IO_APIC_irq(irq);
1640 return ret;
1641}
1642
1643static void ir_irq_migration(struct work_struct *work)
1644{
1645 int irq;
1646
1647 for (irq = 0; irq < NR_IRQS; irq++) {
1648 struct irq_desc *desc = irq_desc + irq;
1649 if (desc->status & IRQ_MOVE_PENDING) {
1650 unsigned long flags;
1651
1652 spin_lock_irqsave(&desc->lock, flags);
1653 if (!desc->chip->set_affinity ||
1654 !(desc->status & IRQ_MOVE_PENDING)) {
1655 desc->status &= ~IRQ_MOVE_PENDING;
1656 spin_unlock_irqrestore(&desc->lock, flags);
1657 continue;
1658 }
1659
1660 desc->chip->set_affinity(irq,
1661 irq_desc[irq].pending_mask);
1662 spin_unlock_irqrestore(&desc->lock, flags);
1663 }
1664 }
1665}
1666
1667/*
1668 * Migrates the IRQ destination in the process context.
1669 */
1670static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1671{
1672 if (irq_desc[irq].status & IRQ_LEVEL) {
1673 irq_desc[irq].status |= IRQ_MOVE_PENDING;
1674 irq_desc[irq].pending_mask = mask;
1675 migrate_irq_remapped_level(irq);
1676 return;
1677 }
1678
1679 migrate_ioapic_irq(irq, mask);
1680}
1681#endif
1682
1395asmlinkage void smp_irq_move_cleanup_interrupt(void) 1683asmlinkage void smp_irq_move_cleanup_interrupt(void)
1396{ 1684{
1397 unsigned vector, me; 1685 unsigned vector, me;
@@ -1448,6 +1736,17 @@ static void irq_complete_move(unsigned int irq)
1448#else 1736#else
1449static inline void irq_complete_move(unsigned int irq) {} 1737static inline void irq_complete_move(unsigned int irq) {}
1450#endif 1738#endif
1739#ifdef CONFIG_INTR_REMAP
1740static void ack_x2apic_level(unsigned int irq)
1741{
1742 ack_x2APIC_irq();
1743}
1744
1745static void ack_x2apic_edge(unsigned int irq)
1746{
1747 ack_x2APIC_irq();
1748}
1749#endif
1451 1750
1452static void ack_apic_edge(unsigned int irq) 1751static void ack_apic_edge(unsigned int irq)
1453{ 1752{
@@ -1522,6 +1821,21 @@ static struct irq_chip ioapic_chip __read_mostly = {
1522 .retrigger = ioapic_retrigger_irq, 1821 .retrigger = ioapic_retrigger_irq,
1523}; 1822};
1524 1823
1824#ifdef CONFIG_INTR_REMAP
1825static struct irq_chip ir_ioapic_chip __read_mostly = {
1826 .name = "IR-IO-APIC",
1827 .startup = startup_ioapic_irq,
1828 .mask = mask_IO_APIC_irq,
1829 .unmask = unmask_IO_APIC_irq,
1830 .ack = ack_x2apic_edge,
1831 .eoi = ack_x2apic_level,
1832#ifdef CONFIG_SMP
1833 .set_affinity = set_ir_ioapic_affinity_irq,
1834#endif
1835 .retrigger = ioapic_retrigger_irq,
1836};
1837#endif
1838
1525static inline void init_IO_APIC_traps(void) 1839static inline void init_IO_APIC_traps(void)
1526{ 1840{
1527 int irq; 1841 int irq;
@@ -1707,6 +2021,8 @@ static inline void __init check_timer(void)
1707 * 8259A. 2021 * 8259A.
1708 */ 2022 */
1709 if (pin1 == -1) { 2023 if (pin1 == -1) {
2024 if (intr_remapping_enabled)
2025 panic("BIOS bug: timer not connected to IO-APIC");
1710 pin1 = pin2; 2026 pin1 = pin2;
1711 apic1 = apic2; 2027 apic1 = apic2;
1712 no_pin1 = 1; 2028 no_pin1 = 1;
@@ -1733,6 +2049,8 @@ static inline void __init check_timer(void)
1733 clear_IO_APIC_pin(0, pin1); 2049 clear_IO_APIC_pin(0, pin1);
1734 goto out; 2050 goto out;
1735 } 2051 }
2052 if (intr_remapping_enabled)
2053 panic("timer doesn't work through Interrupt-remapped IO-APIC");
1736 clear_IO_APIC_pin(apic1, pin1); 2054 clear_IO_APIC_pin(apic1, pin1);
1737 if (!no_pin1) 2055 if (!no_pin1)
1738 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " 2056 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -1972,6 +2290,9 @@ void destroy_irq(unsigned int irq)
1972 2290
1973 dynamic_irq_cleanup(irq); 2291 dynamic_irq_cleanup(irq);
1974 2292
2293#ifdef CONFIG_INTR_REMAP
2294 free_irte(irq);
2295#endif
1975 spin_lock_irqsave(&vector_lock, flags); 2296 spin_lock_irqsave(&vector_lock, flags);
1976 __clear_irq_vector(irq); 2297 __clear_irq_vector(irq);
1977 spin_unlock_irqrestore(&vector_lock, flags); 2298 spin_unlock_irqrestore(&vector_lock, flags);
@@ -1990,11 +2311,42 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
1990 2311
1991 tmp = TARGET_CPUS; 2312 tmp = TARGET_CPUS;
1992 err = assign_irq_vector(irq, tmp); 2313 err = assign_irq_vector(irq, tmp);
1993 if (!err) { 2314 if (err)
1994 cpus_and(tmp, cfg->domain, tmp); 2315 return err;
1995 dest = cpu_mask_to_apicid(tmp); 2316
2317 cpus_and(tmp, cfg->domain, tmp);
2318 dest = cpu_mask_to_apicid(tmp);
2319
2320#ifdef CONFIG_INTR_REMAP
2321 if (irq_remapped(irq)) {
2322 struct irte irte;
2323 int ir_index;
2324 u16 sub_handle;
2325
2326 ir_index = map_irq_to_irte_handle(irq, &sub_handle);
2327 BUG_ON(ir_index == -1);
2328
2329 memset (&irte, 0, sizeof(irte));
2330
2331 irte.present = 1;
2332 irte.dst_mode = INT_DEST_MODE;
2333 irte.trigger_mode = 0; /* edge */
2334 irte.dlvry_mode = INT_DELIVERY_MODE;
2335 irte.vector = cfg->vector;
2336 irte.dest_id = IRTE_DEST(dest);
2337
2338 modify_irte(irq, &irte);
1996 2339
1997 msg->address_hi = MSI_ADDR_BASE_HI; 2340 msg->address_hi = MSI_ADDR_BASE_HI;
2341 msg->data = sub_handle;
2342 msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
2343 MSI_ADDR_IR_SHV |
2344 MSI_ADDR_IR_INDEX1(ir_index) |
2345 MSI_ADDR_IR_INDEX2(ir_index);
2346 } else
2347#endif
2348 {
2349 msg->address_hi = MSI_ADDR_BASE_HI;
1998 msg->address_lo = 2350 msg->address_lo =
1999 MSI_ADDR_BASE_LO | 2351 MSI_ADDR_BASE_LO |
2000 ((INT_DEST_MODE == 0) ? 2352 ((INT_DEST_MODE == 0) ?
@@ -2044,6 +2396,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2044 write_msi_msg(irq, &msg); 2396 write_msi_msg(irq, &msg);
2045 irq_desc[irq].affinity = mask; 2397 irq_desc[irq].affinity = mask;
2046} 2398}
2399
2400#ifdef CONFIG_INTR_REMAP
2401/*
2402 * Migrate the MSI irq to another cpumask. This migration is
2403 * done in the process context using interrupt-remapping hardware.
2404 */
2405static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2406{
2407 struct irq_cfg *cfg = irq_cfg + irq;
2408 unsigned int dest;
2409 cpumask_t tmp, cleanup_mask;
2410 struct irte irte;
2411
2412 cpus_and(tmp, mask, cpu_online_map);
2413 if (cpus_empty(tmp))
2414 return;
2415
2416 if (get_irte(irq, &irte))
2417 return;
2418
2419 if (assign_irq_vector(irq, mask))
2420 return;
2421
2422 cpus_and(tmp, cfg->domain, mask);
2423 dest = cpu_mask_to_apicid(tmp);
2424
2425 irte.vector = cfg->vector;
2426 irte.dest_id = IRTE_DEST(dest);
2427
2428 /*
2429 * atomically update the IRTE with the new destination and vector.
2430 */
2431 modify_irte(irq, &irte);
2432
2433 /*
2434 * After this point, all the interrupts will start arriving
2435 * at the new destination. So, time to cleanup the previous
2436 * vector allocation.
2437 */
2438 if (cfg->move_in_progress) {
2439 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
2440 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2441 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2442 cfg->move_in_progress = 0;
2443 }
2444
2445 irq_desc[irq].affinity = mask;
2446}
2447#endif
2047#endif /* CONFIG_SMP */ 2448#endif /* CONFIG_SMP */
2048 2449
2049/* 2450/*
@@ -2061,26 +2462,157 @@ static struct irq_chip msi_chip = {
2061 .retrigger = ioapic_retrigger_irq, 2462 .retrigger = ioapic_retrigger_irq,
2062}; 2463};
2063 2464
2064int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) 2465#ifdef CONFIG_INTR_REMAP
2466static struct irq_chip msi_ir_chip = {
2467 .name = "IR-PCI-MSI",
2468 .unmask = unmask_msi_irq,
2469 .mask = mask_msi_irq,
2470 .ack = ack_x2apic_edge,
2471#ifdef CONFIG_SMP
2472 .set_affinity = ir_set_msi_irq_affinity,
2473#endif
2474 .retrigger = ioapic_retrigger_irq,
2475};
2476
2477/*
2478 * Map the PCI dev to the corresponding remapping hardware unit
2479 * and allocate 'nvec' consecutive interrupt-remapping table entries
2480 * in it.
2481 */
2482static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
2483{
2484 struct intel_iommu *iommu;
2485 int index;
2486
2487 iommu = map_dev_to_ir(dev);
2488 if (!iommu) {
2489 printk(KERN_ERR
2490 "Unable to map PCI %s to iommu\n", pci_name(dev));
2491 return -ENOENT;
2492 }
2493
2494 index = alloc_irte(iommu, irq, nvec);
2495 if (index < 0) {
2496 printk(KERN_ERR
2497 "Unable to allocate %d IRTE for PCI %s\n", nvec,
2498 pci_name(dev));
2499 return -ENOSPC;
2500 }
2501 return index;
2502}
2503#endif
2504
2505static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
2065{ 2506{
2507 int ret;
2066 struct msi_msg msg; 2508 struct msi_msg msg;
2509
2510 ret = msi_compose_msg(dev, irq, &msg);
2511 if (ret < 0)
2512 return ret;
2513
2514 set_irq_msi(irq, desc);
2515 write_msi_msg(irq, &msg);
2516
2517#ifdef CONFIG_INTR_REMAP
2518 if (irq_remapped(irq)) {
2519 struct irq_desc *desc = irq_desc + irq;
2520 /*
2521 * irq migration in process context
2522 */
2523 desc->status |= IRQ_MOVE_PCNTXT;
2524 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
2525 } else
2526#endif
2527 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
2528
2529 return 0;
2530}
2531
2532int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2533{
2067 int irq, ret; 2534 int irq, ret;
2535
2068 irq = create_irq(); 2536 irq = create_irq();
2069 if (irq < 0) 2537 if (irq < 0)
2070 return irq; 2538 return irq;
2071 2539
2072 ret = msi_compose_msg(dev, irq, &msg); 2540#ifdef CONFIG_INTR_REMAP
2541 if (!intr_remapping_enabled)
2542 goto no_ir;
2543
2544 ret = msi_alloc_irte(dev, irq, 1);
2545 if (ret < 0)
2546 goto error;
2547no_ir:
2548#endif
2549 ret = setup_msi_irq(dev, desc, irq);
2073 if (ret < 0) { 2550 if (ret < 0) {
2074 destroy_irq(irq); 2551 destroy_irq(irq);
2075 return ret; 2552 return ret;
2076 } 2553 }
2554 return 0;
2077 2555
2078 set_irq_msi(irq, desc); 2556#ifdef CONFIG_INTR_REMAP
2079 write_msi_msg(irq, &msg); 2557error:
2558 destroy_irq(irq);
2559 return ret;
2560#endif
2561}
2080 2562
2081 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 2563int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
2564{
2565 int irq, ret, sub_handle;
2566 struct msi_desc *desc;
2567#ifdef CONFIG_INTR_REMAP
2568 struct intel_iommu *iommu = 0;
2569 int index = 0;
2570#endif
2082 2571
2572 sub_handle = 0;
2573 list_for_each_entry(desc, &dev->msi_list, list) {
2574 irq = create_irq();
2575 if (irq < 0)
2576 return irq;
2577#ifdef CONFIG_INTR_REMAP
2578 if (!intr_remapping_enabled)
2579 goto no_ir;
2580
2581 if (!sub_handle) {
2582 /*
2583 * allocate the consecutive block of IRTE's
2584 * for 'nvec'
2585 */
2586 index = msi_alloc_irte(dev, irq, nvec);
2587 if (index < 0) {
2588 ret = index;
2589 goto error;
2590 }
2591 } else {
2592 iommu = map_dev_to_ir(dev);
2593 if (!iommu) {
2594 ret = -ENOENT;
2595 goto error;
2596 }
2597 /*
2598 * setup the mapping between the irq and the IRTE
2599 * base index, the sub_handle pointing to the
2600 * appropriate interrupt remap table entry.
2601 */
2602 set_irte_irq(irq, iommu, index, sub_handle);
2603 }
2604no_ir:
2605#endif
2606 ret = setup_msi_irq(dev, desc, irq);
2607 if (ret < 0)
2608 goto error;
2609 sub_handle++;
2610 }
2083 return 0; 2611 return 0;
2612
2613error:
2614 destroy_irq(irq);
2615 return ret;
2084} 2616}
2085 2617
2086void arch_teardown_msi_irq(unsigned int irq) 2618void arch_teardown_msi_irq(unsigned int irq)
@@ -2328,6 +2860,10 @@ void __init setup_ioapic_dest(void)
2328 setup_IO_APIC_irq(ioapic, pin, irq, 2860 setup_IO_APIC_irq(ioapic, pin, irq,
2329 irq_trigger(irq_entry), 2861 irq_trigger(irq_entry),
2330 irq_polarity(irq_entry)); 2862 irq_polarity(irq_entry));
2863#ifdef CONFIG_INTR_REMAP
2864 else if (intr_remapping_enabled)
2865 set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
2866#endif
2331 else 2867 else
2332 set_ioapic_affinity_irq(irq, TARGET_CPUS); 2868 set_ioapic_affinity_irq(irq, TARGET_CPUS);
2333 } 2869 }