aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSuresh Siddha <suresh.b.siddha@intel.com>2008-07-10 14:16:56 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-12 02:45:05 -0400
commit89027d35aa5b8f45ce0f7fa0911db85b46563da0 (patch)
treebf2f9570231bbd4cc2cd24247059fdb72bdee57e
parent5c520a6724e912a7e6153b7597192edad6752750 (diff)
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure. IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, trigger mode etc, which traditionally was present in the IO-APIC RTE. Introduce a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this cleanly). For edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flush the hardware cache. For level triggered, we need to modify the io-apic RTE aswell with the update vector information, along with modifying IRTE with vector and cpu destination. So irq migration for level triggered is little bit more complex compared to edge triggered migration. But the good news is, we use the same algorithm for level triggered migration as we have today, only difference being, we now initiate the irq migration from process context instead of the interrupt context. In future, when we do a directed EOI (combined with cpu EOI broadcast suppression) to the IO-APIC, level triggered irq migration will also be as simple as edge triggered migration and we can do the irq migration with a simple atomic update to IO-APIC RTE. TBD: some tests/changes needed in the presence of fixup_irqs() for level triggered irq migration. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/kernel/apic_64.c1
-rw-r--r--arch/x86/kernel/io_apic_64.c300
-rw-r--r--drivers/pci/intr_remapping.c10
-rw-r--r--include/asm-x86/apic.h9
-rw-r--r--include/asm-x86/io_apic.h14
-rw-r--r--include/asm-x86/irq_remapping.h8
-rw-r--r--include/linux/dmar.h1
7 files changed, 321 insertions, 22 deletions
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index a969ef78e12a..d5c06917b5b1 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -46,6 +46,7 @@
46static int disable_apic_timer __cpuinitdata; 46static int disable_apic_timer __cpuinitdata;
47static int apic_calibrate_pmtmr __initdata; 47static int apic_calibrate_pmtmr __initdata;
48int disable_apic; 48int disable_apic;
49int x2apic;
49 50
50/* Local APIC timer works in C2 */ 51/* Local APIC timer works in C2 */
51int local_apic_timer_c2_ok; 52int local_apic_timer_c2_ok;
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index b62d42ef9283..9bd02ef049a0 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -37,6 +37,7 @@
37#include <acpi/acpi_bus.h> 37#include <acpi/acpi_bus.h>
38#endif 38#endif
39#include <linux/bootmem.h> 39#include <linux/bootmem.h>
40#include <linux/dmar.h>
40 41
41#include <asm/idle.h> 42#include <asm/idle.h>
42#include <asm/io.h> 43#include <asm/io.h>
@@ -48,6 +49,7 @@
48#include <asm/nmi.h> 49#include <asm/nmi.h>
49#include <asm/msidef.h> 50#include <asm/msidef.h>
50#include <asm/hypertransport.h> 51#include <asm/hypertransport.h>
52#include <asm/irq_remapping.h>
51 53
52#include <mach_ipi.h> 54#include <mach_ipi.h>
53#include <mach_apic.h> 55#include <mach_apic.h>
@@ -312,7 +314,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
312 pin = entry->pin; 314 pin = entry->pin;
313 if (pin == -1) 315 if (pin == -1)
314 break; 316 break;
315 io_apic_write(apic, 0x11 + pin*2, dest); 317 /*
318 * With interrupt-remapping, destination information comes
319 * from interrupt-remapping table entry.
320 */
321 if (!irq_remapped(irq))
322 io_apic_write(apic, 0x11 + pin*2, dest);
316 reg = io_apic_read(apic, 0x10 + pin*2); 323 reg = io_apic_read(apic, 0x10 + pin*2);
317 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 324 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
318 reg |= vector; 325 reg |= vector;
@@ -906,18 +913,98 @@ void setup_vector_irq(int cpu)
906 913
907 914
908static struct irq_chip ioapic_chip; 915static struct irq_chip ioapic_chip;
916#ifdef CONFIG_INTR_REMAP
917static struct irq_chip ir_ioapic_chip;
918#endif
909 919
910static void ioapic_register_intr(int irq, unsigned long trigger) 920static void ioapic_register_intr(int irq, unsigned long trigger)
911{ 921{
912 if (trigger) { 922 if (trigger)
913 irq_desc[irq].status |= IRQ_LEVEL; 923 irq_desc[irq].status |= IRQ_LEVEL;
914 set_irq_chip_and_handler_name(irq, &ioapic_chip, 924 else
915 handle_fasteoi_irq, "fasteoi");
916 } else {
917 irq_desc[irq].status &= ~IRQ_LEVEL; 925 irq_desc[irq].status &= ~IRQ_LEVEL;
926
927#ifdef CONFIG_INTR_REMAP
928 if (irq_remapped(irq)) {
929 irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
930 if (trigger)
931 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
932 handle_fasteoi_irq,
933 "fasteoi");
934 else
935 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
936 handle_edge_irq, "edge");
937 return;
938 }
939#endif
940 if (trigger)
941 set_irq_chip_and_handler_name(irq, &ioapic_chip,
942 handle_fasteoi_irq,
943 "fasteoi");
944 else
918 set_irq_chip_and_handler_name(irq, &ioapic_chip, 945 set_irq_chip_and_handler_name(irq, &ioapic_chip,
919 handle_edge_irq, "edge"); 946 handle_edge_irq, "edge");
947}
948
949static int setup_ioapic_entry(int apic, int irq,
950 struct IO_APIC_route_entry *entry,
951 unsigned int destination, int trigger,
952 int polarity, int vector)
953{
954 /*
955 * add it to the IO-APIC irq-routing table:
956 */
957 memset(entry,0,sizeof(*entry));
958
959#ifdef CONFIG_INTR_REMAP
960 if (intr_remapping_enabled) {
961 struct intel_iommu *iommu = map_ioapic_to_ir(apic);
962 struct irte irte;
963 struct IR_IO_APIC_route_entry *ir_entry =
964 (struct IR_IO_APIC_route_entry *) entry;
965 int index;
966
967 if (!iommu)
968 panic("No mapping iommu for ioapic %d\n", apic);
969
970 index = alloc_irte(iommu, irq, 1);
971 if (index < 0)
972 panic("Failed to allocate IRTE for ioapic %d\n", apic);
973
974 memset(&irte, 0, sizeof(irte));
975
976 irte.present = 1;
977 irte.dst_mode = INT_DEST_MODE;
978 irte.trigger_mode = trigger;
979 irte.dlvry_mode = INT_DELIVERY_MODE;
980 irte.vector = vector;
981 irte.dest_id = IRTE_DEST(destination);
982
983 modify_irte(irq, &irte);
984
985 ir_entry->index2 = (index >> 15) & 0x1;
986 ir_entry->zero = 0;
987 ir_entry->format = 1;
988 ir_entry->index = (index & 0x7fff);
989 } else
990#endif
991 {
992 entry->delivery_mode = INT_DELIVERY_MODE;
993 entry->dest_mode = INT_DEST_MODE;
994 entry->dest = destination;
920 } 995 }
996
997 entry->mask = 0; /* enable IRQ */
998 entry->trigger = trigger;
999 entry->polarity = polarity;
1000 entry->vector = vector;
1001
1002 /* Mask level triggered irqs.
1003 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1004 */
1005 if (trigger)
1006 entry->mask = 1;
1007 return 0;
921} 1008}
922 1009
923static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1010static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
@@ -942,24 +1029,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
942 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, 1029 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
943 irq, trigger, polarity); 1030 irq, trigger, polarity);
944 1031
945 /*
946 * add it to the IO-APIC irq-routing table:
947 */
948 memset(&entry,0,sizeof(entry));
949
950 entry.delivery_mode = INT_DELIVERY_MODE;
951 entry.dest_mode = INT_DEST_MODE;
952 entry.dest = cpu_mask_to_apicid(mask);
953 entry.mask = 0; /* enable IRQ */
954 entry.trigger = trigger;
955 entry.polarity = polarity;
956 entry.vector = cfg->vector;
957 1032
958 /* Mask level triggered irqs. 1033 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
959 * Use IRQ_DELAYED_DISABLE for edge triggered irqs. 1034 cpu_mask_to_apicid(mask), trigger, polarity,
960 */ 1035 cfg->vector)) {
961 if (trigger) 1036 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
962 entry.mask = 1; 1037 mp_ioapics[apic].mp_apicid, pin);
1038 __clear_irq_vector(irq);
1039 return;
1040 }
963 1041
964 ioapic_register_intr(irq, trigger); 1042 ioapic_register_intr(irq, trigger);
965 if (irq < 16) 1043 if (irq < 16)
@@ -1011,6 +1089,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1011{ 1089{
1012 struct IO_APIC_route_entry entry; 1090 struct IO_APIC_route_entry entry;
1013 1091
1092 if (intr_remapping_enabled)
1093 return;
1094
1014 memset(&entry, 0, sizeof(entry)); 1095 memset(&entry, 0, sizeof(entry));
1015 1096
1016 /* 1097 /*
@@ -1466,6 +1547,147 @@ static int ioapic_retrigger_irq(unsigned int irq)
1466 */ 1547 */
1467 1548
1468#ifdef CONFIG_SMP 1549#ifdef CONFIG_SMP
1550
1551#ifdef CONFIG_INTR_REMAP
1552static void ir_irq_migration(struct work_struct *work);
1553
1554static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
1555
1556/*
1557 * Migrate the IO-APIC irq in the presence of intr-remapping.
1558 *
1559 * For edge triggered, irq migration is a simple atomic update(of vector
1560 * and cpu destination) of IRTE and flush the hardware cache.
1561 *
1562 * For level triggered, we need to modify the io-apic RTE aswell with the update
1563 * vector information, along with modifying IRTE with vector and destination.
1564 * So irq migration for level triggered is little bit more complex compared to
1565 * edge triggered migration. But the good news is, we use the same algorithm
1566 * for level triggered migration as we have today, only difference being,
1567 * we now initiate the irq migration from process context instead of the
1568 * interrupt context.
1569 *
1570 * In future, when we do a directed EOI (combined with cpu EOI broadcast
1571 * suppression) to the IO-APIC, level triggered irq migration will also be
1572 * as simple as edge triggered migration and we can do the irq migration
1573 * with a simple atomic update to IO-APIC RTE.
1574 */
1575static void migrate_ioapic_irq(int irq, cpumask_t mask)
1576{
1577 struct irq_cfg *cfg = irq_cfg + irq;
1578 struct irq_desc *desc = irq_desc + irq;
1579 cpumask_t tmp, cleanup_mask;
1580 struct irte irte;
1581 int modify_ioapic_rte = desc->status & IRQ_LEVEL;
1582 unsigned int dest;
1583 unsigned long flags;
1584
1585 cpus_and(tmp, mask, cpu_online_map);
1586 if (cpus_empty(tmp))
1587 return;
1588
1589 if (get_irte(irq, &irte))
1590 return;
1591
1592 if (assign_irq_vector(irq, mask))
1593 return;
1594
1595 cpus_and(tmp, cfg->domain, mask);
1596 dest = cpu_mask_to_apicid(tmp);
1597
1598 if (modify_ioapic_rte) {
1599 spin_lock_irqsave(&ioapic_lock, flags);
1600 __target_IO_APIC_irq(irq, dest, cfg->vector);
1601 spin_unlock_irqrestore(&ioapic_lock, flags);
1602 }
1603
1604 irte.vector = cfg->vector;
1605 irte.dest_id = IRTE_DEST(dest);
1606
1607 /*
1608 * Modified the IRTE and flushes the Interrupt entry cache.
1609 */
1610 modify_irte(irq, &irte);
1611
1612 if (cfg->move_in_progress) {
1613 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
1614 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
1615 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
1616 cfg->move_in_progress = 0;
1617 }
1618
1619 irq_desc[irq].affinity = mask;
1620}
1621
1622static int migrate_irq_remapped_level(int irq)
1623{
1624 int ret = -1;
1625
1626 mask_IO_APIC_irq(irq);
1627
1628 if (io_apic_level_ack_pending(irq)) {
1629 /*
1630 * Interrupt in progress. Migrating irq now will change the
1631 * vector information in the IO-APIC RTE and that will confuse
1632 * the EOI broadcast performed by cpu.
1633 * So, delay the irq migration to the next instance.
1634 */
1635 schedule_delayed_work(&ir_migration_work, 1);
1636 goto unmask;
1637 }
1638
1639 /* everthing is clear. we have right of way */
1640 migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
1641
1642 ret = 0;
1643 irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
1644 cpus_clear(irq_desc[irq].pending_mask);
1645
1646unmask:
1647 unmask_IO_APIC_irq(irq);
1648 return ret;
1649}
1650
1651static void ir_irq_migration(struct work_struct *work)
1652{
1653 int irq;
1654
1655 for (irq = 0; irq < NR_IRQS; irq++) {
1656 struct irq_desc *desc = irq_desc + irq;
1657 if (desc->status & IRQ_MOVE_PENDING) {
1658 unsigned long flags;
1659
1660 spin_lock_irqsave(&desc->lock, flags);
1661 if (!desc->chip->set_affinity ||
1662 !(desc->status & IRQ_MOVE_PENDING)) {
1663 desc->status &= ~IRQ_MOVE_PENDING;
1664 spin_unlock_irqrestore(&desc->lock, flags);
1665 continue;
1666 }
1667
1668 desc->chip->set_affinity(irq,
1669 irq_desc[irq].pending_mask);
1670 spin_unlock_irqrestore(&desc->lock, flags);
1671 }
1672 }
1673}
1674
1675/*
1676 * Migrates the IRQ destination in the process context.
1677 */
1678static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1679{
1680 if (irq_desc[irq].status & IRQ_LEVEL) {
1681 irq_desc[irq].status |= IRQ_MOVE_PENDING;
1682 irq_desc[irq].pending_mask = mask;
1683 migrate_irq_remapped_level(irq);
1684 return;
1685 }
1686
1687 migrate_ioapic_irq(irq, mask);
1688}
1689#endif
1690
1469asmlinkage void smp_irq_move_cleanup_interrupt(void) 1691asmlinkage void smp_irq_move_cleanup_interrupt(void)
1470{ 1692{
1471 unsigned vector, me; 1693 unsigned vector, me;
@@ -1522,6 +1744,17 @@ static void irq_complete_move(unsigned int irq)
1522#else 1744#else
1523static inline void irq_complete_move(unsigned int irq) {} 1745static inline void irq_complete_move(unsigned int irq) {}
1524#endif 1746#endif
1747#ifdef CONFIG_INTR_REMAP
1748static void ack_x2apic_level(unsigned int irq)
1749{
1750 ack_x2APIC_irq();
1751}
1752
1753static void ack_x2apic_edge(unsigned int irq)
1754{
1755 ack_x2APIC_irq();
1756}
1757#endif
1525 1758
1526static void ack_apic_edge(unsigned int irq) 1759static void ack_apic_edge(unsigned int irq)
1527{ 1760{
@@ -1596,6 +1829,21 @@ static struct irq_chip ioapic_chip __read_mostly = {
1596 .retrigger = ioapic_retrigger_irq, 1829 .retrigger = ioapic_retrigger_irq,
1597}; 1830};
1598 1831
1832#ifdef CONFIG_INTR_REMAP
1833static struct irq_chip ir_ioapic_chip __read_mostly = {
1834 .name = "IR-IO-APIC",
1835 .startup = startup_ioapic_irq,
1836 .mask = mask_IO_APIC_irq,
1837 .unmask = unmask_IO_APIC_irq,
1838 .ack = ack_x2apic_edge,
1839 .eoi = ack_x2apic_level,
1840#ifdef CONFIG_SMP
1841 .set_affinity = set_ir_ioapic_affinity_irq,
1842#endif
1843 .retrigger = ioapic_retrigger_irq,
1844};
1845#endif
1846
1599static inline void init_IO_APIC_traps(void) 1847static inline void init_IO_APIC_traps(void)
1600{ 1848{
1601 int irq; 1849 int irq;
@@ -1783,6 +2031,8 @@ static inline void __init check_timer(void)
1783 * 8259A. 2031 * 8259A.
1784 */ 2032 */
1785 if (pin1 == -1) { 2033 if (pin1 == -1) {
2034 if (intr_remapping_enabled)
2035 panic("BIOS bug: timer not connected to IO-APIC");
1786 pin1 = pin2; 2036 pin1 = pin2;
1787 apic1 = apic2; 2037 apic1 = apic2;
1788 no_pin1 = 1; 2038 no_pin1 = 1;
@@ -1809,6 +2059,8 @@ static inline void __init check_timer(void)
1809 clear_IO_APIC_pin(0, pin1); 2059 clear_IO_APIC_pin(0, pin1);
1810 goto out; 2060 goto out;
1811 } 2061 }
2062 if (intr_remapping_enabled)
2063 panic("timer doesn't work through Interrupt-remapped IO-APIC");
1812 clear_IO_APIC_pin(apic1, pin1); 2064 clear_IO_APIC_pin(apic1, pin1);
1813 if (!no_pin1) 2065 if (!no_pin1)
1814 apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: " 2066 apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: "
@@ -2401,6 +2653,10 @@ void __init setup_ioapic_dest(void)
2401 setup_IO_APIC_irq(ioapic, pin, irq, 2653 setup_IO_APIC_irq(ioapic, pin, irq,
2402 irq_trigger(irq_entry), 2654 irq_trigger(irq_entry),
2403 irq_polarity(irq_entry)); 2655 irq_polarity(irq_entry));
2656#ifdef CONFIG_INTR_REMAP
2657 else if (intr_remapping_enabled)
2658 set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
2659#endif
2404 else 2660 else
2405 set_ioapic_affinity_irq(irq, TARGET_CPUS); 2661 set_ioapic_affinity_irq(irq, TARGET_CPUS);
2406 } 2662 }
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index bddb4b19b6c7..32e55c7a9805 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -220,6 +220,16 @@ int flush_irte(int irq)
220 return 0; 220 return 0;
221} 221}
222 222
223struct intel_iommu *map_ioapic_to_ir(int apic)
224{
225 int i;
226
227 for (i = 0; i < MAX_IO_APICS; i++)
228 if (ir_ioapic[i].id == apic)
229 return ir_ioapic[i].iommu;
230 return NULL;
231}
232
223int free_irte(int irq) 233int free_irte(int irq)
224{ 234{
225 int index, i; 235 int index, i;
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index bb54928373ca..aa746704a5c9 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -134,6 +134,15 @@ extern int get_physical_broadcast(void);
134# define apic_write_around(x, y) apic_write_atomic((x), (y)) 134# define apic_write_around(x, y) apic_write_atomic((x), (y))
135#endif 135#endif
136 136
137#ifdef CONFIG_X86_64
138static inline void ack_x2APIC_irq(void)
139{
140 /* Docs say use 0 for future compatibility */
141 native_apic_msr_write(APIC_EOI, 0);
142}
143#endif
144
145
137static inline void ack_APIC_irq(void) 146static inline void ack_APIC_irq(void)
138{ 147{
139 /* 148 /*
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index 1c4a99d882f5..8dc2622714c8 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -107,6 +107,20 @@ struct IO_APIC_route_entry {
107 107
108} __attribute__ ((packed)); 108} __attribute__ ((packed));
109 109
110struct IR_IO_APIC_route_entry {
111 __u64 vector : 8,
112 zero : 3,
113 index2 : 1,
114 delivery_status : 1,
115 polarity : 1,
116 irr : 1,
117 trigger : 1,
118 mask : 1,
119 reserved : 31,
120 format : 1,
121 index : 15;
122} __attribute__ ((packed));
123
110#ifdef CONFIG_X86_IO_APIC 124#ifdef CONFIG_X86_IO_APIC
111 125
112/* 126/*
diff --git a/include/asm-x86/irq_remapping.h b/include/asm-x86/irq_remapping.h
new file mode 100644
index 000000000000..78242c6ffa58
--- /dev/null
+++ b/include/asm-x86/irq_remapping.h
@@ -0,0 +1,8 @@
1#ifndef _ASM_IRQ_REMAPPING_H
2#define _ASM_IRQ_REMAPPING_H
3
4extern int x2apic;
5
6#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
7
8#endif
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 324bbca85a26..bf41ffa74705 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -109,6 +109,7 @@ extern int flush_irte(int irq);
109extern int free_irte(int irq); 109extern int free_irte(int irq);
110 110
111extern int irq_remapped(int irq); 111extern int irq_remapped(int irq);
112extern struct intel_iommu *map_ioapic_to_ir(int apic);
112#else 113#else
113#define irq_remapped(irq) (0) 114#define irq_remapped(irq) (0)
114#define enable_intr_remapping(mode) (-1) 115#define enable_intr_remapping(mode) (-1)