aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/PCI/MSI-HOWTO.txt37
-rw-r--r--arch/x86/include/asm/hpet.h5
-rw-r--r--arch/x86/include/asm/hw_irq.h13
-rw-r--r--arch/x86/include/asm/hypervisor.h13
-rw-r--r--arch/x86/include/asm/io_apic.h28
-rw-r--r--arch/x86/include/asm/irq_remapping.h40
-rw-r--r--arch/x86/include/asm/kvm_para.h8
-rw-r--r--arch/x86/include/asm/pci.h3
-rw-r--r--arch/x86/include/asm/x86_init.h27
-rw-r--r--arch/x86/kernel/apic/apic.c28
-rw-r--r--arch/x86/kernel/apic/io_apic.c457
-rw-r--r--arch/x86/kernel/apic/ipi.c2
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c7
-rw-r--r--arch/x86/kernel/cpu/vmware.c13
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/kvm.c1
-rw-r--r--arch/x86/kernel/x86_init.c24
-rw-r--r--arch/x86/xen/enlighten.c1
-rw-r--r--drivers/ata/ahci.c93
-rw-r--r--drivers/ata/ahci.h6
-rw-r--r--drivers/ata/libahci.c118
-rw-r--r--drivers/iommu/amd_iommu.c8
-rw-r--r--drivers/iommu/dmar.c2
-rw-r--r--drivers/iommu/intel-iommu.c2
-rw-r--r--drivers/iommu/intel_irq_remapping.c48
-rw-r--r--drivers/iommu/irq_remapping.c231
-rw-r--r--drivers/iommu/irq_remapping.h1
-rw-r--r--drivers/pci/msi.c26
-rw-r--r--include/linux/irq.h8
-rw-r--r--include/linux/pci.h7
-rw-r--r--kernel/irq/chip.c30
31 files changed, 918 insertions, 371 deletions
diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
index 53e6fca146d7..a09178086c30 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -127,15 +127,42 @@ on the number of vectors that can be allocated; pci_enable_msi_block()
127returns as soon as it finds any constraint that doesn't allow the 127returns as soon as it finds any constraint that doesn't allow the
128call to succeed. 128call to succeed.
129 129
1304.2.3 pci_disable_msi 1304.2.3 pci_enable_msi_block_auto
131
132int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *count)
133
134This variation on pci_enable_msi() call allows a device driver to request
135the maximum possible number of MSIs. The MSI specification only allows
136interrupts to be allocated in powers of two, up to a maximum of 2^5 (32).
137
138If this function returns a positive number, it indicates that it has
139succeeded and the returned value is the number of allocated interrupts. In
140this case, the function enables MSI on this device and updates dev->irq to
141be the lowest of the new interrupts assigned to it. The other interrupts
142assigned to the device are in the range dev->irq to dev->irq + returned
143value - 1.
144
145If this function returns a negative number, it indicates an error and
146the driver should not attempt to request any more MSI interrupts for
147this device.
148
149If the device driver needs to know the number of interrupts the device
150supports it can pass the pointer count where that number is stored. The
151device driver must decide what action to take if pci_enable_msi_block_auto()
152succeeds, but returns a value less than the number of interrupts supported.
153If the device driver does not need to know the number of interrupts
154supported, it can set the pointer count to NULL.
155
1564.2.4 pci_disable_msi
131 157
132void pci_disable_msi(struct pci_dev *dev) 158void pci_disable_msi(struct pci_dev *dev)
133 159
134This function should be used to undo the effect of pci_enable_msi() or 160This function should be used to undo the effect of pci_enable_msi() or
135pci_enable_msi_block(). Calling it restores dev->irq to the pin-based 161pci_enable_msi_block() or pci_enable_msi_block_auto(). Calling it restores
136interrupt number and frees the previously allocated message signaled 162dev->irq to the pin-based interrupt number and frees the previously
137interrupt(s). The interrupt may subsequently be assigned to another 163allocated message signaled interrupt(s). The interrupt may subsequently be
138device, so drivers should not cache the value of dev->irq. 164assigned to another device, so drivers should not cache the value of
165dev->irq.
139 166
140Before calling this function, a device driver must always call free_irq() 167Before calling this function, a device driver must always call free_irq()
141on any interrupt for which it previously called request_irq(). 168on any interrupt for which it previously called request_irq().
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 434e2106cc87..b18df579c0e9 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -80,9 +80,9 @@ extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
80extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg); 80extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
81 81
82#ifdef CONFIG_PCI_MSI 82#ifdef CONFIG_PCI_MSI
83extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id); 83extern int default_setup_hpet_msi(unsigned int irq, unsigned int id);
84#else 84#else
85static inline int arch_setup_hpet_msi(unsigned int irq, unsigned int id) 85static inline int default_setup_hpet_msi(unsigned int irq, unsigned int id)
86{ 86{
87 return -EINVAL; 87 return -EINVAL;
88} 88}
@@ -111,6 +111,7 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler);
111static inline int hpet_enable(void) { return 0; } 111static inline int hpet_enable(void) { return 0; }
112static inline int is_hpet_enabled(void) { return 0; } 112static inline int is_hpet_enabled(void) { return 0; }
113#define hpet_readl(a) 0 113#define hpet_readl(a) 0
114#define default_setup_hpet_msi NULL
114 115
115#endif 116#endif
116#endif /* _ASM_X86_HPET_H */ 117#endif /* _ASM_X86_HPET_H */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index eb92a6ed2be7..10a78c3d3d5a 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -101,6 +101,7 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
101 irq_attr->polarity = polarity; 101 irq_attr->polarity = polarity;
102} 102}
103 103
104/* Intel specific interrupt remapping information */
104struct irq_2_iommu { 105struct irq_2_iommu {
105 struct intel_iommu *iommu; 106 struct intel_iommu *iommu;
106 u16 irte_index; 107 u16 irte_index;
@@ -108,6 +109,12 @@ struct irq_2_iommu {
108 u8 irte_mask; 109 u8 irte_mask;
109}; 110};
110 111
112/* AMD specific interrupt remapping information */
113struct irq_2_irte {
114 u16 devid; /* Device ID for IRTE table */
115 u16 index; /* Index into IRTE table*/
116};
117
111/* 118/*
112 * This is performance-critical, we want to do it O(1) 119 * This is performance-critical, we want to do it O(1)
113 * 120 *
@@ -120,7 +127,11 @@ struct irq_cfg {
120 u8 vector; 127 u8 vector;
121 u8 move_in_progress : 1; 128 u8 move_in_progress : 1;
122#ifdef CONFIG_IRQ_REMAP 129#ifdef CONFIG_IRQ_REMAP
123 struct irq_2_iommu irq_2_iommu; 130 u8 remapped : 1;
131 union {
132 struct irq_2_iommu irq_2_iommu;
133 struct irq_2_irte irq_2_irte;
134 };
124#endif 135#endif
125}; 136};
126 137
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index b518c7509933..86095ed14135 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -25,6 +25,7 @@
25 25
26extern void init_hypervisor(struct cpuinfo_x86 *c); 26extern void init_hypervisor(struct cpuinfo_x86 *c);
27extern void init_hypervisor_platform(void); 27extern void init_hypervisor_platform(void);
28extern bool hypervisor_x2apic_available(void);
28 29
29/* 30/*
30 * x86 hypervisor information 31 * x86 hypervisor information
@@ -41,6 +42,9 @@ struct hypervisor_x86 {
41 42
42 /* Platform setup (run once per boot) */ 43 /* Platform setup (run once per boot) */
43 void (*init_platform)(void); 44 void (*init_platform)(void);
45
46 /* X2APIC detection (run once per boot) */
47 bool (*x2apic_available)(void);
44}; 48};
45 49
46extern const struct hypervisor_x86 *x86_hyper; 50extern const struct hypervisor_x86 *x86_hyper;
@@ -51,13 +55,4 @@ extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
51extern const struct hypervisor_x86 x86_hyper_xen_hvm; 55extern const struct hypervisor_x86 x86_hyper_xen_hvm;
52extern const struct hypervisor_x86 x86_hyper_kvm; 56extern const struct hypervisor_x86 x86_hyper_kvm;
53 57
54static inline bool hypervisor_x2apic_available(void)
55{
56 if (kvm_para_available())
57 return true;
58 if (xen_x2apic_para_available())
59 return true;
60 return false;
61}
62
63#endif 58#endif
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 73d8c5398ea9..459e50a424d1 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -144,11 +144,24 @@ extern int timer_through_8259;
144 (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) 144 (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
145 145
146struct io_apic_irq_attr; 146struct io_apic_irq_attr;
147struct irq_cfg;
147extern int io_apic_set_pci_routing(struct device *dev, int irq, 148extern int io_apic_set_pci_routing(struct device *dev, int irq,
148 struct io_apic_irq_attr *irq_attr); 149 struct io_apic_irq_attr *irq_attr);
149void setup_IO_APIC_irq_extra(u32 gsi); 150void setup_IO_APIC_irq_extra(u32 gsi);
150extern void ioapic_insert_resources(void); 151extern void ioapic_insert_resources(void);
151 152
153extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
154 unsigned int, int,
155 struct io_apic_irq_attr *);
156extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
157 unsigned int, int,
158 struct io_apic_irq_attr *);
159extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg);
160
161extern void native_compose_msi_msg(struct pci_dev *pdev,
162 unsigned int irq, unsigned int dest,
163 struct msi_msg *msg, u8 hpet_id);
164extern void native_eoi_ioapic_pin(int apic, int pin, int vector);
152int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); 165int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
153 166
154extern int save_ioapic_entries(void); 167extern int save_ioapic_entries(void);
@@ -179,6 +192,12 @@ extern void __init native_io_apic_init_mappings(void);
179extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); 192extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg);
180extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val); 193extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val);
181extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); 194extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
195extern void native_disable_io_apic(void);
196extern void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
197extern void intel_ir_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
198extern int native_ioapic_set_affinity(struct irq_data *,
199 const struct cpumask *,
200 bool);
182 201
183static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 202static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
184{ 203{
@@ -193,6 +212,9 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
193{ 212{
194 x86_io_apic_ops.modify(apic, reg, value); 213 x86_io_apic_ops.modify(apic, reg, value);
195} 214}
215
216extern void io_apic_eoi(unsigned int apic, unsigned int vector);
217
196#else /* !CONFIG_X86_IO_APIC */ 218#else /* !CONFIG_X86_IO_APIC */
197 219
198#define io_apic_assign_pci_irqs 0 220#define io_apic_assign_pci_irqs 0
@@ -223,6 +245,12 @@ static inline void disable_ioapic_support(void) { }
223#define native_io_apic_read NULL 245#define native_io_apic_read NULL
224#define native_io_apic_write NULL 246#define native_io_apic_write NULL
225#define native_io_apic_modify NULL 247#define native_io_apic_modify NULL
248#define native_disable_io_apic NULL
249#define native_io_apic_print_entries NULL
250#define native_ioapic_set_affinity NULL
251#define native_setup_ioapic_entry NULL
252#define native_compose_msi_msg NULL
253#define native_eoi_ioapic_pin NULL
226#endif 254#endif
227 255
228#endif /* _ASM_X86_IO_APIC_H */ 256#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 5fb9bbbd2f14..95fd3527f632 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -26,8 +26,6 @@
26 26
27#ifdef CONFIG_IRQ_REMAP 27#ifdef CONFIG_IRQ_REMAP
28 28
29extern int irq_remapping_enabled;
30
31extern void setup_irq_remapping_ops(void); 29extern void setup_irq_remapping_ops(void);
32extern int irq_remapping_supported(void); 30extern int irq_remapping_supported(void);
33extern int irq_remapping_prepare(void); 31extern int irq_remapping_prepare(void);
@@ -40,21 +38,19 @@ extern int setup_ioapic_remapped_entry(int irq,
40 unsigned int destination, 38 unsigned int destination,
41 int vector, 39 int vector,
42 struct io_apic_irq_attr *attr); 40 struct io_apic_irq_attr *attr);
43extern int set_remapped_irq_affinity(struct irq_data *data,
44 const struct cpumask *mask,
45 bool force);
46extern void free_remapped_irq(int irq); 41extern void free_remapped_irq(int irq);
47extern void compose_remapped_msi_msg(struct pci_dev *pdev, 42extern void compose_remapped_msi_msg(struct pci_dev *pdev,
48 unsigned int irq, unsigned int dest, 43 unsigned int irq, unsigned int dest,
49 struct msi_msg *msg, u8 hpet_id); 44 struct msi_msg *msg, u8 hpet_id);
50extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
51extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
52 int index, int sub_handle);
53extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); 45extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
46extern void panic_if_irq_remap(const char *msg);
47extern bool setup_remapped_irq(int irq,
48 struct irq_cfg *cfg,
49 struct irq_chip *chip);
54 50
55#else /* CONFIG_IRQ_REMAP */ 51void irq_remap_modify_chip_defaults(struct irq_chip *chip);
56 52
57#define irq_remapping_enabled 0 53#else /* CONFIG_IRQ_REMAP */
58 54
59static inline void setup_irq_remapping_ops(void) { } 55static inline void setup_irq_remapping_ops(void) { }
60static inline int irq_remapping_supported(void) { return 0; } 56static inline int irq_remapping_supported(void) { return 0; }
@@ -71,30 +67,30 @@ static inline int setup_ioapic_remapped_entry(int irq,
71{ 67{
72 return -ENODEV; 68 return -ENODEV;
73} 69}
74static inline int set_remapped_irq_affinity(struct irq_data *data,
75 const struct cpumask *mask,
76 bool force)
77{
78 return 0;
79}
80static inline void free_remapped_irq(int irq) { } 70static inline void free_remapped_irq(int irq) { }
81static inline void compose_remapped_msi_msg(struct pci_dev *pdev, 71static inline void compose_remapped_msi_msg(struct pci_dev *pdev,
82 unsigned int irq, unsigned int dest, 72 unsigned int irq, unsigned int dest,
83 struct msi_msg *msg, u8 hpet_id) 73 struct msi_msg *msg, u8 hpet_id)
84{ 74{
85} 75}
86static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) 76static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
87{ 77{
88 return -ENODEV; 78 return -ENODEV;
89} 79}
90static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, 80
91 int index, int sub_handle) 81static inline void panic_if_irq_remap(const char *msg)
82{
83}
84
85static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
92{ 86{
93 return -ENODEV;
94} 87}
95static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) 88
89static inline bool setup_remapped_irq(int irq,
90 struct irq_cfg *cfg,
91 struct irq_chip *chip)
96{ 92{
97 return -ENODEV; 93 return false;
98} 94}
99#endif /* CONFIG_IRQ_REMAP */ 95#endif /* CONFIG_IRQ_REMAP */
100 96
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 5ed1f16187be..65231e173baf 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -85,13 +85,13 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
85 return ret; 85 return ret;
86} 86}
87 87
88static inline int kvm_para_available(void) 88static inline bool kvm_para_available(void)
89{ 89{
90 unsigned int eax, ebx, ecx, edx; 90 unsigned int eax, ebx, ecx, edx;
91 char signature[13]; 91 char signature[13];
92 92
93 if (boot_cpu_data.cpuid_level < 0) 93 if (boot_cpu_data.cpuid_level < 0)
94 return 0; /* So we don't blow up on old processors */ 94 return false; /* So we don't blow up on old processors */
95 95
96 if (cpu_has_hypervisor) { 96 if (cpu_has_hypervisor) {
97 cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); 97 cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
@@ -101,10 +101,10 @@ static inline int kvm_para_available(void)
101 signature[12] = 0; 101 signature[12] = 0;
102 102
103 if (strcmp(signature, "KVMKVMKVM") == 0) 103 if (strcmp(signature, "KVMKVMKVM") == 0)
104 return 1; 104 return true;
105 } 105 }
106 106
107 return 0; 107 return false;
108} 108}
109 109
110static inline unsigned int kvm_arch_para_features(void) 110static inline unsigned int kvm_arch_para_features(void)
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index dba7805176bf..c28fd02f4bf7 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -121,9 +121,12 @@ static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq)
121#define arch_teardown_msi_irq x86_teardown_msi_irq 121#define arch_teardown_msi_irq x86_teardown_msi_irq
122#define arch_restore_msi_irqs x86_restore_msi_irqs 122#define arch_restore_msi_irqs x86_restore_msi_irqs
123/* implemented in arch/x86/kernel/apic/io_apic. */ 123/* implemented in arch/x86/kernel/apic/io_apic. */
124struct msi_desc;
124int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); 125int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
125void native_teardown_msi_irq(unsigned int irq); 126void native_teardown_msi_irq(unsigned int irq);
126void native_restore_msi_irqs(struct pci_dev *dev, int irq); 127void native_restore_msi_irqs(struct pci_dev *dev, int irq);
128int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
129 unsigned int irq_base, unsigned int irq_offset);
127/* default to the implementation in drivers/lib/msi.c */ 130/* default to the implementation in drivers/lib/msi.c */
128#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS 131#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
129#define HAVE_DEFAULT_MSI_RESTORE_IRQS 132#define HAVE_DEFAULT_MSI_RESTORE_IRQS
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 57693498519c..7669941cc9d2 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -181,19 +181,38 @@ struct x86_platform_ops {
181}; 181};
182 182
183struct pci_dev; 183struct pci_dev;
184struct msi_msg;
184 185
185struct x86_msi_ops { 186struct x86_msi_ops {
186 int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); 187 int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
188 void (*compose_msi_msg)(struct pci_dev *dev, unsigned int irq,
189 unsigned int dest, struct msi_msg *msg,
190 u8 hpet_id);
187 void (*teardown_msi_irq)(unsigned int irq); 191 void (*teardown_msi_irq)(unsigned int irq);
188 void (*teardown_msi_irqs)(struct pci_dev *dev); 192 void (*teardown_msi_irqs)(struct pci_dev *dev);
189 void (*restore_msi_irqs)(struct pci_dev *dev, int irq); 193 void (*restore_msi_irqs)(struct pci_dev *dev, int irq);
194 int (*setup_hpet_msi)(unsigned int irq, unsigned int id);
190}; 195};
191 196
197struct IO_APIC_route_entry;
198struct io_apic_irq_attr;
199struct irq_data;
200struct cpumask;
201
192struct x86_io_apic_ops { 202struct x86_io_apic_ops {
193 void (*init) (void); 203 void (*init) (void);
194 unsigned int (*read) (unsigned int apic, unsigned int reg); 204 unsigned int (*read) (unsigned int apic, unsigned int reg);
195 void (*write) (unsigned int apic, unsigned int reg, unsigned int value); 205 void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
196 void (*modify)(unsigned int apic, unsigned int reg, unsigned int value); 206 void (*modify) (unsigned int apic, unsigned int reg, unsigned int value);
207 void (*disable)(void);
208 void (*print_entries)(unsigned int apic, unsigned int nr_entries);
209 int (*set_affinity)(struct irq_data *data,
210 const struct cpumask *mask,
211 bool force);
212 int (*setup_entry)(int irq, struct IO_APIC_route_entry *entry,
213 unsigned int destination, int vector,
214 struct io_apic_irq_attr *attr);
215 void (*eoi_ioapic_pin)(int apic, int pin, int vector);
197}; 216};
198 217
199extern struct x86_init_ops x86_init; 218extern struct x86_init_ops x86_init;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b994cc84aa7e..a5b4dce1b7ac 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1477,8 +1477,7 @@ void __init bsp_end_local_APIC_setup(void)
1477 * Now that local APIC setup is completed for BP, configure the fault 1477 * Now that local APIC setup is completed for BP, configure the fault
1478 * handling for interrupt remapping. 1478 * handling for interrupt remapping.
1479 */ 1479 */
1480 if (irq_remapping_enabled) 1480 irq_remap_enable_fault_handling();
1481 irq_remap_enable_fault_handling();
1482 1481
1483} 1482}
1484 1483
@@ -2251,8 +2250,7 @@ static int lapic_suspend(void)
2251 local_irq_save(flags); 2250 local_irq_save(flags);
2252 disable_local_APIC(); 2251 disable_local_APIC();
2253 2252
2254 if (irq_remapping_enabled) 2253 irq_remapping_disable();
2255 irq_remapping_disable();
2256 2254
2257 local_irq_restore(flags); 2255 local_irq_restore(flags);
2258 return 0; 2256 return 0;
@@ -2268,16 +2266,15 @@ static void lapic_resume(void)
2268 return; 2266 return;
2269 2267
2270 local_irq_save(flags); 2268 local_irq_save(flags);
2271 if (irq_remapping_enabled) { 2269
2272 /* 2270 /*
2273 * IO-APIC and PIC have their own resume routines. 2271 * IO-APIC and PIC have their own resume routines.
2274 * We just mask them here to make sure the interrupt 2272 * We just mask them here to make sure the interrupt
2275 * subsystem is completely quiet while we enable x2apic 2273 * subsystem is completely quiet while we enable x2apic
2276 * and interrupt-remapping. 2274 * and interrupt-remapping.
2277 */ 2275 */
2278 mask_ioapic_entries(); 2276 mask_ioapic_entries();
2279 legacy_pic->mask_all(); 2277 legacy_pic->mask_all();
2280 }
2281 2278
2282 if (x2apic_mode) 2279 if (x2apic_mode)
2283 enable_x2apic(); 2280 enable_x2apic();
@@ -2320,8 +2317,7 @@ static void lapic_resume(void)
2320 apic_write(APIC_ESR, 0); 2317 apic_write(APIC_ESR, 0);
2321 apic_read(APIC_ESR); 2318 apic_read(APIC_ESR);
2322 2319
2323 if (irq_remapping_enabled) 2320 irq_remapping_reenable(x2apic_mode);
2324 irq_remapping_reenable(x2apic_mode);
2325 2321
2326 local_irq_restore(flags); 2322 local_irq_restore(flags);
2327} 2323}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index b739d398bb29..9ed796ccc32c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -68,22 +68,6 @@
68#define for_each_irq_pin(entry, head) \ 68#define for_each_irq_pin(entry, head) \
69 for (entry = head; entry; entry = entry->next) 69 for (entry = head; entry; entry = entry->next)
70 70
71#ifdef CONFIG_IRQ_REMAP
72static void irq_remap_modify_chip_defaults(struct irq_chip *chip);
73static inline bool irq_remapped(struct irq_cfg *cfg)
74{
75 return cfg->irq_2_iommu.iommu != NULL;
76}
77#else
78static inline bool irq_remapped(struct irq_cfg *cfg)
79{
80 return false;
81}
82static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
83{
84}
85#endif
86
87/* 71/*
88 * Is the SiS APIC rmw bug present ? 72 * Is the SiS APIC rmw bug present ?
89 * -1 = don't know, 0 = no, 1 = yes 73 * -1 = don't know, 0 = no, 1 = yes
@@ -300,9 +284,9 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
300 return cfg; 284 return cfg;
301} 285}
302 286
303static int alloc_irq_from(unsigned int from, int node) 287static int alloc_irqs_from(unsigned int from, unsigned int count, int node)
304{ 288{
305 return irq_alloc_desc_from(from, node); 289 return irq_alloc_descs_from(from, count, node);
306} 290}
307 291
308static void free_irq_at(unsigned int at, struct irq_cfg *cfg) 292static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
@@ -326,7 +310,7 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
326 + (mpc_ioapic_addr(idx) & ~PAGE_MASK); 310 + (mpc_ioapic_addr(idx) & ~PAGE_MASK);
327} 311}
328 312
329static inline void io_apic_eoi(unsigned int apic, unsigned int vector) 313void io_apic_eoi(unsigned int apic, unsigned int vector)
330{ 314{
331 struct io_apic __iomem *io_apic = io_apic_base(apic); 315 struct io_apic __iomem *io_apic = io_apic_base(apic);
332 writel(vector, &io_apic->eoi); 316 writel(vector, &io_apic->eoi);
@@ -573,19 +557,10 @@ static void unmask_ioapic_irq(struct irq_data *data)
573 * Otherwise, we simulate the EOI message manually by changing the trigger 557 * Otherwise, we simulate the EOI message manually by changing the trigger
574 * mode to edge and then back to level, with RTE being masked during this. 558 * mode to edge and then back to level, with RTE being masked during this.
575 */ 559 */
576static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg) 560void native_eoi_ioapic_pin(int apic, int pin, int vector)
577{ 561{
578 if (mpc_ioapic_ver(apic) >= 0x20) { 562 if (mpc_ioapic_ver(apic) >= 0x20) {
579 /* 563 io_apic_eoi(apic, vector);
580 * Intr-remapping uses pin number as the virtual vector
581 * in the RTE. Actual vector is programmed in
582 * intr-remapping table entry. Hence for the io-apic
583 * EOI we use the pin number.
584 */
585 if (cfg && irq_remapped(cfg))
586 io_apic_eoi(apic, pin);
587 else
588 io_apic_eoi(apic, vector);
589 } else { 564 } else {
590 struct IO_APIC_route_entry entry, entry1; 565 struct IO_APIC_route_entry entry, entry1;
591 566
@@ -606,14 +581,15 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg)
606 } 581 }
607} 582}
608 583
609static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) 584void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
610{ 585{
611 struct irq_pin_list *entry; 586 struct irq_pin_list *entry;
612 unsigned long flags; 587 unsigned long flags;
613 588
614 raw_spin_lock_irqsave(&ioapic_lock, flags); 589 raw_spin_lock_irqsave(&ioapic_lock, flags);
615 for_each_irq_pin(entry, cfg->irq_2_pin) 590 for_each_irq_pin(entry, cfg->irq_2_pin)
616 __eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg); 591 x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin,
592 cfg->vector);
617 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 593 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
618} 594}
619 595
@@ -650,7 +626,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
650 } 626 }
651 627
652 raw_spin_lock_irqsave(&ioapic_lock, flags); 628 raw_spin_lock_irqsave(&ioapic_lock, flags);
653 __eoi_ioapic_pin(apic, pin, entry.vector, NULL); 629 x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector);
654 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 630 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
655 } 631 }
656 632
@@ -1304,25 +1280,18 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
1304 fasteoi = false; 1280 fasteoi = false;
1305 } 1281 }
1306 1282
1307 if (irq_remapped(cfg)) { 1283 if (setup_remapped_irq(irq, cfg, chip))
1308 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
1309 irq_remap_modify_chip_defaults(chip);
1310 fasteoi = trigger != 0; 1284 fasteoi = trigger != 0;
1311 }
1312 1285
1313 hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; 1286 hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
1314 irq_set_chip_and_handler_name(irq, chip, hdl, 1287 irq_set_chip_and_handler_name(irq, chip, hdl,
1315 fasteoi ? "fasteoi" : "edge"); 1288 fasteoi ? "fasteoi" : "edge");
1316} 1289}
1317 1290
1318static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, 1291int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
1319 unsigned int destination, int vector, 1292 unsigned int destination, int vector,
1320 struct io_apic_irq_attr *attr) 1293 struct io_apic_irq_attr *attr)
1321{ 1294{
1322 if (irq_remapping_enabled)
1323 return setup_ioapic_remapped_entry(irq, entry, destination,
1324 vector, attr);
1325
1326 memset(entry, 0, sizeof(*entry)); 1295 memset(entry, 0, sizeof(*entry));
1327 1296
1328 entry->delivery_mode = apic->irq_delivery_mode; 1297 entry->delivery_mode = apic->irq_delivery_mode;
@@ -1370,8 +1339,8 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
1370 attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin, 1339 attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin,
1371 cfg->vector, irq, attr->trigger, attr->polarity, dest); 1340 cfg->vector, irq, attr->trigger, attr->polarity, dest);
1372 1341
1373 if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { 1342 if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) {
1374 pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1343 pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1375 mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); 1344 mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
1376 __clear_irq_vector(irq, cfg); 1345 __clear_irq_vector(irq, cfg);
1377 1346
@@ -1479,9 +1448,6 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
1479 struct IO_APIC_route_entry entry; 1448 struct IO_APIC_route_entry entry;
1480 unsigned int dest; 1449 unsigned int dest;
1481 1450
1482 if (irq_remapping_enabled)
1483 return;
1484
1485 memset(&entry, 0, sizeof(entry)); 1451 memset(&entry, 0, sizeof(entry));
1486 1452
1487 /* 1453 /*
@@ -1513,9 +1479,63 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
1513 ioapic_write_entry(ioapic_idx, pin, entry); 1479 ioapic_write_entry(ioapic_idx, pin, entry);
1514} 1480}
1515 1481
1516__apicdebuginit(void) print_IO_APIC(int ioapic_idx) 1482void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
1517{ 1483{
1518 int i; 1484 int i;
1485
1486 pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n");
1487
1488 for (i = 0; i <= nr_entries; i++) {
1489 struct IO_APIC_route_entry entry;
1490
1491 entry = ioapic_read_entry(apic, i);
1492
1493 pr_debug(" %02x %02X ", i, entry.dest);
1494 pr_cont("%1d %1d %1d %1d %1d "
1495 "%1d %1d %02X\n",
1496 entry.mask,
1497 entry.trigger,
1498 entry.irr,
1499 entry.polarity,
1500 entry.delivery_status,
1501 entry.dest_mode,
1502 entry.delivery_mode,
1503 entry.vector);
1504 }
1505}
1506
1507void intel_ir_io_apic_print_entries(unsigned int apic,
1508 unsigned int nr_entries)
1509{
1510 int i;
1511
1512 pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n");
1513
1514 for (i = 0; i <= nr_entries; i++) {
1515 struct IR_IO_APIC_route_entry *ir_entry;
1516 struct IO_APIC_route_entry entry;
1517
1518 entry = ioapic_read_entry(apic, i);
1519
1520 ir_entry = (struct IR_IO_APIC_route_entry *)&entry;
1521
1522 pr_debug(" %02x %04X ", i, ir_entry->index);
1523 pr_cont("%1d %1d %1d %1d %1d "
1524 "%1d %1d %X %02X\n",
1525 ir_entry->format,
1526 ir_entry->mask,
1527 ir_entry->trigger,
1528 ir_entry->irr,
1529 ir_entry->polarity,
1530 ir_entry->delivery_status,
1531 ir_entry->index2,
1532 ir_entry->zero,
1533 ir_entry->vector);
1534 }
1535}
1536
1537__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
1538{
1519 union IO_APIC_reg_00 reg_00; 1539 union IO_APIC_reg_00 reg_00;
1520 union IO_APIC_reg_01 reg_01; 1540 union IO_APIC_reg_01 reg_01;
1521 union IO_APIC_reg_02 reg_02; 1541 union IO_APIC_reg_02 reg_02;
@@ -1568,58 +1588,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
1568 1588
1569 printk(KERN_DEBUG ".... IRQ redirection table:\n"); 1589 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1570 1590
1571 if (irq_remapping_enabled) { 1591 x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries);
1572 printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR"
1573 " Pol Stat Indx2 Zero Vect:\n");
1574 } else {
1575 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
1576 " Stat Dmod Deli Vect:\n");
1577 }
1578
1579 for (i = 0; i <= reg_01.bits.entries; i++) {
1580 if (irq_remapping_enabled) {
1581 struct IO_APIC_route_entry entry;
1582 struct IR_IO_APIC_route_entry *ir_entry;
1583
1584 entry = ioapic_read_entry(ioapic_idx, i);
1585 ir_entry = (struct IR_IO_APIC_route_entry *) &entry;
1586 printk(KERN_DEBUG " %02x %04X ",
1587 i,
1588 ir_entry->index
1589 );
1590 pr_cont("%1d %1d %1d %1d %1d "
1591 "%1d %1d %X %02X\n",
1592 ir_entry->format,
1593 ir_entry->mask,
1594 ir_entry->trigger,
1595 ir_entry->irr,
1596 ir_entry->polarity,
1597 ir_entry->delivery_status,
1598 ir_entry->index2,
1599 ir_entry->zero,
1600 ir_entry->vector
1601 );
1602 } else {
1603 struct IO_APIC_route_entry entry;
1604
1605 entry = ioapic_read_entry(ioapic_idx, i);
1606 printk(KERN_DEBUG " %02x %02X ",
1607 i,
1608 entry.dest
1609 );
1610 pr_cont("%1d %1d %1d %1d %1d "
1611 "%1d %1d %02X\n",
1612 entry.mask,
1613 entry.trigger,
1614 entry.irr,
1615 entry.polarity,
1616 entry.delivery_status,
1617 entry.dest_mode,
1618 entry.delivery_mode,
1619 entry.vector
1620 );
1621 }
1622 }
1623} 1592}
1624 1593
1625__apicdebuginit(void) print_IO_APICs(void) 1594__apicdebuginit(void) print_IO_APICs(void)
@@ -1921,30 +1890,14 @@ void __init enable_IO_APIC(void)
1921 clear_IO_APIC(); 1890 clear_IO_APIC();
1922} 1891}
1923 1892
1924/* 1893void native_disable_io_apic(void)
1925 * Not an __init, needed by the reboot code
1926 */
1927void disable_IO_APIC(void)
1928{ 1894{
1929 /* 1895 /*
1930 * Clear the IO-APIC before rebooting:
1931 */
1932 clear_IO_APIC();
1933
1934 if (!legacy_pic->nr_legacy_irqs)
1935 return;
1936
1937 /*
1938 * If the i8259 is routed through an IOAPIC 1896 * If the i8259 is routed through an IOAPIC
1939 * Put that IOAPIC in virtual wire mode 1897 * Put that IOAPIC in virtual wire mode
1940 * so legacy interrupts can be delivered. 1898 * so legacy interrupts can be delivered.
1941 *
1942 * With interrupt-remapping, for now we will use virtual wire A mode,
1943 * as virtual wire B is little complex (need to configure both
1944 * IOAPIC RTE as well as interrupt-remapping table entry).
1945 * As this gets called during crash dump, keep this simple for now.
1946 */ 1899 */
1947 if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) { 1900 if (ioapic_i8259.pin != -1) {
1948 struct IO_APIC_route_entry entry; 1901 struct IO_APIC_route_entry entry;
1949 1902
1950 memset(&entry, 0, sizeof(entry)); 1903 memset(&entry, 0, sizeof(entry));
@@ -1964,12 +1917,25 @@ void disable_IO_APIC(void)
1964 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); 1917 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1965 } 1918 }
1966 1919
1920 if (cpu_has_apic || apic_from_smp_config())
1921 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1922
1923}
1924
1925/*
1926 * Not an __init, needed by the reboot code
1927 */
1928void disable_IO_APIC(void)
1929{
1967 /* 1930 /*
1968 * Use virtual wire A mode when interrupt remapping is enabled. 1931 * Clear the IO-APIC before rebooting:
1969 */ 1932 */
1970 if (cpu_has_apic || apic_from_smp_config()) 1933 clear_IO_APIC();
1971 disconnect_bsp_APIC(!irq_remapping_enabled && 1934
1972 ioapic_i8259.pin != -1); 1935 if (!legacy_pic->nr_legacy_irqs)
1936 return;
1937
1938 x86_io_apic_ops.disable();
1973} 1939}
1974 1940
1975#ifdef CONFIG_X86_32 1941#ifdef CONFIG_X86_32
@@ -2322,12 +2288,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
2322 2288
2323 apic = entry->apic; 2289 apic = entry->apic;
2324 pin = entry->pin; 2290 pin = entry->pin;
2325 /* 2291
2326 * With interrupt-remapping, destination information comes 2292 io_apic_write(apic, 0x11 + pin*2, dest);
2327 * from interrupt-remapping table entry.
2328 */
2329 if (!irq_remapped(cfg))
2330 io_apic_write(apic, 0x11 + pin*2, dest);
2331 reg = io_apic_read(apic, 0x10 + pin*2); 2293 reg = io_apic_read(apic, 0x10 + pin*2);
2332 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 2294 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
2333 reg |= vector; 2295 reg |= vector;
@@ -2369,9 +2331,10 @@ int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2369 return 0; 2331 return 0;
2370} 2332}
2371 2333
2372static int 2334
2373ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, 2335int native_ioapic_set_affinity(struct irq_data *data,
2374 bool force) 2336 const struct cpumask *mask,
2337 bool force)
2375{ 2338{
2376 unsigned int dest, irq = data->irq; 2339 unsigned int dest, irq = data->irq;
2377 unsigned long flags; 2340 unsigned long flags;
@@ -2548,33 +2511,6 @@ static void ack_apic_level(struct irq_data *data)
2548 ioapic_irqd_unmask(data, cfg, masked); 2511 ioapic_irqd_unmask(data, cfg, masked);
2549} 2512}
2550 2513
2551#ifdef CONFIG_IRQ_REMAP
2552static void ir_ack_apic_edge(struct irq_data *data)
2553{
2554 ack_APIC_irq();
2555}
2556
2557static void ir_ack_apic_level(struct irq_data *data)
2558{
2559 ack_APIC_irq();
2560 eoi_ioapic_irq(data->irq, data->chip_data);
2561}
2562
2563static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
2564{
2565 seq_printf(p, " IR-%s", data->chip->name);
2566}
2567
2568static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
2569{
2570 chip->irq_print_chip = ir_print_prefix;
2571 chip->irq_ack = ir_ack_apic_edge;
2572 chip->irq_eoi = ir_ack_apic_level;
2573
2574 chip->irq_set_affinity = set_remapped_irq_affinity;
2575}
2576#endif /* CONFIG_IRQ_REMAP */
2577
2578static struct irq_chip ioapic_chip __read_mostly = { 2514static struct irq_chip ioapic_chip __read_mostly = {
2579 .name = "IO-APIC", 2515 .name = "IO-APIC",
2580 .irq_startup = startup_ioapic_irq, 2516 .irq_startup = startup_ioapic_irq,
@@ -2582,7 +2518,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
2582 .irq_unmask = unmask_ioapic_irq, 2518 .irq_unmask = unmask_ioapic_irq,
2583 .irq_ack = ack_apic_edge, 2519 .irq_ack = ack_apic_edge,
2584 .irq_eoi = ack_apic_level, 2520 .irq_eoi = ack_apic_level,
2585 .irq_set_affinity = ioapic_set_affinity, 2521 .irq_set_affinity = native_ioapic_set_affinity,
2586 .irq_retrigger = ioapic_retrigger_irq, 2522 .irq_retrigger = ioapic_retrigger_irq,
2587}; 2523};
2588 2524
@@ -2781,8 +2717,7 @@ static inline void __init check_timer(void)
2781 * 8259A. 2717 * 8259A.
2782 */ 2718 */
2783 if (pin1 == -1) { 2719 if (pin1 == -1) {
2784 if (irq_remapping_enabled) 2720 panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC");
2785 panic("BIOS bug: timer not connected to IO-APIC");
2786 pin1 = pin2; 2721 pin1 = pin2;
2787 apic1 = apic2; 2722 apic1 = apic2;
2788 no_pin1 = 1; 2723 no_pin1 = 1;
@@ -2814,8 +2749,7 @@ static inline void __init check_timer(void)
2814 clear_IO_APIC_pin(0, pin1); 2749 clear_IO_APIC_pin(0, pin1);
2815 goto out; 2750 goto out;
2816 } 2751 }
2817 if (irq_remapping_enabled) 2752 panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
2818 panic("timer doesn't work through Interrupt-remapped IO-APIC");
2819 local_irq_disable(); 2753 local_irq_disable();
2820 clear_IO_APIC_pin(apic1, pin1); 2754 clear_IO_APIC_pin(apic1, pin1);
2821 if (!no_pin1) 2755 if (!no_pin1)
@@ -2982,37 +2916,58 @@ device_initcall(ioapic_init_ops);
2982/* 2916/*
2983 * Dynamic irq allocate and deallocation 2917 * Dynamic irq allocate and deallocation
2984 */ 2918 */
2985unsigned int create_irq_nr(unsigned int from, int node) 2919unsigned int __create_irqs(unsigned int from, unsigned int count, int node)
2986{ 2920{
2987 struct irq_cfg *cfg; 2921 struct irq_cfg **cfg;
2988 unsigned long flags; 2922 unsigned long flags;
2989 unsigned int ret = 0; 2923 int irq, i;
2990 int irq;
2991 2924
2992 if (from < nr_irqs_gsi) 2925 if (from < nr_irqs_gsi)
2993 from = nr_irqs_gsi; 2926 from = nr_irqs_gsi;
2994 2927
2995 irq = alloc_irq_from(from, node); 2928 cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node);
2996 if (irq < 0) 2929 if (!cfg)
2997 return 0;
2998 cfg = alloc_irq_cfg(irq, node);
2999 if (!cfg) {
3000 free_irq_at(irq, NULL);
3001 return 0; 2930 return 0;
2931
2932 irq = alloc_irqs_from(from, count, node);
2933 if (irq < 0)
2934 goto out_cfgs;
2935
2936 for (i = 0; i < count; i++) {
2937 cfg[i] = alloc_irq_cfg(irq + i, node);
2938 if (!cfg[i])
2939 goto out_irqs;
3002 } 2940 }
3003 2941
3004 raw_spin_lock_irqsave(&vector_lock, flags); 2942 raw_spin_lock_irqsave(&vector_lock, flags);
3005 if (!__assign_irq_vector(irq, cfg, apic->target_cpus())) 2943 for (i = 0; i < count; i++)
3006 ret = irq; 2944 if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus()))
2945 goto out_vecs;
3007 raw_spin_unlock_irqrestore(&vector_lock, flags); 2946 raw_spin_unlock_irqrestore(&vector_lock, flags);
3008 2947
3009 if (ret) { 2948 for (i = 0; i < count; i++) {
3010 irq_set_chip_data(irq, cfg); 2949 irq_set_chip_data(irq + i, cfg[i]);
3011 irq_clear_status_flags(irq, IRQ_NOREQUEST); 2950 irq_clear_status_flags(irq + i, IRQ_NOREQUEST);
3012 } else {
3013 free_irq_at(irq, cfg);
3014 } 2951 }
3015 return ret; 2952
2953 kfree(cfg);
2954 return irq;
2955
2956out_vecs:
2957 for (i--; i >= 0; i--)
2958 __clear_irq_vector(irq + i, cfg[i]);
2959 raw_spin_unlock_irqrestore(&vector_lock, flags);
2960out_irqs:
2961 for (i = 0; i < count; i++)
2962 free_irq_at(irq + i, cfg[i]);
2963out_cfgs:
2964 kfree(cfg);
2965 return 0;
2966}
2967
2968unsigned int create_irq_nr(unsigned int from, int node)
2969{
2970 return __create_irqs(from, 1, node);
3016} 2971}
3017 2972
3018int create_irq(void) 2973int create_irq(void)
@@ -3037,48 +2992,35 @@ void destroy_irq(unsigned int irq)
3037 2992
3038 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); 2993 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
3039 2994
3040 if (irq_remapped(cfg)) 2995 free_remapped_irq(irq);
3041 free_remapped_irq(irq); 2996
3042 raw_spin_lock_irqsave(&vector_lock, flags); 2997 raw_spin_lock_irqsave(&vector_lock, flags);
3043 __clear_irq_vector(irq, cfg); 2998 __clear_irq_vector(irq, cfg);
3044 raw_spin_unlock_irqrestore(&vector_lock, flags); 2999 raw_spin_unlock_irqrestore(&vector_lock, flags);
3045 free_irq_at(irq, cfg); 3000 free_irq_at(irq, cfg);
3046} 3001}
3047 3002
3003void destroy_irqs(unsigned int irq, unsigned int count)
3004{
3005 unsigned int i;
3006
3007 for (i = 0; i < count; i++)
3008 destroy_irq(irq + i);
3009}
3010
3048/* 3011/*
3049 * MSI message composition 3012 * MSI message composition
3050 */ 3013 */
3051#ifdef CONFIG_PCI_MSI 3014void native_compose_msi_msg(struct pci_dev *pdev,
3052static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, 3015 unsigned int irq, unsigned int dest,
3053 struct msi_msg *msg, u8 hpet_id) 3016 struct msi_msg *msg, u8 hpet_id)
3054{ 3017{
3055 struct irq_cfg *cfg; 3018 struct irq_cfg *cfg = irq_cfg(irq);
3056 int err;
3057 unsigned dest;
3058
3059 if (disable_apic)
3060 return -ENXIO;
3061
3062 cfg = irq_cfg(irq);
3063 err = assign_irq_vector(irq, cfg, apic->target_cpus());
3064 if (err)
3065 return err;
3066 3019
3067 err = apic->cpu_mask_to_apicid_and(cfg->domain, 3020 msg->address_hi = MSI_ADDR_BASE_HI;
3068 apic->target_cpus(), &dest);
3069 if (err)
3070 return err;
3071
3072 if (irq_remapped(cfg)) {
3073 compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id);
3074 return err;
3075 }
3076 3021
3077 if (x2apic_enabled()) 3022 if (x2apic_enabled())
3078 msg->address_hi = MSI_ADDR_BASE_HI | 3023 msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
3079 MSI_ADDR_EXT_DEST_ID(dest);
3080 else
3081 msg->address_hi = MSI_ADDR_BASE_HI;
3082 3024
3083 msg->address_lo = 3025 msg->address_lo =
3084 MSI_ADDR_BASE_LO | 3026 MSI_ADDR_BASE_LO |
@@ -3097,8 +3039,32 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3097 MSI_DATA_DELIVERY_FIXED: 3039 MSI_DATA_DELIVERY_FIXED:
3098 MSI_DATA_DELIVERY_LOWPRI) | 3040 MSI_DATA_DELIVERY_LOWPRI) |
3099 MSI_DATA_VECTOR(cfg->vector); 3041 MSI_DATA_VECTOR(cfg->vector);
3042}
3100 3043
3101 return err; 3044#ifdef CONFIG_PCI_MSI
3045static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3046 struct msi_msg *msg, u8 hpet_id)
3047{
3048 struct irq_cfg *cfg;
3049 int err;
3050 unsigned dest;
3051
3052 if (disable_apic)
3053 return -ENXIO;
3054
3055 cfg = irq_cfg(irq);
3056 err = assign_irq_vector(irq, cfg, apic->target_cpus());
3057 if (err)
3058 return err;
3059
3060 err = apic->cpu_mask_to_apicid_and(cfg->domain,
3061 apic->target_cpus(), &dest);
3062 if (err)
3063 return err;
3064
3065 x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
3066
3067 return 0;
3102} 3068}
3103 3069
3104static int 3070static int
@@ -3136,23 +3102,28 @@ static struct irq_chip msi_chip = {
3136 .irq_retrigger = ioapic_retrigger_irq, 3102 .irq_retrigger = ioapic_retrigger_irq,
3137}; 3103};
3138 3104
3139static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) 3105int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
3106 unsigned int irq_base, unsigned int irq_offset)
3140{ 3107{
3141 struct irq_chip *chip = &msi_chip; 3108 struct irq_chip *chip = &msi_chip;
3142 struct msi_msg msg; 3109 struct msi_msg msg;
3110 unsigned int irq = irq_base + irq_offset;
3143 int ret; 3111 int ret;
3144 3112
3145 ret = msi_compose_msg(dev, irq, &msg, -1); 3113 ret = msi_compose_msg(dev, irq, &msg, -1);
3146 if (ret < 0) 3114 if (ret < 0)
3147 return ret; 3115 return ret;
3148 3116
3149 irq_set_msi_desc(irq, msidesc); 3117 irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
3150 write_msi_msg(irq, &msg);
3151 3118
3152 if (irq_remapped(irq_get_chip_data(irq))) { 3119 /*
3153 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 3120 * MSI-X message is written per-IRQ, the offset is always 0.
3154 irq_remap_modify_chip_defaults(chip); 3121 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
3155 } 3122 */
3123 if (!irq_offset)
3124 write_msi_msg(irq, &msg);
3125
3126 setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
3156 3127
3157 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); 3128 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3158 3129
@@ -3163,46 +3134,26 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3163 3134
3164int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 3135int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3165{ 3136{
3166 int node, ret, sub_handle, index = 0;
3167 unsigned int irq, irq_want; 3137 unsigned int irq, irq_want;
3168 struct msi_desc *msidesc; 3138 struct msi_desc *msidesc;
3139 int node, ret;
3169 3140
3170 /* x86 doesn't support multiple MSI yet */ 3141 /* Multiple MSI vectors only supported with interrupt remapping */
3171 if (type == PCI_CAP_ID_MSI && nvec > 1) 3142 if (type == PCI_CAP_ID_MSI && nvec > 1)
3172 return 1; 3143 return 1;
3173 3144
3174 node = dev_to_node(&dev->dev); 3145 node = dev_to_node(&dev->dev);
3175 irq_want = nr_irqs_gsi; 3146 irq_want = nr_irqs_gsi;
3176 sub_handle = 0;
3177 list_for_each_entry(msidesc, &dev->msi_list, list) { 3147 list_for_each_entry(msidesc, &dev->msi_list, list) {
3178 irq = create_irq_nr(irq_want, node); 3148 irq = create_irq_nr(irq_want, node);
3179 if (irq == 0) 3149 if (irq == 0)
3180 return -1; 3150 return -ENOSPC;
3151
3181 irq_want = irq + 1; 3152 irq_want = irq + 1;
3182 if (!irq_remapping_enabled)
3183 goto no_ir;
3184 3153
3185 if (!sub_handle) { 3154 ret = setup_msi_irq(dev, msidesc, irq, 0);
3186 /*
3187 * allocate the consecutive block of IRTE's
3188 * for 'nvec'
3189 */
3190 index = msi_alloc_remapped_irq(dev, irq, nvec);
3191 if (index < 0) {
3192 ret = index;
3193 goto error;
3194 }
3195 } else {
3196 ret = msi_setup_remapped_irq(dev, irq, index,
3197 sub_handle);
3198 if (ret < 0)
3199 goto error;
3200 }
3201no_ir:
3202 ret = setup_msi_irq(dev, msidesc, irq);
3203 if (ret < 0) 3155 if (ret < 0)
3204 goto error; 3156 goto error;
3205 sub_handle++;
3206 } 3157 }
3207 return 0; 3158 return 0;
3208 3159
@@ -3298,26 +3249,19 @@ static struct irq_chip hpet_msi_type = {
3298 .irq_retrigger = ioapic_retrigger_irq, 3249 .irq_retrigger = ioapic_retrigger_irq,
3299}; 3250};
3300 3251
3301int arch_setup_hpet_msi(unsigned int irq, unsigned int id) 3252int default_setup_hpet_msi(unsigned int irq, unsigned int id)
3302{ 3253{
3303 struct irq_chip *chip = &hpet_msi_type; 3254 struct irq_chip *chip = &hpet_msi_type;
3304 struct msi_msg msg; 3255 struct msi_msg msg;
3305 int ret; 3256 int ret;
3306 3257
3307 if (irq_remapping_enabled) {
3308 ret = setup_hpet_msi_remapped(irq, id);
3309 if (ret)
3310 return ret;
3311 }
3312
3313 ret = msi_compose_msg(NULL, irq, &msg, id); 3258 ret = msi_compose_msg(NULL, irq, &msg, id);
3314 if (ret < 0) 3259 if (ret < 0)
3315 return ret; 3260 return ret;
3316 3261
3317 hpet_msi_write(irq_get_handler_data(irq), &msg); 3262 hpet_msi_write(irq_get_handler_data(irq), &msg);
3318 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 3263 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3319 if (irq_remapped(irq_get_chip_data(irq))) 3264 setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
3320 irq_remap_modify_chip_defaults(chip);
3321 3265
3322 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); 3266 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3323 return 0; 3267 return 0;
@@ -3683,10 +3627,7 @@ void __init setup_ioapic_dest(void)
3683 else 3627 else
3684 mask = apic->target_cpus(); 3628 mask = apic->target_cpus();
3685 3629
3686 if (irq_remapping_enabled) 3630 x86_io_apic_ops.set_affinity(idata, mask, false);
3687 set_remapped_irq_affinity(idata, mask, false);
3688 else
3689 ioapic_set_affinity(idata, mask, false);
3690 } 3631 }
3691 3632
3692} 3633}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index cce91bf26676..7434d8556d09 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -106,7 +106,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
106 unsigned long mask = cpumask_bits(cpumask)[0]; 106 unsigned long mask = cpumask_bits(cpumask)[0];
107 unsigned long flags; 107 unsigned long flags;
108 108
109 if (WARN_ONCE(!mask, "empty IPI mask")) 109 if (!mask)
110 return; 110 return;
111 111
112 local_irq_save(flags); 112 local_irq_save(flags);
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index a8f8fa9769d6..1e7e84a02eba 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -79,3 +79,10 @@ void __init init_hypervisor_platform(void)
79 if (x86_hyper->init_platform) 79 if (x86_hyper->init_platform)
80 x86_hyper->init_platform(); 80 x86_hyper->init_platform();
81} 81}
82
83bool __init hypervisor_x2apic_available(void)
84{
85 return x86_hyper &&
86 x86_hyper->x2apic_available &&
87 x86_hyper->x2apic_available();
88}
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index d22d0c4edcfd..03a36321ec54 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -33,6 +33,9 @@
33 33
34#define VMWARE_PORT_CMD_GETVERSION 10 34#define VMWARE_PORT_CMD_GETVERSION 10
35#define VMWARE_PORT_CMD_GETHZ 45 35#define VMWARE_PORT_CMD_GETHZ 45
36#define VMWARE_PORT_CMD_GETVCPU_INFO 68
37#define VMWARE_PORT_CMD_LEGACY_X2APIC 3
38#define VMWARE_PORT_CMD_VCPU_RESERVED 31
36 39
37#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ 40#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
38 __asm__("inl (%%dx)" : \ 41 __asm__("inl (%%dx)" : \
@@ -125,10 +128,20 @@ static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c)
125 set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); 128 set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
126} 129}
127 130
131/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
132static bool __init vmware_legacy_x2apic_available(void)
133{
134 uint32_t eax, ebx, ecx, edx;
135 VMWARE_PORT(GETVCPU_INFO, eax, ebx, ecx, edx);
136 return (eax & (1 << VMWARE_PORT_CMD_VCPU_RESERVED)) == 0 &&
137 (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
138}
139
128const __refconst struct hypervisor_x86 x86_hyper_vmware = { 140const __refconst struct hypervisor_x86 x86_hyper_vmware = {
129 .name = "VMware", 141 .name = "VMware",
130 .detect = vmware_platform, 142 .detect = vmware_platform,
131 .set_cpu_features = vmware_set_cpu_features, 143 .set_cpu_features = vmware_set_cpu_features,
132 .init_platform = vmware_platform_setup, 144 .init_platform = vmware_platform_setup,
145 .x2apic_available = vmware_legacy_x2apic_available,
133}; 146};
134EXPORT_SYMBOL(x86_hyper_vmware); 147EXPORT_SYMBOL(x86_hyper_vmware);
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index e28670f9a589..da85a8e830a1 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -478,7 +478,7 @@ static int hpet_msi_next_event(unsigned long delta,
478 478
479static int hpet_setup_msi_irq(unsigned int irq) 479static int hpet_setup_msi_irq(unsigned int irq)
480{ 480{
481 if (arch_setup_hpet_msi(irq, hpet_blockid)) { 481 if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) {
482 destroy_irq(irq); 482 destroy_irq(irq);
483 return -EINVAL; 483 return -EINVAL;
484 } 484 }
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 9c2bd8bd4b4c..2b44ea5f269d 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -505,6 +505,7 @@ static bool __init kvm_detect(void)
505const struct hypervisor_x86 x86_hyper_kvm __refconst = { 505const struct hypervisor_x86 x86_hyper_kvm __refconst = {
506 .name = "KVM", 506 .name = "KVM",
507 .detect = kvm_detect, 507 .detect = kvm_detect,
508 .x2apic_available = kvm_para_available,
508}; 509};
509EXPORT_SYMBOL_GPL(x86_hyper_kvm); 510EXPORT_SYMBOL_GPL(x86_hyper_kvm);
510 511
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 7a3d075a814a..d065d67c2672 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -19,6 +19,7 @@
19#include <asm/time.h> 19#include <asm/time.h>
20#include <asm/irq.h> 20#include <asm/irq.h>
21#include <asm/io_apic.h> 21#include <asm/io_apic.h>
22#include <asm/hpet.h>
22#include <asm/pat.h> 23#include <asm/pat.h>
23#include <asm/tsc.h> 24#include <asm/tsc.h>
24#include <asm/iommu.h> 25#include <asm/iommu.h>
@@ -111,15 +112,22 @@ struct x86_platform_ops x86_platform = {
111 112
112EXPORT_SYMBOL_GPL(x86_platform); 113EXPORT_SYMBOL_GPL(x86_platform);
113struct x86_msi_ops x86_msi = { 114struct x86_msi_ops x86_msi = {
114 .setup_msi_irqs = native_setup_msi_irqs, 115 .setup_msi_irqs = native_setup_msi_irqs,
115 .teardown_msi_irq = native_teardown_msi_irq, 116 .compose_msi_msg = native_compose_msi_msg,
116 .teardown_msi_irqs = default_teardown_msi_irqs, 117 .teardown_msi_irq = native_teardown_msi_irq,
117 .restore_msi_irqs = default_restore_msi_irqs, 118 .teardown_msi_irqs = default_teardown_msi_irqs,
119 .restore_msi_irqs = default_restore_msi_irqs,
120 .setup_hpet_msi = default_setup_hpet_msi,
118}; 121};
119 122
120struct x86_io_apic_ops x86_io_apic_ops = { 123struct x86_io_apic_ops x86_io_apic_ops = {
121 .init = native_io_apic_init_mappings, 124 .init = native_io_apic_init_mappings,
122 .read = native_io_apic_read, 125 .read = native_io_apic_read,
123 .write = native_io_apic_write, 126 .write = native_io_apic_write,
124 .modify = native_io_apic_modify, 127 .modify = native_io_apic_modify,
128 .disable = native_disable_io_apic,
129 .print_entries = native_io_apic_print_entries,
130 .set_affinity = native_ioapic_set_affinity,
131 .setup_entry = native_setup_ioapic_entry,
132 .eoi_ioapic_pin = native_eoi_ioapic_pin,
125}; 133};
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e0140923062f..39928d16be3b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1637,6 +1637,7 @@ const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
1637 .name = "Xen HVM", 1637 .name = "Xen HVM",
1638 .detect = xen_hvm_platform, 1638 .detect = xen_hvm_platform,
1639 .init_platform = xen_hvm_guest_init, 1639 .init_platform = xen_hvm_guest_init,
1640 .x2apic_available = xen_x2apic_para_available,
1640}; 1641};
1641EXPORT_SYMBOL(x86_hyper_xen_hvm); 1642EXPORT_SYMBOL(x86_hyper_xen_hvm);
1642#endif 1643#endif
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 497912732566..495aeed26779 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1061,6 +1061,86 @@ static inline void ahci_gtf_filter_workaround(struct ata_host *host)
1061{} 1061{}
1062#endif 1062#endif
1063 1063
1064int ahci_init_interrupts(struct pci_dev *pdev, struct ahci_host_priv *hpriv)
1065{
1066 int rc;
1067 unsigned int maxvec;
1068
1069 if (!(hpriv->flags & AHCI_HFLAG_NO_MSI)) {
1070 rc = pci_enable_msi_block_auto(pdev, &maxvec);
1071 if (rc > 0) {
1072 if ((rc == maxvec) || (rc == 1))
1073 return rc;
1074 /*
1075 * Assume that advantage of multipe MSIs is negated,
1076 * so fallback to single MSI mode to save resources
1077 */
1078 pci_disable_msi(pdev);
1079 if (!pci_enable_msi(pdev))
1080 return 1;
1081 }
1082 }
1083
1084 pci_intx(pdev, 1);
1085 return 0;
1086}
1087
1088/**
1089 * ahci_host_activate - start AHCI host, request IRQs and register it
1090 * @host: target ATA host
1091 * @irq: base IRQ number to request
1092 * @n_msis: number of MSIs allocated for this host
1093 * @irq_handler: irq_handler used when requesting IRQs
1094 * @irq_flags: irq_flags used when requesting IRQs
1095 *
1096 * Similar to ata_host_activate, but requests IRQs according to AHCI-1.1
1097 * when multiple MSIs were allocated. That is one MSI per port, starting
1098 * from @irq.
1099 *
1100 * LOCKING:
1101 * Inherited from calling layer (may sleep).
1102 *
1103 * RETURNS:
1104 * 0 on success, -errno otherwise.
1105 */
1106int ahci_host_activate(struct ata_host *host, int irq, unsigned int n_msis)
1107{
1108 int i, rc;
1109
1110 /* Sharing Last Message among several ports is not supported */
1111 if (n_msis < host->n_ports)
1112 return -EINVAL;
1113
1114 rc = ata_host_start(host);
1115 if (rc)
1116 return rc;
1117
1118 for (i = 0; i < host->n_ports; i++) {
1119 rc = devm_request_threaded_irq(host->dev,
1120 irq + i, ahci_hw_interrupt, ahci_thread_fn, IRQF_SHARED,
1121 dev_driver_string(host->dev), host->ports[i]);
1122 if (rc)
1123 goto out_free_irqs;
1124 }
1125
1126 for (i = 0; i < host->n_ports; i++)
1127 ata_port_desc(host->ports[i], "irq %d", irq + i);
1128
1129 rc = ata_host_register(host, &ahci_sht);
1130 if (rc)
1131 goto out_free_all_irqs;
1132
1133 return 0;
1134
1135out_free_all_irqs:
1136 i = host->n_ports;
1137out_free_irqs:
1138 for (i--; i >= 0; i--)
1139 devm_free_irq(host->dev, irq + i, host->ports[i]);
1140
1141 return rc;
1142}
1143
1064static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) 1144static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1065{ 1145{
1066 unsigned int board_id = ent->driver_data; 1146 unsigned int board_id = ent->driver_data;
@@ -1069,7 +1149,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1069 struct device *dev = &pdev->dev; 1149 struct device *dev = &pdev->dev;
1070 struct ahci_host_priv *hpriv; 1150 struct ahci_host_priv *hpriv;
1071 struct ata_host *host; 1151 struct ata_host *host;
1072 int n_ports, i, rc; 1152 int n_ports, n_msis, i, rc;
1073 int ahci_pci_bar = AHCI_PCI_BAR_STANDARD; 1153 int ahci_pci_bar = AHCI_PCI_BAR_STANDARD;
1074 1154
1075 VPRINTK("ENTER\n"); 1155 VPRINTK("ENTER\n");
@@ -1156,11 +1236,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1156 if (ahci_sb600_enable_64bit(pdev)) 1236 if (ahci_sb600_enable_64bit(pdev))
1157 hpriv->flags &= ~AHCI_HFLAG_32BIT_ONLY; 1237 hpriv->flags &= ~AHCI_HFLAG_32BIT_ONLY;
1158 1238
1159 if ((hpriv->flags & AHCI_HFLAG_NO_MSI) || pci_enable_msi(pdev))
1160 pci_intx(pdev, 1);
1161
1162 hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar]; 1239 hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar];
1163 1240
1241 n_msis = ahci_init_interrupts(pdev, hpriv);
1242 if (n_msis > 1)
1243 hpriv->flags |= AHCI_HFLAG_MULTI_MSI;
1244
1164 /* save initial config */ 1245 /* save initial config */
1165 ahci_pci_save_initial_config(pdev, hpriv); 1246 ahci_pci_save_initial_config(pdev, hpriv);
1166 1247
@@ -1256,6 +1337,10 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1256 ahci_pci_print_info(host); 1337 ahci_pci_print_info(host);
1257 1338
1258 pci_set_master(pdev); 1339 pci_set_master(pdev);
1340
1341 if (hpriv->flags & AHCI_HFLAG_MULTI_MSI)
1342 return ahci_host_activate(host, pdev->irq, n_msis);
1343
1259 return ata_host_activate(host, pdev->irq, ahci_interrupt, IRQF_SHARED, 1344 return ata_host_activate(host, pdev->irq, ahci_interrupt, IRQF_SHARED,
1260 &ahci_sht); 1345 &ahci_sht);
1261} 1346}
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 9be471200a07..b830e6c9fe49 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -231,6 +231,7 @@ enum {
231 AHCI_HFLAG_DELAY_ENGINE = (1 << 15), /* do not start engine on 231 AHCI_HFLAG_DELAY_ENGINE = (1 << 15), /* do not start engine on
232 port start (wait until 232 port start (wait until
233 error-handling stage) */ 233 error-handling stage) */
234 AHCI_HFLAG_MULTI_MSI = (1 << 16), /* multiple PCI MSIs */
234 235
235 /* ap->flags bits */ 236 /* ap->flags bits */
236 237
@@ -297,6 +298,8 @@ struct ahci_port_priv {
297 unsigned int ncq_saw_d2h:1; 298 unsigned int ncq_saw_d2h:1;
298 unsigned int ncq_saw_dmas:1; 299 unsigned int ncq_saw_dmas:1;
299 unsigned int ncq_saw_sdb:1; 300 unsigned int ncq_saw_sdb:1;
301 u32 intr_status; /* interrupts to handle */
302 spinlock_t lock; /* protects parent ata_port */
300 u32 intr_mask; /* interrupts to enable */ 303 u32 intr_mask; /* interrupts to enable */
301 bool fbs_supported; /* set iff FBS is supported */ 304 bool fbs_supported; /* set iff FBS is supported */
302 bool fbs_enabled; /* set iff FBS is enabled */ 305 bool fbs_enabled; /* set iff FBS is enabled */
@@ -359,7 +362,10 @@ void ahci_set_em_messages(struct ahci_host_priv *hpriv,
359 struct ata_port_info *pi); 362 struct ata_port_info *pi);
360int ahci_reset_em(struct ata_host *host); 363int ahci_reset_em(struct ata_host *host);
361irqreturn_t ahci_interrupt(int irq, void *dev_instance); 364irqreturn_t ahci_interrupt(int irq, void *dev_instance);
365irqreturn_t ahci_hw_interrupt(int irq, void *dev_instance);
366irqreturn_t ahci_thread_fn(int irq, void *dev_instance);
362void ahci_print_info(struct ata_host *host, const char *scc_s); 367void ahci_print_info(struct ata_host *host, const char *scc_s);
368int ahci_host_activate(struct ata_host *host, int irq, unsigned int n_msis);
363 369
364static inline void __iomem *__ahci_port_base(struct ata_host *host, 370static inline void __iomem *__ahci_port_base(struct ata_host *host,
365 unsigned int port_no) 371 unsigned int port_no)
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 6cd7805e47ca..34c82167b962 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -1655,19 +1655,16 @@ static void ahci_error_intr(struct ata_port *ap, u32 irq_stat)
1655 ata_port_abort(ap); 1655 ata_port_abort(ap);
1656} 1656}
1657 1657
1658static void ahci_port_intr(struct ata_port *ap) 1658static void ahci_handle_port_interrupt(struct ata_port *ap,
1659 void __iomem *port_mmio, u32 status)
1659{ 1660{
1660 void __iomem *port_mmio = ahci_port_base(ap);
1661 struct ata_eh_info *ehi = &ap->link.eh_info; 1661 struct ata_eh_info *ehi = &ap->link.eh_info;
1662 struct ahci_port_priv *pp = ap->private_data; 1662 struct ahci_port_priv *pp = ap->private_data;
1663 struct ahci_host_priv *hpriv = ap->host->private_data; 1663 struct ahci_host_priv *hpriv = ap->host->private_data;
1664 int resetting = !!(ap->pflags & ATA_PFLAG_RESETTING); 1664 int resetting = !!(ap->pflags & ATA_PFLAG_RESETTING);
1665 u32 status, qc_active = 0; 1665 u32 qc_active = 0;
1666 int rc; 1666 int rc;
1667 1667
1668 status = readl(port_mmio + PORT_IRQ_STAT);
1669 writel(status, port_mmio + PORT_IRQ_STAT);
1670
1671 /* ignore BAD_PMP while resetting */ 1668 /* ignore BAD_PMP while resetting */
1672 if (unlikely(resetting)) 1669 if (unlikely(resetting))
1673 status &= ~PORT_IRQ_BAD_PMP; 1670 status &= ~PORT_IRQ_BAD_PMP;
@@ -1743,6 +1740,107 @@ static void ahci_port_intr(struct ata_port *ap)
1743 } 1740 }
1744} 1741}
1745 1742
1743void ahci_port_intr(struct ata_port *ap)
1744{
1745 void __iomem *port_mmio = ahci_port_base(ap);
1746 u32 status;
1747
1748 status = readl(port_mmio + PORT_IRQ_STAT);
1749 writel(status, port_mmio + PORT_IRQ_STAT);
1750
1751 ahci_handle_port_interrupt(ap, port_mmio, status);
1752}
1753
1754irqreturn_t ahci_thread_fn(int irq, void *dev_instance)
1755{
1756 struct ata_port *ap = dev_instance;
1757 struct ahci_port_priv *pp = ap->private_data;
1758 void __iomem *port_mmio = ahci_port_base(ap);
1759 unsigned long flags;
1760 u32 status;
1761
1762 spin_lock_irqsave(&ap->host->lock, flags);
1763 status = pp->intr_status;
1764 if (status)
1765 pp->intr_status = 0;
1766 spin_unlock_irqrestore(&ap->host->lock, flags);
1767
1768 spin_lock_bh(ap->lock);
1769 ahci_handle_port_interrupt(ap, port_mmio, status);
1770 spin_unlock_bh(ap->lock);
1771
1772 return IRQ_HANDLED;
1773}
1774EXPORT_SYMBOL_GPL(ahci_thread_fn);
1775
1776void ahci_hw_port_interrupt(struct ata_port *ap)
1777{
1778 void __iomem *port_mmio = ahci_port_base(ap);
1779 struct ahci_port_priv *pp = ap->private_data;
1780 u32 status;
1781
1782 status = readl(port_mmio + PORT_IRQ_STAT);
1783 writel(status, port_mmio + PORT_IRQ_STAT);
1784
1785 pp->intr_status |= status;
1786}
1787
1788irqreturn_t ahci_hw_interrupt(int irq, void *dev_instance)
1789{
1790 struct ata_port *ap_this = dev_instance;
1791 struct ahci_port_priv *pp = ap_this->private_data;
1792 struct ata_host *host = ap_this->host;
1793 struct ahci_host_priv *hpriv = host->private_data;
1794 void __iomem *mmio = hpriv->mmio;
1795 unsigned int i;
1796 u32 irq_stat, irq_masked;
1797
1798 VPRINTK("ENTER\n");
1799
1800 spin_lock(&host->lock);
1801
1802 irq_stat = readl(mmio + HOST_IRQ_STAT);
1803
1804 if (!irq_stat) {
1805 u32 status = pp->intr_status;
1806
1807 spin_unlock(&host->lock);
1808
1809 VPRINTK("EXIT\n");
1810
1811 return status ? IRQ_WAKE_THREAD : IRQ_NONE;
1812 }
1813
1814 irq_masked = irq_stat & hpriv->port_map;
1815
1816 for (i = 0; i < host->n_ports; i++) {
1817 struct ata_port *ap;
1818
1819 if (!(irq_masked & (1 << i)))
1820 continue;
1821
1822 ap = host->ports[i];
1823 if (ap) {
1824 ahci_hw_port_interrupt(ap);
1825 VPRINTK("port %u\n", i);
1826 } else {
1827 VPRINTK("port %u (no irq)\n", i);
1828 if (ata_ratelimit())
1829 dev_warn(host->dev,
1830 "interrupt on disabled port %u\n", i);
1831 }
1832 }
1833
1834 writel(irq_stat, mmio + HOST_IRQ_STAT);
1835
1836 spin_unlock(&host->lock);
1837
1838 VPRINTK("EXIT\n");
1839
1840 return IRQ_WAKE_THREAD;
1841}
1842EXPORT_SYMBOL_GPL(ahci_hw_interrupt);
1843
1746irqreturn_t ahci_interrupt(int irq, void *dev_instance) 1844irqreturn_t ahci_interrupt(int irq, void *dev_instance)
1747{ 1845{
1748 struct ata_host *host = dev_instance; 1846 struct ata_host *host = dev_instance;
@@ -2196,6 +2294,14 @@ static int ahci_port_start(struct ata_port *ap)
2196 */ 2294 */
2197 pp->intr_mask = DEF_PORT_IRQ; 2295 pp->intr_mask = DEF_PORT_IRQ;
2198 2296
2297 /*
2298 * Switch to per-port locking in case each port has its own MSI vector.
2299 */
2300 if ((hpriv->flags & AHCI_HFLAG_MULTI_MSI)) {
2301 spin_lock_init(&pp->lock);
2302 ap->lock = &pp->lock;
2303 }
2304
2199 ap->private_data = pp; 2305 ap->private_data = pp;
2200 2306
2201 /* engage engines, captain */ 2307 /* engage engines, captain */
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index c1c74e030a58..d33eaaf783ad 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4017,10 +4017,10 @@ static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count)
4017 4017
4018 index -= count - 1; 4018 index -= count - 1;
4019 4019
4020 cfg->remapped = 1;
4020 irte_info = &cfg->irq_2_iommu; 4021 irte_info = &cfg->irq_2_iommu;
4021 irte_info->sub_handle = devid; 4022 irte_info->sub_handle = devid;
4022 irte_info->irte_index = index; 4023 irte_info->irte_index = index;
4023 irte_info->iommu = (void *)cfg;
4024 4024
4025 goto out; 4025 goto out;
4026 } 4026 }
@@ -4127,9 +4127,9 @@ static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
4127 index = attr->ioapic_pin; 4127 index = attr->ioapic_pin;
4128 4128
4129 /* Setup IRQ remapping info */ 4129 /* Setup IRQ remapping info */
4130 cfg->remapped = 1;
4130 irte_info->sub_handle = devid; 4131 irte_info->sub_handle = devid;
4131 irte_info->irte_index = index; 4132 irte_info->irte_index = index;
4132 irte_info->iommu = (void *)cfg;
4133 4133
4134 /* Setup IRTE for IOMMU */ 4134 /* Setup IRTE for IOMMU */
4135 irte.val = 0; 4135 irte.val = 0;
@@ -4288,9 +4288,9 @@ static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq,
4288 devid = get_device_id(&pdev->dev); 4288 devid = get_device_id(&pdev->dev);
4289 irte_info = &cfg->irq_2_iommu; 4289 irte_info = &cfg->irq_2_iommu;
4290 4290
4291 cfg->remapped = 1;
4291 irte_info->sub_handle = devid; 4292 irte_info->sub_handle = devid;
4292 irte_info->irte_index = index + offset; 4293 irte_info->irte_index = index + offset;
4293 irte_info->iommu = (void *)cfg;
4294 4294
4295 return 0; 4295 return 0;
4296} 4296}
@@ -4314,9 +4314,9 @@ static int setup_hpet_msi(unsigned int irq, unsigned int id)
4314 if (index < 0) 4314 if (index < 0)
4315 return index; 4315 return index;
4316 4316
4317 cfg->remapped = 1;
4317 irte_info->sub_handle = devid; 4318 irte_info->sub_handle = devid;
4318 irte_info->irte_index = index; 4319 irte_info->irte_index = index;
4319 irte_info->iommu = (void *)cfg;
4320 4320
4321 return 0; 4321 return 0;
4322} 4322}
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 86e2f4a62b9a..174bb654453d 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -41,6 +41,8 @@
41#include <asm/irq_remapping.h> 41#include <asm/irq_remapping.h>
42#include <asm/iommu_table.h> 42#include <asm/iommu_table.h>
43 43
44#include "irq_remapping.h"
45
44/* No locks are needed as DMA remapping hardware unit 46/* No locks are needed as DMA remapping hardware unit
45 * list is constructed at boot time and hotplug of 47 * list is constructed at boot time and hotplug of
46 * these units are not supported by the architecture. 48 * these units are not supported by the architecture.
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index eca28014ef3e..43d5c8b8e7ad 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -46,6 +46,8 @@
46#include <asm/cacheflush.h> 46#include <asm/cacheflush.h>
47#include <asm/iommu.h> 47#include <asm/iommu.h>
48 48
49#include "irq_remapping.h"
50
49#define ROOT_SIZE VTD_PAGE_SIZE 51#define ROOT_SIZE VTD_PAGE_SIZE
50#define CONTEXT_SIZE VTD_PAGE_SIZE 52#define CONTEXT_SIZE VTD_PAGE_SIZE
51 53
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index af8904de1d44..f3b8f23b5d8f 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -68,6 +68,7 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
68{ 68{
69 struct ir_table *table = iommu->ir_table; 69 struct ir_table *table = iommu->ir_table;
70 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); 70 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
71 struct irq_cfg *cfg = irq_get_chip_data(irq);
71 u16 index, start_index; 72 u16 index, start_index;
72 unsigned int mask = 0; 73 unsigned int mask = 0;
73 unsigned long flags; 74 unsigned long flags;
@@ -115,6 +116,7 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
115 for (i = index; i < index + count; i++) 116 for (i = index; i < index + count; i++)
116 table->base[i].present = 1; 117 table->base[i].present = 1;
117 118
119 cfg->remapped = 1;
118 irq_iommu->iommu = iommu; 120 irq_iommu->iommu = iommu;
119 irq_iommu->irte_index = index; 121 irq_iommu->irte_index = index;
120 irq_iommu->sub_handle = 0; 122 irq_iommu->sub_handle = 0;
@@ -155,6 +157,7 @@ static int map_irq_to_irte_handle(int irq, u16 *sub_handle)
155static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) 157static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
156{ 158{
157 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); 159 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
160 struct irq_cfg *cfg = irq_get_chip_data(irq);
158 unsigned long flags; 161 unsigned long flags;
159 162
160 if (!irq_iommu) 163 if (!irq_iommu)
@@ -162,6 +165,7 @@ static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subha
162 165
163 raw_spin_lock_irqsave(&irq_2_ir_lock, flags); 166 raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
164 167
168 cfg->remapped = 1;
165 irq_iommu->iommu = iommu; 169 irq_iommu->iommu = iommu;
166 irq_iommu->irte_index = index; 170 irq_iommu->irte_index = index;
167 irq_iommu->sub_handle = subhandle; 171 irq_iommu->sub_handle = subhandle;
@@ -425,11 +429,22 @@ static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode)
425 429
426 /* Enable interrupt-remapping */ 430 /* Enable interrupt-remapping */
427 iommu->gcmd |= DMA_GCMD_IRE; 431 iommu->gcmd |= DMA_GCMD_IRE;
432 iommu->gcmd &= ~DMA_GCMD_CFI; /* Block compatibility-format MSIs */
428 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 433 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
429 434
430 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 435 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
431 readl, (sts & DMA_GSTS_IRES), sts); 436 readl, (sts & DMA_GSTS_IRES), sts);
432 437
438 /*
439 * With CFI clear in the Global Command register, we should be
440 * protected from dangerous (i.e. compatibility) interrupts
441 * regardless of x2apic status. Check just to be sure.
442 */
443 if (sts & DMA_GSTS_CFIS)
444 WARN(1, KERN_WARNING
445 "Compatibility-format IRQs enabled despite intr remapping;\n"
446 "you are vulnerable to IRQ injection.\n");
447
433 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 448 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
434} 449}
435 450
@@ -526,20 +541,24 @@ static int __init intel_irq_remapping_supported(void)
526static int __init intel_enable_irq_remapping(void) 541static int __init intel_enable_irq_remapping(void)
527{ 542{
528 struct dmar_drhd_unit *drhd; 543 struct dmar_drhd_unit *drhd;
544 bool x2apic_present;
529 int setup = 0; 545 int setup = 0;
530 int eim = 0; 546 int eim = 0;
531 547
548 x2apic_present = x2apic_supported();
549
532 if (parse_ioapics_under_ir() != 1) { 550 if (parse_ioapics_under_ir() != 1) {
533 printk(KERN_INFO "Not enable interrupt remapping\n"); 551 printk(KERN_INFO "Not enable interrupt remapping\n");
534 return -1; 552 goto error;
535 } 553 }
536 554
537 if (x2apic_supported()) { 555 if (x2apic_present) {
538 eim = !dmar_x2apic_optout(); 556 eim = !dmar_x2apic_optout();
539 WARN(!eim, KERN_WARNING 557 if (!eim)
540 "Your BIOS is broken and requested that x2apic be disabled\n" 558 printk(KERN_WARNING
541 "This will leave your machine vulnerable to irq-injection attacks\n" 559 "Your BIOS is broken and requested that x2apic be disabled.\n"
542 "Use 'intremap=no_x2apic_optout' to override BIOS request\n"); 560 "This will slightly decrease performance.\n"
561 "Use 'intremap=no_x2apic_optout' to override BIOS request.\n");
543 } 562 }
544 563
545 for_each_drhd_unit(drhd) { 564 for_each_drhd_unit(drhd) {
@@ -578,7 +597,7 @@ static int __init intel_enable_irq_remapping(void)
578 if (eim && !ecap_eim_support(iommu->ecap)) { 597 if (eim && !ecap_eim_support(iommu->ecap)) {
579 printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, " 598 printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
580 " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap); 599 " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
581 return -1; 600 goto error;
582 } 601 }
583 } 602 }
584 603
@@ -594,7 +613,7 @@ static int __init intel_enable_irq_remapping(void)
594 printk(KERN_ERR "DRHD %Lx: failed to enable queued, " 613 printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
595 " invalidation, ecap %Lx, ret %d\n", 614 " invalidation, ecap %Lx, ret %d\n",
596 drhd->reg_base_addr, iommu->ecap, ret); 615 drhd->reg_base_addr, iommu->ecap, ret);
597 return -1; 616 goto error;
598 } 617 }
599 } 618 }
600 619
@@ -617,6 +636,14 @@ static int __init intel_enable_irq_remapping(void)
617 goto error; 636 goto error;
618 637
619 irq_remapping_enabled = 1; 638 irq_remapping_enabled = 1;
639
640 /*
641 * VT-d has a different layout for IO-APIC entries when
642 * interrupt remapping is enabled. So it needs a special routine
643 * to print IO-APIC entries for debugging purposes too.
644 */
645 x86_io_apic_ops.print_entries = intel_ir_io_apic_print_entries;
646
620 pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic"); 647 pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic");
621 648
622 return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE; 649 return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE;
@@ -625,6 +652,11 @@ error:
625 /* 652 /*
626 * handle error condition gracefully here! 653 * handle error condition gracefully here!
627 */ 654 */
655
656 if (x2apic_present)
657 WARN(1, KERN_WARNING
658 "Failed to enable irq remapping. You are vulnerable to irq-injection attacks.\n");
659
628 return -1; 660 return -1;
629} 661}
630 662
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index faf85d6e33fe..d56f8c17c5fe 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -1,11 +1,18 @@
1#include <linux/seq_file.h>
2#include <linux/cpumask.h>
1#include <linux/kernel.h> 3#include <linux/kernel.h>
2#include <linux/string.h> 4#include <linux/string.h>
3#include <linux/cpumask.h> 5#include <linux/cpumask.h>
4#include <linux/errno.h> 6#include <linux/errno.h>
5#include <linux/msi.h> 7#include <linux/msi.h>
8#include <linux/irq.h>
9#include <linux/pci.h>
6 10
7#include <asm/hw_irq.h> 11#include <asm/hw_irq.h>
8#include <asm/irq_remapping.h> 12#include <asm/irq_remapping.h>
13#include <asm/processor.h>
14#include <asm/x86_init.h>
15#include <asm/apic.h>
9 16
10#include "irq_remapping.h" 17#include "irq_remapping.h"
11 18
@@ -17,6 +24,152 @@ int no_x2apic_optout;
17 24
18static struct irq_remap_ops *remap_ops; 25static struct irq_remap_ops *remap_ops;
19 26
27static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
28static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
29 int index, int sub_handle);
30static int set_remapped_irq_affinity(struct irq_data *data,
31 const struct cpumask *mask,
32 bool force);
33
34static bool irq_remapped(struct irq_cfg *cfg)
35{
36 return (cfg->remapped == 1);
37}
38
39static void irq_remapping_disable_io_apic(void)
40{
41 /*
42 * With interrupt-remapping, for now we will use virtual wire A
43 * mode, as virtual wire B is little complex (need to configure
44 * both IOAPIC RTE as well as interrupt-remapping table entry).
45 * As this gets called during crash dump, keep this simple for
46 * now.
47 */
48 if (cpu_has_apic || apic_from_smp_config())
49 disconnect_bsp_APIC(0);
50}
51
52static int do_setup_msi_irqs(struct pci_dev *dev, int nvec)
53{
54 int node, ret, sub_handle, index = 0;
55 unsigned int irq;
56 struct msi_desc *msidesc;
57
58 nvec = __roundup_pow_of_two(nvec);
59
60 WARN_ON(!list_is_singular(&dev->msi_list));
61 msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
62 WARN_ON(msidesc->irq);
63 WARN_ON(msidesc->msi_attrib.multiple);
64
65 node = dev_to_node(&dev->dev);
66 irq = __create_irqs(get_nr_irqs_gsi(), nvec, node);
67 if (irq == 0)
68 return -ENOSPC;
69
70 msidesc->msi_attrib.multiple = ilog2(nvec);
71 for (sub_handle = 0; sub_handle < nvec; sub_handle++) {
72 if (!sub_handle) {
73 index = msi_alloc_remapped_irq(dev, irq, nvec);
74 if (index < 0) {
75 ret = index;
76 goto error;
77 }
78 } else {
79 ret = msi_setup_remapped_irq(dev, irq + sub_handle,
80 index, sub_handle);
81 if (ret < 0)
82 goto error;
83 }
84 ret = setup_msi_irq(dev, msidesc, irq, sub_handle);
85 if (ret < 0)
86 goto error;
87 }
88 return 0;
89
90error:
91 destroy_irqs(irq, nvec);
92
93 /*
94 * Restore altered MSI descriptor fields and prevent just destroyed
95 * IRQs from tearing down again in default_teardown_msi_irqs()
96 */
97 msidesc->irq = 0;
98 msidesc->msi_attrib.multiple = 0;
99
100 return ret;
101}
102
103static int do_setup_msix_irqs(struct pci_dev *dev, int nvec)
104{
105 int node, ret, sub_handle, index = 0;
106 struct msi_desc *msidesc;
107 unsigned int irq;
108
109 node = dev_to_node(&dev->dev);
110 irq = get_nr_irqs_gsi();
111 sub_handle = 0;
112
113 list_for_each_entry(msidesc, &dev->msi_list, list) {
114
115 irq = create_irq_nr(irq, node);
116 if (irq == 0)
117 return -1;
118
119 if (sub_handle == 0)
120 ret = index = msi_alloc_remapped_irq(dev, irq, nvec);
121 else
122 ret = msi_setup_remapped_irq(dev, irq, index, sub_handle);
123
124 if (ret < 0)
125 goto error;
126
127 ret = setup_msi_irq(dev, msidesc, irq, 0);
128 if (ret < 0)
129 goto error;
130
131 sub_handle += 1;
132 irq += 1;
133 }
134
135 return 0;
136
137error:
138 destroy_irq(irq);
139 return ret;
140}
141
142static int irq_remapping_setup_msi_irqs(struct pci_dev *dev,
143 int nvec, int type)
144{
145 if (type == PCI_CAP_ID_MSI)
146 return do_setup_msi_irqs(dev, nvec);
147 else
148 return do_setup_msix_irqs(dev, nvec);
149}
150
151void eoi_ioapic_pin_remapped(int apic, int pin, int vector)
152{
153 /*
154 * Intr-remapping uses pin number as the virtual vector
155 * in the RTE. Actual vector is programmed in
156 * intr-remapping table entry. Hence for the io-apic
157 * EOI we use the pin number.
158 */
159 io_apic_eoi(apic, pin);
160}
161
162static void __init irq_remapping_modify_x86_ops(void)
163{
164 x86_io_apic_ops.disable = irq_remapping_disable_io_apic;
165 x86_io_apic_ops.set_affinity = set_remapped_irq_affinity;
166 x86_io_apic_ops.setup_entry = setup_ioapic_remapped_entry;
167 x86_io_apic_ops.eoi_ioapic_pin = eoi_ioapic_pin_remapped;
168 x86_msi.setup_msi_irqs = irq_remapping_setup_msi_irqs;
169 x86_msi.setup_hpet_msi = setup_hpet_msi_remapped;
170 x86_msi.compose_msi_msg = compose_remapped_msi_msg;
171}
172
20static __init int setup_nointremap(char *str) 173static __init int setup_nointremap(char *str)
21{ 174{
22 disable_irq_remap = 1; 175 disable_irq_remap = 1;
@@ -79,15 +232,24 @@ int __init irq_remapping_prepare(void)
79 232
80int __init irq_remapping_enable(void) 233int __init irq_remapping_enable(void)
81{ 234{
235 int ret;
236
82 if (!remap_ops || !remap_ops->enable) 237 if (!remap_ops || !remap_ops->enable)
83 return -ENODEV; 238 return -ENODEV;
84 239
85 return remap_ops->enable(); 240 ret = remap_ops->enable();
241
242 if (irq_remapping_enabled)
243 irq_remapping_modify_x86_ops();
244
245 return ret;
86} 246}
87 247
88void irq_remapping_disable(void) 248void irq_remapping_disable(void)
89{ 249{
90 if (!remap_ops || !remap_ops->disable) 250 if (!irq_remapping_enabled ||
251 !remap_ops ||
252 !remap_ops->disable)
91 return; 253 return;
92 254
93 remap_ops->disable(); 255 remap_ops->disable();
@@ -95,7 +257,9 @@ void irq_remapping_disable(void)
95 257
96int irq_remapping_reenable(int mode) 258int irq_remapping_reenable(int mode)
97{ 259{
98 if (!remap_ops || !remap_ops->reenable) 260 if (!irq_remapping_enabled ||
261 !remap_ops ||
262 !remap_ops->reenable)
99 return 0; 263 return 0;
100 264
101 return remap_ops->reenable(mode); 265 return remap_ops->reenable(mode);
@@ -103,6 +267,9 @@ int irq_remapping_reenable(int mode)
103 267
104int __init irq_remap_enable_fault_handling(void) 268int __init irq_remap_enable_fault_handling(void)
105{ 269{
270 if (!irq_remapping_enabled)
271 return 0;
272
106 if (!remap_ops || !remap_ops->enable_faulting) 273 if (!remap_ops || !remap_ops->enable_faulting)
107 return -ENODEV; 274 return -ENODEV;
108 275
@@ -133,23 +300,28 @@ int set_remapped_irq_affinity(struct irq_data *data, const struct cpumask *mask,
133 300
134void free_remapped_irq(int irq) 301void free_remapped_irq(int irq)
135{ 302{
303 struct irq_cfg *cfg = irq_get_chip_data(irq);
304
136 if (!remap_ops || !remap_ops->free_irq) 305 if (!remap_ops || !remap_ops->free_irq)
137 return; 306 return;
138 307
139 remap_ops->free_irq(irq); 308 if (irq_remapped(cfg))
309 remap_ops->free_irq(irq);
140} 310}
141 311
142void compose_remapped_msi_msg(struct pci_dev *pdev, 312void compose_remapped_msi_msg(struct pci_dev *pdev,
143 unsigned int irq, unsigned int dest, 313 unsigned int irq, unsigned int dest,
144 struct msi_msg *msg, u8 hpet_id) 314 struct msi_msg *msg, u8 hpet_id)
145{ 315{
146 if (!remap_ops || !remap_ops->compose_msi_msg) 316 struct irq_cfg *cfg = irq_get_chip_data(irq);
147 return;
148 317
149 remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id); 318 if (!irq_remapped(cfg))
319 native_compose_msi_msg(pdev, irq, dest, msg, hpet_id);
320 else if (remap_ops && remap_ops->compose_msi_msg)
321 remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id);
150} 322}
151 323
152int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) 324static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
153{ 325{
154 if (!remap_ops || !remap_ops->msi_alloc_irq) 326 if (!remap_ops || !remap_ops->msi_alloc_irq)
155 return -ENODEV; 327 return -ENODEV;
@@ -157,8 +329,8 @@ int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
157 return remap_ops->msi_alloc_irq(pdev, irq, nvec); 329 return remap_ops->msi_alloc_irq(pdev, irq, nvec);
158} 330}
159 331
160int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, 332static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
161 int index, int sub_handle) 333 int index, int sub_handle)
162{ 334{
163 if (!remap_ops || !remap_ops->msi_setup_irq) 335 if (!remap_ops || !remap_ops->msi_setup_irq)
164 return -ENODEV; 336 return -ENODEV;
@@ -173,3 +345,42 @@ int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
173 345
174 return remap_ops->setup_hpet_msi(irq, id); 346 return remap_ops->setup_hpet_msi(irq, id);
175} 347}
348
349void panic_if_irq_remap(const char *msg)
350{
351 if (irq_remapping_enabled)
352 panic(msg);
353}
354
355static void ir_ack_apic_edge(struct irq_data *data)
356{
357 ack_APIC_irq();
358}
359
360static void ir_ack_apic_level(struct irq_data *data)
361{
362 ack_APIC_irq();
363 eoi_ioapic_irq(data->irq, data->chip_data);
364}
365
366static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
367{
368 seq_printf(p, " IR-%s", data->chip->name);
369}
370
371void irq_remap_modify_chip_defaults(struct irq_chip *chip)
372{
373 chip->irq_print_chip = ir_print_prefix;
374 chip->irq_ack = ir_ack_apic_edge;
375 chip->irq_eoi = ir_ack_apic_level;
376 chip->irq_set_affinity = x86_io_apic_ops.set_affinity;
377}
378
379bool setup_remapped_irq(int irq, struct irq_cfg *cfg, struct irq_chip *chip)
380{
381 if (!irq_remapped(cfg))
382 return false;
383 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
384 irq_remap_modify_chip_defaults(chip);
385 return true;
386}
diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
index 95363acb583f..ecb637670405 100644
--- a/drivers/iommu/irq_remapping.h
+++ b/drivers/iommu/irq_remapping.h
@@ -34,6 +34,7 @@ struct msi_msg;
34extern int disable_irq_remap; 34extern int disable_irq_remap;
35extern int disable_sourceid_checking; 35extern int disable_sourceid_checking;
36extern int no_x2apic_optout; 36extern int no_x2apic_optout;
37extern int irq_remapping_enabled;
37 38
38struct irq_remap_ops { 39struct irq_remap_ops {
39 /* Check whether Interrupt Remapping is supported */ 40 /* Check whether Interrupt Remapping is supported */
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 5099636a6e5f..00cc78c7aa04 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -845,6 +845,32 @@ int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
845} 845}
846EXPORT_SYMBOL(pci_enable_msi_block); 846EXPORT_SYMBOL(pci_enable_msi_block);
847 847
848int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec)
849{
850 int ret, pos, nvec;
851 u16 msgctl;
852
853 pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
854 if (!pos)
855 return -EINVAL;
856
857 pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
858 ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
859
860 if (maxvec)
861 *maxvec = ret;
862
863 do {
864 nvec = ret;
865 ret = pci_enable_msi_block(dev, nvec);
866 } while (ret > 0);
867
868 if (ret < 0)
869 return ret;
870 return nvec;
871}
872EXPORT_SYMBOL(pci_enable_msi_block_auto);
873
848void pci_msi_shutdown(struct pci_dev *dev) 874void pci_msi_shutdown(struct pci_dev *dev)
849{ 875{
850 struct msi_desc *desc; 876 struct msi_desc *desc;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index fdf2c4a238cc..bc4e06611958 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -509,8 +509,11 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq)
509 509
510/* Handle dynamic irq creation and destruction */ 510/* Handle dynamic irq creation and destruction */
511extern unsigned int create_irq_nr(unsigned int irq_want, int node); 511extern unsigned int create_irq_nr(unsigned int irq_want, int node);
512extern unsigned int __create_irqs(unsigned int from, unsigned int count,
513 int node);
512extern int create_irq(void); 514extern int create_irq(void);
513extern void destroy_irq(unsigned int irq); 515extern void destroy_irq(unsigned int irq);
516extern void destroy_irqs(unsigned int irq, unsigned int count);
514 517
515/* 518/*
516 * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and 519 * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and
@@ -528,6 +531,8 @@ extern int irq_set_handler_data(unsigned int irq, void *data);
528extern int irq_set_chip_data(unsigned int irq, void *data); 531extern int irq_set_chip_data(unsigned int irq, void *data);
529extern int irq_set_irq_type(unsigned int irq, unsigned int type); 532extern int irq_set_irq_type(unsigned int irq, unsigned int type);
530extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry); 533extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry);
534extern int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
535 struct msi_desc *entry);
531extern struct irq_data *irq_get_irq_data(unsigned int irq); 536extern struct irq_data *irq_get_irq_data(unsigned int irq);
532 537
533static inline struct irq_chip *irq_get_chip(unsigned int irq) 538static inline struct irq_chip *irq_get_chip(unsigned int irq)
@@ -590,6 +595,9 @@ int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
590#define irq_alloc_desc_from(from, node) \ 595#define irq_alloc_desc_from(from, node) \
591 irq_alloc_descs(-1, from, 1, node) 596 irq_alloc_descs(-1, from, 1, node)
592 597
598#define irq_alloc_descs_from(from, cnt, node) \
599 irq_alloc_descs(-1, from, cnt, node)
600
593void irq_free_descs(unsigned int irq, unsigned int cnt); 601void irq_free_descs(unsigned int irq, unsigned int cnt);
594int irq_reserve_irqs(unsigned int from, unsigned int cnt); 602int irq_reserve_irqs(unsigned int from, unsigned int cnt);
595 603
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 15472d691ee6..6fa4dd2a3b9e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1101,6 +1101,12 @@ static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
1101 return -1; 1101 return -1;
1102} 1102}
1103 1103
1104static inline int
1105pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec)
1106{
1107 return -1;
1108}
1109
1104static inline void pci_msi_shutdown(struct pci_dev *dev) 1110static inline void pci_msi_shutdown(struct pci_dev *dev)
1105{ } 1111{ }
1106static inline void pci_disable_msi(struct pci_dev *dev) 1112static inline void pci_disable_msi(struct pci_dev *dev)
@@ -1132,6 +1138,7 @@ static inline int pci_msi_enabled(void)
1132} 1138}
1133#else 1139#else
1134extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec); 1140extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec);
1141extern int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec);
1135extern void pci_msi_shutdown(struct pci_dev *dev); 1142extern void pci_msi_shutdown(struct pci_dev *dev);
1136extern void pci_disable_msi(struct pci_dev *dev); 1143extern void pci_disable_msi(struct pci_dev *dev);
1137extern int pci_msix_table_size(struct pci_dev *dev); 1144extern int pci_msix_table_size(struct pci_dev *dev);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 3aca9f29d30e..cbd97ce0b000 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -90,27 +90,41 @@ int irq_set_handler_data(unsigned int irq, void *data)
90EXPORT_SYMBOL(irq_set_handler_data); 90EXPORT_SYMBOL(irq_set_handler_data);
91 91
92/** 92/**
93 * irq_set_msi_desc - set MSI descriptor data for an irq 93 * irq_set_msi_desc_off - set MSI descriptor data for an irq at offset
94 * @irq: Interrupt number 94 * @irq_base: Interrupt number base
95 * @entry: Pointer to MSI descriptor data 95 * @irq_offset: Interrupt number offset
96 * @entry: Pointer to MSI descriptor data
96 * 97 *
97 * Set the MSI descriptor entry for an irq 98 * Set the MSI descriptor entry for an irq at offset
98 */ 99 */
99int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry) 100int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
101 struct msi_desc *entry)
100{ 102{
101 unsigned long flags; 103 unsigned long flags;
102 struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); 104 struct irq_desc *desc = irq_get_desc_lock(irq_base + irq_offset, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
103 105
104 if (!desc) 106 if (!desc)
105 return -EINVAL; 107 return -EINVAL;
106 desc->irq_data.msi_desc = entry; 108 desc->irq_data.msi_desc = entry;
107 if (entry) 109 if (entry && !irq_offset)
108 entry->irq = irq; 110 entry->irq = irq_base;
109 irq_put_desc_unlock(desc, flags); 111 irq_put_desc_unlock(desc, flags);
110 return 0; 112 return 0;
111} 113}
112 114
113/** 115/**
116 * irq_set_msi_desc - set MSI descriptor data for an irq
117 * @irq: Interrupt number
118 * @entry: Pointer to MSI descriptor data
119 *
120 * Set the MSI descriptor entry for an irq
121 */
122int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry)
123{
124 return irq_set_msi_desc_off(irq, 0, entry);
125}
126
127/**
114 * irq_set_chip_data - set irq chip data for an irq 128 * irq_set_chip_data - set irq chip data for an irq
115 * @irq: Interrupt number 129 * @irq: Interrupt number
116 * @data: Pointer to chip specific data 130 * @data: Pointer to chip specific data