aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig24
-rw-r--r--arch/x86/include/asm/dma-mapping.h2
-rw-r--r--arch/x86/include/asm/io_apic.h9
-rw-r--r--arch/x86/include/asm/iommu.h2
-rw-r--r--arch/x86/include/asm/irq_vectors.h11
-rw-r--r--arch/x86/include/asm/pci.h2
-rw-r--r--arch/x86/include/asm/pci_64.h1
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/include/asm/uaccess_32.h8
-rw-r--r--arch/x86/include/asm/uaccess_64.h6
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/hpet.c7
-rw-r--r--arch/x86/kernel/io_apic.c774
-rw-r--r--arch/x86/kernel/irq.c3
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/irq_64.c2
-rw-r--r--arch/x86/kernel/irqinit_32.c3
-rw-r--r--arch/x86/kernel/irqinit_64.c3
-rw-r--r--arch/x86/kernel/pci-dma.c13
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c29
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/lguest/i386_head.S15
-rw-r--r--arch/x86/lib/usercopy_32.c8
-rw-r--r--arch/x86/lib/usercopy_64.c4
-rw-r--r--arch/x86/mm/init_32.c3
-rw-r--r--arch/x86/oprofile/op_model_amd.c89
27 files changed, 682 insertions, 347 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 98a0ed52b5c3..0f44add3e0b7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -247,6 +247,28 @@ config X86_HAS_BOOT_CPU_ID
247 def_bool y 247 def_bool y
248 depends on X86_VOYAGER 248 depends on X86_VOYAGER
249 249
250config SPARSE_IRQ
251 bool "Support sparse irq numbering"
252 depends on PCI_MSI || HT_IRQ
253 help
254 This enables support for sparse irqs. This is useful for distro
255 kernels that want to define a high CONFIG_NR_CPUS value but still
256 want to have low kernel memory footprint on smaller machines.
257
258 ( Sparse IRQs can also be beneficial on NUMA boxes, as they spread
259 out the irq_desc[] array in a more NUMA-friendly way. )
260
261 If you don't know what to do here, say N.
262
263config NUMA_MIGRATE_IRQ_DESC
264 bool "Move irq desc when changing irq smp_affinity"
265 depends on SPARSE_IRQ && NUMA
266 default n
267 help
268 This enables moving irq_desc to cpu/node that irq will use handled.
269
270 If you don't know what to do here, say N.
271
250config X86_FIND_SMP_CONFIG 272config X86_FIND_SMP_CONFIG
251 def_bool y 273 def_bool y
252 depends on X86_MPPARSE || X86_VOYAGER 274 depends on X86_MPPARSE || X86_VOYAGER
@@ -479,7 +501,7 @@ config HPET_TIMER
479 The HPET provides a stable time base on SMP 501 The HPET provides a stable time base on SMP
480 systems, unlike the TSC, but it is more expensive to access, 502 systems, unlike the TSC, but it is more expensive to access,
481 as it is off-chip. You can find the HPET spec at 503 as it is off-chip. You can find the HPET spec at
482 <http://www.intel.com/hardwaredesign/hpetspec.htm>. 504 <http://www.intel.com/hardwaredesign/hpetspec_1.pdf>.
483 505
484 You can safely choose Y here. However, HPET will only be 506 You can safely choose Y here. However, HPET will only be
485 activated if the platform and the BIOS support this feature. 507 activated if the platform and the BIOS support this feature.
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index dc22c0733282..4035357f5b9d 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -65,7 +65,7 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
65 return dma_ops; 65 return dma_ops;
66 else 66 else
67 return dev->archdata.dma_ops; 67 return dev->archdata.dma_ops;
68#endif /* _ASM_X86_DMA_MAPPING_H */ 68#endif
69} 69}
70 70
71/* Make sure we keep the same behaviour */ 71/* Make sure we keep the same behaviour */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index e475e009ae5d..7a1f44ac1f17 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -198,17 +198,14 @@ extern void restore_IO_APIC_setup(void);
198extern void reinit_intr_remapped_IO_APIC(int); 198extern void reinit_intr_remapped_IO_APIC(int);
199#endif 199#endif
200 200
201extern int probe_nr_irqs(void); 201extern void probe_nr_irqs_gsi(void);
202 202
203#else /* !CONFIG_X86_IO_APIC */ 203#else /* !CONFIG_X86_IO_APIC */
204#define io_apic_assign_pci_irqs 0 204#define io_apic_assign_pci_irqs 0
205static const int timer_through_8259 = 0; 205static const int timer_through_8259 = 0;
206static inline void ioapic_init_mappings(void) { } 206static inline void ioapic_init_mappings(void) { }
207 207
208static inline int probe_nr_irqs(void) 208static inline void probe_nr_irqs_gsi(void) { }
209{
210 return NR_IRQS;
211}
212#endif 209#endif
213 210
214#endif /* _ASM_X86_IO_APIC_H */ 211#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 295b13193f4d..a6ee9e6f530f 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -7,8 +7,6 @@ extern struct dma_mapping_ops nommu_dma_ops;
7extern int force_iommu, no_iommu; 7extern int force_iommu, no_iommu;
8extern int iommu_detected; 8extern int iommu_detected;
9 9
10extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len);
11
12/* 10 seconds */ 10/* 10 seconds */
13#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) 11#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
14 12
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 0005adb0f941..f7ff65032b9d 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -101,12 +101,23 @@
101#define LAST_VM86_IRQ 15 101#define LAST_VM86_IRQ 15
102#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) 102#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
103 103
104#define NR_IRQS_LEGACY 16
105
104#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) 106#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
107
108#ifndef CONFIG_SPARSE_IRQ
105# if NR_CPUS < MAX_IO_APICS 109# if NR_CPUS < MAX_IO_APICS
106# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) 110# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
107# else 111# else
108# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) 112# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
109# endif 113# endif
114#else
115# if (8 * NR_CPUS) > (32 * MAX_IO_APICS)
116# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS))
117# else
118# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
119# endif
120#endif
110 121
111#elif defined(CONFIG_X86_VOYAGER) 122#elif defined(CONFIG_X86_VOYAGER)
112 123
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index f8959c7a985f..a977de23cb4d 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -84,6 +84,8 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
84static inline void early_quirks(void) { } 84static inline void early_quirks(void) { }
85#endif 85#endif
86 86
87extern void pci_iommu_alloc(void);
88
87#endif /* __KERNEL__ */ 89#endif /* __KERNEL__ */
88 90
89#ifdef CONFIG_X86_32 91#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
index d02d936840a3..4da207982777 100644
--- a/arch/x86/include/asm/pci_64.h
+++ b/arch/x86/include/asm/pci_64.h
@@ -23,7 +23,6 @@ extern int (*pci_config_write)(int seg, int bus, int dev, int fn,
23 int reg, int len, u32 value); 23 int reg, int len, u32 value);
24 24
25extern void dma32_reserve_bootmem(void); 25extern void dma32_reserve_bootmem(void);
26extern void pci_iommu_alloc(void);
27 26
28/* The PCI address space does equal the physical memory 27/* The PCI address space does equal the physical memory
29 * address space. The networking and block device layers use 28 * address space. The networking and block device layers use
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 580c3ee6c58c..4340055b7559 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -157,6 +157,7 @@ extern int __get_user_bad(void);
157 int __ret_gu; \ 157 int __ret_gu; \
158 unsigned long __val_gu; \ 158 unsigned long __val_gu; \
159 __chk_user_ptr(ptr); \ 159 __chk_user_ptr(ptr); \
160 might_fault(); \
160 switch (sizeof(*(ptr))) { \ 161 switch (sizeof(*(ptr))) { \
161 case 1: \ 162 case 1: \
162 __get_user_x(1, __ret_gu, __val_gu, ptr); \ 163 __get_user_x(1, __ret_gu, __val_gu, ptr); \
@@ -241,6 +242,7 @@ extern void __put_user_8(void);
241 int __ret_pu; \ 242 int __ret_pu; \
242 __typeof__(*(ptr)) __pu_val; \ 243 __typeof__(*(ptr)) __pu_val; \
243 __chk_user_ptr(ptr); \ 244 __chk_user_ptr(ptr); \
245 might_fault(); \
244 __pu_val = x; \ 246 __pu_val = x; \
245 switch (sizeof(*(ptr))) { \ 247 switch (sizeof(*(ptr))) { \
246 case 1: \ 248 case 1: \
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index d095a3aeea1b..5e06259e90e5 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -82,8 +82,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
82static __always_inline unsigned long __must_check 82static __always_inline unsigned long __must_check
83__copy_to_user(void __user *to, const void *from, unsigned long n) 83__copy_to_user(void __user *to, const void *from, unsigned long n)
84{ 84{
85 might_sleep(); 85 might_fault();
86 return __copy_to_user_inatomic(to, from, n); 86 return __copy_to_user_inatomic(to, from, n);
87} 87}
88 88
89static __always_inline unsigned long 89static __always_inline unsigned long
@@ -137,7 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
137static __always_inline unsigned long 137static __always_inline unsigned long
138__copy_from_user(void *to, const void __user *from, unsigned long n) 138__copy_from_user(void *to, const void __user *from, unsigned long n)
139{ 139{
140 might_sleep(); 140 might_fault();
141 if (__builtin_constant_p(n)) { 141 if (__builtin_constant_p(n)) {
142 unsigned long ret; 142 unsigned long ret;
143 143
@@ -159,7 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
159static __always_inline unsigned long __copy_from_user_nocache(void *to, 159static __always_inline unsigned long __copy_from_user_nocache(void *to,
160 const void __user *from, unsigned long n) 160 const void __user *from, unsigned long n)
161{ 161{
162 might_sleep(); 162 might_fault();
163 if (__builtin_constant_p(n)) { 163 if (__builtin_constant_p(n)) {
164 unsigned long ret; 164 unsigned long ret;
165 165
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index f8cfd00db450..84210c479fca 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -29,6 +29,8 @@ static __always_inline __must_check
29int __copy_from_user(void *dst, const void __user *src, unsigned size) 29int __copy_from_user(void *dst, const void __user *src, unsigned size)
30{ 30{
31 int ret = 0; 31 int ret = 0;
32
33 might_fault();
32 if (!__builtin_constant_p(size)) 34 if (!__builtin_constant_p(size))
33 return copy_user_generic(dst, (__force void *)src, size); 35 return copy_user_generic(dst, (__force void *)src, size);
34 switch (size) { 36 switch (size) {
@@ -71,6 +73,8 @@ static __always_inline __must_check
71int __copy_to_user(void __user *dst, const void *src, unsigned size) 73int __copy_to_user(void __user *dst, const void *src, unsigned size)
72{ 74{
73 int ret = 0; 75 int ret = 0;
76
77 might_fault();
74 if (!__builtin_constant_p(size)) 78 if (!__builtin_constant_p(size))
75 return copy_user_generic((__force void *)dst, src, size); 79 return copy_user_generic((__force void *)dst, src, size);
76 switch (size) { 80 switch (size) {
@@ -113,6 +117,8 @@ static __always_inline __must_check
113int __copy_in_user(void __user *dst, const void __user *src, unsigned size) 117int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
114{ 118{
115 int ret = 0; 119 int ret = 0;
120
121 might_fault();
116 if (!__builtin_constant_p(size)) 122 if (!__builtin_constant_p(size))
117 return copy_user_generic((__force void *)dst, 123 return copy_user_generic((__force void *)dst,
118 (__force void *)src, size); 124 (__force void *)src, size);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 88dd768eab6d..d364df03c1d6 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -109,6 +109,8 @@ obj-$(CONFIG_MICROCODE) += microcode.o
109 109
110obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o 110obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
111 111
112obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64
113
112### 114###
113# 64 bit specific files 115# 64 bit specific files
114ifeq ($(CONFIG_X86_64),y) 116ifeq ($(CONFIG_X86_64),y)
@@ -122,7 +124,6 @@ ifeq ($(CONFIG_X86_64),y)
122 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o 124 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
123 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o 125 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
124 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o 126 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
125 obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o
126 127
127 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o 128 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
128endif 129endif
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index b5310ff1259e..cd759ad90690 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -813,7 +813,7 @@ int __init hpet_enable(void)
813 813
814out_nohpet: 814out_nohpet:
815 hpet_clear_mapping(); 815 hpet_clear_mapping();
816 boot_hpet_disable = 1; 816 hpet_address = 0;
817 return 0; 817 return 0;
818} 818}
819 819
@@ -836,10 +836,11 @@ static __init int hpet_late_init(void)
836 836
837 hpet_address = force_hpet_address; 837 hpet_address = force_hpet_address;
838 hpet_enable(); 838 hpet_enable();
839 if (!hpet_virt_address)
840 return -ENODEV;
841 } 839 }
842 840
841 if (!hpet_virt_address)
842 return -ENODEV;
843
843 hpet_reserve_platform_timers(hpet_readl(HPET_ID)); 844 hpet_reserve_platform_timers(hpet_readl(HPET_ID));
844 845
845 for_each_online_cpu(cpu) { 846 for_each_online_cpu(cpu) {
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 6dbf427175ff..e7745961ed31 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -108,94 +108,253 @@ static int __init parse_noapic(char *str)
108early_param("noapic", parse_noapic); 108early_param("noapic", parse_noapic);
109 109
110struct irq_pin_list; 110struct irq_pin_list;
111
112/*
113 * This is performance-critical, we want to do it O(1)
114 *
115 * the indexing order of this array favors 1:1 mappings
116 * between pins and IRQs.
117 */
118
119struct irq_pin_list {
120 int apic, pin;
121 struct irq_pin_list *next;
122};
123
124static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
125{
126 struct irq_pin_list *pin;
127 int node;
128
129 node = cpu_to_node(cpu);
130
131 pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
132 printk(KERN_DEBUG " alloc irq_2_pin on cpu %d node %d\n", cpu, node);
133
134 return pin;
135}
136
111struct irq_cfg { 137struct irq_cfg {
112 unsigned int irq;
113 struct irq_pin_list *irq_2_pin; 138 struct irq_pin_list *irq_2_pin;
114 cpumask_t domain; 139 cpumask_t domain;
115 cpumask_t old_domain; 140 cpumask_t old_domain;
116 unsigned move_cleanup_count; 141 unsigned move_cleanup_count;
117 u8 vector; 142 u8 vector;
118 u8 move_in_progress : 1; 143 u8 move_in_progress : 1;
144#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
145 u8 move_desc_pending : 1;
146#endif
119}; 147};
120 148
121/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 149/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
150#ifdef CONFIG_SPARSE_IRQ
151static struct irq_cfg irq_cfgx[] = {
152#else
122static struct irq_cfg irq_cfgx[NR_IRQS] = { 153static struct irq_cfg irq_cfgx[NR_IRQS] = {
123 [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, 154#endif
124 [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, 155 [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
125 [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, 156 [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
126 [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, 157 [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
127 [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, 158 [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
128 [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, 159 [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
129 [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, 160 [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
130 [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, 161 [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
131 [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, 162 [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
132 [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, 163 [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
133 [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, 164 [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
134 [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, 165 [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
135 [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, 166 [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
136 [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, 167 [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
137 [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, 168 [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
138 [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, 169 [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
170 [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
139}; 171};
140 172
141#define for_each_irq_cfg(irq, cfg) \ 173void __init arch_early_irq_init(void)
142 for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++) 174{
175 struct irq_cfg *cfg;
176 struct irq_desc *desc;
177 int count;
178 int i;
179
180 cfg = irq_cfgx;
181 count = ARRAY_SIZE(irq_cfgx);
143 182
183 for (i = 0; i < count; i++) {
184 desc = irq_to_desc(i);
185 desc->chip_data = &cfg[i];
186 }
187}
188
189#ifdef CONFIG_SPARSE_IRQ
144static struct irq_cfg *irq_cfg(unsigned int irq) 190static struct irq_cfg *irq_cfg(unsigned int irq)
145{ 191{
146 return irq < nr_irqs ? irq_cfgx + irq : NULL; 192 struct irq_cfg *cfg = NULL;
193 struct irq_desc *desc;
194
195 desc = irq_to_desc(irq);
196 if (desc)
197 cfg = desc->chip_data;
198
199 return cfg;
147} 200}
148 201
149static struct irq_cfg *irq_cfg_alloc(unsigned int irq) 202static struct irq_cfg *get_one_free_irq_cfg(int cpu)
150{ 203{
151 return irq_cfg(irq); 204 struct irq_cfg *cfg;
205 int node;
206
207 node = cpu_to_node(cpu);
208
209 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
210 printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node);
211
212 return cfg;
152} 213}
153 214
154/* 215void arch_init_chip_data(struct irq_desc *desc, int cpu)
155 * Rough estimation of how many shared IRQs there are, can be changed 216{
156 * anytime. 217 struct irq_cfg *cfg;
157 */
158#define MAX_PLUS_SHARED_IRQS NR_IRQS
159#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
160 218
161/* 219 cfg = desc->chip_data;
162 * This is performance-critical, we want to do it O(1) 220 if (!cfg) {
163 * 221 desc->chip_data = get_one_free_irq_cfg(cpu);
164 * the indexing order of this array favors 1:1 mappings 222 if (!desc->chip_data) {
165 * between pins and IRQs. 223 printk(KERN_ERR "can not alloc irq_cfg\n");
166 */ 224 BUG_ON(1);
225 }
226 }
227}
167 228
168struct irq_pin_list { 229#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
169 int apic, pin; 230
170 struct irq_pin_list *next; 231static void
171}; 232init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
233{
234 struct irq_pin_list *old_entry, *head, *tail, *entry;
235
236 cfg->irq_2_pin = NULL;
237 old_entry = old_cfg->irq_2_pin;
238 if (!old_entry)
239 return;
240
241 entry = get_one_free_irq_2_pin(cpu);
242 if (!entry)
243 return;
244
245 entry->apic = old_entry->apic;
246 entry->pin = old_entry->pin;
247 head = entry;
248 tail = entry;
249 old_entry = old_entry->next;
250 while (old_entry) {
251 entry = get_one_free_irq_2_pin(cpu);
252 if (!entry) {
253 entry = head;
254 while (entry) {
255 head = entry->next;
256 kfree(entry);
257 entry = head;
258 }
259 /* still use the old one */
260 return;
261 }
262 entry->apic = old_entry->apic;
263 entry->pin = old_entry->pin;
264 tail->next = entry;
265 tail = entry;
266 old_entry = old_entry->next;
267 }
172 268
173static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; 269 tail->next = NULL;
174static struct irq_pin_list *irq_2_pin_ptr; 270 cfg->irq_2_pin = head;
271}
175 272
176static void __init irq_2_pin_init(void) 273static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
177{ 274{
178 struct irq_pin_list *pin = irq_2_pin_head; 275 struct irq_pin_list *entry, *next;
179 int i; 276
277 if (old_cfg->irq_2_pin == cfg->irq_2_pin)
278 return;
180 279
181 for (i = 1; i < PIN_MAP_SIZE; i++) 280 entry = old_cfg->irq_2_pin;
182 pin[i-1].next = &pin[i];
183 281
184 irq_2_pin_ptr = &pin[0]; 282 while (entry) {
283 next = entry->next;
284 kfree(entry);
285 entry = next;
286 }
287 old_cfg->irq_2_pin = NULL;
185} 288}
186 289
187static struct irq_pin_list *get_one_free_irq_2_pin(void) 290void arch_init_copy_chip_data(struct irq_desc *old_desc,
291 struct irq_desc *desc, int cpu)
188{ 292{
189 struct irq_pin_list *pin = irq_2_pin_ptr; 293 struct irq_cfg *cfg;
294 struct irq_cfg *old_cfg;
190 295
191 if (!pin) 296 cfg = get_one_free_irq_cfg(cpu);
192 panic("can not get more irq_2_pin\n");
193 297
194 irq_2_pin_ptr = pin->next; 298 if (!cfg)
195 pin->next = NULL; 299 return;
196 return pin; 300
301 desc->chip_data = cfg;
302
303 old_cfg = old_desc->chip_data;
304
305 memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
306
307 init_copy_irq_2_pin(old_cfg, cfg, cpu);
308}
309
310static void free_irq_cfg(struct irq_cfg *old_cfg)
311{
312 kfree(old_cfg);
313}
314
315void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
316{
317 struct irq_cfg *old_cfg, *cfg;
318
319 old_cfg = old_desc->chip_data;
320 cfg = desc->chip_data;
321
322 if (old_cfg == cfg)
323 return;
324
325 if (old_cfg) {
326 free_irq_2_pin(old_cfg, cfg);
327 free_irq_cfg(old_cfg);
328 old_desc->chip_data = NULL;
329 }
197} 330}
198 331
332static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
333{
334 struct irq_cfg *cfg = desc->chip_data;
335
336 if (!cfg->move_in_progress) {
337 /* it means that domain is not changed */
338 if (!cpus_intersects(desc->affinity, mask))
339 cfg->move_desc_pending = 1;
340 }
341}
342#endif
343
344#else
345static struct irq_cfg *irq_cfg(unsigned int irq)
346{
347 return irq < nr_irqs ? irq_cfgx + irq : NULL;
348}
349
350#endif
351
352#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
353static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
354{
355}
356#endif
357
199struct io_apic { 358struct io_apic {
200 unsigned int index; 359 unsigned int index;
201 unsigned int unused[3]; 360 unsigned int unused[3];
@@ -237,11 +396,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
237 writel(value, &io_apic->data); 396 writel(value, &io_apic->data);
238} 397}
239 398
240static bool io_apic_level_ack_pending(unsigned int irq) 399static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
241{ 400{
242 struct irq_pin_list *entry; 401 struct irq_pin_list *entry;
243 unsigned long flags; 402 unsigned long flags;
244 struct irq_cfg *cfg = irq_cfg(irq);
245 403
246 spin_lock_irqsave(&ioapic_lock, flags); 404 spin_lock_irqsave(&ioapic_lock, flags);
247 entry = cfg->irq_2_pin; 405 entry = cfg->irq_2_pin;
@@ -323,13 +481,12 @@ static void ioapic_mask_entry(int apic, int pin)
323} 481}
324 482
325#ifdef CONFIG_SMP 483#ifdef CONFIG_SMP
326static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) 484static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
327{ 485{
328 int apic, pin; 486 int apic, pin;
329 struct irq_cfg *cfg;
330 struct irq_pin_list *entry; 487 struct irq_pin_list *entry;
488 u8 vector = cfg->vector;
331 489
332 cfg = irq_cfg(irq);
333 entry = cfg->irq_2_pin; 490 entry = cfg->irq_2_pin;
334 for (;;) { 491 for (;;) {
335 unsigned int reg; 492 unsigned int reg;
@@ -359,37 +516,49 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
359 } 516 }
360} 517}
361 518
362static int assign_irq_vector(int irq, cpumask_t mask); 519static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
363 520
364static void set_ioapic_affinity_irq(unsigned int irq, 521static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
365 const struct cpumask *mask)
366{ 522{
367 struct irq_cfg *cfg; 523 struct irq_cfg *cfg;
368 unsigned long flags; 524 unsigned long flags;
369 unsigned int dest; 525 unsigned int dest;
370 cpumask_t tmp; 526 cpumask_t tmp;
371 struct irq_desc *desc; 527 unsigned int irq;
372 528
373 if (!cpumask_intersects(mask, cpu_online_mask)) 529 cpus_and(tmp, mask, cpu_online_map);
530 if (cpus_empty(tmp))
374 return; 531 return;
375 532
376 cfg = irq_cfg(irq); 533 irq = desc->irq;
377 if (assign_irq_vector(irq, *mask)) 534 cfg = desc->chip_data;
535 if (assign_irq_vector(irq, cfg, mask))
378 return; 536 return;
379 537
380 cpumask_and(&tmp, &cfg->domain, mask); 538 set_extra_move_desc(desc, mask);
539
540 cpus_and(tmp, cfg->domain, mask);
381 dest = cpu_mask_to_apicid(tmp); 541 dest = cpu_mask_to_apicid(tmp);
382 /* 542 /*
383 * Only the high 8 bits are valid. 543 * Only the high 8 bits are valid.
384 */ 544 */
385 dest = SET_APIC_LOGICAL_ID(dest); 545 dest = SET_APIC_LOGICAL_ID(dest);
386 546
387 desc = irq_to_desc(irq);
388 spin_lock_irqsave(&ioapic_lock, flags); 547 spin_lock_irqsave(&ioapic_lock, flags);
389 __target_IO_APIC_irq(irq, dest, cfg->vector); 548 __target_IO_APIC_irq(irq, dest, cfg);
390 cpumask_copy(&desc->affinity, mask); 549 desc->affinity = mask;
391 spin_unlock_irqrestore(&ioapic_lock, flags); 550 spin_unlock_irqrestore(&ioapic_lock, flags);
392} 551}
552
553static void set_ioapic_affinity_irq(unsigned int irq,
554 const struct cpumask *mask)
555{
556 struct irq_desc *desc;
557
558 desc = irq_to_desc(irq);
559
560 set_ioapic_affinity_irq_desc(desc, *mask);
561}
393#endif /* CONFIG_SMP */ 562#endif /* CONFIG_SMP */
394 563
395/* 564/*
@@ -397,16 +566,18 @@ static void set_ioapic_affinity_irq(unsigned int irq,
397 * shared ISA-space IRQs, so we have to support them. We are super 566 * shared ISA-space IRQs, so we have to support them. We are super
398 * fast in the common case, and fast for shared ISA-space IRQs. 567 * fast in the common case, and fast for shared ISA-space IRQs.
399 */ 568 */
400static void add_pin_to_irq(unsigned int irq, int apic, int pin) 569static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
401{ 570{
402 struct irq_cfg *cfg;
403 struct irq_pin_list *entry; 571 struct irq_pin_list *entry;
404 572
405 /* first time to refer irq_cfg, so with new */
406 cfg = irq_cfg_alloc(irq);
407 entry = cfg->irq_2_pin; 573 entry = cfg->irq_2_pin;
408 if (!entry) { 574 if (!entry) {
409 entry = get_one_free_irq_2_pin(); 575 entry = get_one_free_irq_2_pin(cpu);
576 if (!entry) {
577 printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
578 apic, pin);
579 return;
580 }
410 cfg->irq_2_pin = entry; 581 cfg->irq_2_pin = entry;
411 entry->apic = apic; 582 entry->apic = apic;
412 entry->pin = pin; 583 entry->pin = pin;
@@ -421,7 +592,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
421 entry = entry->next; 592 entry = entry->next;
422 } 593 }
423 594
424 entry->next = get_one_free_irq_2_pin(); 595 entry->next = get_one_free_irq_2_pin(cpu);
425 entry = entry->next; 596 entry = entry->next;
426 entry->apic = apic; 597 entry->apic = apic;
427 entry->pin = pin; 598 entry->pin = pin;
@@ -430,11 +601,10 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
430/* 601/*
431 * Reroute an IRQ to a different pin. 602 * Reroute an IRQ to a different pin.
432 */ 603 */
433static void __init replace_pin_at_irq(unsigned int irq, 604static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
434 int oldapic, int oldpin, 605 int oldapic, int oldpin,
435 int newapic, int newpin) 606 int newapic, int newpin)
436{ 607{
437 struct irq_cfg *cfg = irq_cfg(irq);
438 struct irq_pin_list *entry = cfg->irq_2_pin; 608 struct irq_pin_list *entry = cfg->irq_2_pin;
439 int replaced = 0; 609 int replaced = 0;
440 610
@@ -451,18 +621,16 @@ static void __init replace_pin_at_irq(unsigned int irq,
451 621
452 /* why? call replace before add? */ 622 /* why? call replace before add? */
453 if (!replaced) 623 if (!replaced)
454 add_pin_to_irq(irq, newapic, newpin); 624 add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
455} 625}
456 626
457static inline void io_apic_modify_irq(unsigned int irq, 627static inline void io_apic_modify_irq(struct irq_cfg *cfg,
458 int mask_and, int mask_or, 628 int mask_and, int mask_or,
459 void (*final)(struct irq_pin_list *entry)) 629 void (*final)(struct irq_pin_list *entry))
460{ 630{
461 int pin; 631 int pin;
462 struct irq_cfg *cfg;
463 struct irq_pin_list *entry; 632 struct irq_pin_list *entry;
464 633
465 cfg = irq_cfg(irq);
466 for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { 634 for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
467 unsigned int reg; 635 unsigned int reg;
468 pin = entry->pin; 636 pin = entry->pin;
@@ -475,9 +643,9 @@ static inline void io_apic_modify_irq(unsigned int irq,
475 } 643 }
476} 644}
477 645
478static void __unmask_IO_APIC_irq(unsigned int irq) 646static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
479{ 647{
480 io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); 648 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
481} 649}
482 650
483#ifdef CONFIG_X86_64 651#ifdef CONFIG_X86_64
@@ -492,47 +660,64 @@ void io_apic_sync(struct irq_pin_list *entry)
492 readl(&io_apic->data); 660 readl(&io_apic->data);
493} 661}
494 662
495static void __mask_IO_APIC_irq(unsigned int irq) 663static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
496{ 664{
497 io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); 665 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
498} 666}
499#else /* CONFIG_X86_32 */ 667#else /* CONFIG_X86_32 */
500static void __mask_IO_APIC_irq(unsigned int irq) 668static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
501{ 669{
502 io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); 670 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
503} 671}
504 672
505static void __mask_and_edge_IO_APIC_irq(unsigned int irq) 673static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
506{ 674{
507 io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, 675 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
508 IO_APIC_REDIR_MASKED, NULL); 676 IO_APIC_REDIR_MASKED, NULL);
509} 677}
510 678
511static void __unmask_and_level_IO_APIC_irq(unsigned int irq) 679static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
512{ 680{
513 io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 681 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
514 IO_APIC_REDIR_LEVEL_TRIGGER, NULL); 682 IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
515} 683}
516#endif /* CONFIG_X86_32 */ 684#endif /* CONFIG_X86_32 */
517 685
518static void mask_IO_APIC_irq (unsigned int irq) 686static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
519{ 687{
688 struct irq_cfg *cfg = desc->chip_data;
520 unsigned long flags; 689 unsigned long flags;
521 690
691 BUG_ON(!cfg);
692
522 spin_lock_irqsave(&ioapic_lock, flags); 693 spin_lock_irqsave(&ioapic_lock, flags);
523 __mask_IO_APIC_irq(irq); 694 __mask_IO_APIC_irq(cfg);
524 spin_unlock_irqrestore(&ioapic_lock, flags); 695 spin_unlock_irqrestore(&ioapic_lock, flags);
525} 696}
526 697
527static void unmask_IO_APIC_irq (unsigned int irq) 698static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
528{ 699{
700 struct irq_cfg *cfg = desc->chip_data;
529 unsigned long flags; 701 unsigned long flags;
530 702
531 spin_lock_irqsave(&ioapic_lock, flags); 703 spin_lock_irqsave(&ioapic_lock, flags);
532 __unmask_IO_APIC_irq(irq); 704 __unmask_IO_APIC_irq(cfg);
533 spin_unlock_irqrestore(&ioapic_lock, flags); 705 spin_unlock_irqrestore(&ioapic_lock, flags);
534} 706}
535 707
708static void mask_IO_APIC_irq(unsigned int irq)
709{
710 struct irq_desc *desc = irq_to_desc(irq);
711
712 mask_IO_APIC_irq_desc(desc);
713}
714static void unmask_IO_APIC_irq(unsigned int irq)
715{
716 struct irq_desc *desc = irq_to_desc(irq);
717
718 unmask_IO_APIC_irq_desc(desc);
719}
720
536static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) 721static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
537{ 722{
538 struct IO_APIC_route_entry entry; 723 struct IO_APIC_route_entry entry;
@@ -809,7 +994,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
809 */ 994 */
810static int EISA_ELCR(unsigned int irq) 995static int EISA_ELCR(unsigned int irq)
811{ 996{
812 if (irq < 16) { 997 if (irq < NR_IRQS_LEGACY) {
813 unsigned int port = 0x4d0 + (irq >> 3); 998 unsigned int port = 0x4d0 + (irq >> 3);
814 return (inb(port) >> (irq & 7)) & 1; 999 return (inb(port) >> (irq & 7)) & 1;
815 } 1000 }
@@ -1034,7 +1219,7 @@ void unlock_vector_lock(void)
1034 spin_unlock(&vector_lock); 1219 spin_unlock(&vector_lock);
1035} 1220}
1036 1221
1037static int __assign_irq_vector(int irq, cpumask_t mask) 1222static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
1038{ 1223{
1039 /* 1224 /*
1040 * NOTE! The local APIC isn't very good at handling 1225 * NOTE! The local APIC isn't very good at handling
@@ -1050,16 +1235,13 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
1050 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; 1235 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
1051 unsigned int old_vector; 1236 unsigned int old_vector;
1052 int cpu; 1237 int cpu;
1053 struct irq_cfg *cfg;
1054 1238
1055 cfg = irq_cfg(irq); 1239 if ((cfg->move_in_progress) || cfg->move_cleanup_count)
1240 return -EBUSY;
1056 1241
1057 /* Only try and allocate irqs on cpus that are present */ 1242 /* Only try and allocate irqs on cpus that are present */
1058 cpus_and(mask, mask, cpu_online_map); 1243 cpus_and(mask, mask, cpu_online_map);
1059 1244
1060 if ((cfg->move_in_progress) || cfg->move_cleanup_count)
1061 return -EBUSY;
1062
1063 old_vector = cfg->vector; 1245 old_vector = cfg->vector;
1064 if (old_vector) { 1246 if (old_vector) {
1065 cpumask_t tmp; 1247 cpumask_t tmp;
@@ -1113,24 +1295,22 @@ next:
1113 return -ENOSPC; 1295 return -ENOSPC;
1114} 1296}
1115 1297
1116static int assign_irq_vector(int irq, cpumask_t mask) 1298static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
1117{ 1299{
1118 int err; 1300 int err;
1119 unsigned long flags; 1301 unsigned long flags;
1120 1302
1121 spin_lock_irqsave(&vector_lock, flags); 1303 spin_lock_irqsave(&vector_lock, flags);
1122 err = __assign_irq_vector(irq, mask); 1304 err = __assign_irq_vector(irq, cfg, mask);
1123 spin_unlock_irqrestore(&vector_lock, flags); 1305 spin_unlock_irqrestore(&vector_lock, flags);
1124 return err; 1306 return err;
1125} 1307}
1126 1308
1127static void __clear_irq_vector(int irq) 1309static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1128{ 1310{
1129 struct irq_cfg *cfg;
1130 cpumask_t mask; 1311 cpumask_t mask;
1131 int cpu, vector; 1312 int cpu, vector;
1132 1313
1133 cfg = irq_cfg(irq);
1134 BUG_ON(!cfg->vector); 1314 BUG_ON(!cfg->vector);
1135 1315
1136 vector = cfg->vector; 1316 vector = cfg->vector;
@@ -1162,9 +1342,13 @@ void __setup_vector_irq(int cpu)
1162 /* This function must be called with vector_lock held */ 1342 /* This function must be called with vector_lock held */
1163 int irq, vector; 1343 int irq, vector;
1164 struct irq_cfg *cfg; 1344 struct irq_cfg *cfg;
1345 struct irq_desc *desc;
1165 1346
1166 /* Mark the inuse vectors */ 1347 /* Mark the inuse vectors */
1167 for_each_irq_cfg(irq, cfg) { 1348 for_each_irq_desc(irq, desc) {
1349 if (!desc)
1350 continue;
1351 cfg = desc->chip_data;
1168 if (!cpu_isset(cpu, cfg->domain)) 1352 if (!cpu_isset(cpu, cfg->domain))
1169 continue; 1353 continue;
1170 vector = cfg->vector; 1354 vector = cfg->vector;
@@ -1215,11 +1399,8 @@ static inline int IO_APIC_irq_trigger(int irq)
1215} 1399}
1216#endif 1400#endif
1217 1401
1218static void ioapic_register_intr(int irq, unsigned long trigger) 1402static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
1219{ 1403{
1220 struct irq_desc *desc;
1221
1222 desc = irq_to_desc(irq);
1223 1404
1224 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1405 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1225 trigger == IOAPIC_LEVEL) 1406 trigger == IOAPIC_LEVEL)
@@ -1311,7 +1492,7 @@ static int setup_ioapic_entry(int apic, int irq,
1311 return 0; 1492 return 0;
1312} 1493}
1313 1494
1314static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1495static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
1315 int trigger, int polarity) 1496 int trigger, int polarity)
1316{ 1497{
1317 struct irq_cfg *cfg; 1498 struct irq_cfg *cfg;
@@ -1321,10 +1502,10 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
1321 if (!IO_APIC_IRQ(irq)) 1502 if (!IO_APIC_IRQ(irq))
1322 return; 1503 return;
1323 1504
1324 cfg = irq_cfg(irq); 1505 cfg = desc->chip_data;
1325 1506
1326 mask = TARGET_CPUS; 1507 mask = TARGET_CPUS;
1327 if (assign_irq_vector(irq, mask)) 1508 if (assign_irq_vector(irq, cfg, mask))
1328 return; 1509 return;
1329 1510
1330 cpus_and(mask, cfg->domain, mask); 1511 cpus_and(mask, cfg->domain, mask);
@@ -1341,12 +1522,12 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
1341 cfg->vector)) { 1522 cfg->vector)) {
1342 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1523 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1343 mp_ioapics[apic].mp_apicid, pin); 1524 mp_ioapics[apic].mp_apicid, pin);
1344 __clear_irq_vector(irq); 1525 __clear_irq_vector(irq, cfg);
1345 return; 1526 return;
1346 } 1527 }
1347 1528
1348 ioapic_register_intr(irq, trigger); 1529 ioapic_register_intr(irq, desc, trigger);
1349 if (irq < 16) 1530 if (irq < NR_IRQS_LEGACY)
1350 disable_8259A_irq(irq); 1531 disable_8259A_irq(irq);
1351 1532
1352 ioapic_write_entry(apic, pin, entry); 1533 ioapic_write_entry(apic, pin, entry);
@@ -1356,6 +1537,9 @@ static void __init setup_IO_APIC_irqs(void)
1356{ 1537{
1357 int apic, pin, idx, irq; 1538 int apic, pin, idx, irq;
1358 int notcon = 0; 1539 int notcon = 0;
1540 struct irq_desc *desc;
1541 struct irq_cfg *cfg;
1542 int cpu = boot_cpu_id;
1359 1543
1360 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1544 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1361 1545
@@ -1387,9 +1571,15 @@ static void __init setup_IO_APIC_irqs(void)
1387 if (multi_timer_check(apic, irq)) 1571 if (multi_timer_check(apic, irq))
1388 continue; 1572 continue;
1389#endif 1573#endif
1390 add_pin_to_irq(irq, apic, pin); 1574 desc = irq_to_desc_alloc_cpu(irq, cpu);
1575 if (!desc) {
1576 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1577 continue;
1578 }
1579 cfg = desc->chip_data;
1580 add_pin_to_irq_cpu(cfg, cpu, apic, pin);
1391 1581
1392 setup_IO_APIC_irq(apic, pin, irq, 1582 setup_IO_APIC_irq(apic, pin, irq, desc,
1393 irq_trigger(idx), irq_polarity(idx)); 1583 irq_trigger(idx), irq_polarity(idx));
1394 } 1584 }
1395 } 1585 }
@@ -1448,6 +1638,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1448 union IO_APIC_reg_03 reg_03; 1638 union IO_APIC_reg_03 reg_03;
1449 unsigned long flags; 1639 unsigned long flags;
1450 struct irq_cfg *cfg; 1640 struct irq_cfg *cfg;
1641 struct irq_desc *desc;
1451 unsigned int irq; 1642 unsigned int irq;
1452 1643
1453 if (apic_verbosity == APIC_QUIET) 1644 if (apic_verbosity == APIC_QUIET)
@@ -1537,8 +1728,13 @@ __apicdebuginit(void) print_IO_APIC(void)
1537 } 1728 }
1538 } 1729 }
1539 printk(KERN_DEBUG "IRQ to pin mappings:\n"); 1730 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1540 for_each_irq_cfg(irq, cfg) { 1731 for_each_irq_desc(irq, desc) {
1541 struct irq_pin_list *entry = cfg->irq_2_pin; 1732 struct irq_pin_list *entry;
1733
1734 if (!desc)
1735 continue;
1736 cfg = desc->chip_data;
1737 entry = cfg->irq_2_pin;
1542 if (!entry) 1738 if (!entry)
1543 continue; 1739 continue;
1544 printk(KERN_DEBUG "IRQ%d ", irq); 1740 printk(KERN_DEBUG "IRQ%d ", irq);
@@ -2022,14 +2218,16 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
2022{ 2218{
2023 int was_pending = 0; 2219 int was_pending = 0;
2024 unsigned long flags; 2220 unsigned long flags;
2221 struct irq_cfg *cfg;
2025 2222
2026 spin_lock_irqsave(&ioapic_lock, flags); 2223 spin_lock_irqsave(&ioapic_lock, flags);
2027 if (irq < 16) { 2224 if (irq < NR_IRQS_LEGACY) {
2028 disable_8259A_irq(irq); 2225 disable_8259A_irq(irq);
2029 if (i8259A_irq_pending(irq)) 2226 if (i8259A_irq_pending(irq))
2030 was_pending = 1; 2227 was_pending = 1;
2031 } 2228 }
2032 __unmask_IO_APIC_irq(irq); 2229 cfg = irq_cfg(irq);
2230 __unmask_IO_APIC_irq(cfg);
2033 spin_unlock_irqrestore(&ioapic_lock, flags); 2231 spin_unlock_irqrestore(&ioapic_lock, flags);
2034 2232
2035 return was_pending; 2233 return was_pending;
@@ -2092,35 +2290,37 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
2092 * as simple as edge triggered migration and we can do the irq migration 2290 * as simple as edge triggered migration and we can do the irq migration
2093 * with a simple atomic update to IO-APIC RTE. 2291 * with a simple atomic update to IO-APIC RTE.
2094 */ 2292 */
2095static void migrate_ioapic_irq(int irq, cpumask_t mask) 2293static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
2096{ 2294{
2097 struct irq_cfg *cfg; 2295 struct irq_cfg *cfg;
2098 struct irq_desc *desc;
2099 cpumask_t tmp, cleanup_mask; 2296 cpumask_t tmp, cleanup_mask;
2100 struct irte irte; 2297 struct irte irte;
2101 int modify_ioapic_rte; 2298 int modify_ioapic_rte;
2102 unsigned int dest; 2299 unsigned int dest;
2103 unsigned long flags; 2300 unsigned long flags;
2301 unsigned int irq;
2104 2302
2105 cpus_and(tmp, mask, cpu_online_map); 2303 cpus_and(tmp, mask, cpu_online_map);
2106 if (cpus_empty(tmp)) 2304 if (cpus_empty(tmp))
2107 return; 2305 return;
2108 2306
2307 irq = desc->irq;
2109 if (get_irte(irq, &irte)) 2308 if (get_irte(irq, &irte))
2110 return; 2309 return;
2111 2310
2112 if (assign_irq_vector(irq, mask)) 2311 cfg = desc->chip_data;
2312 if (assign_irq_vector(irq, cfg, mask))
2113 return; 2313 return;
2114 2314
2115 cfg = irq_cfg(irq); 2315 set_extra_move_desc(desc, mask);
2316
2116 cpus_and(tmp, cfg->domain, mask); 2317 cpus_and(tmp, cfg->domain, mask);
2117 dest = cpu_mask_to_apicid(tmp); 2318 dest = cpu_mask_to_apicid(tmp);
2118 2319
2119 desc = irq_to_desc(irq);
2120 modify_ioapic_rte = desc->status & IRQ_LEVEL; 2320 modify_ioapic_rte = desc->status & IRQ_LEVEL;
2121 if (modify_ioapic_rte) { 2321 if (modify_ioapic_rte) {
2122 spin_lock_irqsave(&ioapic_lock, flags); 2322 spin_lock_irqsave(&ioapic_lock, flags);
2123 __target_IO_APIC_irq(irq, dest, cfg->vector); 2323 __target_IO_APIC_irq(irq, dest, cfg);
2124 spin_unlock_irqrestore(&ioapic_lock, flags); 2324 spin_unlock_irqrestore(&ioapic_lock, flags);
2125 } 2325 }
2126 2326
@@ -2142,14 +2342,14 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
2142 desc->affinity = mask; 2342 desc->affinity = mask;
2143} 2343}
2144 2344
2145static int migrate_irq_remapped_level(int irq) 2345static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
2146{ 2346{
2147 int ret = -1; 2347 int ret = -1;
2148 struct irq_desc *desc = irq_to_desc(irq); 2348 struct irq_cfg *cfg = desc->chip_data;
2149 2349
2150 mask_IO_APIC_irq(irq); 2350 mask_IO_APIC_irq_desc(desc);
2151 2351
2152 if (io_apic_level_ack_pending(irq)) { 2352 if (io_apic_level_ack_pending(cfg)) {
2153 /* 2353 /*
2154 * Interrupt in progress. Migrating irq now will change the 2354 * Interrupt in progress. Migrating irq now will change the
2155 * vector information in the IO-APIC RTE and that will confuse 2355 * vector information in the IO-APIC RTE and that will confuse
@@ -2161,14 +2361,15 @@ static int migrate_irq_remapped_level(int irq)
2161 } 2361 }
2162 2362
2163 /* everthing is clear. we have right of way */ 2363 /* everthing is clear. we have right of way */
2164 migrate_ioapic_irq(irq, desc->pending_mask); 2364 migrate_ioapic_irq_desc(desc, desc->pending_mask);
2165 2365
2166 ret = 0; 2366 ret = 0;
2167 desc->status &= ~IRQ_MOVE_PENDING; 2367 desc->status &= ~IRQ_MOVE_PENDING;
2168 cpus_clear(desc->pending_mask); 2368 cpus_clear(desc->pending_mask);
2169 2369
2170unmask: 2370unmask:
2171 unmask_IO_APIC_irq(irq); 2371 unmask_IO_APIC_irq_desc(desc);
2372
2172 return ret; 2373 return ret;
2173} 2374}
2174 2375
@@ -2178,6 +2379,9 @@ static void ir_irq_migration(struct work_struct *work)
2178 struct irq_desc *desc; 2379 struct irq_desc *desc;
2179 2380
2180 for_each_irq_desc(irq, desc) { 2381 for_each_irq_desc(irq, desc) {
2382 if (!desc)
2383 continue;
2384
2181 if (desc->status & IRQ_MOVE_PENDING) { 2385 if (desc->status & IRQ_MOVE_PENDING) {
2182 unsigned long flags; 2386 unsigned long flags;
2183 2387
@@ -2198,19 +2402,23 @@ static void ir_irq_migration(struct work_struct *work)
2198/* 2402/*
2199 * Migrates the IRQ destination in the process context. 2403 * Migrates the IRQ destination in the process context.
2200 */ 2404 */
2201static void set_ir_ioapic_affinity_irq(unsigned int irq, 2405static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
2202 const struct cpumask *mask)
2203{ 2406{
2204 struct irq_desc *desc = irq_to_desc(irq);
2205
2206 if (desc->status & IRQ_LEVEL) { 2407 if (desc->status & IRQ_LEVEL) {
2207 desc->status |= IRQ_MOVE_PENDING; 2408 desc->status |= IRQ_MOVE_PENDING;
2208 cpumask_copy(&desc->pending_mask, mask); 2409 desc->pending_mask = mask;
2209 migrate_irq_remapped_level(irq); 2410 migrate_irq_remapped_level_desc(desc);
2210 return; 2411 return;
2211 } 2412 }
2212 2413
2213 migrate_ioapic_irq(irq, *mask); 2414 migrate_ioapic_irq_desc(desc, mask);
2415}
2416static void set_ir_ioapic_affinity_irq(unsigned int irq,
2417 const struct cpumask *mask)
2418{
2419 struct irq_desc *desc = irq_to_desc(irq);
2420
2421 set_ir_ioapic_affinity_irq_desc(desc, *mask);
2214} 2422}
2215#endif 2423#endif
2216 2424
@@ -2229,6 +2437,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2229 struct irq_cfg *cfg; 2437 struct irq_cfg *cfg;
2230 irq = __get_cpu_var(vector_irq)[vector]; 2438 irq = __get_cpu_var(vector_irq)[vector];
2231 2439
2440 if (irq == -1)
2441 continue;
2442
2232 desc = irq_to_desc(irq); 2443 desc = irq_to_desc(irq);
2233 if (!desc) 2444 if (!desc)
2234 continue; 2445 continue;
@@ -2250,19 +2461,40 @@ unlock:
2250 irq_exit(); 2461 irq_exit();
2251} 2462}
2252 2463
2253static void irq_complete_move(unsigned int irq) 2464static void irq_complete_move(struct irq_desc **descp)
2254{ 2465{
2255 struct irq_cfg *cfg = irq_cfg(irq); 2466 struct irq_desc *desc = *descp;
2467 struct irq_cfg *cfg = desc->chip_data;
2256 unsigned vector, me; 2468 unsigned vector, me;
2257 2469
2258 if (likely(!cfg->move_in_progress)) 2470 if (likely(!cfg->move_in_progress)) {
2471#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
2472 if (likely(!cfg->move_desc_pending))
2473 return;
2474
2475 /* domain has not changed, but affinity did */
2476 me = smp_processor_id();
2477 if (cpu_isset(me, desc->affinity)) {
2478 *descp = desc = move_irq_desc(desc, me);
2479 /* get the new one */
2480 cfg = desc->chip_data;
2481 cfg->move_desc_pending = 0;
2482 }
2483#endif
2259 return; 2484 return;
2485 }
2260 2486
2261 vector = ~get_irq_regs()->orig_ax; 2487 vector = ~get_irq_regs()->orig_ax;
2262 me = smp_processor_id(); 2488 me = smp_processor_id();
2263 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { 2489 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
2264 cpumask_t cleanup_mask; 2490 cpumask_t cleanup_mask;
2265 2491
2492#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
2493 *descp = desc = move_irq_desc(desc, me);
2494 /* get the new one */
2495 cfg = desc->chip_data;
2496#endif
2497
2266 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 2498 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
2267 cfg->move_cleanup_count = cpus_weight(cleanup_mask); 2499 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2268 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); 2500 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
@@ -2270,8 +2502,9 @@ static void irq_complete_move(unsigned int irq)
2270 } 2502 }
2271} 2503}
2272#else 2504#else
2273static inline void irq_complete_move(unsigned int irq) {} 2505static inline void irq_complete_move(struct irq_desc **descp) {}
2274#endif 2506#endif
2507
2275#ifdef CONFIG_INTR_REMAP 2508#ifdef CONFIG_INTR_REMAP
2276static void ack_x2apic_level(unsigned int irq) 2509static void ack_x2apic_level(unsigned int irq)
2277{ 2510{
@@ -2282,11 +2515,14 @@ static void ack_x2apic_edge(unsigned int irq)
2282{ 2515{
2283 ack_x2APIC_irq(); 2516 ack_x2APIC_irq();
2284} 2517}
2518
2285#endif 2519#endif
2286 2520
2287static void ack_apic_edge(unsigned int irq) 2521static void ack_apic_edge(unsigned int irq)
2288{ 2522{
2289 irq_complete_move(irq); 2523 struct irq_desc *desc = irq_to_desc(irq);
2524
2525 irq_complete_move(&desc);
2290 move_native_irq(irq); 2526 move_native_irq(irq);
2291 ack_APIC_irq(); 2527 ack_APIC_irq();
2292} 2528}
@@ -2295,18 +2531,21 @@ atomic_t irq_mis_count;
2295 2531
2296static void ack_apic_level(unsigned int irq) 2532static void ack_apic_level(unsigned int irq)
2297{ 2533{
2534 struct irq_desc *desc = irq_to_desc(irq);
2535
2298#ifdef CONFIG_X86_32 2536#ifdef CONFIG_X86_32
2299 unsigned long v; 2537 unsigned long v;
2300 int i; 2538 int i;
2301#endif 2539#endif
2540 struct irq_cfg *cfg;
2302 int do_unmask_irq = 0; 2541 int do_unmask_irq = 0;
2303 2542
2304 irq_complete_move(irq); 2543 irq_complete_move(&desc);
2305#ifdef CONFIG_GENERIC_PENDING_IRQ 2544#ifdef CONFIG_GENERIC_PENDING_IRQ
2306 /* If we are moving the irq we need to mask it */ 2545 /* If we are moving the irq we need to mask it */
2307 if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { 2546 if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
2308 do_unmask_irq = 1; 2547 do_unmask_irq = 1;
2309 mask_IO_APIC_irq(irq); 2548 mask_IO_APIC_irq_desc(desc);
2310 } 2549 }
2311#endif 2550#endif
2312 2551
@@ -2330,7 +2569,8 @@ static void ack_apic_level(unsigned int irq)
2330 * operation to prevent an edge-triggered interrupt escaping meanwhile. 2569 * operation to prevent an edge-triggered interrupt escaping meanwhile.
2331 * The idea is from Manfred Spraul. --macro 2570 * The idea is from Manfred Spraul. --macro
2332 */ 2571 */
2333 i = irq_cfg(irq)->vector; 2572 cfg = desc->chip_data;
2573 i = cfg->vector;
2334 2574
2335 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); 2575 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2336#endif 2576#endif
@@ -2369,17 +2609,18 @@ static void ack_apic_level(unsigned int irq)
2369 * accurate and is causing problems then it is a hardware bug 2609 * accurate and is causing problems then it is a hardware bug
2370 * and you can go talk to the chipset vendor about it. 2610 * and you can go talk to the chipset vendor about it.
2371 */ 2611 */
2372 if (!io_apic_level_ack_pending(irq)) 2612 cfg = desc->chip_data;
2613 if (!io_apic_level_ack_pending(cfg))
2373 move_masked_irq(irq); 2614 move_masked_irq(irq);
2374 unmask_IO_APIC_irq(irq); 2615 unmask_IO_APIC_irq_desc(desc);
2375 } 2616 }
2376 2617
2377#ifdef CONFIG_X86_32 2618#ifdef CONFIG_X86_32
2378 if (!(v & (1 << (i & 0x1f)))) { 2619 if (!(v & (1 << (i & 0x1f)))) {
2379 atomic_inc(&irq_mis_count); 2620 atomic_inc(&irq_mis_count);
2380 spin_lock(&ioapic_lock); 2621 spin_lock(&ioapic_lock);
2381 __mask_and_edge_IO_APIC_irq(irq); 2622 __mask_and_edge_IO_APIC_irq(cfg);
2382 __unmask_and_level_IO_APIC_irq(irq); 2623 __unmask_and_level_IO_APIC_irq(cfg);
2383 spin_unlock(&ioapic_lock); 2624 spin_unlock(&ioapic_lock);
2384 } 2625 }
2385#endif 2626#endif
@@ -2430,20 +2671,22 @@ static inline void init_IO_APIC_traps(void)
2430 * Also, we've got to be careful not to trash gate 2671 * Also, we've got to be careful not to trash gate
2431 * 0x80, because int 0x80 is hm, kind of importantish. ;) 2672 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2432 */ 2673 */
2433 for_each_irq_cfg(irq, cfg) { 2674 for_each_irq_desc(irq, desc) {
2434 if (IO_APIC_IRQ(irq) && !cfg->vector) { 2675 if (!desc)
2676 continue;
2677
2678 cfg = desc->chip_data;
2679 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
2435 /* 2680 /*
2436 * Hmm.. We don't have an entry for this, 2681 * Hmm.. We don't have an entry for this,
2437 * so default to an old-fashioned 8259 2682 * so default to an old-fashioned 8259
2438 * interrupt if we can.. 2683 * interrupt if we can..
2439 */ 2684 */
2440 if (irq < 16) 2685 if (irq < NR_IRQS_LEGACY)
2441 make_8259A_irq(irq); 2686 make_8259A_irq(irq);
2442 else { 2687 else
2443 desc = irq_to_desc(irq);
2444 /* Strange. Oh, well.. */ 2688 /* Strange. Oh, well.. */
2445 desc->chip = &no_irq_chip; 2689 desc->chip = &no_irq_chip;
2446 }
2447 } 2690 }
2448 } 2691 }
2449} 2692}
@@ -2468,7 +2711,7 @@ static void unmask_lapic_irq(unsigned int irq)
2468 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); 2711 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
2469} 2712}
2470 2713
2471static void ack_lapic_irq (unsigned int irq) 2714static void ack_lapic_irq(unsigned int irq)
2472{ 2715{
2473 ack_APIC_irq(); 2716 ack_APIC_irq();
2474} 2717}
@@ -2480,11 +2723,8 @@ static struct irq_chip lapic_chip __read_mostly = {
2480 .ack = ack_lapic_irq, 2723 .ack = ack_lapic_irq,
2481}; 2724};
2482 2725
2483static void lapic_register_intr(int irq) 2726static void lapic_register_intr(int irq, struct irq_desc *desc)
2484{ 2727{
2485 struct irq_desc *desc;
2486
2487 desc = irq_to_desc(irq);
2488 desc->status &= ~IRQ_LEVEL; 2728 desc->status &= ~IRQ_LEVEL;
2489 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, 2729 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2490 "edge"); 2730 "edge");
@@ -2588,7 +2828,9 @@ int timer_through_8259 __initdata;
2588 */ 2828 */
2589static inline void __init check_timer(void) 2829static inline void __init check_timer(void)
2590{ 2830{
2591 struct irq_cfg *cfg = irq_cfg(0); 2831 struct irq_desc *desc = irq_to_desc(0);
2832 struct irq_cfg *cfg = desc->chip_data;
2833 int cpu = boot_cpu_id;
2592 int apic1, pin1, apic2, pin2; 2834 int apic1, pin1, apic2, pin2;
2593 unsigned long flags; 2835 unsigned long flags;
2594 unsigned int ver; 2836 unsigned int ver;
@@ -2603,7 +2845,7 @@ static inline void __init check_timer(void)
2603 * get/set the timer IRQ vector: 2845 * get/set the timer IRQ vector:
2604 */ 2846 */
2605 disable_8259A_irq(0); 2847 disable_8259A_irq(0);
2606 assign_irq_vector(0, TARGET_CPUS); 2848 assign_irq_vector(0, cfg, TARGET_CPUS);
2607 2849
2608 /* 2850 /*
2609 * As IRQ0 is to be enabled in the 8259A, the virtual 2851 * As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2654,10 +2896,10 @@ static inline void __init check_timer(void)
2654 * Ok, does IRQ0 through the IOAPIC work? 2896 * Ok, does IRQ0 through the IOAPIC work?
2655 */ 2897 */
2656 if (no_pin1) { 2898 if (no_pin1) {
2657 add_pin_to_irq(0, apic1, pin1); 2899 add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
2658 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); 2900 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
2659 } 2901 }
2660 unmask_IO_APIC_irq(0); 2902 unmask_IO_APIC_irq_desc(desc);
2661 if (timer_irq_works()) { 2903 if (timer_irq_works()) {
2662 if (nmi_watchdog == NMI_IO_APIC) { 2904 if (nmi_watchdog == NMI_IO_APIC) {
2663 setup_nmi(); 2905 setup_nmi();
@@ -2683,9 +2925,9 @@ static inline void __init check_timer(void)
2683 /* 2925 /*
2684 * legacy devices should be connected to IO APIC #0 2926 * legacy devices should be connected to IO APIC #0
2685 */ 2927 */
2686 replace_pin_at_irq(0, apic1, pin1, apic2, pin2); 2928 replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
2687 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); 2929 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
2688 unmask_IO_APIC_irq(0); 2930 unmask_IO_APIC_irq_desc(desc);
2689 enable_8259A_irq(0); 2931 enable_8259A_irq(0);
2690 if (timer_irq_works()) { 2932 if (timer_irq_works()) {
2691 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2933 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2717,7 +2959,7 @@ static inline void __init check_timer(void)
2717 apic_printk(APIC_QUIET, KERN_INFO 2959 apic_printk(APIC_QUIET, KERN_INFO
2718 "...trying to set up timer as Virtual Wire IRQ...\n"); 2960 "...trying to set up timer as Virtual Wire IRQ...\n");
2719 2961
2720 lapic_register_intr(0); 2962 lapic_register_intr(0, desc);
2721 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ 2963 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
2722 enable_8259A_irq(0); 2964 enable_8259A_irq(0);
2723 2965
@@ -2902,22 +3144,26 @@ unsigned int create_irq_nr(unsigned int irq_want)
2902 unsigned int irq; 3144 unsigned int irq;
2903 unsigned int new; 3145 unsigned int new;
2904 unsigned long flags; 3146 unsigned long flags;
2905 struct irq_cfg *cfg_new; 3147 struct irq_cfg *cfg_new = NULL;
2906 3148 int cpu = boot_cpu_id;
2907 irq_want = nr_irqs - 1; 3149 struct irq_desc *desc_new = NULL;
2908 3150
2909 irq = 0; 3151 irq = 0;
2910 spin_lock_irqsave(&vector_lock, flags); 3152 spin_lock_irqsave(&vector_lock, flags);
2911 for (new = irq_want; new > 0; new--) { 3153 for (new = irq_want; new < NR_IRQS; new++) {
2912 if (platform_legacy_irq(new)) 3154 if (platform_legacy_irq(new))
2913 continue; 3155 continue;
2914 cfg_new = irq_cfg(new); 3156
2915 if (cfg_new && cfg_new->vector != 0) 3157 desc_new = irq_to_desc_alloc_cpu(new, cpu);
3158 if (!desc_new) {
3159 printk(KERN_INFO "can not get irq_desc for %d\n", new);
2916 continue; 3160 continue;
2917 /* check if need to create one */ 3161 }
2918 if (!cfg_new) 3162 cfg_new = desc_new->chip_data;
2919 cfg_new = irq_cfg_alloc(new); 3163
2920 if (__assign_irq_vector(new, TARGET_CPUS) == 0) 3164 if (cfg_new->vector != 0)
3165 continue;
3166 if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
2921 irq = new; 3167 irq = new;
2922 break; 3168 break;
2923 } 3169 }
@@ -2925,15 +3171,21 @@ unsigned int create_irq_nr(unsigned int irq_want)
2925 3171
2926 if (irq > 0) { 3172 if (irq > 0) {
2927 dynamic_irq_init(irq); 3173 dynamic_irq_init(irq);
3174 /* restore it, in case dynamic_irq_init clear it */
3175 if (desc_new)
3176 desc_new->chip_data = cfg_new;
2928 } 3177 }
2929 return irq; 3178 return irq;
2930} 3179}
2931 3180
3181static int nr_irqs_gsi = NR_IRQS_LEGACY;
2932int create_irq(void) 3182int create_irq(void)
2933{ 3183{
3184 unsigned int irq_want;
2934 int irq; 3185 int irq;
2935 3186
2936 irq = create_irq_nr(nr_irqs - 1); 3187 irq_want = nr_irqs_gsi;
3188 irq = create_irq_nr(irq_want);
2937 3189
2938 if (irq == 0) 3190 if (irq == 0)
2939 irq = -1; 3191 irq = -1;
@@ -2944,14 +3196,22 @@ int create_irq(void)
2944void destroy_irq(unsigned int irq) 3196void destroy_irq(unsigned int irq)
2945{ 3197{
2946 unsigned long flags; 3198 unsigned long flags;
3199 struct irq_cfg *cfg;
3200 struct irq_desc *desc;
2947 3201
3202 /* store it, in case dynamic_irq_cleanup clear it */
3203 desc = irq_to_desc(irq);
3204 cfg = desc->chip_data;
2948 dynamic_irq_cleanup(irq); 3205 dynamic_irq_cleanup(irq);
3206 /* connect back irq_cfg */
3207 if (desc)
3208 desc->chip_data = cfg;
2949 3209
2950#ifdef CONFIG_INTR_REMAP 3210#ifdef CONFIG_INTR_REMAP
2951 free_irte(irq); 3211 free_irte(irq);
2952#endif 3212#endif
2953 spin_lock_irqsave(&vector_lock, flags); 3213 spin_lock_irqsave(&vector_lock, flags);
2954 __clear_irq_vector(irq); 3214 __clear_irq_vector(irq, cfg);
2955 spin_unlock_irqrestore(&vector_lock, flags); 3215 spin_unlock_irqrestore(&vector_lock, flags);
2956} 3216}
2957 3217
@@ -2966,12 +3226,12 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
2966 unsigned dest; 3226 unsigned dest;
2967 cpumask_t tmp; 3227 cpumask_t tmp;
2968 3228
3229 cfg = irq_cfg(irq);
2969 tmp = TARGET_CPUS; 3230 tmp = TARGET_CPUS;
2970 err = assign_irq_vector(irq, tmp); 3231 err = assign_irq_vector(irq, cfg, tmp);
2971 if (err) 3232 if (err)
2972 return err; 3233 return err;
2973 3234
2974 cfg = irq_cfg(irq);
2975 cpus_and(tmp, cfg->domain, tmp); 3235 cpus_and(tmp, cfg->domain, tmp);
2976 dest = cpu_mask_to_apicid(tmp); 3236 dest = cpu_mask_to_apicid(tmp);
2977 3237
@@ -3029,34 +3289,34 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3029#ifdef CONFIG_SMP 3289#ifdef CONFIG_SMP
3030static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) 3290static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3031{ 3291{
3292 struct irq_desc *desc = irq_to_desc(irq);
3032 struct irq_cfg *cfg; 3293 struct irq_cfg *cfg;
3033 struct msi_msg msg; 3294 struct msi_msg msg;
3034 unsigned int dest; 3295 unsigned int dest;
3035 cpumask_t tmp; 3296 cpumask_t tmp;
3036 struct irq_desc *desc;
3037 3297
3038 if (!cpumask_intersects(mask, cpu_online_mask)) 3298 if (!cpumask_intersects(mask, cpu_online_mask))
3039 return; 3299 return;
3040 3300
3041 if (assign_irq_vector(irq, *mask)) 3301 cfg = desc->chip_data;
3302 if (assign_irq_vector(irq, cfg, *mask))
3042 return; 3303 return;
3043 3304
3044 cfg = irq_cfg(irq); 3305 set_extra_move_desc(desc, *mask);
3306
3045 cpumask_and(&tmp, &cfg->domain, mask); 3307 cpumask_and(&tmp, &cfg->domain, mask);
3046 dest = cpu_mask_to_apicid(tmp); 3308 dest = cpu_mask_to_apicid(tmp);
3047 3309
3048 read_msi_msg(irq, &msg); 3310 read_msi_msg_desc(desc, &msg);
3049 3311
3050 msg.data &= ~MSI_DATA_VECTOR_MASK; 3312 msg.data &= ~MSI_DATA_VECTOR_MASK;
3051 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3313 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3052 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3314 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3053 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3315 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3054 3316
3055 write_msi_msg(irq, &msg); 3317 write_msi_msg_desc(desc, &msg);
3056 desc = irq_to_desc(irq);
3057 cpumask_copy(&desc->affinity, mask); 3318 cpumask_copy(&desc->affinity, mask);
3058} 3319}
3059
3060#ifdef CONFIG_INTR_REMAP 3320#ifdef CONFIG_INTR_REMAP
3061/* 3321/*
3062 * Migrate the MSI irq to another cpumask. This migration is 3322 * Migrate the MSI irq to another cpumask. This migration is
@@ -3065,11 +3325,11 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3065static void ir_set_msi_irq_affinity(unsigned int irq, 3325static void ir_set_msi_irq_affinity(unsigned int irq,
3066 const struct cpumask *mask) 3326 const struct cpumask *mask)
3067{ 3327{
3328 struct irq_desc *desc = irq_to_desc(irq);
3068 struct irq_cfg *cfg; 3329 struct irq_cfg *cfg;
3069 unsigned int dest; 3330 unsigned int dest;
3070 cpumask_t tmp, cleanup_mask; 3331 cpumask_t tmp, cleanup_mask;
3071 struct irte irte; 3332 struct irte irte;
3072 struct irq_desc *desc;
3073 3333
3074 if (!cpumask_intersects(mask, cpu_online_mask)) 3334 if (!cpumask_intersects(mask, cpu_online_mask))
3075 return; 3335 return;
@@ -3077,10 +3337,12 @@ static void ir_set_msi_irq_affinity(unsigned int irq,
3077 if (get_irte(irq, &irte)) 3337 if (get_irte(irq, &irte))
3078 return; 3338 return;
3079 3339
3080 if (assign_irq_vector(irq, *mask)) 3340 cfg = desc->chip_data;
3341 if (assign_irq_vector(irq, cfg, *mask))
3081 return; 3342 return;
3082 3343
3083 cfg = irq_cfg(irq); 3344 set_extra_move_desc(desc, *mask);
3345
3084 cpumask_and(&tmp, &cfg->domain, mask); 3346 cpumask_and(&tmp, &cfg->domain, mask);
3085 dest = cpu_mask_to_apicid(tmp); 3347 dest = cpu_mask_to_apicid(tmp);
3086 3348
@@ -3104,9 +3366,9 @@ static void ir_set_msi_irq_affinity(unsigned int irq,
3104 cfg->move_in_progress = 0; 3366 cfg->move_in_progress = 0;
3105 } 3367 }
3106 3368
3107 desc = irq_to_desc(irq);
3108 cpumask_copy(&desc->affinity, mask); 3369 cpumask_copy(&desc->affinity, mask);
3109} 3370}
3371
3110#endif 3372#endif
3111#endif /* CONFIG_SMP */ 3373#endif /* CONFIG_SMP */
3112 3374
@@ -3165,7 +3427,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
3165} 3427}
3166#endif 3428#endif
3167 3429
3168static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) 3430static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3169{ 3431{
3170 int ret; 3432 int ret;
3171 struct msi_msg msg; 3433 struct msi_msg msg;
@@ -3174,7 +3436,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
3174 if (ret < 0) 3436 if (ret < 0)
3175 return ret; 3437 return ret;
3176 3438
3177 set_irq_msi(irq, desc); 3439 set_irq_msi(irq, msidesc);
3178 write_msi_msg(irq, &msg); 3440 write_msi_msg(irq, &msg);
3179 3441
3180#ifdef CONFIG_INTR_REMAP 3442#ifdef CONFIG_INTR_REMAP
@@ -3194,26 +3456,13 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
3194 return 0; 3456 return 0;
3195} 3457}
3196 3458
3197static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) 3459int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
3198{
3199 unsigned int irq;
3200
3201 irq = dev->bus->number;
3202 irq <<= 8;
3203 irq |= dev->devfn;
3204 irq <<= 12;
3205
3206 return irq;
3207}
3208
3209int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
3210{ 3460{
3211 unsigned int irq; 3461 unsigned int irq;
3212 int ret; 3462 int ret;
3213 unsigned int irq_want; 3463 unsigned int irq_want;
3214 3464
3215 irq_want = build_irq_for_pci_dev(dev) + 0x100; 3465 irq_want = nr_irqs_gsi;
3216
3217 irq = create_irq_nr(irq_want); 3466 irq = create_irq_nr(irq_want);
3218 if (irq == 0) 3467 if (irq == 0)
3219 return -1; 3468 return -1;
@@ -3227,7 +3476,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
3227 goto error; 3476 goto error;
3228no_ir: 3477no_ir:
3229#endif 3478#endif
3230 ret = setup_msi_irq(dev, desc, irq); 3479 ret = setup_msi_irq(dev, msidesc, irq);
3231 if (ret < 0) { 3480 if (ret < 0) {
3232 destroy_irq(irq); 3481 destroy_irq(irq);
3233 return ret; 3482 return ret;
@@ -3245,7 +3494,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3245{ 3494{
3246 unsigned int irq; 3495 unsigned int irq;
3247 int ret, sub_handle; 3496 int ret, sub_handle;
3248 struct msi_desc *desc; 3497 struct msi_desc *msidesc;
3249 unsigned int irq_want; 3498 unsigned int irq_want;
3250 3499
3251#ifdef CONFIG_INTR_REMAP 3500#ifdef CONFIG_INTR_REMAP
@@ -3253,10 +3502,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3253 int index = 0; 3502 int index = 0;
3254#endif 3503#endif
3255 3504
3256 irq_want = build_irq_for_pci_dev(dev) + 0x100; 3505 irq_want = nr_irqs_gsi;
3257 sub_handle = 0; 3506 sub_handle = 0;
3258 list_for_each_entry(desc, &dev->msi_list, list) { 3507 list_for_each_entry(msidesc, &dev->msi_list, list) {
3259 irq = create_irq_nr(irq_want--); 3508 irq = create_irq_nr(irq_want);
3509 irq_want++;
3260 if (irq == 0) 3510 if (irq == 0)
3261 return -1; 3511 return -1;
3262#ifdef CONFIG_INTR_REMAP 3512#ifdef CONFIG_INTR_REMAP
@@ -3288,7 +3538,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3288 } 3538 }
3289no_ir: 3539no_ir:
3290#endif 3540#endif
3291 ret = setup_msi_irq(dev, desc, irq); 3541 ret = setup_msi_irq(dev, msidesc, irq);
3292 if (ret < 0) 3542 if (ret < 0)
3293 goto error; 3543 goto error;
3294 sub_handle++; 3544 sub_handle++;
@@ -3309,19 +3559,21 @@ void arch_teardown_msi_irq(unsigned int irq)
3309#ifdef CONFIG_SMP 3559#ifdef CONFIG_SMP
3310static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) 3560static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3311{ 3561{
3562 struct irq_desc *desc = irq_to_desc(irq);
3312 struct irq_cfg *cfg; 3563 struct irq_cfg *cfg;
3313 struct msi_msg msg; 3564 struct msi_msg msg;
3314 unsigned int dest; 3565 unsigned int dest;
3315 cpumask_t tmp; 3566 cpumask_t tmp;
3316 struct irq_desc *desc;
3317 3567
3318 if (!cpumask_intersects(mask, cpu_online_mask)) 3568 if (!cpumask_intersects(mask, cpu_online_mask))
3319 return; 3569 return;
3320 3570
3321 if (assign_irq_vector(irq, *mask)) 3571 cfg = desc->chip_data;
3572 if (assign_irq_vector(irq, cfg, *mask))
3322 return; 3573 return;
3323 3574
3324 cfg = irq_cfg(irq); 3575 set_extra_move_desc(desc, *mask);
3576
3325 cpumask_and(&tmp, &cfg->domain, mask); 3577 cpumask_and(&tmp, &cfg->domain, mask);
3326 dest = cpu_mask_to_apicid(tmp); 3578 dest = cpu_mask_to_apicid(tmp);
3327 3579
@@ -3333,9 +3585,9 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3333 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3585 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3334 3586
3335 dmar_msi_write(irq, &msg); 3587 dmar_msi_write(irq, &msg);
3336 desc = irq_to_desc(irq);
3337 cpumask_copy(&desc->affinity, mask); 3588 cpumask_copy(&desc->affinity, mask);
3338} 3589}
3590
3339#endif /* CONFIG_SMP */ 3591#endif /* CONFIG_SMP */
3340 3592
3341struct irq_chip dmar_msi_type = { 3593struct irq_chip dmar_msi_type = {
@@ -3369,8 +3621,8 @@ int arch_setup_dmar_msi(unsigned int irq)
3369#ifdef CONFIG_SMP 3621#ifdef CONFIG_SMP
3370static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) 3622static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3371{ 3623{
3624 struct irq_desc *desc = irq_to_desc(irq);
3372 struct irq_cfg *cfg; 3625 struct irq_cfg *cfg;
3373 struct irq_desc *desc;
3374 struct msi_msg msg; 3626 struct msi_msg msg;
3375 unsigned int dest; 3627 unsigned int dest;
3376 cpumask_t tmp; 3628 cpumask_t tmp;
@@ -3378,10 +3630,12 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3378 if (!cpumask_intersects(mask, cpu_online_mask)) 3630 if (!cpumask_intersects(mask, cpu_online_mask))
3379 return; 3631 return;
3380 3632
3381 if (assign_irq_vector(irq, *mask)) 3633 cfg = desc->chip_data;
3634 if (assign_irq_vector(irq, cfg, *mask))
3382 return; 3635 return;
3383 3636
3384 cfg = irq_cfg(irq); 3637 set_extra_move_desc(desc, *mask);
3638
3385 cpumask_and(&tmp, &cfg->domain, mask); 3639 cpumask_and(&tmp, &cfg->domain, mask);
3386 dest = cpu_mask_to_apicid(tmp); 3640 dest = cpu_mask_to_apicid(tmp);
3387 3641
@@ -3393,9 +3647,9 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3393 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3647 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3394 3648
3395 hpet_msi_write(irq, &msg); 3649 hpet_msi_write(irq, &msg);
3396 desc = irq_to_desc(irq);
3397 cpumask_copy(&desc->affinity, mask); 3650 cpumask_copy(&desc->affinity, mask);
3398} 3651}
3652
3399#endif /* CONFIG_SMP */ 3653#endif /* CONFIG_SMP */
3400 3654
3401struct irq_chip hpet_msi_type = { 3655struct irq_chip hpet_msi_type = {
@@ -3450,25 +3704,27 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
3450 3704
3451static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) 3705static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
3452{ 3706{
3707 struct irq_desc *desc = irq_to_desc(irq);
3453 struct irq_cfg *cfg; 3708 struct irq_cfg *cfg;
3454 unsigned int dest; 3709 unsigned int dest;
3455 cpumask_t tmp; 3710 cpumask_t tmp;
3456 struct irq_desc *desc;
3457 3711
3458 if (!cpumask_intersects(mask, cpu_online_mask)) 3712 if (!cpumask_intersects(mask, cpu_online_mask))
3459 return; 3713 return;
3460 3714
3461 if (assign_irq_vector(irq, *mask)) 3715 cfg = desc->chip_data;
3716 if (assign_irq_vector(irq, cfg, *mask))
3462 return; 3717 return;
3463 3718
3464 cfg = irq_cfg(irq); 3719 set_extra_move_desc(desc, *mask);
3720
3465 cpumask_and(&tmp, &cfg->domain, mask); 3721 cpumask_and(&tmp, &cfg->domain, mask);
3466 dest = cpu_mask_to_apicid(tmp); 3722 dest = cpu_mask_to_apicid(tmp);
3467 3723
3468 target_ht_irq(irq, dest, cfg->vector); 3724 target_ht_irq(irq, dest, cfg->vector);
3469 desc = irq_to_desc(irq);
3470 cpumask_copy(&desc->affinity, mask); 3725 cpumask_copy(&desc->affinity, mask);
3471} 3726}
3727
3472#endif 3728#endif
3473 3729
3474static struct irq_chip ht_irq_chip = { 3730static struct irq_chip ht_irq_chip = {
@@ -3488,13 +3744,13 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3488 int err; 3744 int err;
3489 cpumask_t tmp; 3745 cpumask_t tmp;
3490 3746
3747 cfg = irq_cfg(irq);
3491 tmp = TARGET_CPUS; 3748 tmp = TARGET_CPUS;
3492 err = assign_irq_vector(irq, tmp); 3749 err = assign_irq_vector(irq, cfg, tmp);
3493 if (!err) { 3750 if (!err) {
3494 struct ht_irq_msg msg; 3751 struct ht_irq_msg msg;
3495 unsigned dest; 3752 unsigned dest;
3496 3753
3497 cfg = irq_cfg(irq);
3498 cpus_and(tmp, cfg->domain, tmp); 3754 cpus_and(tmp, cfg->domain, tmp);
3499 dest = cpu_mask_to_apicid(tmp); 3755 dest = cpu_mask_to_apicid(tmp);
3500 3756
@@ -3540,7 +3796,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3540 unsigned long flags; 3796 unsigned long flags;
3541 int err; 3797 int err;
3542 3798
3543 err = assign_irq_vector(irq, *eligible_cpu); 3799 cfg = irq_cfg(irq);
3800
3801 err = assign_irq_vector(irq, cfg, *eligible_cpu);
3544 if (err != 0) 3802 if (err != 0)
3545 return err; 3803 return err;
3546 3804
@@ -3549,8 +3807,6 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3549 irq_name); 3807 irq_name);
3550 spin_unlock_irqrestore(&vector_lock, flags); 3808 spin_unlock_irqrestore(&vector_lock, flags);
3551 3809
3552 cfg = irq_cfg(irq);
3553
3554 mmr_value = 0; 3810 mmr_value = 0;
3555 entry = (struct uv_IO_APIC_route_entry *)&mmr_value; 3811 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
3556 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); 3812 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
@@ -3602,9 +3858,16 @@ int __init io_apic_get_redir_entries (int ioapic)
3602 return reg_01.bits.entries; 3858 return reg_01.bits.entries;
3603} 3859}
3604 3860
3605int __init probe_nr_irqs(void) 3861void __init probe_nr_irqs_gsi(void)
3606{ 3862{
3607 return NR_IRQS; 3863 int idx;
3864 int nr = 0;
3865
3866 for (idx = 0; idx < nr_ioapics; idx++)
3867 nr += io_apic_get_redir_entries(idx) + 1;
3868
3869 if (nr > nr_irqs_gsi)
3870 nr_irqs_gsi = nr;
3608} 3871}
3609 3872
3610/* -------------------------------------------------------------------------- 3873/* --------------------------------------------------------------------------
@@ -3703,19 +3966,31 @@ int __init io_apic_get_version(int ioapic)
3703 3966
3704int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) 3967int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
3705{ 3968{
3969 struct irq_desc *desc;
3970 struct irq_cfg *cfg;
3971 int cpu = boot_cpu_id;
3972
3706 if (!IO_APIC_IRQ(irq)) { 3973 if (!IO_APIC_IRQ(irq)) {
3707 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", 3974 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
3708 ioapic); 3975 ioapic);
3709 return -EINVAL; 3976 return -EINVAL;
3710 } 3977 }
3711 3978
3979 desc = irq_to_desc_alloc_cpu(irq, cpu);
3980 if (!desc) {
3981 printk(KERN_INFO "can not get irq_desc %d\n", irq);
3982 return 0;
3983 }
3984
3712 /* 3985 /*
3713 * IRQs < 16 are already in the irq_2_pin[] map 3986 * IRQs < 16 are already in the irq_2_pin[] map
3714 */ 3987 */
3715 if (irq >= 16) 3988 if (irq >= NR_IRQS_LEGACY) {
3716 add_pin_to_irq(irq, ioapic, pin); 3989 cfg = desc->chip_data;
3990 add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
3991 }
3717 3992
3718 setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); 3993 setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
3719 3994
3720 return 0; 3995 return 0;
3721} 3996}
@@ -3769,9 +4044,10 @@ void __init setup_ioapic_dest(void)
3769 * when you have too many devices, because at that time only boot 4044 * when you have too many devices, because at that time only boot
3770 * cpu is online. 4045 * cpu is online.
3771 */ 4046 */
3772 cfg = irq_cfg(irq); 4047 desc = irq_to_desc(irq);
4048 cfg = desc->chip_data;
3773 if (!cfg->vector) { 4049 if (!cfg->vector) {
3774 setup_IO_APIC_irq(ioapic, pin, irq, 4050 setup_IO_APIC_irq(ioapic, pin, irq, desc,
3775 irq_trigger(irq_entry), 4051 irq_trigger(irq_entry),
3776 irq_polarity(irq_entry)); 4052 irq_polarity(irq_entry));
3777 continue; 4053 continue;
@@ -3781,7 +4057,6 @@ void __init setup_ioapic_dest(void)
3781 /* 4057 /*
3782 * Honour affinities which have been set in early boot 4058 * Honour affinities which have been set in early boot
3783 */ 4059 */
3784 desc = irq_to_desc(irq);
3785 if (desc->status & 4060 if (desc->status &
3786 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 4061 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
3787 mask = desc->affinity; 4062 mask = desc->affinity;
@@ -3790,10 +4065,10 @@ void __init setup_ioapic_dest(void)
3790 4065
3791#ifdef CONFIG_INTR_REMAP 4066#ifdef CONFIG_INTR_REMAP
3792 if (intr_remapping_enabled) 4067 if (intr_remapping_enabled)
3793 set_ir_ioapic_affinity_irq(irq, &mask); 4068 set_ir_ioapic_affinity_irq_desc(desc, mask);
3794 else 4069 else
3795#endif 4070#endif
3796 set_ioapic_affinity_irq(irq, &mask); 4071 set_ioapic_affinity_irq_desc(desc, mask);
3797 } 4072 }
3798 4073
3799 } 4074 }
@@ -3842,7 +4117,6 @@ void __init ioapic_init_mappings(void)
3842 struct resource *ioapic_res; 4117 struct resource *ioapic_res;
3843 int i; 4118 int i;
3844 4119
3845 irq_2_pin_init();
3846 ioapic_res = ioapic_setup_resources(); 4120 ioapic_res = ioapic_setup_resources();
3847 for (i = 0; i < nr_ioapics; i++) { 4121 for (i = 0; i < nr_ioapics; i++) {
3848 if (smp_found_config) { 4122 if (smp_found_config) {
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index d1d4dc52f649..3f1d9d18df67 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -118,6 +118,9 @@ int show_interrupts(struct seq_file *p, void *v)
118 } 118 }
119 119
120 desc = irq_to_desc(i); 120 desc = irq_to_desc(i);
121 if (!desc)
122 return 0;
123
121 spin_lock_irqsave(&desc->lock, flags); 124 spin_lock_irqsave(&desc->lock, flags);
122#ifndef CONFIG_SMP 125#ifndef CONFIG_SMP
123 any_count = kstat_irqs(i); 126 any_count = kstat_irqs(i);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 87870a49be4e..9cf9cbbf7a02 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -242,6 +242,8 @@ void fixup_irqs(cpumask_t map)
242 for_each_irq_desc(irq, desc) { 242 for_each_irq_desc(irq, desc) {
243 cpumask_t mask; 243 cpumask_t mask;
244 244
245 if (!desc)
246 continue;
245 if (irq == 2) 247 if (irq == 2)
246 continue; 248 continue;
247 249
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 8cbd069e5b41..54c69d47a771 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -91,6 +91,8 @@ void fixup_irqs(cpumask_t map)
91 int break_affinity = 0; 91 int break_affinity = 0;
92 int set_affinity = 1; 92 int set_affinity = 1;
93 93
94 if (!desc)
95 continue;
94 if (irq == 2) 96 if (irq == 2)
95 continue; 97 continue;
96 98
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 607db63044a5..203384ed2b5d 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -68,8 +68,7 @@ void __init init_ISA_irqs (void)
68 /* 68 /*
69 * 16 old-style INTA-cycle interrupts: 69 * 16 old-style INTA-cycle interrupts:
70 */ 70 */
71 for (i = 0; i < 16; i++) { 71 for (i = 0; i < NR_IRQS_LEGACY; i++) {
72 /* first time call this irq_desc */
73 struct irq_desc *desc = irq_to_desc(i); 72 struct irq_desc *desc = irq_to_desc(i);
74 73
75 desc->status = IRQ_DISABLED; 74 desc->status = IRQ_DISABLED;
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 8670b3ce626e..6190e6ef546c 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -76,8 +76,7 @@ void __init init_ISA_irqs(void)
76 init_bsp_APIC(); 76 init_bsp_APIC();
77 init_8259A(0); 77 init_8259A(0);
78 78
79 for (i = 0; i < 16; i++) { 79 for (i = 0; i < NR_IRQS_LEGACY; i++) {
80 /* first time call this irq_desc */
81 struct irq_desc *desc = irq_to_desc(i); 80 struct irq_desc *desc = irq_to_desc(i);
82 81
83 desc->status = IRQ_DISABLED; 82 desc->status = IRQ_DISABLED;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 7a3dfceb90e4..19a1044a0cd9 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -101,11 +101,15 @@ static void __init dma32_free_bootmem(void)
101 dma32_bootmem_ptr = NULL; 101 dma32_bootmem_ptr = NULL;
102 dma32_bootmem_size = 0; 102 dma32_bootmem_size = 0;
103} 103}
104#endif
104 105
105void __init pci_iommu_alloc(void) 106void __init pci_iommu_alloc(void)
106{ 107{
108#ifdef CONFIG_X86_64
107 /* free the range so iommu could get some range less than 4G */ 109 /* free the range so iommu could get some range less than 4G */
108 dma32_free_bootmem(); 110 dma32_free_bootmem();
111#endif
112
109 /* 113 /*
110 * The order of these functions is important for 114 * The order of these functions is important for
111 * fall-back/fail-over reasons 115 * fall-back/fail-over reasons
@@ -121,15 +125,6 @@ void __init pci_iommu_alloc(void)
121 pci_swiotlb_init(); 125 pci_swiotlb_init();
122} 126}
123 127
124unsigned long iommu_nr_pages(unsigned long addr, unsigned long len)
125{
126 unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
127
128 return size >> PAGE_SHIFT;
129}
130EXPORT_SYMBOL(iommu_nr_pages);
131#endif
132
133void *dma_generic_alloc_coherent(struct device *dev, size_t size, 128void *dma_generic_alloc_coherent(struct device *dev, size_t size,
134 dma_addr_t *dma_addr, gfp_t flag) 129 dma_addr_t *dma_addr, gfp_t flag)
135{ 130{
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 3c539d111abb..242c3440687f 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -3,6 +3,8 @@
3#include <linux/pci.h> 3#include <linux/pci.h>
4#include <linux/cache.h> 4#include <linux/cache.h>
5#include <linux/module.h> 5#include <linux/module.h>
6#include <linux/swiotlb.h>
7#include <linux/bootmem.h>
6#include <linux/dma-mapping.h> 8#include <linux/dma-mapping.h>
7 9
8#include <asm/iommu.h> 10#include <asm/iommu.h>
@@ -11,6 +13,31 @@
11 13
12int swiotlb __read_mostly; 14int swiotlb __read_mostly;
13 15
16void *swiotlb_alloc_boot(size_t size, unsigned long nslabs)
17{
18 return alloc_bootmem_low_pages(size);
19}
20
21void *swiotlb_alloc(unsigned order, unsigned long nslabs)
22{
23 return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
24}
25
26dma_addr_t swiotlb_phys_to_bus(phys_addr_t paddr)
27{
28 return paddr;
29}
30
31phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
32{
33 return baddr;
34}
35
36int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size)
37{
38 return 0;
39}
40
14static dma_addr_t 41static dma_addr_t
15swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, 42swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
16 int direction) 43 int direction)
@@ -50,8 +77,10 @@ struct dma_mapping_ops swiotlb_dma_ops = {
50void __init pci_swiotlb_init(void) 77void __init pci_swiotlb_init(void)
51{ 78{
52 /* don't initialize swiotlb if iommu=off (no_iommu=1) */ 79 /* don't initialize swiotlb if iommu=off (no_iommu=1) */
80#ifdef CONFIG_X86_64
53 if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) 81 if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)
54 swiotlb = 1; 82 swiotlb = 1;
83#endif
55 if (swiotlb_force) 84 if (swiotlb_force)
56 swiotlb = 1; 85 swiotlb = 1;
57 if (swiotlb) { 86 if (swiotlb) {
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 67465ed89310..309949e9e1c1 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -168,6 +168,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31,
168 ich_force_enable_hpet); 168 ich_force_enable_hpet);
169DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, 169DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1,
170 ich_force_enable_hpet); 170 ich_force_enable_hpet);
171DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4,
172 ich_force_enable_hpet);
171DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, 173DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7,
172 ich_force_enable_hpet); 174 ich_force_enable_hpet);
173 175
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 08e02e8453c9..ae0d8042cf69 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -953,7 +953,7 @@ void __init setup_arch(char **cmdline_p)
953 ioapic_init_mappings(); 953 ioapic_init_mappings();
954 954
955 /* need to wait for io_apic is mapped */ 955 /* need to wait for io_apic is mapped */
956 nr_irqs = probe_nr_irqs(); 956 probe_nr_irqs_gsi();
957 957
958 kvm_guest_init(); 958 kvm_guest_init();
959 959
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 5c7cef34c9e7..10b9bd35a8ff 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -30,21 +30,6 @@ ENTRY(lguest_entry)
30 movl $lguest_data - __PAGE_OFFSET, %edx 30 movl $lguest_data - __PAGE_OFFSET, %edx
31 int $LGUEST_TRAP_ENTRY 31 int $LGUEST_TRAP_ENTRY
32 32
33 /* The Host put the toplevel pagetable in lguest_data.pgdir. The movsl
34 * instruction uses %esi implicitly as the source for the copy we're
35 * about to do. */
36 movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi
37
38 /* Copy first 32 entries of page directory to __PAGE_OFFSET entries.
39 * This means the first 128M of kernel memory will be mapped at
40 * PAGE_OFFSET where the kernel expects to run. This will get it far
41 * enough through boot to switch to its own pagetables. */
42 movl $32, %ecx
43 movl %esi, %edi
44 addl $((__PAGE_OFFSET >> 22) * 4), %edi
45 rep
46 movsl
47
48 /* Set up the initial stack so we can run C code. */ 33 /* Set up the initial stack so we can run C code. */
49 movl $(init_thread_union+THREAD_SIZE),%esp 34 movl $(init_thread_union+THREAD_SIZE),%esp
50 35
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 9e68075544f6..4a20b2f9a381 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -39,7 +39,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
39#define __do_strncpy_from_user(dst, src, count, res) \ 39#define __do_strncpy_from_user(dst, src, count, res) \
40do { \ 40do { \
41 int __d0, __d1, __d2; \ 41 int __d0, __d1, __d2; \
42 might_sleep(); \ 42 might_fault(); \
43 __asm__ __volatile__( \ 43 __asm__ __volatile__( \
44 " testl %1,%1\n" \ 44 " testl %1,%1\n" \
45 " jz 2f\n" \ 45 " jz 2f\n" \
@@ -126,7 +126,7 @@ EXPORT_SYMBOL(strncpy_from_user);
126#define __do_clear_user(addr,size) \ 126#define __do_clear_user(addr,size) \
127do { \ 127do { \
128 int __d0; \ 128 int __d0; \
129 might_sleep(); \ 129 might_fault(); \
130 __asm__ __volatile__( \ 130 __asm__ __volatile__( \
131 "0: rep; stosl\n" \ 131 "0: rep; stosl\n" \
132 " movl %2,%0\n" \ 132 " movl %2,%0\n" \
@@ -155,7 +155,7 @@ do { \
155unsigned long 155unsigned long
156clear_user(void __user *to, unsigned long n) 156clear_user(void __user *to, unsigned long n)
157{ 157{
158 might_sleep(); 158 might_fault();
159 if (access_ok(VERIFY_WRITE, to, n)) 159 if (access_ok(VERIFY_WRITE, to, n))
160 __do_clear_user(to, n); 160 __do_clear_user(to, n);
161 return n; 161 return n;
@@ -197,7 +197,7 @@ long strnlen_user(const char __user *s, long n)
197 unsigned long mask = -__addr_ok(s); 197 unsigned long mask = -__addr_ok(s);
198 unsigned long res, tmp; 198 unsigned long res, tmp;
199 199
200 might_sleep(); 200 might_fault();
201 201
202 __asm__ __volatile__( 202 __asm__ __volatile__(
203 " testl %0, %0\n" 203 " testl %0, %0\n"
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index f4df6e7c718b..64d6c84e6353 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -15,7 +15,7 @@
15#define __do_strncpy_from_user(dst,src,count,res) \ 15#define __do_strncpy_from_user(dst,src,count,res) \
16do { \ 16do { \
17 long __d0, __d1, __d2; \ 17 long __d0, __d1, __d2; \
18 might_sleep(); \ 18 might_fault(); \
19 __asm__ __volatile__( \ 19 __asm__ __volatile__( \
20 " testq %1,%1\n" \ 20 " testq %1,%1\n" \
21 " jz 2f\n" \ 21 " jz 2f\n" \
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user);
64unsigned long __clear_user(void __user *addr, unsigned long size) 64unsigned long __clear_user(void __user *addr, unsigned long size)
65{ 65{
66 long __d0; 66 long __d0;
67 might_sleep(); 67 might_fault();
68 /* no memory constraint because it doesn't change any memory gcc knows 68 /* no memory constraint because it doesn't change any memory gcc knows
69 about */ 69 about */
70 asm volatile( 70 asm volatile(
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 800e1d94c1b5..8655b5bb0963 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -21,6 +21,7 @@
21#include <linux/init.h> 21#include <linux/init.h>
22#include <linux/highmem.h> 22#include <linux/highmem.h>
23#include <linux/pagemap.h> 23#include <linux/pagemap.h>
24#include <linux/pci.h>
24#include <linux/pfn.h> 25#include <linux/pfn.h>
25#include <linux/poison.h> 26#include <linux/poison.h>
26#include <linux/bootmem.h> 27#include <linux/bootmem.h>
@@ -967,6 +968,8 @@ void __init mem_init(void)
967 int codesize, reservedpages, datasize, initsize; 968 int codesize, reservedpages, datasize, initsize;
968 int tmp; 969 int tmp;
969 970
971 pci_iommu_alloc();
972
970#ifdef CONFIG_FLATMEM 973#ifdef CONFIG_FLATMEM
971 BUG_ON(!mem_map); 974 BUG_ON(!mem_map);
972#endif 975#endif
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 509513760a6e..98658f25f542 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -65,11 +65,13 @@ static unsigned long reset_value[NUM_COUNTERS];
65#define IBS_FETCH_BEGIN 3 65#define IBS_FETCH_BEGIN 3
66#define IBS_OP_BEGIN 4 66#define IBS_OP_BEGIN 4
67 67
68/* The function interface needs to be fixed, something like add 68/*
69 data. Should then be added to linux/oprofile.h. */ 69 * The function interface needs to be fixed, something like add
70 * data. Should then be added to linux/oprofile.h.
71 */
70extern void 72extern void
71oprofile_add_ibs_sample(struct pt_regs *const regs, 73oprofile_add_ibs_sample(struct pt_regs * const regs,
72 unsigned int *const ibs_sample, int ibs_code); 74 unsigned int * const ibs_sample, int ibs_code);
73 75
74struct ibs_fetch_sample { 76struct ibs_fetch_sample {
75 /* MSRC001_1031 IBS Fetch Linear Address Register */ 77 /* MSRC001_1031 IBS Fetch Linear Address Register */
@@ -104,11 +106,6 @@ struct ibs_op_sample {
104 unsigned int ibs_dc_phys_high; 106 unsigned int ibs_dc_phys_high;
105}; 107};
106 108
107/*
108 * unitialize the APIC for the IBS interrupts if needed on AMD Family10h+
109*/
110static void clear_ibs_nmi(void);
111
112static int ibs_allowed; /* AMD Family10h and later */ 109static int ibs_allowed; /* AMD Family10h and later */
113 110
114struct op_ibs_config { 111struct op_ibs_config {
@@ -223,7 +220,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
223 (unsigned int *)&ibs_fetch, 220 (unsigned int *)&ibs_fetch,
224 IBS_FETCH_BEGIN); 221 IBS_FETCH_BEGIN);
225 222
226 /*reenable the IRQ */ 223 /* reenable the IRQ */
227 rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); 224 rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
228 high &= ~IBS_FETCH_HIGH_VALID_BIT; 225 high &= ~IBS_FETCH_HIGH_VALID_BIT;
229 high |= IBS_FETCH_HIGH_ENABLE; 226 high |= IBS_FETCH_HIGH_ENABLE;
@@ -331,8 +328,10 @@ static void op_amd_stop(struct op_msrs const * const msrs)
331 unsigned int low, high; 328 unsigned int low, high;
332 int i; 329 int i;
333 330
334 /* Subtle: stop on all counters to avoid race with 331 /*
335 * setting our pm callback */ 332 * Subtle: stop on all counters to avoid race with setting our
333 * pm callback
334 */
336 for (i = 0 ; i < NUM_COUNTERS ; ++i) { 335 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
337 if (!reset_value[i]) 336 if (!reset_value[i])
338 continue; 337 continue;
@@ -343,13 +342,15 @@ static void op_amd_stop(struct op_msrs const * const msrs)
343 342
344#ifdef CONFIG_OPROFILE_IBS 343#ifdef CONFIG_OPROFILE_IBS
345 if (ibs_allowed && ibs_config.fetch_enabled) { 344 if (ibs_allowed && ibs_config.fetch_enabled) {
346 low = 0; /* clear max count and enable */ 345 /* clear max count and enable */
346 low = 0;
347 high = 0; 347 high = 0;
348 wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); 348 wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
349 } 349 }
350 350
351 if (ibs_allowed && ibs_config.op_enabled) { 351 if (ibs_allowed && ibs_config.op_enabled) {
352 low = 0; /* clear max count and enable */ 352 /* clear max count and enable */
353 low = 0;
353 high = 0; 354 high = 0;
354 wrmsr(MSR_AMD64_IBSOPCTL, low, high); 355 wrmsr(MSR_AMD64_IBSOPCTL, low, high);
355 } 356 }
@@ -370,18 +371,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
370 } 371 }
371} 372}
372 373
373#ifndef CONFIG_OPROFILE_IBS 374#ifdef CONFIG_OPROFILE_IBS
374
375/* no IBS support */
376
377static int op_amd_init(struct oprofile_operations *ops)
378{
379 return 0;
380}
381
382static void op_amd_exit(void) {}
383
384#else
385 375
386static u8 ibs_eilvt_off; 376static u8 ibs_eilvt_off;
387 377
@@ -395,7 +385,7 @@ static inline void apic_clear_ibs_nmi_per_cpu(void *arg)
395 setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); 385 setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1);
396} 386}
397 387
398static int pfm_amd64_setup_eilvt(void) 388static int init_ibs_nmi(void)
399{ 389{
400#define IBSCTL_LVTOFFSETVAL (1 << 8) 390#define IBSCTL_LVTOFFSETVAL (1 << 8)
401#define IBSCTL 0x1cc 391#define IBSCTL 0x1cc
@@ -443,18 +433,22 @@ static int pfm_amd64_setup_eilvt(void)
443 return 0; 433 return 0;
444} 434}
445 435
446/* 436/* uninitialize the APIC for the IBS interrupts if needed */
447 * initialize the APIC for the IBS interrupts 437static void clear_ibs_nmi(void)
448 * if available (AMD Family10h rev B0 and later) 438{
449 */ 439 if (ibs_allowed)
450static void setup_ibs(void) 440 on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1);
441}
442
443/* initialize the APIC for the IBS interrupts if available */
444static void ibs_init(void)
451{ 445{
452 ibs_allowed = boot_cpu_has(X86_FEATURE_IBS); 446 ibs_allowed = boot_cpu_has(X86_FEATURE_IBS);
453 447
454 if (!ibs_allowed) 448 if (!ibs_allowed)
455 return; 449 return;
456 450
457 if (pfm_amd64_setup_eilvt()) { 451 if (init_ibs_nmi()) {
458 ibs_allowed = 0; 452 ibs_allowed = 0;
459 return; 453 return;
460 } 454 }
@@ -462,14 +456,12 @@ static void setup_ibs(void)
462 printk(KERN_INFO "oprofile: AMD IBS detected\n"); 456 printk(KERN_INFO "oprofile: AMD IBS detected\n");
463} 457}
464 458
465 459static void ibs_exit(void)
466/*
467 * unitialize the APIC for the IBS interrupts if needed on AMD Family10h
468 * rev B0 and later */
469static void clear_ibs_nmi(void)
470{ 460{
471 if (ibs_allowed) 461 if (!ibs_allowed)
472 on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); 462 return;
463
464 clear_ibs_nmi();
473} 465}
474 466
475static int (*create_arch_files)(struct super_block *sb, struct dentry *root); 467static int (*create_arch_files)(struct super_block *sb, struct dentry *root);
@@ -519,7 +511,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
519 511
520static int op_amd_init(struct oprofile_operations *ops) 512static int op_amd_init(struct oprofile_operations *ops)
521{ 513{
522 setup_ibs(); 514 ibs_init();
523 create_arch_files = ops->create_files; 515 create_arch_files = ops->create_files;
524 ops->create_files = setup_ibs_files; 516 ops->create_files = setup_ibs_files;
525 return 0; 517 return 0;
@@ -527,10 +519,21 @@ static int op_amd_init(struct oprofile_operations *ops)
527 519
528static void op_amd_exit(void) 520static void op_amd_exit(void)
529{ 521{
530 clear_ibs_nmi(); 522 ibs_exit();
531} 523}
532 524
533#endif 525#else
526
527/* no IBS support */
528
529static int op_amd_init(struct oprofile_operations *ops)
530{
531 return 0;
532}
533
534static void op_amd_exit(void) {}
535
536#endif /* CONFIG_OPROFILE_IBS */
534 537
535struct op_x86_model_spec const op_amd_spec = { 538struct op_x86_model_spec const op_amd_spec = {
536 .init = op_amd_init, 539 .init = op_amd_init,