diff options
Diffstat (limited to 'arch/x86')
99 files changed, 2911 insertions, 1310 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 98a0ed52b5c3..249d1e0824b5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -247,6 +247,28 @@ config X86_HAS_BOOT_CPU_ID | |||
247 | def_bool y | 247 | def_bool y |
248 | depends on X86_VOYAGER | 248 | depends on X86_VOYAGER |
249 | 249 | ||
250 | config SPARSE_IRQ | ||
251 | bool "Support sparse irq numbering" | ||
252 | depends on PCI_MSI || HT_IRQ | ||
253 | help | ||
254 | This enables support for sparse irqs. This is useful for distro | ||
255 | kernels that want to define a high CONFIG_NR_CPUS value but still | ||
256 | want to have low kernel memory footprint on smaller machines. | ||
257 | |||
258 | ( Sparse IRQs can also be beneficial on NUMA boxes, as they spread | ||
259 | out the irq_desc[] array in a more NUMA-friendly way. ) | ||
260 | |||
261 | If you don't know what to do here, say N. | ||
262 | |||
263 | config NUMA_MIGRATE_IRQ_DESC | ||
264 | bool "Move irq desc when changing irq smp_affinity" | ||
265 | depends on SPARSE_IRQ && NUMA | ||
266 | default n | ||
267 | help | ||
268 | This enables moving irq_desc to cpu/node that irq will use handled. | ||
269 | |||
270 | If you don't know what to do here, say N. | ||
271 | |||
250 | config X86_FIND_SMP_CONFIG | 272 | config X86_FIND_SMP_CONFIG |
251 | def_bool y | 273 | def_bool y |
252 | depends on X86_MPPARSE || X86_VOYAGER | 274 | depends on X86_MPPARSE || X86_VOYAGER |
@@ -479,7 +501,7 @@ config HPET_TIMER | |||
479 | The HPET provides a stable time base on SMP | 501 | The HPET provides a stable time base on SMP |
480 | systems, unlike the TSC, but it is more expensive to access, | 502 | systems, unlike the TSC, but it is more expensive to access, |
481 | as it is off-chip. You can find the HPET spec at | 503 | as it is off-chip. You can find the HPET spec at |
482 | <http://www.intel.com/hardwaredesign/hpetspec.htm>. | 504 | <http://www.intel.com/hardwaredesign/hpetspec_1.pdf>. |
483 | 505 | ||
484 | You can safely choose Y here. However, HPET will only be | 506 | You can safely choose Y here. However, HPET will only be |
485 | activated if the platform and the BIOS support this feature. | 507 | activated if the platform and the BIOS support this feature. |
@@ -579,19 +601,20 @@ config IOMMU_HELPER | |||
579 | 601 | ||
580 | config MAXSMP | 602 | config MAXSMP |
581 | bool "Configure Maximum number of SMP Processors and NUMA Nodes" | 603 | bool "Configure Maximum number of SMP Processors and NUMA Nodes" |
582 | depends on X86_64 && SMP && BROKEN | 604 | depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL |
605 | select CPUMASK_OFFSTACK | ||
583 | default n | 606 | default n |
584 | help | 607 | help |
585 | Configure maximum number of CPUS and NUMA Nodes for this architecture. | 608 | Configure maximum number of CPUS and NUMA Nodes for this architecture. |
586 | If unsure, say N. | 609 | If unsure, say N. |
587 | 610 | ||
588 | config NR_CPUS | 611 | config NR_CPUS |
589 | int "Maximum number of CPUs (2-512)" if !MAXSMP | 612 | int "Maximum number of CPUs" if SMP && !MAXSMP |
590 | range 2 512 | 613 | range 2 512 if SMP && !MAXSMP |
591 | depends on SMP | 614 | default "1" if !SMP |
592 | default "4096" if MAXSMP | 615 | default "4096" if MAXSMP |
593 | default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 | 616 | default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000) |
594 | default "8" | 617 | default "8" if SMP |
595 | help | 618 | help |
596 | This allows you to specify the maximum number of CPUs which this | 619 | This allows you to specify the maximum number of CPUs which this |
597 | kernel will support. The maximum supported value is 512 and the | 620 | kernel will support. The maximum supported value is 512 and the |
diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index ce547f24a1cd..d8dd9f537911 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h | |||
@@ -9,12 +9,12 @@ static inline int apic_id_registered(void) | |||
9 | return (1); | 9 | return (1); |
10 | } | 10 | } |
11 | 11 | ||
12 | static inline cpumask_t target_cpus(void) | 12 | static inline const cpumask_t *target_cpus(void) |
13 | { | 13 | { |
14 | #ifdef CONFIG_SMP | 14 | #ifdef CONFIG_SMP |
15 | return cpu_online_map; | 15 | return &cpu_online_map; |
16 | #else | 16 | #else |
17 | return cpumask_of_cpu(0); | 17 | return &cpumask_of_cpu(0); |
18 | #endif | 18 | #endif |
19 | } | 19 | } |
20 | 20 | ||
@@ -79,7 +79,7 @@ static inline int apicid_to_node(int logical_apicid) | |||
79 | 79 | ||
80 | static inline int cpu_present_to_apicid(int mps_cpu) | 80 | static inline int cpu_present_to_apicid(int mps_cpu) |
81 | { | 81 | { |
82 | if (mps_cpu < NR_CPUS) | 82 | if (mps_cpu < nr_cpu_ids) |
83 | return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); | 83 | return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); |
84 | 84 | ||
85 | return BAD_APICID; | 85 | return BAD_APICID; |
@@ -94,7 +94,7 @@ extern u8 cpu_2_logical_apicid[]; | |||
94 | /* Mapping from cpu number to logical apicid */ | 94 | /* Mapping from cpu number to logical apicid */ |
95 | static inline int cpu_to_logical_apicid(int cpu) | 95 | static inline int cpu_to_logical_apicid(int cpu) |
96 | { | 96 | { |
97 | if (cpu >= NR_CPUS) | 97 | if (cpu >= nr_cpu_ids) |
98 | return BAD_APICID; | 98 | return BAD_APICID; |
99 | return cpu_physical_id(cpu); | 99 | return cpu_physical_id(cpu); |
100 | } | 100 | } |
@@ -119,16 +119,34 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) | |||
119 | } | 119 | } |
120 | 120 | ||
121 | /* As we are using single CPU as destination, pick only one CPU here */ | 121 | /* As we are using single CPU as destination, pick only one CPU here */ |
122 | static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | 122 | static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) |
123 | { | 123 | { |
124 | int cpu; | 124 | int cpu; |
125 | int apicid; | 125 | int apicid; |
126 | 126 | ||
127 | cpu = first_cpu(cpumask); | 127 | cpu = first_cpu(*cpumask); |
128 | apicid = cpu_to_logical_apicid(cpu); | 128 | apicid = cpu_to_logical_apicid(cpu); |
129 | return apicid; | 129 | return apicid; |
130 | } | 130 | } |
131 | 131 | ||
132 | static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
133 | const struct cpumask *andmask) | ||
134 | { | ||
135 | int cpu; | ||
136 | |||
137 | /* | ||
138 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
139 | * May as well be the first. | ||
140 | */ | ||
141 | for_each_cpu_and(cpu, cpumask, andmask) | ||
142 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
143 | break; | ||
144 | if (cpu < nr_cpu_ids) | ||
145 | return cpu_to_logical_apicid(cpu); | ||
146 | |||
147 | return BAD_APICID; | ||
148 | } | ||
149 | |||
132 | static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) | 150 | static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) |
133 | { | 151 | { |
134 | return cpuid_apic >> index_msb; | 152 | return cpuid_apic >> index_msb; |
diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h index 9404c535b7ec..27fcd01b3ae6 100644 --- a/arch/x86/include/asm/bigsmp/ipi.h +++ b/arch/x86/include/asm/bigsmp/ipi.h | |||
@@ -1,25 +1,22 @@ | |||
1 | #ifndef __ASM_MACH_IPI_H | 1 | #ifndef __ASM_MACH_IPI_H |
2 | #define __ASM_MACH_IPI_H | 2 | #define __ASM_MACH_IPI_H |
3 | 3 | ||
4 | void send_IPI_mask_sequence(cpumask_t mask, int vector); | 4 | void send_IPI_mask_sequence(const struct cpumask *mask, int vector); |
5 | void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); | ||
5 | 6 | ||
6 | static inline void send_IPI_mask(cpumask_t mask, int vector) | 7 | static inline void send_IPI_mask(const struct cpumask *mask, int vector) |
7 | { | 8 | { |
8 | send_IPI_mask_sequence(mask, vector); | 9 | send_IPI_mask_sequence(mask, vector); |
9 | } | 10 | } |
10 | 11 | ||
11 | static inline void send_IPI_allbutself(int vector) | 12 | static inline void send_IPI_allbutself(int vector) |
12 | { | 13 | { |
13 | cpumask_t mask = cpu_online_map; | 14 | send_IPI_mask_allbutself(cpu_online_mask, vector); |
14 | cpu_clear(smp_processor_id(), mask); | ||
15 | |||
16 | if (!cpus_empty(mask)) | ||
17 | send_IPI_mask(mask, vector); | ||
18 | } | 15 | } |
19 | 16 | ||
20 | static inline void send_IPI_all(int vector) | 17 | static inline void send_IPI_all(int vector) |
21 | { | 18 | { |
22 | send_IPI_mask(cpu_online_map, vector); | 19 | send_IPI_mask(cpu_online_mask, vector); |
23 | } | 20 | } |
24 | 21 | ||
25 | #endif /* __ASM_MACH_IPI_H */ | 22 | #endif /* __ASM_MACH_IPI_H */ |
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index e6b82b17b072..dc27705f5443 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h | |||
@@ -320,16 +320,14 @@ static inline void set_intr_gate(unsigned int n, void *addr) | |||
320 | _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); | 320 | _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); |
321 | } | 321 | } |
322 | 322 | ||
323 | #define SYS_VECTOR_FREE 0 | ||
324 | #define SYS_VECTOR_ALLOCED 1 | ||
325 | |||
326 | extern int first_system_vector; | 323 | extern int first_system_vector; |
327 | extern char system_vectors[]; | 324 | /* used_vectors is BITMAP for irq is not managed by percpu vector_irq */ |
325 | extern unsigned long used_vectors[]; | ||
328 | 326 | ||
329 | static inline void alloc_system_vector(int vector) | 327 | static inline void alloc_system_vector(int vector) |
330 | { | 328 | { |
331 | if (system_vectors[vector] == SYS_VECTOR_FREE) { | 329 | if (!test_bit(vector, used_vectors)) { |
332 | system_vectors[vector] = SYS_VECTOR_ALLOCED; | 330 | set_bit(vector, used_vectors); |
333 | if (first_system_vector > vector) | 331 | if (first_system_vector > vector) |
334 | first_system_vector = vector; | 332 | first_system_vector = vector; |
335 | } else | 333 | } else |
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index dc22c0733282..4035357f5b9d 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h | |||
@@ -65,7 +65,7 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) | |||
65 | return dma_ops; | 65 | return dma_ops; |
66 | else | 66 | else |
67 | return dev->archdata.dma_ops; | 67 | return dev->archdata.dma_ops; |
68 | #endif /* _ASM_X86_DMA_MAPPING_H */ | 68 | #endif |
69 | } | 69 | } |
70 | 70 | ||
71 | /* Make sure we keep the same behaviour */ | 71 | /* Make sure we keep the same behaviour */ |
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index e24ef876915f..51ac1230294e 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h | |||
@@ -9,14 +9,14 @@ static inline int apic_id_registered(void) | |||
9 | return (1); | 9 | return (1); |
10 | } | 10 | } |
11 | 11 | ||
12 | static inline cpumask_t target_cpus_cluster(void) | 12 | static inline const cpumask_t *target_cpus_cluster(void) |
13 | { | 13 | { |
14 | return CPU_MASK_ALL; | 14 | return &CPU_MASK_ALL; |
15 | } | 15 | } |
16 | 16 | ||
17 | static inline cpumask_t target_cpus(void) | 17 | static inline const cpumask_t *target_cpus(void) |
18 | { | 18 | { |
19 | return cpumask_of_cpu(smp_processor_id()); | 19 | return &cpumask_of_cpu(smp_processor_id()); |
20 | } | 20 | } |
21 | 21 | ||
22 | #define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) | 22 | #define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) |
@@ -80,9 +80,10 @@ extern int apic_version [MAX_APICS]; | |||
80 | static inline void setup_apic_routing(void) | 80 | static inline void setup_apic_routing(void) |
81 | { | 81 | { |
82 | int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); | 82 | int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); |
83 | printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", | 83 | printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", |
84 | (apic_version[apic] == 0x14) ? | 84 | (apic_version[apic] == 0x14) ? |
85 | "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]); | 85 | "Physical Cluster" : "Logical Cluster", |
86 | nr_ioapics, cpus_addr(*target_cpus())[0]); | ||
86 | } | 87 | } |
87 | 88 | ||
88 | static inline int multi_timer_check(int apic, int irq) | 89 | static inline int multi_timer_check(int apic, int irq) |
@@ -100,7 +101,7 @@ static inline int cpu_present_to_apicid(int mps_cpu) | |||
100 | { | 101 | { |
101 | if (!mps_cpu) | 102 | if (!mps_cpu) |
102 | return boot_cpu_physical_apicid; | 103 | return boot_cpu_physical_apicid; |
103 | else if (mps_cpu < NR_CPUS) | 104 | else if (mps_cpu < nr_cpu_ids) |
104 | return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); | 105 | return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); |
105 | else | 106 | else |
106 | return BAD_APICID; | 107 | return BAD_APICID; |
@@ -120,9 +121,9 @@ extern u8 cpu_2_logical_apicid[]; | |||
120 | static inline int cpu_to_logical_apicid(int cpu) | 121 | static inline int cpu_to_logical_apicid(int cpu) |
121 | { | 122 | { |
122 | #ifdef CONFIG_SMP | 123 | #ifdef CONFIG_SMP |
123 | if (cpu >= NR_CPUS) | 124 | if (cpu >= nr_cpu_ids) |
124 | return BAD_APICID; | 125 | return BAD_APICID; |
125 | return (int)cpu_2_logical_apicid[cpu]; | 126 | return (int)cpu_2_logical_apicid[cpu]; |
126 | #else | 127 | #else |
127 | return logical_smp_processor_id(); | 128 | return logical_smp_processor_id(); |
128 | #endif | 129 | #endif |
@@ -146,14 +147,15 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid) | |||
146 | return (1); | 147 | return (1); |
147 | } | 148 | } |
148 | 149 | ||
149 | static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask) | 150 | static inline unsigned int |
151 | cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) | ||
150 | { | 152 | { |
151 | int num_bits_set; | 153 | int num_bits_set; |
152 | int cpus_found = 0; | 154 | int cpus_found = 0; |
153 | int cpu; | 155 | int cpu; |
154 | int apicid; | 156 | int apicid; |
155 | 157 | ||
156 | num_bits_set = cpus_weight(cpumask); | 158 | num_bits_set = cpumask_weight(cpumask); |
157 | /* Return id to all */ | 159 | /* Return id to all */ |
158 | if (num_bits_set == NR_CPUS) | 160 | if (num_bits_set == NR_CPUS) |
159 | return 0xFF; | 161 | return 0xFF; |
@@ -161,10 +163,10 @@ static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask) | |||
161 | * The cpus in the mask must all be on the apic cluster. If are not | 163 | * The cpus in the mask must all be on the apic cluster. If are not |
162 | * on the same apicid cluster return default value of TARGET_CPUS. | 164 | * on the same apicid cluster return default value of TARGET_CPUS. |
163 | */ | 165 | */ |
164 | cpu = first_cpu(cpumask); | 166 | cpu = cpumask_first(cpumask); |
165 | apicid = cpu_to_logical_apicid(cpu); | 167 | apicid = cpu_to_logical_apicid(cpu); |
166 | while (cpus_found < num_bits_set) { | 168 | while (cpus_found < num_bits_set) { |
167 | if (cpu_isset(cpu, cpumask)) { | 169 | if (cpumask_test_cpu(cpu, cpumask)) { |
168 | int new_apicid = cpu_to_logical_apicid(cpu); | 170 | int new_apicid = cpu_to_logical_apicid(cpu); |
169 | if (apicid_cluster(apicid) != | 171 | if (apicid_cluster(apicid) != |
170 | apicid_cluster(new_apicid)){ | 172 | apicid_cluster(new_apicid)){ |
@@ -179,14 +181,14 @@ static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask) | |||
179 | return apicid; | 181 | return apicid; |
180 | } | 182 | } |
181 | 183 | ||
182 | static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | 184 | static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) |
183 | { | 185 | { |
184 | int num_bits_set; | 186 | int num_bits_set; |
185 | int cpus_found = 0; | 187 | int cpus_found = 0; |
186 | int cpu; | 188 | int cpu; |
187 | int apicid; | 189 | int apicid; |
188 | 190 | ||
189 | num_bits_set = cpus_weight(cpumask); | 191 | num_bits_set = cpus_weight(*cpumask); |
190 | /* Return id to all */ | 192 | /* Return id to all */ |
191 | if (num_bits_set == NR_CPUS) | 193 | if (num_bits_set == NR_CPUS) |
192 | return cpu_to_logical_apicid(0); | 194 | return cpu_to_logical_apicid(0); |
@@ -194,10 +196,52 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | |||
194 | * The cpus in the mask must all be on the apic cluster. If are not | 196 | * The cpus in the mask must all be on the apic cluster. If are not |
195 | * on the same apicid cluster return default value of TARGET_CPUS. | 197 | * on the same apicid cluster return default value of TARGET_CPUS. |
196 | */ | 198 | */ |
197 | cpu = first_cpu(cpumask); | 199 | cpu = first_cpu(*cpumask); |
200 | apicid = cpu_to_logical_apicid(cpu); | ||
201 | while (cpus_found < num_bits_set) { | ||
202 | if (cpu_isset(cpu, *cpumask)) { | ||
203 | int new_apicid = cpu_to_logical_apicid(cpu); | ||
204 | if (apicid_cluster(apicid) != | ||
205 | apicid_cluster(new_apicid)){ | ||
206 | printk ("%s: Not a valid mask!\n", __func__); | ||
207 | return cpu_to_logical_apicid(0); | ||
208 | } | ||
209 | apicid = new_apicid; | ||
210 | cpus_found++; | ||
211 | } | ||
212 | cpu++; | ||
213 | } | ||
214 | return apicid; | ||
215 | } | ||
216 | |||
217 | |||
218 | static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, | ||
219 | const struct cpumask *andmask) | ||
220 | { | ||
221 | int num_bits_set; | ||
222 | int cpus_found = 0; | ||
223 | int cpu; | ||
224 | int apicid = cpu_to_logical_apicid(0); | ||
225 | cpumask_var_t cpumask; | ||
226 | |||
227 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) | ||
228 | return apicid; | ||
229 | |||
230 | cpumask_and(cpumask, inmask, andmask); | ||
231 | cpumask_and(cpumask, cpumask, cpu_online_mask); | ||
232 | |||
233 | num_bits_set = cpumask_weight(cpumask); | ||
234 | /* Return id to all */ | ||
235 | if (num_bits_set == NR_CPUS) | ||
236 | goto exit; | ||
237 | /* | ||
238 | * The cpus in the mask must all be on the apic cluster. If are not | ||
239 | * on the same apicid cluster return default value of TARGET_CPUS. | ||
240 | */ | ||
241 | cpu = cpumask_first(cpumask); | ||
198 | apicid = cpu_to_logical_apicid(cpu); | 242 | apicid = cpu_to_logical_apicid(cpu); |
199 | while (cpus_found < num_bits_set) { | 243 | while (cpus_found < num_bits_set) { |
200 | if (cpu_isset(cpu, cpumask)) { | 244 | if (cpumask_test_cpu(cpu, cpumask)) { |
201 | int new_apicid = cpu_to_logical_apicid(cpu); | 245 | int new_apicid = cpu_to_logical_apicid(cpu); |
202 | if (apicid_cluster(apicid) != | 246 | if (apicid_cluster(apicid) != |
203 | apicid_cluster(new_apicid)){ | 247 | apicid_cluster(new_apicid)){ |
@@ -209,6 +253,8 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | |||
209 | } | 253 | } |
210 | cpu++; | 254 | cpu++; |
211 | } | 255 | } |
256 | exit: | ||
257 | free_cpumask_var(cpumask); | ||
212 | return apicid; | 258 | return apicid; |
213 | } | 259 | } |
214 | 260 | ||
diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h index 632a955fcc0a..7e8ed24d4b8a 100644 --- a/arch/x86/include/asm/es7000/ipi.h +++ b/arch/x86/include/asm/es7000/ipi.h | |||
@@ -1,24 +1,22 @@ | |||
1 | #ifndef __ASM_ES7000_IPI_H | 1 | #ifndef __ASM_ES7000_IPI_H |
2 | #define __ASM_ES7000_IPI_H | 2 | #define __ASM_ES7000_IPI_H |
3 | 3 | ||
4 | void send_IPI_mask_sequence(cpumask_t mask, int vector); | 4 | void send_IPI_mask_sequence(const struct cpumask *mask, int vector); |
5 | void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); | ||
5 | 6 | ||
6 | static inline void send_IPI_mask(cpumask_t mask, int vector) | 7 | static inline void send_IPI_mask(const struct cpumask *mask, int vector) |
7 | { | 8 | { |
8 | send_IPI_mask_sequence(mask, vector); | 9 | send_IPI_mask_sequence(mask, vector); |
9 | } | 10 | } |
10 | 11 | ||
11 | static inline void send_IPI_allbutself(int vector) | 12 | static inline void send_IPI_allbutself(int vector) |
12 | { | 13 | { |
13 | cpumask_t mask = cpu_online_map; | 14 | send_IPI_mask_allbutself(cpu_online_mask, vector); |
14 | cpu_clear(smp_processor_id(), mask); | ||
15 | if (!cpus_empty(mask)) | ||
16 | send_IPI_mask(mask, vector); | ||
17 | } | 15 | } |
18 | 16 | ||
19 | static inline void send_IPI_all(int vector) | 17 | static inline void send_IPI_all(int vector) |
20 | { | 18 | { |
21 | send_IPI_mask(cpu_online_map, vector); | 19 | send_IPI_mask(cpu_online_mask, vector); |
22 | } | 20 | } |
23 | 21 | ||
24 | #endif /* __ASM_ES7000_IPI_H */ | 22 | #endif /* __ASM_ES7000_IPI_H */ |
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h index 0ac17d33a8c7..746f37a7963a 100644 --- a/arch/x86/include/asm/genapic_32.h +++ b/arch/x86/include/asm/genapic_32.h | |||
@@ -24,7 +24,7 @@ struct genapic { | |||
24 | int (*probe)(void); | 24 | int (*probe)(void); |
25 | 25 | ||
26 | int (*apic_id_registered)(void); | 26 | int (*apic_id_registered)(void); |
27 | cpumask_t (*target_cpus)(void); | 27 | const struct cpumask *(*target_cpus)(void); |
28 | int int_delivery_mode; | 28 | int int_delivery_mode; |
29 | int int_dest_mode; | 29 | int int_dest_mode; |
30 | int ESR_DISABLE; | 30 | int ESR_DISABLE; |
@@ -57,12 +57,16 @@ struct genapic { | |||
57 | 57 | ||
58 | unsigned (*get_apic_id)(unsigned long x); | 58 | unsigned (*get_apic_id)(unsigned long x); |
59 | unsigned long apic_id_mask; | 59 | unsigned long apic_id_mask; |
60 | unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); | 60 | unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); |
61 | cpumask_t (*vector_allocation_domain)(int cpu); | 61 | unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, |
62 | const struct cpumask *andmask); | ||
63 | void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); | ||
62 | 64 | ||
63 | #ifdef CONFIG_SMP | 65 | #ifdef CONFIG_SMP |
64 | /* ipi */ | 66 | /* ipi */ |
65 | void (*send_IPI_mask)(cpumask_t mask, int vector); | 67 | void (*send_IPI_mask)(const struct cpumask *mask, int vector); |
68 | void (*send_IPI_mask_allbutself)(const struct cpumask *mask, | ||
69 | int vector); | ||
66 | void (*send_IPI_allbutself)(int vector); | 70 | void (*send_IPI_allbutself)(int vector); |
67 | void (*send_IPI_all)(int vector); | 71 | void (*send_IPI_all)(int vector); |
68 | #endif | 72 | #endif |
@@ -114,6 +118,7 @@ struct genapic { | |||
114 | APICFUNC(get_apic_id) \ | 118 | APICFUNC(get_apic_id) \ |
115 | .apic_id_mask = APIC_ID_MASK, \ | 119 | .apic_id_mask = APIC_ID_MASK, \ |
116 | APICFUNC(cpu_mask_to_apicid) \ | 120 | APICFUNC(cpu_mask_to_apicid) \ |
121 | APICFUNC(cpu_mask_to_apicid_and) \ | ||
117 | APICFUNC(vector_allocation_domain) \ | 122 | APICFUNC(vector_allocation_domain) \ |
118 | APICFUNC(acpi_madt_oem_check) \ | 123 | APICFUNC(acpi_madt_oem_check) \ |
119 | IPIFUNC(send_IPI_mask) \ | 124 | IPIFUNC(send_IPI_mask) \ |
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h index 2cae011668b7..adf32fb56aa6 100644 --- a/arch/x86/include/asm/genapic_64.h +++ b/arch/x86/include/asm/genapic_64.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _ASM_X86_GENAPIC_64_H | 1 | #ifndef _ASM_X86_GENAPIC_64_H |
2 | #define _ASM_X86_GENAPIC_64_H | 2 | #define _ASM_X86_GENAPIC_64_H |
3 | 3 | ||
4 | #include <linux/cpumask.h> | ||
5 | |||
4 | /* | 6 | /* |
5 | * Copyright 2004 James Cleverdon, IBM. | 7 | * Copyright 2004 James Cleverdon, IBM. |
6 | * Subject to the GNU Public License, v.2 | 8 | * Subject to the GNU Public License, v.2 |
@@ -18,16 +20,20 @@ struct genapic { | |||
18 | u32 int_delivery_mode; | 20 | u32 int_delivery_mode; |
19 | u32 int_dest_mode; | 21 | u32 int_dest_mode; |
20 | int (*apic_id_registered)(void); | 22 | int (*apic_id_registered)(void); |
21 | cpumask_t (*target_cpus)(void); | 23 | const struct cpumask *(*target_cpus)(void); |
22 | cpumask_t (*vector_allocation_domain)(int cpu); | 24 | void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); |
23 | void (*init_apic_ldr)(void); | 25 | void (*init_apic_ldr)(void); |
24 | /* ipi */ | 26 | /* ipi */ |
25 | void (*send_IPI_mask)(cpumask_t mask, int vector); | 27 | void (*send_IPI_mask)(const struct cpumask *mask, int vector); |
28 | void (*send_IPI_mask_allbutself)(const struct cpumask *mask, | ||
29 | int vector); | ||
26 | void (*send_IPI_allbutself)(int vector); | 30 | void (*send_IPI_allbutself)(int vector); |
27 | void (*send_IPI_all)(int vector); | 31 | void (*send_IPI_all)(int vector); |
28 | void (*send_IPI_self)(int vector); | 32 | void (*send_IPI_self)(int vector); |
29 | /* */ | 33 | /* */ |
30 | unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); | 34 | unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); |
35 | unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, | ||
36 | const struct cpumask *andmask); | ||
31 | unsigned int (*phys_pkg_id)(int index_msb); | 37 | unsigned int (*phys_pkg_id)(int index_msb); |
32 | unsigned int (*get_apic_id)(unsigned long x); | 38 | unsigned int (*get_apic_id)(unsigned long x); |
33 | unsigned long (*set_apic_id)(unsigned int id); | 39 | unsigned long (*set_apic_id)(unsigned int id); |
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index e475e009ae5d..7a1f44ac1f17 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h | |||
@@ -198,17 +198,14 @@ extern void restore_IO_APIC_setup(void); | |||
198 | extern void reinit_intr_remapped_IO_APIC(int); | 198 | extern void reinit_intr_remapped_IO_APIC(int); |
199 | #endif | 199 | #endif |
200 | 200 | ||
201 | extern int probe_nr_irqs(void); | 201 | extern void probe_nr_irqs_gsi(void); |
202 | 202 | ||
203 | #else /* !CONFIG_X86_IO_APIC */ | 203 | #else /* !CONFIG_X86_IO_APIC */ |
204 | #define io_apic_assign_pci_irqs 0 | 204 | #define io_apic_assign_pci_irqs 0 |
205 | static const int timer_through_8259 = 0; | 205 | static const int timer_through_8259 = 0; |
206 | static inline void ioapic_init_mappings(void) { } | 206 | static inline void ioapic_init_mappings(void) { } |
207 | 207 | ||
208 | static inline int probe_nr_irqs(void) | 208 | static inline void probe_nr_irqs_gsi(void) { } |
209 | { | ||
210 | return NR_IRQS; | ||
211 | } | ||
212 | #endif | 209 | #endif |
213 | 210 | ||
214 | #endif /* _ASM_X86_IO_APIC_H */ | 211 | #endif /* _ASM_X86_IO_APIC_H */ |
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 295b13193f4d..a6ee9e6f530f 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h | |||
@@ -7,8 +7,6 @@ extern struct dma_mapping_ops nommu_dma_ops; | |||
7 | extern int force_iommu, no_iommu; | 7 | extern int force_iommu, no_iommu; |
8 | extern int iommu_detected; | 8 | extern int iommu_detected; |
9 | 9 | ||
10 | extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len); | ||
11 | |||
12 | /* 10 seconds */ | 10 | /* 10 seconds */ |
13 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) | 11 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) |
14 | 12 | ||
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index f89dffb28aa9..c745a306f7d3 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h | |||
@@ -117,7 +117,8 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, | |||
117 | native_apic_mem_write(APIC_ICR, cfg); | 117 | native_apic_mem_write(APIC_ICR, cfg); |
118 | } | 118 | } |
119 | 119 | ||
120 | static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) | 120 | static inline void send_IPI_mask_sequence(const struct cpumask *mask, |
121 | int vector) | ||
121 | { | 122 | { |
122 | unsigned long flags; | 123 | unsigned long flags; |
123 | unsigned long query_cpu; | 124 | unsigned long query_cpu; |
@@ -128,11 +129,29 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) | |||
128 | * - mbligh | 129 | * - mbligh |
129 | */ | 130 | */ |
130 | local_irq_save(flags); | 131 | local_irq_save(flags); |
131 | for_each_cpu_mask_nr(query_cpu, mask) { | 132 | for_each_cpu(query_cpu, mask) { |
132 | __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), | 133 | __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), |
133 | vector, APIC_DEST_PHYSICAL); | 134 | vector, APIC_DEST_PHYSICAL); |
134 | } | 135 | } |
135 | local_irq_restore(flags); | 136 | local_irq_restore(flags); |
136 | } | 137 | } |
137 | 138 | ||
139 | static inline void send_IPI_mask_allbutself(const struct cpumask *mask, | ||
140 | int vector) | ||
141 | { | ||
142 | unsigned long flags; | ||
143 | unsigned int query_cpu; | ||
144 | unsigned int this_cpu = smp_processor_id(); | ||
145 | |||
146 | /* See Hack comment above */ | ||
147 | |||
148 | local_irq_save(flags); | ||
149 | for_each_cpu(query_cpu, mask) | ||
150 | if (query_cpu != this_cpu) | ||
151 | __send_IPI_dest_field( | ||
152 | per_cpu(x86_cpu_to_apicid, query_cpu), | ||
153 | vector, APIC_DEST_PHYSICAL); | ||
154 | local_irq_restore(flags); | ||
155 | } | ||
156 | |||
138 | #endif /* _ASM_X86_IPI_H */ | 157 | #endif /* _ASM_X86_IPI_H */ |
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 28e409fc73f3..592688ed04d3 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h | |||
@@ -33,7 +33,7 @@ static inline int irq_canonicalize(int irq) | |||
33 | 33 | ||
34 | #ifdef CONFIG_HOTPLUG_CPU | 34 | #ifdef CONFIG_HOTPLUG_CPU |
35 | #include <linux/cpumask.h> | 35 | #include <linux/cpumask.h> |
36 | extern void fixup_irqs(cpumask_t map); | 36 | extern void fixup_irqs(void); |
37 | #endif | 37 | #endif |
38 | 38 | ||
39 | extern unsigned int do_IRQ(struct pt_regs *regs); | 39 | extern unsigned int do_IRQ(struct pt_regs *regs); |
@@ -42,5 +42,6 @@ extern void native_init_IRQ(void); | |||
42 | 42 | ||
43 | /* Interrupt vector management */ | 43 | /* Interrupt vector management */ |
44 | extern DECLARE_BITMAP(used_vectors, NR_VECTORS); | 44 | extern DECLARE_BITMAP(used_vectors, NR_VECTORS); |
45 | extern int vector_used_by_percpu_irq(unsigned int vector); | ||
45 | 46 | ||
46 | #endif /* _ASM_X86_IRQ_H */ | 47 | #endif /* _ASM_X86_IRQ_H */ |
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 0005adb0f941..f7ff65032b9d 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -101,12 +101,23 @@ | |||
101 | #define LAST_VM86_IRQ 15 | 101 | #define LAST_VM86_IRQ 15 |
102 | #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) | 102 | #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) |
103 | 103 | ||
104 | #define NR_IRQS_LEGACY 16 | ||
105 | |||
104 | #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) | 106 | #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) |
107 | |||
108 | #ifndef CONFIG_SPARSE_IRQ | ||
105 | # if NR_CPUS < MAX_IO_APICS | 109 | # if NR_CPUS < MAX_IO_APICS |
106 | # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) | 110 | # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) |
107 | # else | 111 | # else |
108 | # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) | 112 | # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) |
109 | # endif | 113 | # endif |
114 | #else | ||
115 | # if (8 * NR_CPUS) > (32 * MAX_IO_APICS) | ||
116 | # define NR_IRQS (NR_VECTORS + (8 * NR_CPUS)) | ||
117 | # else | ||
118 | # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) | ||
119 | # endif | ||
120 | #endif | ||
110 | 121 | ||
111 | #elif defined(CONFIG_X86_VOYAGER) | 122 | #elif defined(CONFIG_X86_VOYAGER) |
112 | 123 | ||
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8346be87cfa1..97215a458e5f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -21,6 +21,7 @@ | |||
21 | 21 | ||
22 | #include <asm/pvclock-abi.h> | 22 | #include <asm/pvclock-abi.h> |
23 | #include <asm/desc.h> | 23 | #include <asm/desc.h> |
24 | #include <asm/mtrr.h> | ||
24 | 25 | ||
25 | #define KVM_MAX_VCPUS 16 | 26 | #define KVM_MAX_VCPUS 16 |
26 | #define KVM_MEMORY_SLOTS 32 | 27 | #define KVM_MEMORY_SLOTS 32 |
@@ -86,6 +87,7 @@ | |||
86 | #define KVM_MIN_FREE_MMU_PAGES 5 | 87 | #define KVM_MIN_FREE_MMU_PAGES 5 |
87 | #define KVM_REFILL_PAGES 25 | 88 | #define KVM_REFILL_PAGES 25 |
88 | #define KVM_MAX_CPUID_ENTRIES 40 | 89 | #define KVM_MAX_CPUID_ENTRIES 40 |
90 | #define KVM_NR_FIXED_MTRR_REGION 88 | ||
89 | #define KVM_NR_VAR_MTRR 8 | 91 | #define KVM_NR_VAR_MTRR 8 |
90 | 92 | ||
91 | extern spinlock_t kvm_lock; | 93 | extern spinlock_t kvm_lock; |
@@ -180,6 +182,8 @@ struct kvm_mmu_page { | |||
180 | struct list_head link; | 182 | struct list_head link; |
181 | struct hlist_node hash_link; | 183 | struct hlist_node hash_link; |
182 | 184 | ||
185 | struct list_head oos_link; | ||
186 | |||
183 | /* | 187 | /* |
184 | * The following two entries are used to key the shadow page in the | 188 | * The following two entries are used to key the shadow page in the |
185 | * hash table. | 189 | * hash table. |
@@ -190,13 +194,16 @@ struct kvm_mmu_page { | |||
190 | u64 *spt; | 194 | u64 *spt; |
191 | /* hold the gfn of each spte inside spt */ | 195 | /* hold the gfn of each spte inside spt */ |
192 | gfn_t *gfns; | 196 | gfn_t *gfns; |
193 | unsigned long slot_bitmap; /* One bit set per slot which has memory | 197 | /* |
194 | * in this shadow page. | 198 | * One bit set per slot which has memory |
195 | */ | 199 | * in this shadow page. |
200 | */ | ||
201 | DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | ||
196 | int multimapped; /* More than one parent_pte? */ | 202 | int multimapped; /* More than one parent_pte? */ |
197 | int root_count; /* Currently serving as active root */ | 203 | int root_count; /* Currently serving as active root */ |
198 | bool unsync; | 204 | bool unsync; |
199 | bool unsync_children; | 205 | bool global; |
206 | unsigned int unsync_children; | ||
200 | union { | 207 | union { |
201 | u64 *parent_pte; /* !multimapped */ | 208 | u64 *parent_pte; /* !multimapped */ |
202 | struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ | 209 | struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ |
@@ -327,8 +334,10 @@ struct kvm_vcpu_arch { | |||
327 | 334 | ||
328 | bool nmi_pending; | 335 | bool nmi_pending; |
329 | bool nmi_injected; | 336 | bool nmi_injected; |
337 | bool nmi_window_open; | ||
330 | 338 | ||
331 | u64 mtrr[0x100]; | 339 | struct mtrr_state_type mtrr_state; |
340 | u32 pat; | ||
332 | }; | 341 | }; |
333 | 342 | ||
334 | struct kvm_mem_alias { | 343 | struct kvm_mem_alias { |
@@ -350,11 +359,13 @@ struct kvm_arch{ | |||
350 | */ | 359 | */ |
351 | struct list_head active_mmu_pages; | 360 | struct list_head active_mmu_pages; |
352 | struct list_head assigned_dev_head; | 361 | struct list_head assigned_dev_head; |
362 | struct list_head oos_global_pages; | ||
353 | struct dmar_domain *intel_iommu_domain; | 363 | struct dmar_domain *intel_iommu_domain; |
354 | struct kvm_pic *vpic; | 364 | struct kvm_pic *vpic; |
355 | struct kvm_ioapic *vioapic; | 365 | struct kvm_ioapic *vioapic; |
356 | struct kvm_pit *vpit; | 366 | struct kvm_pit *vpit; |
357 | struct hlist_head irq_ack_notifier_list; | 367 | struct hlist_head irq_ack_notifier_list; |
368 | int vapics_in_nmi_mode; | ||
358 | 369 | ||
359 | int round_robin_prev_vcpu; | 370 | int round_robin_prev_vcpu; |
360 | unsigned int tss_addr; | 371 | unsigned int tss_addr; |
@@ -378,6 +389,7 @@ struct kvm_vm_stat { | |||
378 | u32 mmu_recycled; | 389 | u32 mmu_recycled; |
379 | u32 mmu_cache_miss; | 390 | u32 mmu_cache_miss; |
380 | u32 mmu_unsync; | 391 | u32 mmu_unsync; |
392 | u32 mmu_unsync_global; | ||
381 | u32 remote_tlb_flush; | 393 | u32 remote_tlb_flush; |
382 | u32 lpages; | 394 | u32 lpages; |
383 | }; | 395 | }; |
@@ -397,6 +409,7 @@ struct kvm_vcpu_stat { | |||
397 | u32 halt_exits; | 409 | u32 halt_exits; |
398 | u32 halt_wakeup; | 410 | u32 halt_wakeup; |
399 | u32 request_irq_exits; | 411 | u32 request_irq_exits; |
412 | u32 request_nmi_exits; | ||
400 | u32 irq_exits; | 413 | u32 irq_exits; |
401 | u32 host_state_reload; | 414 | u32 host_state_reload; |
402 | u32 efer_reload; | 415 | u32 efer_reload; |
@@ -405,6 +418,7 @@ struct kvm_vcpu_stat { | |||
405 | u32 insn_emulation_fail; | 418 | u32 insn_emulation_fail; |
406 | u32 hypercalls; | 419 | u32 hypercalls; |
407 | u32 irq_injections; | 420 | u32 irq_injections; |
421 | u32 nmi_injections; | ||
408 | }; | 422 | }; |
409 | 423 | ||
410 | struct descriptor_table { | 424 | struct descriptor_table { |
@@ -477,6 +491,7 @@ struct kvm_x86_ops { | |||
477 | 491 | ||
478 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); | 492 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
479 | int (*get_tdp_level)(void); | 493 | int (*get_tdp_level)(void); |
494 | int (*get_mt_mask_shift)(void); | ||
480 | }; | 495 | }; |
481 | 496 | ||
482 | extern struct kvm_x86_ops *kvm_x86_ops; | 497 | extern struct kvm_x86_ops *kvm_x86_ops; |
@@ -490,7 +505,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu); | |||
490 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); | 505 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); |
491 | void kvm_mmu_set_base_ptes(u64 base_pte); | 506 | void kvm_mmu_set_base_ptes(u64 base_pte); |
492 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 507 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
493 | u64 dirty_mask, u64 nx_mask, u64 x_mask); | 508 | u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask); |
494 | 509 | ||
495 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); | 510 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); |
496 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); | 511 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); |
@@ -587,12 +602,14 @@ unsigned long segment_base(u16 selector); | |||
587 | 602 | ||
588 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); | 603 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); |
589 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 604 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
590 | const u8 *new, int bytes); | 605 | const u8 *new, int bytes, |
606 | bool guest_initiated); | ||
591 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); | 607 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); |
592 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); | 608 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); |
593 | int kvm_mmu_load(struct kvm_vcpu *vcpu); | 609 | int kvm_mmu_load(struct kvm_vcpu *vcpu); |
594 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); | 610 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); |
595 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); | 611 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); |
612 | void kvm_mmu_sync_global(struct kvm_vcpu *vcpu); | ||
596 | 613 | ||
597 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); | 614 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); |
598 | 615 | ||
@@ -607,6 +624,8 @@ void kvm_disable_tdp(void); | |||
607 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); | 624 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); |
608 | int complete_pio(struct kvm_vcpu *vcpu); | 625 | int complete_pio(struct kvm_vcpu *vcpu); |
609 | 626 | ||
627 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); | ||
628 | |||
610 | static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) | 629 | static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) |
611 | { | 630 | { |
612 | struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); | 631 | struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); |
@@ -702,18 +721,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) | |||
702 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); | 721 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); |
703 | } | 722 | } |
704 | 723 | ||
705 | #define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" | ||
706 | #define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" | ||
707 | #define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" | ||
708 | #define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" | ||
709 | #define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" | ||
710 | #define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" | ||
711 | #define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" | ||
712 | #define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" | ||
713 | #define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" | ||
714 | #define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" | ||
715 | #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" | ||
716 | |||
717 | #define MSR_IA32_TIME_STAMP_COUNTER 0x010 | 724 | #define MSR_IA32_TIME_STAMP_COUNTER 0x010 |
718 | 725 | ||
719 | #define TSS_IOPB_BASE_OFFSET 0x66 | 726 | #define TSS_IOPB_BASE_OFFSET 0x66 |
diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h index 25179a29f208..6a159732881a 100644 --- a/arch/x86/include/asm/kvm_x86_emulate.h +++ b/arch/x86/include/asm/kvm_x86_emulate.h | |||
@@ -123,6 +123,7 @@ struct decode_cache { | |||
123 | u8 ad_bytes; | 123 | u8 ad_bytes; |
124 | u8 rex_prefix; | 124 | u8 rex_prefix; |
125 | struct operand src; | 125 | struct operand src; |
126 | struct operand src2; | ||
126 | struct operand dst; | 127 | struct operand dst; |
127 | bool has_seg_override; | 128 | bool has_seg_override; |
128 | u8 seg_override; | 129 | u8 seg_override; |
@@ -146,22 +147,18 @@ struct x86_emulate_ctxt { | |||
146 | /* Register state before/after emulation. */ | 147 | /* Register state before/after emulation. */ |
147 | struct kvm_vcpu *vcpu; | 148 | struct kvm_vcpu *vcpu; |
148 | 149 | ||
149 | /* Linear faulting address (if emulating a page-faulting instruction) */ | ||
150 | unsigned long eflags; | 150 | unsigned long eflags; |
151 | |||
152 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ | 151 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ |
153 | int mode; | 152 | int mode; |
154 | |||
155 | u32 cs_base; | 153 | u32 cs_base; |
156 | 154 | ||
157 | /* decode cache */ | 155 | /* decode cache */ |
158 | |||
159 | struct decode_cache decode; | 156 | struct decode_cache decode; |
160 | }; | 157 | }; |
161 | 158 | ||
162 | /* Repeat String Operation Prefix */ | 159 | /* Repeat String Operation Prefix */ |
163 | #define REPE_PREFIX 1 | 160 | #define REPE_PREFIX 1 |
164 | #define REPNE_PREFIX 2 | 161 | #define REPNE_PREFIX 2 |
165 | 162 | ||
166 | /* Execution mode, passed to the emulator. */ | 163 | /* Execution mode, passed to the emulator. */ |
167 | #define X86EMUL_MODE_REAL 0 /* Real mode. */ | 164 | #define X86EMUL_MODE_REAL 0 /* Real mode. */ |
@@ -170,7 +167,7 @@ struct x86_emulate_ctxt { | |||
170 | #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ | 167 | #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ |
171 | 168 | ||
172 | /* Host execution mode. */ | 169 | /* Host execution mode. */ |
173 | #if defined(__i386__) | 170 | #if defined(CONFIG_X86_32) |
174 | #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 | 171 | #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 |
175 | #elif defined(CONFIG_X86_64) | 172 | #elif defined(CONFIG_X86_64) |
176 | #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 | 173 | #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 |
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 6cb3a467e067..cc09cbbee27e 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h | |||
@@ -8,12 +8,12 @@ | |||
8 | 8 | ||
9 | #define APIC_DFR_VALUE (APIC_DFR_FLAT) | 9 | #define APIC_DFR_VALUE (APIC_DFR_FLAT) |
10 | 10 | ||
11 | static inline cpumask_t target_cpus(void) | 11 | static inline const struct cpumask *target_cpus(void) |
12 | { | 12 | { |
13 | #ifdef CONFIG_SMP | 13 | #ifdef CONFIG_SMP |
14 | return cpu_online_map; | 14 | return cpu_online_mask; |
15 | #else | 15 | #else |
16 | return cpumask_of_cpu(0); | 16 | return cpumask_of(0); |
17 | #endif | 17 | #endif |
18 | } | 18 | } |
19 | 19 | ||
@@ -28,6 +28,7 @@ static inline cpumask_t target_cpus(void) | |||
28 | #define apic_id_registered (genapic->apic_id_registered) | 28 | #define apic_id_registered (genapic->apic_id_registered) |
29 | #define init_apic_ldr (genapic->init_apic_ldr) | 29 | #define init_apic_ldr (genapic->init_apic_ldr) |
30 | #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) | 30 | #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) |
31 | #define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) | ||
31 | #define phys_pkg_id (genapic->phys_pkg_id) | 32 | #define phys_pkg_id (genapic->phys_pkg_id) |
32 | #define vector_allocation_domain (genapic->vector_allocation_domain) | 33 | #define vector_allocation_domain (genapic->vector_allocation_domain) |
33 | #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) | 34 | #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) |
@@ -61,9 +62,19 @@ static inline int apic_id_registered(void) | |||
61 | return physid_isset(read_apic_id(), phys_cpu_present_map); | 62 | return physid_isset(read_apic_id(), phys_cpu_present_map); |
62 | } | 63 | } |
63 | 64 | ||
64 | static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | 65 | static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask) |
65 | { | 66 | { |
66 | return cpus_addr(cpumask)[0]; | 67 | return cpumask_bits(cpumask)[0]; |
68 | } | ||
69 | |||
70 | static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
71 | const struct cpumask *andmask) | ||
72 | { | ||
73 | unsigned long mask1 = cpumask_bits(cpumask)[0]; | ||
74 | unsigned long mask2 = cpumask_bits(andmask)[0]; | ||
75 | unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; | ||
76 | |||
77 | return (unsigned int)(mask1 & mask2 & mask3); | ||
67 | } | 78 | } |
68 | 79 | ||
69 | static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) | 80 | static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) |
@@ -88,7 +99,7 @@ static inline int apicid_to_node(int logical_apicid) | |||
88 | #endif | 99 | #endif |
89 | } | 100 | } |
90 | 101 | ||
91 | static inline cpumask_t vector_allocation_domain(int cpu) | 102 | static inline void vector_allocation_domain(int cpu, struct cpumask *retmask) |
92 | { | 103 | { |
93 | /* Careful. Some cpus do not strictly honor the set of cpus | 104 | /* Careful. Some cpus do not strictly honor the set of cpus |
94 | * specified in the interrupt destination when using lowest | 105 | * specified in the interrupt destination when using lowest |
@@ -98,8 +109,7 @@ static inline cpumask_t vector_allocation_domain(int cpu) | |||
98 | * deliver interrupts to the wrong hyperthread when only one | 109 | * deliver interrupts to the wrong hyperthread when only one |
99 | * hyperthread was specified in the interrupt desitination. | 110 | * hyperthread was specified in the interrupt desitination. |
100 | */ | 111 | */ |
101 | cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; | 112 | *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; |
102 | return domain; | ||
103 | } | 113 | } |
104 | #endif | 114 | #endif |
105 | 115 | ||
@@ -131,7 +141,7 @@ static inline int cpu_to_logical_apicid(int cpu) | |||
131 | 141 | ||
132 | static inline int cpu_present_to_apicid(int mps_cpu) | 142 | static inline int cpu_present_to_apicid(int mps_cpu) |
133 | { | 143 | { |
134 | if (mps_cpu < NR_CPUS && cpu_present(mps_cpu)) | 144 | if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) |
135 | return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); | 145 | return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); |
136 | else | 146 | else |
137 | return BAD_APICID; | 147 | return BAD_APICID; |
diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h index fabca01ebacf..191312d155da 100644 --- a/arch/x86/include/asm/mach-default/mach_ipi.h +++ b/arch/x86/include/asm/mach-default/mach_ipi.h | |||
@@ -4,7 +4,8 @@ | |||
4 | /* Avoid include hell */ | 4 | /* Avoid include hell */ |
5 | #define NMI_VECTOR 0x02 | 5 | #define NMI_VECTOR 0x02 |
6 | 6 | ||
7 | void send_IPI_mask_bitmask(cpumask_t mask, int vector); | 7 | void send_IPI_mask_bitmask(const struct cpumask *mask, int vector); |
8 | void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); | ||
8 | void __send_IPI_shortcut(unsigned int shortcut, int vector); | 9 | void __send_IPI_shortcut(unsigned int shortcut, int vector); |
9 | 10 | ||
10 | extern int no_broadcast; | 11 | extern int no_broadcast; |
@@ -12,28 +13,27 @@ extern int no_broadcast; | |||
12 | #ifdef CONFIG_X86_64 | 13 | #ifdef CONFIG_X86_64 |
13 | #include <asm/genapic.h> | 14 | #include <asm/genapic.h> |
14 | #define send_IPI_mask (genapic->send_IPI_mask) | 15 | #define send_IPI_mask (genapic->send_IPI_mask) |
16 | #define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself) | ||
15 | #else | 17 | #else |
16 | static inline void send_IPI_mask(cpumask_t mask, int vector) | 18 | static inline void send_IPI_mask(const struct cpumask *mask, int vector) |
17 | { | 19 | { |
18 | send_IPI_mask_bitmask(mask, vector); | 20 | send_IPI_mask_bitmask(mask, vector); |
19 | } | 21 | } |
22 | void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); | ||
20 | #endif | 23 | #endif |
21 | 24 | ||
22 | static inline void __local_send_IPI_allbutself(int vector) | 25 | static inline void __local_send_IPI_allbutself(int vector) |
23 | { | 26 | { |
24 | if (no_broadcast || vector == NMI_VECTOR) { | 27 | if (no_broadcast || vector == NMI_VECTOR) |
25 | cpumask_t mask = cpu_online_map; | 28 | send_IPI_mask_allbutself(cpu_online_mask, vector); |
26 | 29 | else | |
27 | cpu_clear(smp_processor_id(), mask); | ||
28 | send_IPI_mask(mask, vector); | ||
29 | } else | ||
30 | __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); | 30 | __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); |
31 | } | 31 | } |
32 | 32 | ||
33 | static inline void __local_send_IPI_all(int vector) | 33 | static inline void __local_send_IPI_all(int vector) |
34 | { | 34 | { |
35 | if (no_broadcast || vector == NMI_VECTOR) | 35 | if (no_broadcast || vector == NMI_VECTOR) |
36 | send_IPI_mask(cpu_online_map, vector); | 36 | send_IPI_mask(cpu_online_mask, vector); |
37 | else | 37 | else |
38 | __send_IPI_shortcut(APIC_DEST_ALLINC, vector); | 38 | __send_IPI_shortcut(APIC_DEST_ALLINC, vector); |
39 | } | 39 | } |
diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index e430f47df667..48553e958ad5 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h | |||
@@ -24,6 +24,7 @@ | |||
24 | #define check_phys_apicid_present (genapic->check_phys_apicid_present) | 24 | #define check_phys_apicid_present (genapic->check_phys_apicid_present) |
25 | #define check_apicid_used (genapic->check_apicid_used) | 25 | #define check_apicid_used (genapic->check_apicid_used) |
26 | #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) | 26 | #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) |
27 | #define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) | ||
27 | #define vector_allocation_domain (genapic->vector_allocation_domain) | 28 | #define vector_allocation_domain (genapic->vector_allocation_domain) |
28 | #define enable_apic_mode (genapic->enable_apic_mode) | 29 | #define enable_apic_mode (genapic->enable_apic_mode) |
29 | #define phys_pkg_id (genapic->phys_pkg_id) | 30 | #define phys_pkg_id (genapic->phys_pkg_id) |
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index 7c1e4258b31e..cb988aab716d 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h | |||
@@ -57,6 +57,31 @@ struct mtrr_gentry { | |||
57 | }; | 57 | }; |
58 | #endif /* !__i386__ */ | 58 | #endif /* !__i386__ */ |
59 | 59 | ||
60 | struct mtrr_var_range { | ||
61 | u32 base_lo; | ||
62 | u32 base_hi; | ||
63 | u32 mask_lo; | ||
64 | u32 mask_hi; | ||
65 | }; | ||
66 | |||
67 | /* In the Intel processor's MTRR interface, the MTRR type is always held in | ||
68 | an 8 bit field: */ | ||
69 | typedef u8 mtrr_type; | ||
70 | |||
71 | #define MTRR_NUM_FIXED_RANGES 88 | ||
72 | #define MTRR_MAX_VAR_RANGES 256 | ||
73 | |||
74 | struct mtrr_state_type { | ||
75 | struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; | ||
76 | mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; | ||
77 | unsigned char enabled; | ||
78 | unsigned char have_fixed; | ||
79 | mtrr_type def_type; | ||
80 | }; | ||
81 | |||
82 | #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) | ||
83 | #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) | ||
84 | |||
60 | /* These are the various ioctls */ | 85 | /* These are the various ioctls */ |
61 | #define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry) | 86 | #define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry) |
62 | #define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry) | 87 | #define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry) |
diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 0bf2a06b7a4e..c80f00d29965 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h | |||
@@ -7,9 +7,9 @@ | |||
7 | 7 | ||
8 | #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) | 8 | #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) |
9 | 9 | ||
10 | static inline cpumask_t target_cpus(void) | 10 | static inline const cpumask_t *target_cpus(void) |
11 | { | 11 | { |
12 | return CPU_MASK_ALL; | 12 | return &CPU_MASK_ALL; |
13 | } | 13 | } |
14 | 14 | ||
15 | #define NO_BALANCE_IRQ (1) | 15 | #define NO_BALANCE_IRQ (1) |
@@ -122,7 +122,13 @@ static inline void enable_apic_mode(void) | |||
122 | * We use physical apicids here, not logical, so just return the default | 122 | * We use physical apicids here, not logical, so just return the default |
123 | * physical broadcast to stop people from breaking us | 123 | * physical broadcast to stop people from breaking us |
124 | */ | 124 | */ |
125 | static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | 125 | static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) |
126 | { | ||
127 | return (int) 0xF; | ||
128 | } | ||
129 | |||
130 | static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
131 | const struct cpumask *andmask) | ||
126 | { | 132 | { |
127 | return (int) 0xF; | 133 | return (int) 0xF; |
128 | } | 134 | } |
diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h index 935588d286cf..a8374c652778 100644 --- a/arch/x86/include/asm/numaq/ipi.h +++ b/arch/x86/include/asm/numaq/ipi.h | |||
@@ -1,25 +1,22 @@ | |||
1 | #ifndef __ASM_NUMAQ_IPI_H | 1 | #ifndef __ASM_NUMAQ_IPI_H |
2 | #define __ASM_NUMAQ_IPI_H | 2 | #define __ASM_NUMAQ_IPI_H |
3 | 3 | ||
4 | void send_IPI_mask_sequence(cpumask_t, int vector); | 4 | void send_IPI_mask_sequence(const struct cpumask *mask, int vector); |
5 | void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); | ||
5 | 6 | ||
6 | static inline void send_IPI_mask(cpumask_t mask, int vector) | 7 | static inline void send_IPI_mask(const struct cpumask *mask, int vector) |
7 | { | 8 | { |
8 | send_IPI_mask_sequence(mask, vector); | 9 | send_IPI_mask_sequence(mask, vector); |
9 | } | 10 | } |
10 | 11 | ||
11 | static inline void send_IPI_allbutself(int vector) | 12 | static inline void send_IPI_allbutself(int vector) |
12 | { | 13 | { |
13 | cpumask_t mask = cpu_online_map; | 14 | send_IPI_mask_allbutself(cpu_online_mask, vector); |
14 | cpu_clear(smp_processor_id(), mask); | ||
15 | |||
16 | if (!cpus_empty(mask)) | ||
17 | send_IPI_mask(mask, vector); | ||
18 | } | 15 | } |
19 | 16 | ||
20 | static inline void send_IPI_all(int vector) | 17 | static inline void send_IPI_all(int vector) |
21 | { | 18 | { |
22 | send_IPI_mask(cpu_online_map, vector); | 19 | send_IPI_mask(cpu_online_mask, vector); |
23 | } | 20 | } |
24 | 21 | ||
25 | #endif /* __ASM_NUMAQ_IPI_H */ | 22 | #endif /* __ASM_NUMAQ_IPI_H */ |
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 647781298e7e..66834c41c049 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -84,6 +84,8 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev, | |||
84 | static inline void early_quirks(void) { } | 84 | static inline void early_quirks(void) { } |
85 | #endif | 85 | #endif |
86 | 86 | ||
87 | extern void pci_iommu_alloc(void); | ||
88 | |||
87 | #endif /* __KERNEL__ */ | 89 | #endif /* __KERNEL__ */ |
88 | 90 | ||
89 | #ifdef CONFIG_X86_32 | 91 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h index d02d936840a3..4da207982777 100644 --- a/arch/x86/include/asm/pci_64.h +++ b/arch/x86/include/asm/pci_64.h | |||
@@ -23,7 +23,6 @@ extern int (*pci_config_write)(int seg, int bus, int dev, int fn, | |||
23 | int reg, int len, u32 value); | 23 | int reg, int len, u32 value); |
24 | 24 | ||
25 | extern void dma32_reserve_bootmem(void); | 25 | extern void dma32_reserve_bootmem(void); |
26 | extern void pci_iommu_alloc(void); | ||
27 | 26 | ||
28 | /* The PCI address space does equal the physical memory | 27 | /* The PCI address space does equal the physical memory |
29 | * address space. The networking and block device layers use | 28 | * address space. The networking and block device layers use |
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index d12811ce51d9..830b9fcb6427 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -60,7 +60,7 @@ struct smp_ops { | |||
60 | void (*cpu_die)(unsigned int cpu); | 60 | void (*cpu_die)(unsigned int cpu); |
61 | void (*play_dead)(void); | 61 | void (*play_dead)(void); |
62 | 62 | ||
63 | void (*send_call_func_ipi)(cpumask_t mask); | 63 | void (*send_call_func_ipi)(const struct cpumask *mask); |
64 | void (*send_call_func_single_ipi)(int cpu); | 64 | void (*send_call_func_single_ipi)(int cpu); |
65 | }; | 65 | }; |
66 | 66 | ||
@@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu) | |||
125 | 125 | ||
126 | static inline void arch_send_call_function_ipi(cpumask_t mask) | 126 | static inline void arch_send_call_function_ipi(cpumask_t mask) |
127 | { | 127 | { |
128 | smp_ops.send_call_func_ipi(mask); | 128 | smp_ops.send_call_func_ipi(&mask); |
129 | } | 129 | } |
130 | 130 | ||
131 | void cpu_disable_common(void); | 131 | void cpu_disable_common(void); |
@@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu); | |||
138 | void native_play_dead(void); | 138 | void native_play_dead(void); |
139 | void play_dead_common(void); | 139 | void play_dead_common(void); |
140 | 140 | ||
141 | void native_send_call_func_ipi(cpumask_t mask); | 141 | void native_send_call_func_ipi(const struct cpumask *mask); |
142 | void native_send_call_func_single_ipi(int cpu); | 142 | void native_send_call_func_single_ipi(int cpu); |
143 | 143 | ||
144 | extern void prefill_possible_map(void); | 144 | extern void prefill_possible_map(void); |
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 9b3070f1c2ac..99327d1be49f 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h | |||
@@ -14,13 +14,13 @@ | |||
14 | 14 | ||
15 | #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) | 15 | #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) |
16 | 16 | ||
17 | static inline cpumask_t target_cpus(void) | 17 | static inline const cpumask_t *target_cpus(void) |
18 | { | 18 | { |
19 | /* CPU_MASK_ALL (0xff) has undefined behaviour with | 19 | /* CPU_MASK_ALL (0xff) has undefined behaviour with |
20 | * dest_LowestPrio mode logical clustered apic interrupt routing | 20 | * dest_LowestPrio mode logical clustered apic interrupt routing |
21 | * Just start on cpu 0. IRQ balancing will spread load | 21 | * Just start on cpu 0. IRQ balancing will spread load |
22 | */ | 22 | */ |
23 | return cpumask_of_cpu(0); | 23 | return &cpumask_of_cpu(0); |
24 | } | 24 | } |
25 | 25 | ||
26 | #define INT_DELIVERY_MODE (dest_LowestPrio) | 26 | #define INT_DELIVERY_MODE (dest_LowestPrio) |
@@ -137,14 +137,14 @@ static inline void enable_apic_mode(void) | |||
137 | { | 137 | { |
138 | } | 138 | } |
139 | 139 | ||
140 | static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | 140 | static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) |
141 | { | 141 | { |
142 | int num_bits_set; | 142 | int num_bits_set; |
143 | int cpus_found = 0; | 143 | int cpus_found = 0; |
144 | int cpu; | 144 | int cpu; |
145 | int apicid; | 145 | int apicid; |
146 | 146 | ||
147 | num_bits_set = cpus_weight(cpumask); | 147 | num_bits_set = cpus_weight(*cpumask); |
148 | /* Return id to all */ | 148 | /* Return id to all */ |
149 | if (num_bits_set == NR_CPUS) | 149 | if (num_bits_set == NR_CPUS) |
150 | return (int) 0xFF; | 150 | return (int) 0xFF; |
@@ -152,10 +152,10 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | |||
152 | * The cpus in the mask must all be on the apic cluster. If are not | 152 | * The cpus in the mask must all be on the apic cluster. If are not |
153 | * on the same apicid cluster return default value of TARGET_CPUS. | 153 | * on the same apicid cluster return default value of TARGET_CPUS. |
154 | */ | 154 | */ |
155 | cpu = first_cpu(cpumask); | 155 | cpu = first_cpu(*cpumask); |
156 | apicid = cpu_to_logical_apicid(cpu); | 156 | apicid = cpu_to_logical_apicid(cpu); |
157 | while (cpus_found < num_bits_set) { | 157 | while (cpus_found < num_bits_set) { |
158 | if (cpu_isset(cpu, cpumask)) { | 158 | if (cpu_isset(cpu, *cpumask)) { |
159 | int new_apicid = cpu_to_logical_apicid(cpu); | 159 | int new_apicid = cpu_to_logical_apicid(cpu); |
160 | if (apicid_cluster(apicid) != | 160 | if (apicid_cluster(apicid) != |
161 | apicid_cluster(new_apicid)){ | 161 | apicid_cluster(new_apicid)){ |
@@ -170,6 +170,49 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | |||
170 | return apicid; | 170 | return apicid; |
171 | } | 171 | } |
172 | 172 | ||
173 | static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, | ||
174 | const struct cpumask *andmask) | ||
175 | { | ||
176 | int num_bits_set; | ||
177 | int cpus_found = 0; | ||
178 | int cpu; | ||
179 | int apicid = 0xFF; | ||
180 | cpumask_var_t cpumask; | ||
181 | |||
182 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) | ||
183 | return (int) 0xFF; | ||
184 | |||
185 | cpumask_and(cpumask, inmask, andmask); | ||
186 | cpumask_and(cpumask, cpumask, cpu_online_mask); | ||
187 | |||
188 | num_bits_set = cpumask_weight(cpumask); | ||
189 | /* Return id to all */ | ||
190 | if (num_bits_set == nr_cpu_ids) | ||
191 | goto exit; | ||
192 | /* | ||
193 | * The cpus in the mask must all be on the apic cluster. If are not | ||
194 | * on the same apicid cluster return default value of TARGET_CPUS. | ||
195 | */ | ||
196 | cpu = cpumask_first(cpumask); | ||
197 | apicid = cpu_to_logical_apicid(cpu); | ||
198 | while (cpus_found < num_bits_set) { | ||
199 | if (cpumask_test_cpu(cpu, cpumask)) { | ||
200 | int new_apicid = cpu_to_logical_apicid(cpu); | ||
201 | if (apicid_cluster(apicid) != | ||
202 | apicid_cluster(new_apicid)){ | ||
203 | printk ("%s: Not a valid mask!\n", __func__); | ||
204 | return 0xFF; | ||
205 | } | ||
206 | apicid = apicid | new_apicid; | ||
207 | cpus_found++; | ||
208 | } | ||
209 | cpu++; | ||
210 | } | ||
211 | exit: | ||
212 | free_cpumask_var(cpumask); | ||
213 | return apicid; | ||
214 | } | ||
215 | |||
173 | /* cpuid returns the value latched in the HW at reset, not the APIC ID | 216 | /* cpuid returns the value latched in the HW at reset, not the APIC ID |
174 | * register's value. For any box whose BIOS changes APIC IDs, like | 217 | * register's value. For any box whose BIOS changes APIC IDs, like |
175 | * clustered APIC systems, we must use hard_smp_processor_id. | 218 | * clustered APIC systems, we must use hard_smp_processor_id. |
diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h index 53bd1e7bd7b4..a8a2c24f50cc 100644 --- a/arch/x86/include/asm/summit/ipi.h +++ b/arch/x86/include/asm/summit/ipi.h | |||
@@ -1,9 +1,10 @@ | |||
1 | #ifndef __ASM_SUMMIT_IPI_H | 1 | #ifndef __ASM_SUMMIT_IPI_H |
2 | #define __ASM_SUMMIT_IPI_H | 2 | #define __ASM_SUMMIT_IPI_H |
3 | 3 | ||
4 | void send_IPI_mask_sequence(cpumask_t mask, int vector); | 4 | void send_IPI_mask_sequence(const cpumask_t *mask, int vector); |
5 | void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); | ||
5 | 6 | ||
6 | static inline void send_IPI_mask(cpumask_t mask, int vector) | 7 | static inline void send_IPI_mask(const cpumask_t *mask, int vector) |
7 | { | 8 | { |
8 | send_IPI_mask_sequence(mask, vector); | 9 | send_IPI_mask_sequence(mask, vector); |
9 | } | 10 | } |
@@ -14,12 +15,12 @@ static inline void send_IPI_allbutself(int vector) | |||
14 | cpu_clear(smp_processor_id(), mask); | 15 | cpu_clear(smp_processor_id(), mask); |
15 | 16 | ||
16 | if (!cpus_empty(mask)) | 17 | if (!cpus_empty(mask)) |
17 | send_IPI_mask(mask, vector); | 18 | send_IPI_mask(&mask, vector); |
18 | } | 19 | } |
19 | 20 | ||
20 | static inline void send_IPI_all(int vector) | 21 | static inline void send_IPI_all(int vector) |
21 | { | 22 | { |
22 | send_IPI_mask(cpu_online_map, vector); | 23 | send_IPI_mask(&cpu_online_map, vector); |
23 | } | 24 | } |
24 | 25 | ||
25 | #endif /* __ASM_SUMMIT_IPI_H */ | 26 | #endif /* __ASM_SUMMIT_IPI_H */ |
diff --git a/arch/x86/kvm/svm.h b/arch/x86/include/asm/svm.h index 1b8afa78e869..1b8afa78e869 100644 --- a/arch/x86/kvm/svm.h +++ b/arch/x86/include/asm/svm.h | |||
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index ff386ff50ed7..79e31e9dcdda 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -226,6 +226,8 @@ extern cpumask_t cpu_coregroup_map(int cpu); | |||
226 | #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) | 226 | #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) |
227 | #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) | 227 | #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) |
228 | #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) | 228 | #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) |
229 | #define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu)) | ||
230 | #define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) | ||
229 | 231 | ||
230 | /* indicates that pointers to the topology cpumask_t maps are valid */ | 232 | /* indicates that pointers to the topology cpumask_t maps are valid */ |
231 | #define arch_provides_topology_pointers yes | 233 | #define arch_provides_topology_pointers yes |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 580c3ee6c58c..4340055b7559 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -157,6 +157,7 @@ extern int __get_user_bad(void); | |||
157 | int __ret_gu; \ | 157 | int __ret_gu; \ |
158 | unsigned long __val_gu; \ | 158 | unsigned long __val_gu; \ |
159 | __chk_user_ptr(ptr); \ | 159 | __chk_user_ptr(ptr); \ |
160 | might_fault(); \ | ||
160 | switch (sizeof(*(ptr))) { \ | 161 | switch (sizeof(*(ptr))) { \ |
161 | case 1: \ | 162 | case 1: \ |
162 | __get_user_x(1, __ret_gu, __val_gu, ptr); \ | 163 | __get_user_x(1, __ret_gu, __val_gu, ptr); \ |
@@ -241,6 +242,7 @@ extern void __put_user_8(void); | |||
241 | int __ret_pu; \ | 242 | int __ret_pu; \ |
242 | __typeof__(*(ptr)) __pu_val; \ | 243 | __typeof__(*(ptr)) __pu_val; \ |
243 | __chk_user_ptr(ptr); \ | 244 | __chk_user_ptr(ptr); \ |
245 | might_fault(); \ | ||
244 | __pu_val = x; \ | 246 | __pu_val = x; \ |
245 | switch (sizeof(*(ptr))) { \ | 247 | switch (sizeof(*(ptr))) { \ |
246 | case 1: \ | 248 | case 1: \ |
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index d095a3aeea1b..5e06259e90e5 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h | |||
@@ -82,8 +82,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) | |||
82 | static __always_inline unsigned long __must_check | 82 | static __always_inline unsigned long __must_check |
83 | __copy_to_user(void __user *to, const void *from, unsigned long n) | 83 | __copy_to_user(void __user *to, const void *from, unsigned long n) |
84 | { | 84 | { |
85 | might_sleep(); | 85 | might_fault(); |
86 | return __copy_to_user_inatomic(to, from, n); | 86 | return __copy_to_user_inatomic(to, from, n); |
87 | } | 87 | } |
88 | 88 | ||
89 | static __always_inline unsigned long | 89 | static __always_inline unsigned long |
@@ -137,7 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) | |||
137 | static __always_inline unsigned long | 137 | static __always_inline unsigned long |
138 | __copy_from_user(void *to, const void __user *from, unsigned long n) | 138 | __copy_from_user(void *to, const void __user *from, unsigned long n) |
139 | { | 139 | { |
140 | might_sleep(); | 140 | might_fault(); |
141 | if (__builtin_constant_p(n)) { | 141 | if (__builtin_constant_p(n)) { |
142 | unsigned long ret; | 142 | unsigned long ret; |
143 | 143 | ||
@@ -159,7 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) | |||
159 | static __always_inline unsigned long __copy_from_user_nocache(void *to, | 159 | static __always_inline unsigned long __copy_from_user_nocache(void *to, |
160 | const void __user *from, unsigned long n) | 160 | const void __user *from, unsigned long n) |
161 | { | 161 | { |
162 | might_sleep(); | 162 | might_fault(); |
163 | if (__builtin_constant_p(n)) { | 163 | if (__builtin_constant_p(n)) { |
164 | unsigned long ret; | 164 | unsigned long ret; |
165 | 165 | ||
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index f8cfd00db450..84210c479fca 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h | |||
@@ -29,6 +29,8 @@ static __always_inline __must_check | |||
29 | int __copy_from_user(void *dst, const void __user *src, unsigned size) | 29 | int __copy_from_user(void *dst, const void __user *src, unsigned size) |
30 | { | 30 | { |
31 | int ret = 0; | 31 | int ret = 0; |
32 | |||
33 | might_fault(); | ||
32 | if (!__builtin_constant_p(size)) | 34 | if (!__builtin_constant_p(size)) |
33 | return copy_user_generic(dst, (__force void *)src, size); | 35 | return copy_user_generic(dst, (__force void *)src, size); |
34 | switch (size) { | 36 | switch (size) { |
@@ -71,6 +73,8 @@ static __always_inline __must_check | |||
71 | int __copy_to_user(void __user *dst, const void *src, unsigned size) | 73 | int __copy_to_user(void __user *dst, const void *src, unsigned size) |
72 | { | 74 | { |
73 | int ret = 0; | 75 | int ret = 0; |
76 | |||
77 | might_fault(); | ||
74 | if (!__builtin_constant_p(size)) | 78 | if (!__builtin_constant_p(size)) |
75 | return copy_user_generic((__force void *)dst, src, size); | 79 | return copy_user_generic((__force void *)dst, src, size); |
76 | switch (size) { | 80 | switch (size) { |
@@ -113,6 +117,8 @@ static __always_inline __must_check | |||
113 | int __copy_in_user(void __user *dst, const void __user *src, unsigned size) | 117 | int __copy_in_user(void __user *dst, const void __user *src, unsigned size) |
114 | { | 118 | { |
115 | int ret = 0; | 119 | int ret = 0; |
120 | |||
121 | might_fault(); | ||
116 | if (!__builtin_constant_p(size)) | 122 | if (!__builtin_constant_p(size)) |
117 | return copy_user_generic((__force void *)dst, | 123 | return copy_user_generic((__force void *)dst, |
118 | (__force void *)src, size); | 124 | (__force void *)src, size); |
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h new file mode 100644 index 000000000000..593636275238 --- /dev/null +++ b/arch/x86/include/asm/virtext.h | |||
@@ -0,0 +1,132 @@ | |||
1 | /* CPU virtualization extensions handling | ||
2 | * | ||
3 | * This should carry the code for handling CPU virtualization extensions | ||
4 | * that needs to live in the kernel core. | ||
5 | * | ||
6 | * Author: Eduardo Habkost <ehabkost@redhat.com> | ||
7 | * | ||
8 | * Copyright (C) 2008, Red Hat Inc. | ||
9 | * | ||
10 | * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc. | ||
11 | * | ||
12 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
13 | * the COPYING file in the top-level directory. | ||
14 | */ | ||
15 | #ifndef _ASM_X86_VIRTEX_H | ||
16 | #define _ASM_X86_VIRTEX_H | ||
17 | |||
18 | #include <asm/processor.h> | ||
19 | #include <asm/system.h> | ||
20 | |||
21 | #include <asm/vmx.h> | ||
22 | #include <asm/svm.h> | ||
23 | |||
24 | /* | ||
25 | * VMX functions: | ||
26 | */ | ||
27 | |||
28 | static inline int cpu_has_vmx(void) | ||
29 | { | ||
30 | unsigned long ecx = cpuid_ecx(1); | ||
31 | return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ | ||
32 | } | ||
33 | |||
34 | |||
35 | /** Disable VMX on the current CPU | ||
36 | * | ||
37 | * vmxoff causes a undefined-opcode exception if vmxon was not run | ||
38 | * on the CPU previously. Only call this function if you know VMX | ||
39 | * is enabled. | ||
40 | */ | ||
41 | static inline void cpu_vmxoff(void) | ||
42 | { | ||
43 | asm volatile (ASM_VMX_VMXOFF : : : "cc"); | ||
44 | write_cr4(read_cr4() & ~X86_CR4_VMXE); | ||
45 | } | ||
46 | |||
47 | static inline int cpu_vmx_enabled(void) | ||
48 | { | ||
49 | return read_cr4() & X86_CR4_VMXE; | ||
50 | } | ||
51 | |||
52 | /** Disable VMX if it is enabled on the current CPU | ||
53 | * | ||
54 | * You shouldn't call this if cpu_has_vmx() returns 0. | ||
55 | */ | ||
56 | static inline void __cpu_emergency_vmxoff(void) | ||
57 | { | ||
58 | if (cpu_vmx_enabled()) | ||
59 | cpu_vmxoff(); | ||
60 | } | ||
61 | |||
62 | /** Disable VMX if it is supported and enabled on the current CPU | ||
63 | */ | ||
64 | static inline void cpu_emergency_vmxoff(void) | ||
65 | { | ||
66 | if (cpu_has_vmx()) | ||
67 | __cpu_emergency_vmxoff(); | ||
68 | } | ||
69 | |||
70 | |||
71 | |||
72 | |||
73 | /* | ||
74 | * SVM functions: | ||
75 | */ | ||
76 | |||
77 | /** Check if the CPU has SVM support | ||
78 | * | ||
79 | * You can use the 'msg' arg to get a message describing the problem, | ||
80 | * if the function returns zero. Simply pass NULL if you are not interested | ||
81 | * on the messages; gcc should take care of not generating code for | ||
82 | * the messages on this case. | ||
83 | */ | ||
84 | static inline int cpu_has_svm(const char **msg) | ||
85 | { | ||
86 | uint32_t eax, ebx, ecx, edx; | ||
87 | |||
88 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { | ||
89 | if (msg) | ||
90 | *msg = "not amd"; | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | cpuid(0x80000000, &eax, &ebx, &ecx, &edx); | ||
95 | if (eax < SVM_CPUID_FUNC) { | ||
96 | if (msg) | ||
97 | *msg = "can't execute cpuid_8000000a"; | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | cpuid(0x80000001, &eax, &ebx, &ecx, &edx); | ||
102 | if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) { | ||
103 | if (msg) | ||
104 | *msg = "svm not available"; | ||
105 | return 0; | ||
106 | } | ||
107 | return 1; | ||
108 | } | ||
109 | |||
110 | |||
111 | /** Disable SVM on the current CPU | ||
112 | * | ||
113 | * You should call this only if cpu_has_svm() returned true. | ||
114 | */ | ||
115 | static inline void cpu_svm_disable(void) | ||
116 | { | ||
117 | uint64_t efer; | ||
118 | |||
119 | wrmsrl(MSR_VM_HSAVE_PA, 0); | ||
120 | rdmsrl(MSR_EFER, efer); | ||
121 | wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); | ||
122 | } | ||
123 | |||
124 | /** Makes sure SVM is disabled, if it is supported on the CPU | ||
125 | */ | ||
126 | static inline void cpu_emergency_svm_disable(void) | ||
127 | { | ||
128 | if (cpu_has_svm(NULL)) | ||
129 | cpu_svm_disable(); | ||
130 | } | ||
131 | |||
132 | #endif /* _ASM_X86_VIRTEX_H */ | ||
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/include/asm/vmx.h index ec5edc339da6..d0238e6151d8 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -63,10 +63,13 @@ | |||
63 | 63 | ||
64 | #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 | 64 | #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 |
65 | #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 | 65 | #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 |
66 | #define VM_EXIT_SAVE_IA32_PAT 0x00040000 | ||
67 | #define VM_EXIT_LOAD_IA32_PAT 0x00080000 | ||
66 | 68 | ||
67 | #define VM_ENTRY_IA32E_MODE 0x00000200 | 69 | #define VM_ENTRY_IA32E_MODE 0x00000200 |
68 | #define VM_ENTRY_SMM 0x00000400 | 70 | #define VM_ENTRY_SMM 0x00000400 |
69 | #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 | 71 | #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 |
72 | #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 | ||
70 | 73 | ||
71 | /* VMCS Encodings */ | 74 | /* VMCS Encodings */ |
72 | enum vmcs_field { | 75 | enum vmcs_field { |
@@ -112,6 +115,8 @@ enum vmcs_field { | |||
112 | VMCS_LINK_POINTER_HIGH = 0x00002801, | 115 | VMCS_LINK_POINTER_HIGH = 0x00002801, |
113 | GUEST_IA32_DEBUGCTL = 0x00002802, | 116 | GUEST_IA32_DEBUGCTL = 0x00002802, |
114 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, | 117 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, |
118 | GUEST_IA32_PAT = 0x00002804, | ||
119 | GUEST_IA32_PAT_HIGH = 0x00002805, | ||
115 | GUEST_PDPTR0 = 0x0000280a, | 120 | GUEST_PDPTR0 = 0x0000280a, |
116 | GUEST_PDPTR0_HIGH = 0x0000280b, | 121 | GUEST_PDPTR0_HIGH = 0x0000280b, |
117 | GUEST_PDPTR1 = 0x0000280c, | 122 | GUEST_PDPTR1 = 0x0000280c, |
@@ -120,6 +125,8 @@ enum vmcs_field { | |||
120 | GUEST_PDPTR2_HIGH = 0x0000280f, | 125 | GUEST_PDPTR2_HIGH = 0x0000280f, |
121 | GUEST_PDPTR3 = 0x00002810, | 126 | GUEST_PDPTR3 = 0x00002810, |
122 | GUEST_PDPTR3_HIGH = 0x00002811, | 127 | GUEST_PDPTR3_HIGH = 0x00002811, |
128 | HOST_IA32_PAT = 0x00002c00, | ||
129 | HOST_IA32_PAT_HIGH = 0x00002c01, | ||
123 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, | 130 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, |
124 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, | 131 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, |
125 | EXCEPTION_BITMAP = 0x00004004, | 132 | EXCEPTION_BITMAP = 0x00004004, |
@@ -331,8 +338,9 @@ enum vmcs_field { | |||
331 | 338 | ||
332 | #define AR_RESERVD_MASK 0xfffe0f00 | 339 | #define AR_RESERVD_MASK 0xfffe0f00 |
333 | 340 | ||
334 | #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 | 341 | #define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0) |
335 | #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 | 342 | #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1) |
343 | #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2) | ||
336 | 344 | ||
337 | #define VMX_NR_VPIDS (1 << 16) | 345 | #define VMX_NR_VPIDS (1 << 16) |
338 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 | 346 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 |
@@ -356,4 +364,19 @@ enum vmcs_field { | |||
356 | 364 | ||
357 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul | 365 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul |
358 | 366 | ||
367 | |||
368 | #define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" | ||
369 | #define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" | ||
370 | #define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" | ||
371 | #define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" | ||
372 | #define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" | ||
373 | #define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" | ||
374 | #define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" | ||
375 | #define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" | ||
376 | #define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" | ||
377 | #define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" | ||
378 | #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" | ||
379 | |||
380 | |||
381 | |||
359 | #endif | 382 | #endif |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 88dd768eab6d..d364df03c1d6 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -109,6 +109,8 @@ obj-$(CONFIG_MICROCODE) += microcode.o | |||
109 | 109 | ||
110 | obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o | 110 | obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o |
111 | 111 | ||
112 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64 | ||
113 | |||
112 | ### | 114 | ### |
113 | # 64 bit specific files | 115 | # 64 bit specific files |
114 | ifeq ($(CONFIG_X86_64),y) | 116 | ifeq ($(CONFIG_X86_64),y) |
@@ -122,7 +124,6 @@ ifeq ($(CONFIG_X86_64),y) | |||
122 | obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o | 124 | obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o |
123 | obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o | 125 | obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o |
124 | obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o | 126 | obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o |
125 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o | ||
126 | 127 | ||
127 | obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o | 128 | obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o |
128 | endif | 129 | endif |
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 66198cbe464d..d652515e2855 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c | |||
@@ -119,8 +119,6 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); | |||
119 | 119 | ||
120 | int first_system_vector = 0xfe; | 120 | int first_system_vector = 0xfe; |
121 | 121 | ||
122 | char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; | ||
123 | |||
124 | /* | 122 | /* |
125 | * Debug level, exported for io_apic.c | 123 | * Debug level, exported for io_apic.c |
126 | */ | 124 | */ |
@@ -142,7 +140,7 @@ static int lapic_next_event(unsigned long delta, | |||
142 | struct clock_event_device *evt); | 140 | struct clock_event_device *evt); |
143 | static void lapic_timer_setup(enum clock_event_mode mode, | 141 | static void lapic_timer_setup(enum clock_event_mode mode, |
144 | struct clock_event_device *evt); | 142 | struct clock_event_device *evt); |
145 | static void lapic_timer_broadcast(cpumask_t mask); | 143 | static void lapic_timer_broadcast(const cpumask_t *mask); |
146 | static void apic_pm_activate(void); | 144 | static void apic_pm_activate(void); |
147 | 145 | ||
148 | /* | 146 | /* |
@@ -455,7 +453,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, | |||
455 | /* | 453 | /* |
456 | * Local APIC timer broadcast function | 454 | * Local APIC timer broadcast function |
457 | */ | 455 | */ |
458 | static void lapic_timer_broadcast(cpumask_t mask) | 456 | static void lapic_timer_broadcast(const cpumask_t *mask) |
459 | { | 457 | { |
460 | #ifdef CONFIG_SMP | 458 | #ifdef CONFIG_SMP |
461 | send_IPI_mask(mask, LOCAL_TIMER_VECTOR); | 459 | send_IPI_mask(mask, LOCAL_TIMER_VECTOR); |
@@ -471,7 +469,7 @@ static void __cpuinit setup_APIC_timer(void) | |||
471 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); | 469 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); |
472 | 470 | ||
473 | memcpy(levt, &lapic_clockevent, sizeof(*levt)); | 471 | memcpy(levt, &lapic_clockevent, sizeof(*levt)); |
474 | levt->cpumask = cpumask_of_cpu(smp_processor_id()); | 472 | levt->cpumask = cpumask_of(smp_processor_id()); |
475 | 473 | ||
476 | clockevents_register_device(levt); | 474 | clockevents_register_device(levt); |
477 | } | 475 | } |
@@ -1807,28 +1805,32 @@ void disconnect_bsp_APIC(int virt_wire_setup) | |||
1807 | void __cpuinit generic_processor_info(int apicid, int version) | 1805 | void __cpuinit generic_processor_info(int apicid, int version) |
1808 | { | 1806 | { |
1809 | int cpu; | 1807 | int cpu; |
1810 | cpumask_t tmp_map; | ||
1811 | 1808 | ||
1812 | /* | 1809 | /* |
1813 | * Validate version | 1810 | * Validate version |
1814 | */ | 1811 | */ |
1815 | if (version == 0x0) { | 1812 | if (version == 0x0) { |
1816 | pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " | 1813 | pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " |
1817 | "fixing up to 0x10. (tell your hw vendor)\n", | 1814 | "fixing up to 0x10. (tell your hw vendor)\n", |
1818 | version); | 1815 | version); |
1819 | version = 0x10; | 1816 | version = 0x10; |
1820 | } | 1817 | } |
1821 | apic_version[apicid] = version; | 1818 | apic_version[apicid] = version; |
1822 | 1819 | ||
1823 | if (num_processors >= NR_CPUS) { | 1820 | if (num_processors >= nr_cpu_ids) { |
1824 | pr_warning("WARNING: NR_CPUS limit of %i reached." | 1821 | int max = nr_cpu_ids; |
1825 | " Processor ignored.\n", NR_CPUS); | 1822 | int thiscpu = max + disabled_cpus; |
1823 | |||
1824 | pr_warning( | ||
1825 | "ACPI: NR_CPUS/possible_cpus limit of %i reached." | ||
1826 | " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); | ||
1827 | |||
1828 | disabled_cpus++; | ||
1826 | return; | 1829 | return; |
1827 | } | 1830 | } |
1828 | 1831 | ||
1829 | num_processors++; | 1832 | num_processors++; |
1830 | cpus_complement(tmp_map, cpu_present_map); | 1833 | cpu = cpumask_next_zero(-1, cpu_present_mask); |
1831 | cpu = first_cpu(tmp_map); | ||
1832 | 1834 | ||
1833 | physid_set(apicid, phys_cpu_present_map); | 1835 | physid_set(apicid, phys_cpu_present_map); |
1834 | if (apicid == boot_cpu_physical_apicid) { | 1836 | if (apicid == boot_cpu_physical_apicid) { |
@@ -1878,8 +1880,8 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1878 | } | 1880 | } |
1879 | #endif | 1881 | #endif |
1880 | 1882 | ||
1881 | cpu_set(cpu, cpu_possible_map); | 1883 | set_cpu_possible(cpu, true); |
1882 | cpu_set(cpu, cpu_present_map); | 1884 | set_cpu_present(cpu, true); |
1883 | } | 1885 | } |
1884 | 1886 | ||
1885 | #ifdef CONFIG_X86_64 | 1887 | #ifdef CONFIG_X86_64 |
@@ -2081,7 +2083,7 @@ __cpuinit int apic_is_clustered_box(void) | |||
2081 | bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); | 2083 | bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); |
2082 | bitmap_zero(clustermap, NUM_APIC_CLUSTERS); | 2084 | bitmap_zero(clustermap, NUM_APIC_CLUSTERS); |
2083 | 2085 | ||
2084 | for (i = 0; i < NR_CPUS; i++) { | 2086 | for (i = 0; i < nr_cpu_ids; i++) { |
2085 | /* are we being called early in kernel startup? */ | 2087 | /* are we being called early in kernel startup? */ |
2086 | if (bios_cpu_apicid) { | 2088 | if (bios_cpu_apicid) { |
2087 | id = bios_cpu_apicid[i]; | 2089 | id = bios_cpu_apicid[i]; |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 68b5d8681cbb..c6ecda64f5f1 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -534,31 +534,16 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) | |||
534 | per_cpu(cpuid4_info, cpu) = NULL; | 534 | per_cpu(cpuid4_info, cpu) = NULL; |
535 | } | 535 | } |
536 | 536 | ||
537 | static int __cpuinit detect_cache_attributes(unsigned int cpu) | 537 | static void get_cpu_leaves(void *_retval) |
538 | { | 538 | { |
539 | struct _cpuid4_info *this_leaf; | 539 | int j, *retval = _retval, cpu = smp_processor_id(); |
540 | unsigned long j; | ||
541 | int retval; | ||
542 | cpumask_t oldmask; | ||
543 | |||
544 | if (num_cache_leaves == 0) | ||
545 | return -ENOENT; | ||
546 | |||
547 | per_cpu(cpuid4_info, cpu) = kzalloc( | ||
548 | sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); | ||
549 | if (per_cpu(cpuid4_info, cpu) == NULL) | ||
550 | return -ENOMEM; | ||
551 | |||
552 | oldmask = current->cpus_allowed; | ||
553 | retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
554 | if (retval) | ||
555 | goto out; | ||
556 | 540 | ||
557 | /* Do cpuid and store the results */ | 541 | /* Do cpuid and store the results */ |
558 | for (j = 0; j < num_cache_leaves; j++) { | 542 | for (j = 0; j < num_cache_leaves; j++) { |
543 | struct _cpuid4_info *this_leaf; | ||
559 | this_leaf = CPUID4_INFO_IDX(cpu, j); | 544 | this_leaf = CPUID4_INFO_IDX(cpu, j); |
560 | retval = cpuid4_cache_lookup(j, this_leaf); | 545 | *retval = cpuid4_cache_lookup(j, this_leaf); |
561 | if (unlikely(retval < 0)) { | 546 | if (unlikely(*retval < 0)) { |
562 | int i; | 547 | int i; |
563 | 548 | ||
564 | for (i = 0; i < j; i++) | 549 | for (i = 0; i < j; i++) |
@@ -567,9 +552,21 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) | |||
567 | } | 552 | } |
568 | cache_shared_cpu_map_setup(cpu, j); | 553 | cache_shared_cpu_map_setup(cpu, j); |
569 | } | 554 | } |
570 | set_cpus_allowed_ptr(current, &oldmask); | 555 | } |
556 | |||
557 | static int __cpuinit detect_cache_attributes(unsigned int cpu) | ||
558 | { | ||
559 | int retval; | ||
560 | |||
561 | if (num_cache_leaves == 0) | ||
562 | return -ENOENT; | ||
563 | |||
564 | per_cpu(cpuid4_info, cpu) = kzalloc( | ||
565 | sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); | ||
566 | if (per_cpu(cpuid4_info, cpu) == NULL) | ||
567 | return -ENOMEM; | ||
571 | 568 | ||
572 | out: | 569 | smp_call_function_single(cpu, get_cpu_leaves, &retval, true); |
573 | if (retval) { | 570 | if (retval) { |
574 | kfree(per_cpu(cpuid4_info, cpu)); | 571 | kfree(per_cpu(cpuid4_info, cpu)); |
575 | per_cpu(cpuid4_info, cpu) = NULL; | 572 | per_cpu(cpuid4_info, cpu) = NULL; |
@@ -626,8 +623,8 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, | |||
626 | cpumask_t *mask = &this_leaf->shared_cpu_map; | 623 | cpumask_t *mask = &this_leaf->shared_cpu_map; |
627 | 624 | ||
628 | n = type? | 625 | n = type? |
629 | cpulist_scnprintf(buf, len-2, *mask): | 626 | cpulist_scnprintf(buf, len-2, mask) : |
630 | cpumask_scnprintf(buf, len-2, *mask); | 627 | cpumask_scnprintf(buf, len-2, mask); |
631 | buf[n++] = '\n'; | 628 | buf[n++] = '\n'; |
632 | buf[n] = '\0'; | 629 | buf[n] = '\0'; |
633 | } | 630 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 748c8f9e7a05..a5a5e0530370 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c | |||
@@ -83,34 +83,41 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ | |||
83 | * CPU Initialization | 83 | * CPU Initialization |
84 | */ | 84 | */ |
85 | 85 | ||
86 | struct thresh_restart { | ||
87 | struct threshold_block *b; | ||
88 | int reset; | ||
89 | u16 old_limit; | ||
90 | }; | ||
91 | |||
86 | /* must be called with correct cpu affinity */ | 92 | /* must be called with correct cpu affinity */ |
87 | static void threshold_restart_bank(struct threshold_block *b, | 93 | static long threshold_restart_bank(void *_tr) |
88 | int reset, u16 old_limit) | ||
89 | { | 94 | { |
95 | struct thresh_restart *tr = _tr; | ||
90 | u32 mci_misc_hi, mci_misc_lo; | 96 | u32 mci_misc_hi, mci_misc_lo; |
91 | 97 | ||
92 | rdmsr(b->address, mci_misc_lo, mci_misc_hi); | 98 | rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi); |
93 | 99 | ||
94 | if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) | 100 | if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) |
95 | reset = 1; /* limit cannot be lower than err count */ | 101 | tr->reset = 1; /* limit cannot be lower than err count */ |
96 | 102 | ||
97 | if (reset) { /* reset err count and overflow bit */ | 103 | if (tr->reset) { /* reset err count and overflow bit */ |
98 | mci_misc_hi = | 104 | mci_misc_hi = |
99 | (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | | 105 | (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | |
100 | (THRESHOLD_MAX - b->threshold_limit); | 106 | (THRESHOLD_MAX - tr->b->threshold_limit); |
101 | } else if (old_limit) { /* change limit w/o reset */ | 107 | } else if (tr->old_limit) { /* change limit w/o reset */ |
102 | int new_count = (mci_misc_hi & THRESHOLD_MAX) + | 108 | int new_count = (mci_misc_hi & THRESHOLD_MAX) + |
103 | (old_limit - b->threshold_limit); | 109 | (tr->old_limit - tr->b->threshold_limit); |
104 | mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | | 110 | mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | |
105 | (new_count & THRESHOLD_MAX); | 111 | (new_count & THRESHOLD_MAX); |
106 | } | 112 | } |
107 | 113 | ||
108 | b->interrupt_enable ? | 114 | tr->b->interrupt_enable ? |
109 | (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : | 115 | (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : |
110 | (mci_misc_hi &= ~MASK_INT_TYPE_HI); | 116 | (mci_misc_hi &= ~MASK_INT_TYPE_HI); |
111 | 117 | ||
112 | mci_misc_hi |= MASK_COUNT_EN_HI; | 118 | mci_misc_hi |= MASK_COUNT_EN_HI; |
113 | wrmsr(b->address, mci_misc_lo, mci_misc_hi); | 119 | wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); |
120 | return 0; | ||
114 | } | 121 | } |
115 | 122 | ||
116 | /* cpu init entry point, called from mce.c with preempt off */ | 123 | /* cpu init entry point, called from mce.c with preempt off */ |
@@ -120,6 +127,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
120 | unsigned int cpu = smp_processor_id(); | 127 | unsigned int cpu = smp_processor_id(); |
121 | u8 lvt_off; | 128 | u8 lvt_off; |
122 | u32 low = 0, high = 0, address = 0; | 129 | u32 low = 0, high = 0, address = 0; |
130 | struct thresh_restart tr; | ||
123 | 131 | ||
124 | for (bank = 0; bank < NR_BANKS; ++bank) { | 132 | for (bank = 0; bank < NR_BANKS; ++bank) { |
125 | for (block = 0; block < NR_BLOCKS; ++block) { | 133 | for (block = 0; block < NR_BLOCKS; ++block) { |
@@ -162,7 +170,10 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
162 | wrmsr(address, low, high); | 170 | wrmsr(address, low, high); |
163 | 171 | ||
164 | threshold_defaults.address = address; | 172 | threshold_defaults.address = address; |
165 | threshold_restart_bank(&threshold_defaults, 0, 0); | 173 | tr.b = &threshold_defaults; |
174 | tr.reset = 0; | ||
175 | tr.old_limit = 0; | ||
176 | threshold_restart_bank(&tr); | ||
166 | } | 177 | } |
167 | } | 178 | } |
168 | } | 179 | } |
@@ -251,20 +262,6 @@ struct threshold_attr { | |||
251 | ssize_t(*store) (struct threshold_block *, const char *, size_t count); | 262 | ssize_t(*store) (struct threshold_block *, const char *, size_t count); |
252 | }; | 263 | }; |
253 | 264 | ||
254 | static void affinity_set(unsigned int cpu, cpumask_t *oldmask, | ||
255 | cpumask_t *newmask) | ||
256 | { | ||
257 | *oldmask = current->cpus_allowed; | ||
258 | cpus_clear(*newmask); | ||
259 | cpu_set(cpu, *newmask); | ||
260 | set_cpus_allowed_ptr(current, newmask); | ||
261 | } | ||
262 | |||
263 | static void affinity_restore(const cpumask_t *oldmask) | ||
264 | { | ||
265 | set_cpus_allowed_ptr(current, oldmask); | ||
266 | } | ||
267 | |||
268 | #define SHOW_FIELDS(name) \ | 265 | #define SHOW_FIELDS(name) \ |
269 | static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ | 266 | static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ |
270 | { \ | 267 | { \ |
@@ -277,15 +274,16 @@ static ssize_t store_interrupt_enable(struct threshold_block *b, | |||
277 | const char *buf, size_t count) | 274 | const char *buf, size_t count) |
278 | { | 275 | { |
279 | char *end; | 276 | char *end; |
280 | cpumask_t oldmask, newmask; | 277 | struct thresh_restart tr; |
281 | unsigned long new = simple_strtoul(buf, &end, 0); | 278 | unsigned long new = simple_strtoul(buf, &end, 0); |
282 | if (end == buf) | 279 | if (end == buf) |
283 | return -EINVAL; | 280 | return -EINVAL; |
284 | b->interrupt_enable = !!new; | 281 | b->interrupt_enable = !!new; |
285 | 282 | ||
286 | affinity_set(b->cpu, &oldmask, &newmask); | 283 | tr.b = b; |
287 | threshold_restart_bank(b, 0, 0); | 284 | tr.reset = 0; |
288 | affinity_restore(&oldmask); | 285 | tr.old_limit = 0; |
286 | work_on_cpu(b->cpu, threshold_restart_bank, &tr); | ||
289 | 287 | ||
290 | return end - buf; | 288 | return end - buf; |
291 | } | 289 | } |
@@ -294,8 +292,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b, | |||
294 | const char *buf, size_t count) | 292 | const char *buf, size_t count) |
295 | { | 293 | { |
296 | char *end; | 294 | char *end; |
297 | cpumask_t oldmask, newmask; | 295 | struct thresh_restart tr; |
298 | u16 old; | ||
299 | unsigned long new = simple_strtoul(buf, &end, 0); | 296 | unsigned long new = simple_strtoul(buf, &end, 0); |
300 | if (end == buf) | 297 | if (end == buf) |
301 | return -EINVAL; | 298 | return -EINVAL; |
@@ -303,34 +300,36 @@ static ssize_t store_threshold_limit(struct threshold_block *b, | |||
303 | new = THRESHOLD_MAX; | 300 | new = THRESHOLD_MAX; |
304 | if (new < 1) | 301 | if (new < 1) |
305 | new = 1; | 302 | new = 1; |
306 | old = b->threshold_limit; | 303 | tr.old_limit = b->threshold_limit; |
307 | b->threshold_limit = new; | 304 | b->threshold_limit = new; |
305 | tr.b = b; | ||
306 | tr.reset = 0; | ||
308 | 307 | ||
309 | affinity_set(b->cpu, &oldmask, &newmask); | 308 | work_on_cpu(b->cpu, threshold_restart_bank, &tr); |
310 | threshold_restart_bank(b, 0, old); | ||
311 | affinity_restore(&oldmask); | ||
312 | 309 | ||
313 | return end - buf; | 310 | return end - buf; |
314 | } | 311 | } |
315 | 312 | ||
316 | static ssize_t show_error_count(struct threshold_block *b, char *buf) | 313 | static long local_error_count(void *_b) |
317 | { | 314 | { |
318 | u32 high, low; | 315 | struct threshold_block *b = _b; |
319 | cpumask_t oldmask, newmask; | 316 | u32 low, high; |
320 | affinity_set(b->cpu, &oldmask, &newmask); | 317 | |
321 | rdmsr(b->address, low, high); | 318 | rdmsr(b->address, low, high); |
322 | affinity_restore(&oldmask); | 319 | return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); |
323 | return sprintf(buf, "%x\n", | 320 | } |
324 | (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); | 321 | |
322 | static ssize_t show_error_count(struct threshold_block *b, char *buf) | ||
323 | { | ||
324 | return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b)); | ||
325 | } | 325 | } |
326 | 326 | ||
327 | static ssize_t store_error_count(struct threshold_block *b, | 327 | static ssize_t store_error_count(struct threshold_block *b, |
328 | const char *buf, size_t count) | 328 | const char *buf, size_t count) |
329 | { | 329 | { |
330 | cpumask_t oldmask, newmask; | 330 | struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 }; |
331 | affinity_set(b->cpu, &oldmask, &newmask); | 331 | |
332 | threshold_restart_bank(b, 1, 0); | 332 | work_on_cpu(b->cpu, threshold_restart_bank, &tr); |
333 | affinity_restore(&oldmask); | ||
334 | return 1; | 333 | return 1; |
335 | } | 334 | } |
336 | 335 | ||
@@ -463,12 +462,19 @@ out_free: | |||
463 | return err; | 462 | return err; |
464 | } | 463 | } |
465 | 464 | ||
465 | static long local_allocate_threshold_blocks(void *_bank) | ||
466 | { | ||
467 | unsigned int *bank = _bank; | ||
468 | |||
469 | return allocate_threshold_blocks(smp_processor_id(), *bank, 0, | ||
470 | MSR_IA32_MC0_MISC + *bank * 4); | ||
471 | } | ||
472 | |||
466 | /* symlinks sibling shared banks to first core. first core owns dir/files. */ | 473 | /* symlinks sibling shared banks to first core. first core owns dir/files. */ |
467 | static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | 474 | static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) |
468 | { | 475 | { |
469 | int i, err = 0; | 476 | int i, err = 0; |
470 | struct threshold_bank *b = NULL; | 477 | struct threshold_bank *b = NULL; |
471 | cpumask_t oldmask, newmask; | ||
472 | char name[32]; | 478 | char name[32]; |
473 | 479 | ||
474 | sprintf(name, "threshold_bank%i", bank); | 480 | sprintf(name, "threshold_bank%i", bank); |
@@ -519,11 +525,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
519 | 525 | ||
520 | per_cpu(threshold_banks, cpu)[bank] = b; | 526 | per_cpu(threshold_banks, cpu)[bank] = b; |
521 | 527 | ||
522 | affinity_set(cpu, &oldmask, &newmask); | 528 | err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank); |
523 | err = allocate_threshold_blocks(cpu, bank, 0, | ||
524 | MSR_IA32_MC0_MISC + bank * 4); | ||
525 | affinity_restore(&oldmask); | ||
526 | |||
527 | if (err) | 529 | if (err) |
528 | goto out_free; | 530 | goto out_free; |
529 | 531 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 4e8d77f01eeb..b59ddcc88cd8 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -14,14 +14,6 @@ | |||
14 | #include <asm/pat.h> | 14 | #include <asm/pat.h> |
15 | #include "mtrr.h" | 15 | #include "mtrr.h" |
16 | 16 | ||
17 | struct mtrr_state { | ||
18 | struct mtrr_var_range var_ranges[MAX_VAR_RANGES]; | ||
19 | mtrr_type fixed_ranges[NUM_FIXED_RANGES]; | ||
20 | unsigned char enabled; | ||
21 | unsigned char have_fixed; | ||
22 | mtrr_type def_type; | ||
23 | }; | ||
24 | |||
25 | struct fixed_range_block { | 17 | struct fixed_range_block { |
26 | int base_msr; /* start address of an MTRR block */ | 18 | int base_msr; /* start address of an MTRR block */ |
27 | int ranges; /* number of MTRRs in this block */ | 19 | int ranges; /* number of MTRRs in this block */ |
@@ -35,10 +27,12 @@ static struct fixed_range_block fixed_range_blocks[] = { | |||
35 | }; | 27 | }; |
36 | 28 | ||
37 | static unsigned long smp_changes_mask; | 29 | static unsigned long smp_changes_mask; |
38 | static struct mtrr_state mtrr_state = {}; | ||
39 | static int mtrr_state_set; | 30 | static int mtrr_state_set; |
40 | u64 mtrr_tom2; | 31 | u64 mtrr_tom2; |
41 | 32 | ||
33 | struct mtrr_state_type mtrr_state = {}; | ||
34 | EXPORT_SYMBOL_GPL(mtrr_state); | ||
35 | |||
42 | #undef MODULE_PARAM_PREFIX | 36 | #undef MODULE_PARAM_PREFIX |
43 | #define MODULE_PARAM_PREFIX "mtrr." | 37 | #define MODULE_PARAM_PREFIX "mtrr." |
44 | 38 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 44fcb237bd52..d259e5d2e054 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -49,7 +49,7 @@ | |||
49 | 49 | ||
50 | u32 num_var_ranges = 0; | 50 | u32 num_var_ranges = 0; |
51 | 51 | ||
52 | unsigned int mtrr_usage_table[MAX_VAR_RANGES]; | 52 | unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; |
53 | static DEFINE_MUTEX(mtrr_mutex); | 53 | static DEFINE_MUTEX(mtrr_mutex); |
54 | 54 | ||
55 | u64 size_or_mask, size_and_mask; | 55 | u64 size_or_mask, size_and_mask; |
@@ -574,7 +574,7 @@ struct mtrr_value { | |||
574 | unsigned long lsize; | 574 | unsigned long lsize; |
575 | }; | 575 | }; |
576 | 576 | ||
577 | static struct mtrr_value mtrr_state[MAX_VAR_RANGES]; | 577 | static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES]; |
578 | 578 | ||
579 | static int mtrr_save(struct sys_device * sysdev, pm_message_t state) | 579 | static int mtrr_save(struct sys_device * sysdev, pm_message_t state) |
580 | { | 580 | { |
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 2dc4ec656b23..ffd60409cc6d 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h | |||
@@ -8,11 +8,6 @@ | |||
8 | #define MTRRcap_MSR 0x0fe | 8 | #define MTRRcap_MSR 0x0fe |
9 | #define MTRRdefType_MSR 0x2ff | 9 | #define MTRRdefType_MSR 0x2ff |
10 | 10 | ||
11 | #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) | ||
12 | #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) | ||
13 | |||
14 | #define NUM_FIXED_RANGES 88 | ||
15 | #define MAX_VAR_RANGES 256 | ||
16 | #define MTRRfix64K_00000_MSR 0x250 | 11 | #define MTRRfix64K_00000_MSR 0x250 |
17 | #define MTRRfix16K_80000_MSR 0x258 | 12 | #define MTRRfix16K_80000_MSR 0x258 |
18 | #define MTRRfix16K_A0000_MSR 0x259 | 13 | #define MTRRfix16K_A0000_MSR 0x259 |
@@ -29,11 +24,7 @@ | |||
29 | #define MTRR_CHANGE_MASK_VARIABLE 0x02 | 24 | #define MTRR_CHANGE_MASK_VARIABLE 0x02 |
30 | #define MTRR_CHANGE_MASK_DEFTYPE 0x04 | 25 | #define MTRR_CHANGE_MASK_DEFTYPE 0x04 |
31 | 26 | ||
32 | /* In the Intel processor's MTRR interface, the MTRR type is always held in | 27 | extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; |
33 | an 8 bit field: */ | ||
34 | typedef u8 mtrr_type; | ||
35 | |||
36 | extern unsigned int mtrr_usage_table[MAX_VAR_RANGES]; | ||
37 | 28 | ||
38 | struct mtrr_ops { | 29 | struct mtrr_ops { |
39 | u32 vendor; | 30 | u32 vendor; |
@@ -70,13 +61,6 @@ struct set_mtrr_context { | |||
70 | u32 ccr3; | 61 | u32 ccr3; |
71 | }; | 62 | }; |
72 | 63 | ||
73 | struct mtrr_var_range { | ||
74 | u32 base_lo; | ||
75 | u32 base_hi; | ||
76 | u32 mask_lo; | ||
77 | u32 mask_hi; | ||
78 | }; | ||
79 | |||
80 | void set_mtrr_done(struct set_mtrr_context *ctxt); | 64 | void set_mtrr_done(struct set_mtrr_context *ctxt); |
81 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); | 65 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); |
82 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); | 66 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); |
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index d84a852e4cd7..c689d19e35ab 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/kdebug.h> | 26 | #include <linux/kdebug.h> |
27 | #include <asm/smp.h> | 27 | #include <asm/smp.h> |
28 | #include <asm/reboot.h> | 28 | #include <asm/reboot.h> |
29 | #include <asm/virtext.h> | ||
29 | 30 | ||
30 | #include <mach_ipi.h> | 31 | #include <mach_ipi.h> |
31 | 32 | ||
@@ -49,6 +50,15 @@ static void kdump_nmi_callback(int cpu, struct die_args *args) | |||
49 | #endif | 50 | #endif |
50 | crash_save_cpu(regs, cpu); | 51 | crash_save_cpu(regs, cpu); |
51 | 52 | ||
53 | /* Disable VMX or SVM if needed. | ||
54 | * | ||
55 | * We need to disable virtualization on all CPUs. | ||
56 | * Having VMX or SVM enabled on any CPU may break rebooting | ||
57 | * after the kdump kernel has finished its task. | ||
58 | */ | ||
59 | cpu_emergency_vmxoff(); | ||
60 | cpu_emergency_svm_disable(); | ||
61 | |||
52 | disable_local_APIC(); | 62 | disable_local_APIC(); |
53 | } | 63 | } |
54 | 64 | ||
@@ -80,6 +90,14 @@ void native_machine_crash_shutdown(struct pt_regs *regs) | |||
80 | local_irq_disable(); | 90 | local_irq_disable(); |
81 | 91 | ||
82 | kdump_nmi_shootdown_cpus(); | 92 | kdump_nmi_shootdown_cpus(); |
93 | |||
94 | /* Booting kdump kernel with VMX or SVM enabled won't work, | ||
95 | * because (among other limitations) we can't disable paging | ||
96 | * with the virt flags. | ||
97 | */ | ||
98 | cpu_emergency_vmxoff(); | ||
99 | cpu_emergency_svm_disable(); | ||
100 | |||
83 | lapic_shutdown(); | 101 | lapic_shutdown(); |
84 | #if defined(CONFIG_X86_IO_APIC) | 102 | #if defined(CONFIG_X86_IO_APIC) |
85 | disable_IO_APIC(); | 103 | disable_IO_APIC(); |
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index c0262791bda4..34185488e4fb 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c | |||
@@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
30 | return 1; | 30 | return 1; |
31 | } | 31 | } |
32 | 32 | ||
33 | static cpumask_t flat_target_cpus(void) | 33 | static const struct cpumask *flat_target_cpus(void) |
34 | { | 34 | { |
35 | return cpu_online_map; | 35 | return cpu_online_mask; |
36 | } | 36 | } |
37 | 37 | ||
38 | static cpumask_t flat_vector_allocation_domain(int cpu) | 38 | static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) |
39 | { | 39 | { |
40 | /* Careful. Some cpus do not strictly honor the set of cpus | 40 | /* Careful. Some cpus do not strictly honor the set of cpus |
41 | * specified in the interrupt destination when using lowest | 41 | * specified in the interrupt destination when using lowest |
@@ -45,8 +45,8 @@ static cpumask_t flat_vector_allocation_domain(int cpu) | |||
45 | * deliver interrupts to the wrong hyperthread when only one | 45 | * deliver interrupts to the wrong hyperthread when only one |
46 | * hyperthread was specified in the interrupt desitination. | 46 | * hyperthread was specified in the interrupt desitination. |
47 | */ | 47 | */ |
48 | cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; | 48 | cpumask_clear(retmask); |
49 | return domain; | 49 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; |
50 | } | 50 | } |
51 | 51 | ||
52 | /* | 52 | /* |
@@ -69,9 +69,8 @@ static void flat_init_apic_ldr(void) | |||
69 | apic_write(APIC_LDR, val); | 69 | apic_write(APIC_LDR, val); |
70 | } | 70 | } |
71 | 71 | ||
72 | static void flat_send_IPI_mask(cpumask_t cpumask, int vector) | 72 | static inline void _flat_send_IPI_mask(unsigned long mask, int vector) |
73 | { | 73 | { |
74 | unsigned long mask = cpus_addr(cpumask)[0]; | ||
75 | unsigned long flags; | 74 | unsigned long flags; |
76 | 75 | ||
77 | local_irq_save(flags); | 76 | local_irq_save(flags); |
@@ -79,20 +78,41 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector) | |||
79 | local_irq_restore(flags); | 78 | local_irq_restore(flags); |
80 | } | 79 | } |
81 | 80 | ||
81 | static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) | ||
82 | { | ||
83 | unsigned long mask = cpumask_bits(cpumask)[0]; | ||
84 | |||
85 | _flat_send_IPI_mask(mask, vector); | ||
86 | } | ||
87 | |||
88 | static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, | ||
89 | int vector) | ||
90 | { | ||
91 | unsigned long mask = cpumask_bits(cpumask)[0]; | ||
92 | int cpu = smp_processor_id(); | ||
93 | |||
94 | if (cpu < BITS_PER_LONG) | ||
95 | clear_bit(cpu, &mask); | ||
96 | _flat_send_IPI_mask(mask, vector); | ||
97 | } | ||
98 | |||
82 | static void flat_send_IPI_allbutself(int vector) | 99 | static void flat_send_IPI_allbutself(int vector) |
83 | { | 100 | { |
101 | int cpu = smp_processor_id(); | ||
84 | #ifdef CONFIG_HOTPLUG_CPU | 102 | #ifdef CONFIG_HOTPLUG_CPU |
85 | int hotplug = 1; | 103 | int hotplug = 1; |
86 | #else | 104 | #else |
87 | int hotplug = 0; | 105 | int hotplug = 0; |
88 | #endif | 106 | #endif |
89 | if (hotplug || vector == NMI_VECTOR) { | 107 | if (hotplug || vector == NMI_VECTOR) { |
90 | cpumask_t allbutme = cpu_online_map; | 108 | if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) { |
109 | unsigned long mask = cpumask_bits(cpu_online_mask)[0]; | ||
91 | 110 | ||
92 | cpu_clear(smp_processor_id(), allbutme); | 111 | if (cpu < BITS_PER_LONG) |
112 | clear_bit(cpu, &mask); | ||
93 | 113 | ||
94 | if (!cpus_empty(allbutme)) | 114 | _flat_send_IPI_mask(mask, vector); |
95 | flat_send_IPI_mask(allbutme, vector); | 115 | } |
96 | } else if (num_online_cpus() > 1) { | 116 | } else if (num_online_cpus() > 1) { |
97 | __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); | 117 | __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); |
98 | } | 118 | } |
@@ -101,7 +121,7 @@ static void flat_send_IPI_allbutself(int vector) | |||
101 | static void flat_send_IPI_all(int vector) | 121 | static void flat_send_IPI_all(int vector) |
102 | { | 122 | { |
103 | if (vector == NMI_VECTOR) | 123 | if (vector == NMI_VECTOR) |
104 | flat_send_IPI_mask(cpu_online_map, vector); | 124 | flat_send_IPI_mask(cpu_online_mask, vector); |
105 | else | 125 | else |
106 | __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); | 126 | __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); |
107 | } | 127 | } |
@@ -135,9 +155,18 @@ static int flat_apic_id_registered(void) | |||
135 | return physid_isset(read_xapic_id(), phys_cpu_present_map); | 155 | return physid_isset(read_xapic_id(), phys_cpu_present_map); |
136 | } | 156 | } |
137 | 157 | ||
138 | static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) | 158 | static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask) |
159 | { | ||
160 | return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; | ||
161 | } | ||
162 | |||
163 | static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
164 | const struct cpumask *andmask) | ||
139 | { | 165 | { |
140 | return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; | 166 | unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; |
167 | unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS; | ||
168 | |||
169 | return mask1 & mask2; | ||
141 | } | 170 | } |
142 | 171 | ||
143 | static unsigned int phys_pkg_id(int index_msb) | 172 | static unsigned int phys_pkg_id(int index_msb) |
@@ -157,8 +186,10 @@ struct genapic apic_flat = { | |||
157 | .send_IPI_all = flat_send_IPI_all, | 186 | .send_IPI_all = flat_send_IPI_all, |
158 | .send_IPI_allbutself = flat_send_IPI_allbutself, | 187 | .send_IPI_allbutself = flat_send_IPI_allbutself, |
159 | .send_IPI_mask = flat_send_IPI_mask, | 188 | .send_IPI_mask = flat_send_IPI_mask, |
189 | .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, | ||
160 | .send_IPI_self = apic_send_IPI_self, | 190 | .send_IPI_self = apic_send_IPI_self, |
161 | .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, | 191 | .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, |
192 | .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, | ||
162 | .phys_pkg_id = phys_pkg_id, | 193 | .phys_pkg_id = phys_pkg_id, |
163 | .get_apic_id = get_apic_id, | 194 | .get_apic_id = get_apic_id, |
164 | .set_apic_id = set_apic_id, | 195 | .set_apic_id = set_apic_id, |
@@ -188,35 +219,39 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
188 | return 0; | 219 | return 0; |
189 | } | 220 | } |
190 | 221 | ||
191 | static cpumask_t physflat_target_cpus(void) | 222 | static const struct cpumask *physflat_target_cpus(void) |
192 | { | 223 | { |
193 | return cpu_online_map; | 224 | return cpu_online_mask; |
194 | } | 225 | } |
195 | 226 | ||
196 | static cpumask_t physflat_vector_allocation_domain(int cpu) | 227 | static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) |
197 | { | 228 | { |
198 | return cpumask_of_cpu(cpu); | 229 | cpumask_clear(retmask); |
230 | cpumask_set_cpu(cpu, retmask); | ||
199 | } | 231 | } |
200 | 232 | ||
201 | static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) | 233 | static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) |
202 | { | 234 | { |
203 | send_IPI_mask_sequence(cpumask, vector); | 235 | send_IPI_mask_sequence(cpumask, vector); |
204 | } | 236 | } |
205 | 237 | ||
206 | static void physflat_send_IPI_allbutself(int vector) | 238 | static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, |
239 | int vector) | ||
207 | { | 240 | { |
208 | cpumask_t allbutme = cpu_online_map; | 241 | send_IPI_mask_allbutself(cpumask, vector); |
242 | } | ||
209 | 243 | ||
210 | cpu_clear(smp_processor_id(), allbutme); | 244 | static void physflat_send_IPI_allbutself(int vector) |
211 | physflat_send_IPI_mask(allbutme, vector); | 245 | { |
246 | send_IPI_mask_allbutself(cpu_online_mask, vector); | ||
212 | } | 247 | } |
213 | 248 | ||
214 | static void physflat_send_IPI_all(int vector) | 249 | static void physflat_send_IPI_all(int vector) |
215 | { | 250 | { |
216 | physflat_send_IPI_mask(cpu_online_map, vector); | 251 | physflat_send_IPI_mask(cpu_online_mask, vector); |
217 | } | 252 | } |
218 | 253 | ||
219 | static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) | 254 | static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) |
220 | { | 255 | { |
221 | int cpu; | 256 | int cpu; |
222 | 257 | ||
@@ -224,13 +259,31 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) | |||
224 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 259 | * We're using fixed IRQ delivery, can only return one phys APIC ID. |
225 | * May as well be the first. | 260 | * May as well be the first. |
226 | */ | 261 | */ |
227 | cpu = first_cpu(cpumask); | 262 | cpu = cpumask_first(cpumask); |
228 | if ((unsigned)cpu < nr_cpu_ids) | 263 | if ((unsigned)cpu < nr_cpu_ids) |
229 | return per_cpu(x86_cpu_to_apicid, cpu); | 264 | return per_cpu(x86_cpu_to_apicid, cpu); |
230 | else | 265 | else |
231 | return BAD_APICID; | 266 | return BAD_APICID; |
232 | } | 267 | } |
233 | 268 | ||
269 | static unsigned int | ||
270 | physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
271 | const struct cpumask *andmask) | ||
272 | { | ||
273 | int cpu; | ||
274 | |||
275 | /* | ||
276 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
277 | * May as well be the first. | ||
278 | */ | ||
279 | for_each_cpu_and(cpu, cpumask, andmask) | ||
280 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
281 | break; | ||
282 | if (cpu < nr_cpu_ids) | ||
283 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
284 | return BAD_APICID; | ||
285 | } | ||
286 | |||
234 | struct genapic apic_physflat = { | 287 | struct genapic apic_physflat = { |
235 | .name = "physical flat", | 288 | .name = "physical flat", |
236 | .acpi_madt_oem_check = physflat_acpi_madt_oem_check, | 289 | .acpi_madt_oem_check = physflat_acpi_madt_oem_check, |
@@ -243,8 +296,10 @@ struct genapic apic_physflat = { | |||
243 | .send_IPI_all = physflat_send_IPI_all, | 296 | .send_IPI_all = physflat_send_IPI_all, |
244 | .send_IPI_allbutself = physflat_send_IPI_allbutself, | 297 | .send_IPI_allbutself = physflat_send_IPI_allbutself, |
245 | .send_IPI_mask = physflat_send_IPI_mask, | 298 | .send_IPI_mask = physflat_send_IPI_mask, |
299 | .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, | ||
246 | .send_IPI_self = apic_send_IPI_self, | 300 | .send_IPI_self = apic_send_IPI_self, |
247 | .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, | 301 | .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, |
302 | .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, | ||
248 | .phys_pkg_id = phys_pkg_id, | 303 | .phys_pkg_id = phys_pkg_id, |
249 | .get_apic_id = get_apic_id, | 304 | .get_apic_id = get_apic_id, |
250 | .set_apic_id = set_apic_id, | 305 | .set_apic_id = set_apic_id, |
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index f6a2c8eb48a6..6ce497cc372d 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c | |||
@@ -22,19 +22,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
22 | 22 | ||
23 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | 23 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ |
24 | 24 | ||
25 | static cpumask_t x2apic_target_cpus(void) | 25 | static const struct cpumask *x2apic_target_cpus(void) |
26 | { | 26 | { |
27 | return cpumask_of_cpu(0); | 27 | return cpumask_of(0); |
28 | } | 28 | } |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * for now each logical cpu is in its own vector allocation domain. | 31 | * for now each logical cpu is in its own vector allocation domain. |
32 | */ | 32 | */ |
33 | static cpumask_t x2apic_vector_allocation_domain(int cpu) | 33 | static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) |
34 | { | 34 | { |
35 | cpumask_t domain = CPU_MASK_NONE; | 35 | cpumask_clear(retmask); |
36 | cpu_set(cpu, domain); | 36 | cpumask_set_cpu(cpu, retmask); |
37 | return domain; | ||
38 | } | 37 | } |
39 | 38 | ||
40 | static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | 39 | static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, |
@@ -56,32 +55,53 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | |||
56 | * at once. We have 16 cpu's in a cluster. This will minimize IPI register | 55 | * at once. We have 16 cpu's in a cluster. This will minimize IPI register |
57 | * writes. | 56 | * writes. |
58 | */ | 57 | */ |
59 | static void x2apic_send_IPI_mask(cpumask_t mask, int vector) | 58 | static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) |
60 | { | 59 | { |
61 | unsigned long flags; | 60 | unsigned long flags; |
62 | unsigned long query_cpu; | 61 | unsigned long query_cpu; |
63 | 62 | ||
64 | local_irq_save(flags); | 63 | local_irq_save(flags); |
65 | for_each_cpu_mask(query_cpu, mask) { | 64 | for_each_cpu(query_cpu, mask) |
66 | __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), | 65 | __x2apic_send_IPI_dest( |
67 | vector, APIC_DEST_LOGICAL); | 66 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), |
68 | } | 67 | vector, APIC_DEST_LOGICAL); |
69 | local_irq_restore(flags); | 68 | local_irq_restore(flags); |
70 | } | 69 | } |
71 | 70 | ||
72 | static void x2apic_send_IPI_allbutself(int vector) | 71 | static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, |
72 | int vector) | ||
73 | { | 73 | { |
74 | cpumask_t mask = cpu_online_map; | 74 | unsigned long flags; |
75 | unsigned long query_cpu; | ||
76 | unsigned long this_cpu = smp_processor_id(); | ||
75 | 77 | ||
76 | cpu_clear(smp_processor_id(), mask); | 78 | local_irq_save(flags); |
79 | for_each_cpu(query_cpu, mask) | ||
80 | if (query_cpu != this_cpu) | ||
81 | __x2apic_send_IPI_dest( | ||
82 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), | ||
83 | vector, APIC_DEST_LOGICAL); | ||
84 | local_irq_restore(flags); | ||
85 | } | ||
86 | |||
87 | static void x2apic_send_IPI_allbutself(int vector) | ||
88 | { | ||
89 | unsigned long flags; | ||
90 | unsigned long query_cpu; | ||
91 | unsigned long this_cpu = smp_processor_id(); | ||
77 | 92 | ||
78 | if (!cpus_empty(mask)) | 93 | local_irq_save(flags); |
79 | x2apic_send_IPI_mask(mask, vector); | 94 | for_each_online_cpu(query_cpu) |
95 | if (query_cpu != this_cpu) | ||
96 | __x2apic_send_IPI_dest( | ||
97 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), | ||
98 | vector, APIC_DEST_LOGICAL); | ||
99 | local_irq_restore(flags); | ||
80 | } | 100 | } |
81 | 101 | ||
82 | static void x2apic_send_IPI_all(int vector) | 102 | static void x2apic_send_IPI_all(int vector) |
83 | { | 103 | { |
84 | x2apic_send_IPI_mask(cpu_online_map, vector); | 104 | x2apic_send_IPI_mask(cpu_online_mask, vector); |
85 | } | 105 | } |
86 | 106 | ||
87 | static int x2apic_apic_id_registered(void) | 107 | static int x2apic_apic_id_registered(void) |
@@ -89,21 +109,38 @@ static int x2apic_apic_id_registered(void) | |||
89 | return 1; | 109 | return 1; |
90 | } | 110 | } |
91 | 111 | ||
92 | static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) | 112 | static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) |
93 | { | 113 | { |
94 | int cpu; | 114 | int cpu; |
95 | 115 | ||
96 | /* | 116 | /* |
97 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 117 | * We're using fixed IRQ delivery, can only return one logical APIC ID. |
98 | * May as well be the first. | 118 | * May as well be the first. |
99 | */ | 119 | */ |
100 | cpu = first_cpu(cpumask); | 120 | cpu = cpumask_first(cpumask); |
101 | if ((unsigned)cpu < NR_CPUS) | 121 | if ((unsigned)cpu < nr_cpu_ids) |
102 | return per_cpu(x86_cpu_to_logical_apicid, cpu); | 122 | return per_cpu(x86_cpu_to_logical_apicid, cpu); |
103 | else | 123 | else |
104 | return BAD_APICID; | 124 | return BAD_APICID; |
105 | } | 125 | } |
106 | 126 | ||
127 | static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
128 | const struct cpumask *andmask) | ||
129 | { | ||
130 | int cpu; | ||
131 | |||
132 | /* | ||
133 | * We're using fixed IRQ delivery, can only return one logical APIC ID. | ||
134 | * May as well be the first. | ||
135 | */ | ||
136 | for_each_cpu_and(cpu, cpumask, andmask) | ||
137 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
138 | break; | ||
139 | if (cpu < nr_cpu_ids) | ||
140 | return per_cpu(x86_cpu_to_logical_apicid, cpu); | ||
141 | return BAD_APICID; | ||
142 | } | ||
143 | |||
107 | static unsigned int get_apic_id(unsigned long x) | 144 | static unsigned int get_apic_id(unsigned long x) |
108 | { | 145 | { |
109 | unsigned int id; | 146 | unsigned int id; |
@@ -150,8 +187,10 @@ struct genapic apic_x2apic_cluster = { | |||
150 | .send_IPI_all = x2apic_send_IPI_all, | 187 | .send_IPI_all = x2apic_send_IPI_all, |
151 | .send_IPI_allbutself = x2apic_send_IPI_allbutself, | 188 | .send_IPI_allbutself = x2apic_send_IPI_allbutself, |
152 | .send_IPI_mask = x2apic_send_IPI_mask, | 189 | .send_IPI_mask = x2apic_send_IPI_mask, |
190 | .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, | ||
153 | .send_IPI_self = x2apic_send_IPI_self, | 191 | .send_IPI_self = x2apic_send_IPI_self, |
154 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | 192 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, |
193 | .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, | ||
155 | .phys_pkg_id = phys_pkg_id, | 194 | .phys_pkg_id = phys_pkg_id, |
156 | .get_apic_id = get_apic_id, | 195 | .get_apic_id = get_apic_id, |
157 | .set_apic_id = set_apic_id, | 196 | .set_apic_id = set_apic_id, |
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index a177c7880ab5..21bcc0e098ba 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c | |||
@@ -29,16 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
29 | 29 | ||
30 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | 30 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ |
31 | 31 | ||
32 | static cpumask_t x2apic_target_cpus(void) | 32 | static const struct cpumask *x2apic_target_cpus(void) |
33 | { | 33 | { |
34 | return cpumask_of_cpu(0); | 34 | return cpumask_of(0); |
35 | } | 35 | } |
36 | 36 | ||
37 | static cpumask_t x2apic_vector_allocation_domain(int cpu) | 37 | static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) |
38 | { | 38 | { |
39 | cpumask_t domain = CPU_MASK_NONE; | 39 | cpumask_clear(retmask); |
40 | cpu_set(cpu, domain); | 40 | cpumask_set_cpu(cpu, retmask); |
41 | return domain; | ||
42 | } | 41 | } |
43 | 42 | ||
44 | static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | 43 | static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, |
@@ -54,32 +53,54 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | |||
54 | x2apic_icr_write(cfg, apicid); | 53 | x2apic_icr_write(cfg, apicid); |
55 | } | 54 | } |
56 | 55 | ||
57 | static void x2apic_send_IPI_mask(cpumask_t mask, int vector) | 56 | static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) |
58 | { | 57 | { |
59 | unsigned long flags; | 58 | unsigned long flags; |
60 | unsigned long query_cpu; | 59 | unsigned long query_cpu; |
61 | 60 | ||
62 | local_irq_save(flags); | 61 | local_irq_save(flags); |
63 | for_each_cpu_mask(query_cpu, mask) { | 62 | for_each_cpu(query_cpu, mask) { |
64 | __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), | 63 | __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), |
65 | vector, APIC_DEST_PHYSICAL); | 64 | vector, APIC_DEST_PHYSICAL); |
66 | } | 65 | } |
67 | local_irq_restore(flags); | 66 | local_irq_restore(flags); |
68 | } | 67 | } |
69 | 68 | ||
70 | static void x2apic_send_IPI_allbutself(int vector) | 69 | static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, |
70 | int vector) | ||
71 | { | 71 | { |
72 | cpumask_t mask = cpu_online_map; | 72 | unsigned long flags; |
73 | unsigned long query_cpu; | ||
74 | unsigned long this_cpu = smp_processor_id(); | ||
75 | |||
76 | local_irq_save(flags); | ||
77 | for_each_cpu(query_cpu, mask) { | ||
78 | if (query_cpu != this_cpu) | ||
79 | __x2apic_send_IPI_dest( | ||
80 | per_cpu(x86_cpu_to_apicid, query_cpu), | ||
81 | vector, APIC_DEST_PHYSICAL); | ||
82 | } | ||
83 | local_irq_restore(flags); | ||
84 | } | ||
73 | 85 | ||
74 | cpu_clear(smp_processor_id(), mask); | 86 | static void x2apic_send_IPI_allbutself(int vector) |
87 | { | ||
88 | unsigned long flags; | ||
89 | unsigned long query_cpu; | ||
90 | unsigned long this_cpu = smp_processor_id(); | ||
75 | 91 | ||
76 | if (!cpus_empty(mask)) | 92 | local_irq_save(flags); |
77 | x2apic_send_IPI_mask(mask, vector); | 93 | for_each_online_cpu(query_cpu) |
94 | if (query_cpu != this_cpu) | ||
95 | __x2apic_send_IPI_dest( | ||
96 | per_cpu(x86_cpu_to_apicid, query_cpu), | ||
97 | vector, APIC_DEST_PHYSICAL); | ||
98 | local_irq_restore(flags); | ||
78 | } | 99 | } |
79 | 100 | ||
80 | static void x2apic_send_IPI_all(int vector) | 101 | static void x2apic_send_IPI_all(int vector) |
81 | { | 102 | { |
82 | x2apic_send_IPI_mask(cpu_online_map, vector); | 103 | x2apic_send_IPI_mask(cpu_online_mask, vector); |
83 | } | 104 | } |
84 | 105 | ||
85 | static int x2apic_apic_id_registered(void) | 106 | static int x2apic_apic_id_registered(void) |
@@ -87,7 +108,7 @@ static int x2apic_apic_id_registered(void) | |||
87 | return 1; | 108 | return 1; |
88 | } | 109 | } |
89 | 110 | ||
90 | static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) | 111 | static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) |
91 | { | 112 | { |
92 | int cpu; | 113 | int cpu; |
93 | 114 | ||
@@ -95,13 +116,30 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) | |||
95 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 116 | * We're using fixed IRQ delivery, can only return one phys APIC ID. |
96 | * May as well be the first. | 117 | * May as well be the first. |
97 | */ | 118 | */ |
98 | cpu = first_cpu(cpumask); | 119 | cpu = cpumask_first(cpumask); |
99 | if ((unsigned)cpu < NR_CPUS) | 120 | if ((unsigned)cpu < nr_cpu_ids) |
100 | return per_cpu(x86_cpu_to_apicid, cpu); | 121 | return per_cpu(x86_cpu_to_apicid, cpu); |
101 | else | 122 | else |
102 | return BAD_APICID; | 123 | return BAD_APICID; |
103 | } | 124 | } |
104 | 125 | ||
126 | static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
127 | const struct cpumask *andmask) | ||
128 | { | ||
129 | int cpu; | ||
130 | |||
131 | /* | ||
132 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
133 | * May as well be the first. | ||
134 | */ | ||
135 | for_each_cpu_and(cpu, cpumask, andmask) | ||
136 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
137 | break; | ||
138 | if (cpu < nr_cpu_ids) | ||
139 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
140 | return BAD_APICID; | ||
141 | } | ||
142 | |||
105 | static unsigned int get_apic_id(unsigned long x) | 143 | static unsigned int get_apic_id(unsigned long x) |
106 | { | 144 | { |
107 | unsigned int id; | 145 | unsigned int id; |
@@ -145,8 +183,10 @@ struct genapic apic_x2apic_phys = { | |||
145 | .send_IPI_all = x2apic_send_IPI_all, | 183 | .send_IPI_all = x2apic_send_IPI_all, |
146 | .send_IPI_allbutself = x2apic_send_IPI_allbutself, | 184 | .send_IPI_allbutself = x2apic_send_IPI_allbutself, |
147 | .send_IPI_mask = x2apic_send_IPI_mask, | 185 | .send_IPI_mask = x2apic_send_IPI_mask, |
186 | .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, | ||
148 | .send_IPI_self = x2apic_send_IPI_self, | 187 | .send_IPI_self = x2apic_send_IPI_self, |
149 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | 188 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, |
189 | .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, | ||
150 | .phys_pkg_id = phys_pkg_id, | 190 | .phys_pkg_id = phys_pkg_id, |
151 | .get_apic_id = get_apic_id, | 191 | .get_apic_id = get_apic_id, |
152 | .set_apic_id = set_apic_id, | 192 | .set_apic_id = set_apic_id, |
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index dece17289731..b193e082f6ce 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c | |||
@@ -79,16 +79,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); | |||
79 | 79 | ||
80 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | 80 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ |
81 | 81 | ||
82 | static cpumask_t uv_target_cpus(void) | 82 | static const struct cpumask *uv_target_cpus(void) |
83 | { | 83 | { |
84 | return cpumask_of_cpu(0); | 84 | return cpumask_of(0); |
85 | } | 85 | } |
86 | 86 | ||
87 | static cpumask_t uv_vector_allocation_domain(int cpu) | 87 | static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) |
88 | { | 88 | { |
89 | cpumask_t domain = CPU_MASK_NONE; | 89 | cpumask_clear(retmask); |
90 | cpu_set(cpu, domain); | 90 | cpumask_set_cpu(cpu, retmask); |
91 | return domain; | ||
92 | } | 91 | } |
93 | 92 | ||
94 | int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) | 93 | int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) |
@@ -127,28 +126,37 @@ static void uv_send_IPI_one(int cpu, int vector) | |||
127 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); | 126 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); |
128 | } | 127 | } |
129 | 128 | ||
130 | static void uv_send_IPI_mask(cpumask_t mask, int vector) | 129 | static void uv_send_IPI_mask(const struct cpumask *mask, int vector) |
131 | { | 130 | { |
132 | unsigned int cpu; | 131 | unsigned int cpu; |
133 | 132 | ||
134 | for_each_possible_cpu(cpu) | 133 | for_each_cpu(cpu, mask) |
135 | if (cpu_isset(cpu, mask)) | 134 | uv_send_IPI_one(cpu, vector); |
135 | } | ||
136 | |||
137 | static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) | ||
138 | { | ||
139 | unsigned int cpu; | ||
140 | unsigned int this_cpu = smp_processor_id(); | ||
141 | |||
142 | for_each_cpu(cpu, mask) | ||
143 | if (cpu != this_cpu) | ||
136 | uv_send_IPI_one(cpu, vector); | 144 | uv_send_IPI_one(cpu, vector); |
137 | } | 145 | } |
138 | 146 | ||
139 | static void uv_send_IPI_allbutself(int vector) | 147 | static void uv_send_IPI_allbutself(int vector) |
140 | { | 148 | { |
141 | cpumask_t mask = cpu_online_map; | 149 | unsigned int cpu; |
142 | 150 | unsigned int this_cpu = smp_processor_id(); | |
143 | cpu_clear(smp_processor_id(), mask); | ||
144 | 151 | ||
145 | if (!cpus_empty(mask)) | 152 | for_each_online_cpu(cpu) |
146 | uv_send_IPI_mask(mask, vector); | 153 | if (cpu != this_cpu) |
154 | uv_send_IPI_one(cpu, vector); | ||
147 | } | 155 | } |
148 | 156 | ||
149 | static void uv_send_IPI_all(int vector) | 157 | static void uv_send_IPI_all(int vector) |
150 | { | 158 | { |
151 | uv_send_IPI_mask(cpu_online_map, vector); | 159 | uv_send_IPI_mask(cpu_online_mask, vector); |
152 | } | 160 | } |
153 | 161 | ||
154 | static int uv_apic_id_registered(void) | 162 | static int uv_apic_id_registered(void) |
@@ -160,7 +168,7 @@ static void uv_init_apic_ldr(void) | |||
160 | { | 168 | { |
161 | } | 169 | } |
162 | 170 | ||
163 | static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) | 171 | static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) |
164 | { | 172 | { |
165 | int cpu; | 173 | int cpu; |
166 | 174 | ||
@@ -168,13 +176,30 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) | |||
168 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 176 | * We're using fixed IRQ delivery, can only return one phys APIC ID. |
169 | * May as well be the first. | 177 | * May as well be the first. |
170 | */ | 178 | */ |
171 | cpu = first_cpu(cpumask); | 179 | cpu = cpumask_first(cpumask); |
172 | if ((unsigned)cpu < nr_cpu_ids) | 180 | if ((unsigned)cpu < nr_cpu_ids) |
173 | return per_cpu(x86_cpu_to_apicid, cpu); | 181 | return per_cpu(x86_cpu_to_apicid, cpu); |
174 | else | 182 | else |
175 | return BAD_APICID; | 183 | return BAD_APICID; |
176 | } | 184 | } |
177 | 185 | ||
186 | static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
187 | const struct cpumask *andmask) | ||
188 | { | ||
189 | int cpu; | ||
190 | |||
191 | /* | ||
192 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
193 | * May as well be the first. | ||
194 | */ | ||
195 | for_each_cpu_and(cpu, cpumask, andmask) | ||
196 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
197 | break; | ||
198 | if (cpu < nr_cpu_ids) | ||
199 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
200 | return BAD_APICID; | ||
201 | } | ||
202 | |||
178 | static unsigned int get_apic_id(unsigned long x) | 203 | static unsigned int get_apic_id(unsigned long x) |
179 | { | 204 | { |
180 | unsigned int id; | 205 | unsigned int id; |
@@ -222,8 +247,10 @@ struct genapic apic_x2apic_uv_x = { | |||
222 | .send_IPI_all = uv_send_IPI_all, | 247 | .send_IPI_all = uv_send_IPI_all, |
223 | .send_IPI_allbutself = uv_send_IPI_allbutself, | 248 | .send_IPI_allbutself = uv_send_IPI_allbutself, |
224 | .send_IPI_mask = uv_send_IPI_mask, | 249 | .send_IPI_mask = uv_send_IPI_mask, |
250 | .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, | ||
225 | .send_IPI_self = uv_send_IPI_self, | 251 | .send_IPI_self = uv_send_IPI_self, |
226 | .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, | 252 | .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, |
253 | .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, | ||
227 | .phys_pkg_id = phys_pkg_id, | 254 | .phys_pkg_id = phys_pkg_id, |
228 | .get_apic_id = get_apic_id, | 255 | .get_apic_id = get_apic_id, |
229 | .set_apic_id = set_apic_id, | 256 | .set_apic_id = set_apic_id, |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 3f0a3edf0a57..cd759ad90690 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -248,7 +248,7 @@ static void hpet_legacy_clockevent_register(void) | |||
248 | * Start hpet with the boot cpu mask and make it | 248 | * Start hpet with the boot cpu mask and make it |
249 | * global after the IO_APIC has been initialized. | 249 | * global after the IO_APIC has been initialized. |
250 | */ | 250 | */ |
251 | hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); | 251 | hpet_clockevent.cpumask = cpumask_of(smp_processor_id()); |
252 | clockevents_register_device(&hpet_clockevent); | 252 | clockevents_register_device(&hpet_clockevent); |
253 | global_clock_event = &hpet_clockevent; | 253 | global_clock_event = &hpet_clockevent; |
254 | printk(KERN_DEBUG "hpet clockevent registered\n"); | 254 | printk(KERN_DEBUG "hpet clockevent registered\n"); |
@@ -303,7 +303,7 @@ static void hpet_set_mode(enum clock_event_mode mode, | |||
303 | struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); | 303 | struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); |
304 | hpet_setup_msi_irq(hdev->irq); | 304 | hpet_setup_msi_irq(hdev->irq); |
305 | disable_irq(hdev->irq); | 305 | disable_irq(hdev->irq); |
306 | irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu)); | 306 | irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); |
307 | enable_irq(hdev->irq); | 307 | enable_irq(hdev->irq); |
308 | } | 308 | } |
309 | break; | 309 | break; |
@@ -451,7 +451,7 @@ static int hpet_setup_irq(struct hpet_dev *dev) | |||
451 | return -1; | 451 | return -1; |
452 | 452 | ||
453 | disable_irq(dev->irq); | 453 | disable_irq(dev->irq); |
454 | irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu)); | 454 | irq_set_affinity(dev->irq, cpumask_of(dev->cpu)); |
455 | enable_irq(dev->irq); | 455 | enable_irq(dev->irq); |
456 | 456 | ||
457 | printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", | 457 | printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", |
@@ -502,7 +502,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) | |||
502 | /* 5 usec minimum reprogramming delta. */ | 502 | /* 5 usec minimum reprogramming delta. */ |
503 | evt->min_delta_ns = 5000; | 503 | evt->min_delta_ns = 5000; |
504 | 504 | ||
505 | evt->cpumask = cpumask_of_cpu(hdev->cpu); | 505 | evt->cpumask = cpumask_of(hdev->cpu); |
506 | clockevents_register_device(evt); | 506 | clockevents_register_device(evt); |
507 | } | 507 | } |
508 | 508 | ||
@@ -813,7 +813,7 @@ int __init hpet_enable(void) | |||
813 | 813 | ||
814 | out_nohpet: | 814 | out_nohpet: |
815 | hpet_clear_mapping(); | 815 | hpet_clear_mapping(); |
816 | boot_hpet_disable = 1; | 816 | hpet_address = 0; |
817 | return 0; | 817 | return 0; |
818 | } | 818 | } |
819 | 819 | ||
@@ -836,10 +836,11 @@ static __init int hpet_late_init(void) | |||
836 | 836 | ||
837 | hpet_address = force_hpet_address; | 837 | hpet_address = force_hpet_address; |
838 | hpet_enable(); | 838 | hpet_enable(); |
839 | if (!hpet_virt_address) | ||
840 | return -ENODEV; | ||
841 | } | 839 | } |
842 | 840 | ||
841 | if (!hpet_virt_address) | ||
842 | return -ENODEV; | ||
843 | |||
843 | hpet_reserve_platform_timers(hpet_readl(HPET_ID)); | 844 | hpet_reserve_platform_timers(hpet_readl(HPET_ID)); |
844 | 845 | ||
845 | for_each_online_cpu(cpu) { | 846 | for_each_online_cpu(cpu) { |
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index c1b5e3ece1f2..10f92fb532f3 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c | |||
@@ -114,7 +114,7 @@ void __init setup_pit_timer(void) | |||
114 | * Start pit with the boot cpu mask and make it global after the | 114 | * Start pit with the boot cpu mask and make it global after the |
115 | * IO_APIC has been initialized. | 115 | * IO_APIC has been initialized. |
116 | */ | 116 | */ |
117 | pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); | 117 | pit_clockevent.cpumask = cpumask_of(smp_processor_id()); |
118 | pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, | 118 | pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, |
119 | pit_clockevent.shift); | 119 | pit_clockevent.shift); |
120 | pit_clockevent.max_delta_ns = | 120 | pit_clockevent.max_delta_ns = |
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index d39918076bb4..df3bf269beab 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c | |||
@@ -10,7 +10,6 @@ | |||
10 | #include <asm/pgtable.h> | 10 | #include <asm/pgtable.h> |
11 | #include <asm/desc.h> | 11 | #include <asm/desc.h> |
12 | 12 | ||
13 | static struct fs_struct init_fs = INIT_FS; | ||
14 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); | 13 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); |
15 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); | 14 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); |
16 | struct mm_struct init_mm = INIT_MM(init_mm); | 15 | struct mm_struct init_mm = INIT_MM(init_mm); |
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index b8c8a8e99341..69911722b9d3 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c | |||
@@ -108,94 +108,277 @@ static int __init parse_noapic(char *str) | |||
108 | early_param("noapic", parse_noapic); | 108 | early_param("noapic", parse_noapic); |
109 | 109 | ||
110 | struct irq_pin_list; | 110 | struct irq_pin_list; |
111 | |||
112 | /* | ||
113 | * This is performance-critical, we want to do it O(1) | ||
114 | * | ||
115 | * the indexing order of this array favors 1:1 mappings | ||
116 | * between pins and IRQs. | ||
117 | */ | ||
118 | |||
119 | struct irq_pin_list { | ||
120 | int apic, pin; | ||
121 | struct irq_pin_list *next; | ||
122 | }; | ||
123 | |||
124 | static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) | ||
125 | { | ||
126 | struct irq_pin_list *pin; | ||
127 | int node; | ||
128 | |||
129 | node = cpu_to_node(cpu); | ||
130 | |||
131 | pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); | ||
132 | printk(KERN_DEBUG " alloc irq_2_pin on cpu %d node %d\n", cpu, node); | ||
133 | |||
134 | return pin; | ||
135 | } | ||
136 | |||
111 | struct irq_cfg { | 137 | struct irq_cfg { |
112 | unsigned int irq; | ||
113 | struct irq_pin_list *irq_2_pin; | 138 | struct irq_pin_list *irq_2_pin; |
114 | cpumask_t domain; | 139 | cpumask_var_t domain; |
115 | cpumask_t old_domain; | 140 | cpumask_var_t old_domain; |
116 | unsigned move_cleanup_count; | 141 | unsigned move_cleanup_count; |
117 | u8 vector; | 142 | u8 vector; |
118 | u8 move_in_progress : 1; | 143 | u8 move_in_progress : 1; |
144 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC | ||
145 | u8 move_desc_pending : 1; | ||
146 | #endif | ||
119 | }; | 147 | }; |
120 | 148 | ||
121 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ | 149 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ |
150 | #ifdef CONFIG_SPARSE_IRQ | ||
151 | static struct irq_cfg irq_cfgx[] = { | ||
152 | #else | ||
122 | static struct irq_cfg irq_cfgx[NR_IRQS] = { | 153 | static struct irq_cfg irq_cfgx[NR_IRQS] = { |
123 | [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, | 154 | #endif |
124 | [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, | 155 | [0] = { .vector = IRQ0_VECTOR, }, |
125 | [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, | 156 | [1] = { .vector = IRQ1_VECTOR, }, |
126 | [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, | 157 | [2] = { .vector = IRQ2_VECTOR, }, |
127 | [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, | 158 | [3] = { .vector = IRQ3_VECTOR, }, |
128 | [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, | 159 | [4] = { .vector = IRQ4_VECTOR, }, |
129 | [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, | 160 | [5] = { .vector = IRQ5_VECTOR, }, |
130 | [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, | 161 | [6] = { .vector = IRQ6_VECTOR, }, |
131 | [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, | 162 | [7] = { .vector = IRQ7_VECTOR, }, |
132 | [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, | 163 | [8] = { .vector = IRQ8_VECTOR, }, |
133 | [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, | 164 | [9] = { .vector = IRQ9_VECTOR, }, |
134 | [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, | 165 | [10] = { .vector = IRQ10_VECTOR, }, |
135 | [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, | 166 | [11] = { .vector = IRQ11_VECTOR, }, |
136 | [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, | 167 | [12] = { .vector = IRQ12_VECTOR, }, |
137 | [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, | 168 | [13] = { .vector = IRQ13_VECTOR, }, |
138 | [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, | 169 | [14] = { .vector = IRQ14_VECTOR, }, |
170 | [15] = { .vector = IRQ15_VECTOR, }, | ||
139 | }; | 171 | }; |
140 | 172 | ||
141 | #define for_each_irq_cfg(irq, cfg) \ | 173 | int __init arch_early_irq_init(void) |
142 | for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++) | 174 | { |
175 | struct irq_cfg *cfg; | ||
176 | struct irq_desc *desc; | ||
177 | int count; | ||
178 | int i; | ||
179 | |||
180 | cfg = irq_cfgx; | ||
181 | count = ARRAY_SIZE(irq_cfgx); | ||
182 | |||
183 | for (i = 0; i < count; i++) { | ||
184 | desc = irq_to_desc(i); | ||
185 | desc->chip_data = &cfg[i]; | ||
186 | alloc_bootmem_cpumask_var(&cfg[i].domain); | ||
187 | alloc_bootmem_cpumask_var(&cfg[i].old_domain); | ||
188 | if (i < NR_IRQS_LEGACY) | ||
189 | cpumask_setall(cfg[i].domain); | ||
190 | } | ||
191 | |||
192 | return 0; | ||
193 | } | ||
143 | 194 | ||
195 | #ifdef CONFIG_SPARSE_IRQ | ||
144 | static struct irq_cfg *irq_cfg(unsigned int irq) | 196 | static struct irq_cfg *irq_cfg(unsigned int irq) |
145 | { | 197 | { |
146 | return irq < nr_irqs ? irq_cfgx + irq : NULL; | 198 | struct irq_cfg *cfg = NULL; |
199 | struct irq_desc *desc; | ||
200 | |||
201 | desc = irq_to_desc(irq); | ||
202 | if (desc) | ||
203 | cfg = desc->chip_data; | ||
204 | |||
205 | return cfg; | ||
147 | } | 206 | } |
148 | 207 | ||
149 | static struct irq_cfg *irq_cfg_alloc(unsigned int irq) | 208 | static struct irq_cfg *get_one_free_irq_cfg(int cpu) |
150 | { | 209 | { |
151 | return irq_cfg(irq); | 210 | struct irq_cfg *cfg; |
211 | int node; | ||
212 | |||
213 | node = cpu_to_node(cpu); | ||
214 | |||
215 | cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); | ||
216 | if (cfg) { | ||
217 | /* FIXME: needs alloc_cpumask_var_node() */ | ||
218 | if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) { | ||
219 | kfree(cfg); | ||
220 | cfg = NULL; | ||
221 | } else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) { | ||
222 | free_cpumask_var(cfg->domain); | ||
223 | kfree(cfg); | ||
224 | cfg = NULL; | ||
225 | } else { | ||
226 | cpumask_clear(cfg->domain); | ||
227 | cpumask_clear(cfg->old_domain); | ||
228 | } | ||
229 | } | ||
230 | printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node); | ||
231 | |||
232 | return cfg; | ||
152 | } | 233 | } |
153 | 234 | ||
154 | /* | 235 | int arch_init_chip_data(struct irq_desc *desc, int cpu) |
155 | * Rough estimation of how many shared IRQs there are, can be changed | 236 | { |
156 | * anytime. | 237 | struct irq_cfg *cfg; |
157 | */ | ||
158 | #define MAX_PLUS_SHARED_IRQS NR_IRQS | ||
159 | #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) | ||
160 | 238 | ||
161 | /* | 239 | cfg = desc->chip_data; |
162 | * This is performance-critical, we want to do it O(1) | 240 | if (!cfg) { |
163 | * | 241 | desc->chip_data = get_one_free_irq_cfg(cpu); |
164 | * the indexing order of this array favors 1:1 mappings | 242 | if (!desc->chip_data) { |
165 | * between pins and IRQs. | 243 | printk(KERN_ERR "can not alloc irq_cfg\n"); |
166 | */ | 244 | BUG_ON(1); |
245 | } | ||
246 | } | ||
167 | 247 | ||
168 | struct irq_pin_list { | 248 | return 0; |
169 | int apic, pin; | 249 | } |
170 | struct irq_pin_list *next; | ||
171 | }; | ||
172 | 250 | ||
173 | static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; | 251 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC |
174 | static struct irq_pin_list *irq_2_pin_ptr; | ||
175 | 252 | ||
176 | static void __init irq_2_pin_init(void) | 253 | static void |
254 | init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) | ||
177 | { | 255 | { |
178 | struct irq_pin_list *pin = irq_2_pin_head; | 256 | struct irq_pin_list *old_entry, *head, *tail, *entry; |
179 | int i; | 257 | |
258 | cfg->irq_2_pin = NULL; | ||
259 | old_entry = old_cfg->irq_2_pin; | ||
260 | if (!old_entry) | ||
261 | return; | ||
262 | |||
263 | entry = get_one_free_irq_2_pin(cpu); | ||
264 | if (!entry) | ||
265 | return; | ||
180 | 266 | ||
181 | for (i = 1; i < PIN_MAP_SIZE; i++) | 267 | entry->apic = old_entry->apic; |
182 | pin[i-1].next = &pin[i]; | 268 | entry->pin = old_entry->pin; |
269 | head = entry; | ||
270 | tail = entry; | ||
271 | old_entry = old_entry->next; | ||
272 | while (old_entry) { | ||
273 | entry = get_one_free_irq_2_pin(cpu); | ||
274 | if (!entry) { | ||
275 | entry = head; | ||
276 | while (entry) { | ||
277 | head = entry->next; | ||
278 | kfree(entry); | ||
279 | entry = head; | ||
280 | } | ||
281 | /* still use the old one */ | ||
282 | return; | ||
283 | } | ||
284 | entry->apic = old_entry->apic; | ||
285 | entry->pin = old_entry->pin; | ||
286 | tail->next = entry; | ||
287 | tail = entry; | ||
288 | old_entry = old_entry->next; | ||
289 | } | ||
183 | 290 | ||
184 | irq_2_pin_ptr = &pin[0]; | 291 | tail->next = NULL; |
292 | cfg->irq_2_pin = head; | ||
185 | } | 293 | } |
186 | 294 | ||
187 | static struct irq_pin_list *get_one_free_irq_2_pin(void) | 295 | static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) |
188 | { | 296 | { |
189 | struct irq_pin_list *pin = irq_2_pin_ptr; | 297 | struct irq_pin_list *entry, *next; |
190 | 298 | ||
191 | if (!pin) | 299 | if (old_cfg->irq_2_pin == cfg->irq_2_pin) |
192 | panic("can not get more irq_2_pin\n"); | 300 | return; |
193 | 301 | ||
194 | irq_2_pin_ptr = pin->next; | 302 | entry = old_cfg->irq_2_pin; |
195 | pin->next = NULL; | 303 | |
196 | return pin; | 304 | while (entry) { |
305 | next = entry->next; | ||
306 | kfree(entry); | ||
307 | entry = next; | ||
308 | } | ||
309 | old_cfg->irq_2_pin = NULL; | ||
310 | } | ||
311 | |||
312 | void arch_init_copy_chip_data(struct irq_desc *old_desc, | ||
313 | struct irq_desc *desc, int cpu) | ||
314 | { | ||
315 | struct irq_cfg *cfg; | ||
316 | struct irq_cfg *old_cfg; | ||
317 | |||
318 | cfg = get_one_free_irq_cfg(cpu); | ||
319 | |||
320 | if (!cfg) | ||
321 | return; | ||
322 | |||
323 | desc->chip_data = cfg; | ||
324 | |||
325 | old_cfg = old_desc->chip_data; | ||
326 | |||
327 | memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); | ||
328 | |||
329 | init_copy_irq_2_pin(old_cfg, cfg, cpu); | ||
197 | } | 330 | } |
198 | 331 | ||
332 | static void free_irq_cfg(struct irq_cfg *old_cfg) | ||
333 | { | ||
334 | kfree(old_cfg); | ||
335 | } | ||
336 | |||
337 | void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) | ||
338 | { | ||
339 | struct irq_cfg *old_cfg, *cfg; | ||
340 | |||
341 | old_cfg = old_desc->chip_data; | ||
342 | cfg = desc->chip_data; | ||
343 | |||
344 | if (old_cfg == cfg) | ||
345 | return; | ||
346 | |||
347 | if (old_cfg) { | ||
348 | free_irq_2_pin(old_cfg, cfg); | ||
349 | free_irq_cfg(old_cfg); | ||
350 | old_desc->chip_data = NULL; | ||
351 | } | ||
352 | } | ||
353 | |||
354 | static void | ||
355 | set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) | ||
356 | { | ||
357 | struct irq_cfg *cfg = desc->chip_data; | ||
358 | |||
359 | if (!cfg->move_in_progress) { | ||
360 | /* it means that domain is not changed */ | ||
361 | if (!cpumask_intersects(&desc->affinity, mask)) | ||
362 | cfg->move_desc_pending = 1; | ||
363 | } | ||
364 | } | ||
365 | #endif | ||
366 | |||
367 | #else | ||
368 | static struct irq_cfg *irq_cfg(unsigned int irq) | ||
369 | { | ||
370 | return irq < nr_irqs ? irq_cfgx + irq : NULL; | ||
371 | } | ||
372 | |||
373 | #endif | ||
374 | |||
375 | #ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC | ||
376 | static inline void | ||
377 | set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) | ||
378 | { | ||
379 | } | ||
380 | #endif | ||
381 | |||
199 | struct io_apic { | 382 | struct io_apic { |
200 | unsigned int index; | 383 | unsigned int index; |
201 | unsigned int unused[3]; | 384 | unsigned int unused[3]; |
@@ -237,11 +420,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned | |||
237 | writel(value, &io_apic->data); | 420 | writel(value, &io_apic->data); |
238 | } | 421 | } |
239 | 422 | ||
240 | static bool io_apic_level_ack_pending(unsigned int irq) | 423 | static bool io_apic_level_ack_pending(struct irq_cfg *cfg) |
241 | { | 424 | { |
242 | struct irq_pin_list *entry; | 425 | struct irq_pin_list *entry; |
243 | unsigned long flags; | 426 | unsigned long flags; |
244 | struct irq_cfg *cfg = irq_cfg(irq); | ||
245 | 427 | ||
246 | spin_lock_irqsave(&ioapic_lock, flags); | 428 | spin_lock_irqsave(&ioapic_lock, flags); |
247 | entry = cfg->irq_2_pin; | 429 | entry = cfg->irq_2_pin; |
@@ -323,13 +505,32 @@ static void ioapic_mask_entry(int apic, int pin) | |||
323 | } | 505 | } |
324 | 506 | ||
325 | #ifdef CONFIG_SMP | 507 | #ifdef CONFIG_SMP |
326 | static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) | 508 | static void send_cleanup_vector(struct irq_cfg *cfg) |
509 | { | ||
510 | cpumask_var_t cleanup_mask; | ||
511 | |||
512 | if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { | ||
513 | unsigned int i; | ||
514 | cfg->move_cleanup_count = 0; | ||
515 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) | ||
516 | cfg->move_cleanup_count++; | ||
517 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) | ||
518 | send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); | ||
519 | } else { | ||
520 | cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); | ||
521 | cfg->move_cleanup_count = cpumask_weight(cleanup_mask); | ||
522 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
523 | free_cpumask_var(cleanup_mask); | ||
524 | } | ||
525 | cfg->move_in_progress = 0; | ||
526 | } | ||
527 | |||
528 | static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) | ||
327 | { | 529 | { |
328 | int apic, pin; | 530 | int apic, pin; |
329 | struct irq_cfg *cfg; | ||
330 | struct irq_pin_list *entry; | 531 | struct irq_pin_list *entry; |
532 | u8 vector = cfg->vector; | ||
331 | 533 | ||
332 | cfg = irq_cfg(irq); | ||
333 | entry = cfg->irq_2_pin; | 534 | entry = cfg->irq_2_pin; |
334 | for (;;) { | 535 | for (;;) { |
335 | unsigned int reg; | 536 | unsigned int reg; |
@@ -359,36 +560,61 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) | |||
359 | } | 560 | } |
360 | } | 561 | } |
361 | 562 | ||
362 | static int assign_irq_vector(int irq, cpumask_t mask); | 563 | static int |
564 | assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); | ||
363 | 565 | ||
364 | static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | 566 | /* |
567 | * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid | ||
568 | * of that, or returns BAD_APICID and leaves desc->affinity untouched. | ||
569 | */ | ||
570 | static unsigned int | ||
571 | set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) | ||
572 | { | ||
573 | struct irq_cfg *cfg; | ||
574 | unsigned int irq; | ||
575 | |||
576 | if (!cpumask_intersects(mask, cpu_online_mask)) | ||
577 | return BAD_APICID; | ||
578 | |||
579 | irq = desc->irq; | ||
580 | cfg = desc->chip_data; | ||
581 | if (assign_irq_vector(irq, cfg, mask)) | ||
582 | return BAD_APICID; | ||
583 | |||
584 | cpumask_and(&desc->affinity, cfg->domain, mask); | ||
585 | set_extra_move_desc(desc, mask); | ||
586 | return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); | ||
587 | } | ||
588 | |||
589 | static void | ||
590 | set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | ||
365 | { | 591 | { |
366 | struct irq_cfg *cfg; | 592 | struct irq_cfg *cfg; |
367 | unsigned long flags; | 593 | unsigned long flags; |
368 | unsigned int dest; | 594 | unsigned int dest; |
369 | cpumask_t tmp; | 595 | unsigned int irq; |
370 | struct irq_desc *desc; | ||
371 | 596 | ||
372 | cpus_and(tmp, mask, cpu_online_map); | 597 | irq = desc->irq; |
373 | if (cpus_empty(tmp)) | 598 | cfg = desc->chip_data; |
374 | return; | ||
375 | 599 | ||
376 | cfg = irq_cfg(irq); | 600 | spin_lock_irqsave(&ioapic_lock, flags); |
377 | if (assign_irq_vector(irq, mask)) | 601 | dest = set_desc_affinity(desc, mask); |
378 | return; | 602 | if (dest != BAD_APICID) { |
603 | /* Only the high 8 bits are valid. */ | ||
604 | dest = SET_APIC_LOGICAL_ID(dest); | ||
605 | __target_IO_APIC_irq(irq, dest, cfg); | ||
606 | } | ||
607 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
608 | } | ||
379 | 609 | ||
380 | cpus_and(tmp, cfg->domain, mask); | 610 | static void |
381 | dest = cpu_mask_to_apicid(tmp); | 611 | set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) |
382 | /* | 612 | { |
383 | * Only the high 8 bits are valid. | 613 | struct irq_desc *desc; |
384 | */ | ||
385 | dest = SET_APIC_LOGICAL_ID(dest); | ||
386 | 614 | ||
387 | desc = irq_to_desc(irq); | 615 | desc = irq_to_desc(irq); |
388 | spin_lock_irqsave(&ioapic_lock, flags); | 616 | |
389 | __target_IO_APIC_irq(irq, dest, cfg->vector); | 617 | set_ioapic_affinity_irq_desc(desc, mask); |
390 | desc->affinity = mask; | ||
391 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
392 | } | 618 | } |
393 | #endif /* CONFIG_SMP */ | 619 | #endif /* CONFIG_SMP */ |
394 | 620 | ||
@@ -397,16 +623,18 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | |||
397 | * shared ISA-space IRQs, so we have to support them. We are super | 623 | * shared ISA-space IRQs, so we have to support them. We are super |
398 | * fast in the common case, and fast for shared ISA-space IRQs. | 624 | * fast in the common case, and fast for shared ISA-space IRQs. |
399 | */ | 625 | */ |
400 | static void add_pin_to_irq(unsigned int irq, int apic, int pin) | 626 | static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) |
401 | { | 627 | { |
402 | struct irq_cfg *cfg; | ||
403 | struct irq_pin_list *entry; | 628 | struct irq_pin_list *entry; |
404 | 629 | ||
405 | /* first time to refer irq_cfg, so with new */ | ||
406 | cfg = irq_cfg_alloc(irq); | ||
407 | entry = cfg->irq_2_pin; | 630 | entry = cfg->irq_2_pin; |
408 | if (!entry) { | 631 | if (!entry) { |
409 | entry = get_one_free_irq_2_pin(); | 632 | entry = get_one_free_irq_2_pin(cpu); |
633 | if (!entry) { | ||
634 | printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", | ||
635 | apic, pin); | ||
636 | return; | ||
637 | } | ||
410 | cfg->irq_2_pin = entry; | 638 | cfg->irq_2_pin = entry; |
411 | entry->apic = apic; | 639 | entry->apic = apic; |
412 | entry->pin = pin; | 640 | entry->pin = pin; |
@@ -421,7 +649,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) | |||
421 | entry = entry->next; | 649 | entry = entry->next; |
422 | } | 650 | } |
423 | 651 | ||
424 | entry->next = get_one_free_irq_2_pin(); | 652 | entry->next = get_one_free_irq_2_pin(cpu); |
425 | entry = entry->next; | 653 | entry = entry->next; |
426 | entry->apic = apic; | 654 | entry->apic = apic; |
427 | entry->pin = pin; | 655 | entry->pin = pin; |
@@ -430,11 +658,10 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) | |||
430 | /* | 658 | /* |
431 | * Reroute an IRQ to a different pin. | 659 | * Reroute an IRQ to a different pin. |
432 | */ | 660 | */ |
433 | static void __init replace_pin_at_irq(unsigned int irq, | 661 | static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, |
434 | int oldapic, int oldpin, | 662 | int oldapic, int oldpin, |
435 | int newapic, int newpin) | 663 | int newapic, int newpin) |
436 | { | 664 | { |
437 | struct irq_cfg *cfg = irq_cfg(irq); | ||
438 | struct irq_pin_list *entry = cfg->irq_2_pin; | 665 | struct irq_pin_list *entry = cfg->irq_2_pin; |
439 | int replaced = 0; | 666 | int replaced = 0; |
440 | 667 | ||
@@ -451,18 +678,16 @@ static void __init replace_pin_at_irq(unsigned int irq, | |||
451 | 678 | ||
452 | /* why? call replace before add? */ | 679 | /* why? call replace before add? */ |
453 | if (!replaced) | 680 | if (!replaced) |
454 | add_pin_to_irq(irq, newapic, newpin); | 681 | add_pin_to_irq_cpu(cfg, cpu, newapic, newpin); |
455 | } | 682 | } |
456 | 683 | ||
457 | static inline void io_apic_modify_irq(unsigned int irq, | 684 | static inline void io_apic_modify_irq(struct irq_cfg *cfg, |
458 | int mask_and, int mask_or, | 685 | int mask_and, int mask_or, |
459 | void (*final)(struct irq_pin_list *entry)) | 686 | void (*final)(struct irq_pin_list *entry)) |
460 | { | 687 | { |
461 | int pin; | 688 | int pin; |
462 | struct irq_cfg *cfg; | ||
463 | struct irq_pin_list *entry; | 689 | struct irq_pin_list *entry; |
464 | 690 | ||
465 | cfg = irq_cfg(irq); | ||
466 | for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { | 691 | for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { |
467 | unsigned int reg; | 692 | unsigned int reg; |
468 | pin = entry->pin; | 693 | pin = entry->pin; |
@@ -475,9 +700,9 @@ static inline void io_apic_modify_irq(unsigned int irq, | |||
475 | } | 700 | } |
476 | } | 701 | } |
477 | 702 | ||
478 | static void __unmask_IO_APIC_irq(unsigned int irq) | 703 | static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) |
479 | { | 704 | { |
480 | io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); | 705 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); |
481 | } | 706 | } |
482 | 707 | ||
483 | #ifdef CONFIG_X86_64 | 708 | #ifdef CONFIG_X86_64 |
@@ -492,47 +717,64 @@ static void io_apic_sync(struct irq_pin_list *entry) | |||
492 | readl(&io_apic->data); | 717 | readl(&io_apic->data); |
493 | } | 718 | } |
494 | 719 | ||
495 | static void __mask_IO_APIC_irq(unsigned int irq) | 720 | static void __mask_IO_APIC_irq(struct irq_cfg *cfg) |
496 | { | 721 | { |
497 | io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); | 722 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); |
498 | } | 723 | } |
499 | #else /* CONFIG_X86_32 */ | 724 | #else /* CONFIG_X86_32 */ |
500 | static void __mask_IO_APIC_irq(unsigned int irq) | 725 | static void __mask_IO_APIC_irq(struct irq_cfg *cfg) |
501 | { | 726 | { |
502 | io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); | 727 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL); |
503 | } | 728 | } |
504 | 729 | ||
505 | static void __mask_and_edge_IO_APIC_irq(unsigned int irq) | 730 | static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) |
506 | { | 731 | { |
507 | io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, | 732 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, |
508 | IO_APIC_REDIR_MASKED, NULL); | 733 | IO_APIC_REDIR_MASKED, NULL); |
509 | } | 734 | } |
510 | 735 | ||
511 | static void __unmask_and_level_IO_APIC_irq(unsigned int irq) | 736 | static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) |
512 | { | 737 | { |
513 | io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, | 738 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, |
514 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); | 739 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); |
515 | } | 740 | } |
516 | #endif /* CONFIG_X86_32 */ | 741 | #endif /* CONFIG_X86_32 */ |
517 | 742 | ||
518 | static void mask_IO_APIC_irq (unsigned int irq) | 743 | static void mask_IO_APIC_irq_desc(struct irq_desc *desc) |
519 | { | 744 | { |
745 | struct irq_cfg *cfg = desc->chip_data; | ||
520 | unsigned long flags; | 746 | unsigned long flags; |
521 | 747 | ||
748 | BUG_ON(!cfg); | ||
749 | |||
522 | spin_lock_irqsave(&ioapic_lock, flags); | 750 | spin_lock_irqsave(&ioapic_lock, flags); |
523 | __mask_IO_APIC_irq(irq); | 751 | __mask_IO_APIC_irq(cfg); |
524 | spin_unlock_irqrestore(&ioapic_lock, flags); | 752 | spin_unlock_irqrestore(&ioapic_lock, flags); |
525 | } | 753 | } |
526 | 754 | ||
527 | static void unmask_IO_APIC_irq (unsigned int irq) | 755 | static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) |
528 | { | 756 | { |
757 | struct irq_cfg *cfg = desc->chip_data; | ||
529 | unsigned long flags; | 758 | unsigned long flags; |
530 | 759 | ||
531 | spin_lock_irqsave(&ioapic_lock, flags); | 760 | spin_lock_irqsave(&ioapic_lock, flags); |
532 | __unmask_IO_APIC_irq(irq); | 761 | __unmask_IO_APIC_irq(cfg); |
533 | spin_unlock_irqrestore(&ioapic_lock, flags); | 762 | spin_unlock_irqrestore(&ioapic_lock, flags); |
534 | } | 763 | } |
535 | 764 | ||
765 | static void mask_IO_APIC_irq(unsigned int irq) | ||
766 | { | ||
767 | struct irq_desc *desc = irq_to_desc(irq); | ||
768 | |||
769 | mask_IO_APIC_irq_desc(desc); | ||
770 | } | ||
771 | static void unmask_IO_APIC_irq(unsigned int irq) | ||
772 | { | ||
773 | struct irq_desc *desc = irq_to_desc(irq); | ||
774 | |||
775 | unmask_IO_APIC_irq_desc(desc); | ||
776 | } | ||
777 | |||
536 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | 778 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) |
537 | { | 779 | { |
538 | struct IO_APIC_route_entry entry; | 780 | struct IO_APIC_route_entry entry; |
@@ -809,7 +1051,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); | |||
809 | */ | 1051 | */ |
810 | static int EISA_ELCR(unsigned int irq) | 1052 | static int EISA_ELCR(unsigned int irq) |
811 | { | 1053 | { |
812 | if (irq < 16) { | 1054 | if (irq < NR_IRQS_LEGACY) { |
813 | unsigned int port = 0x4d0 + (irq >> 3); | 1055 | unsigned int port = 0x4d0 + (irq >> 3); |
814 | return (inb(port) >> (irq & 7)) & 1; | 1056 | return (inb(port) >> (irq & 7)) & 1; |
815 | } | 1057 | } |
@@ -1034,7 +1276,8 @@ void unlock_vector_lock(void) | |||
1034 | spin_unlock(&vector_lock); | 1276 | spin_unlock(&vector_lock); |
1035 | } | 1277 | } |
1036 | 1278 | ||
1037 | static int __assign_irq_vector(int irq, cpumask_t mask) | 1279 | static int |
1280 | __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | ||
1038 | { | 1281 | { |
1039 | /* | 1282 | /* |
1040 | * NOTE! The local APIC isn't very good at handling | 1283 | * NOTE! The local APIC isn't very good at handling |
@@ -1049,52 +1292,49 @@ static int __assign_irq_vector(int irq, cpumask_t mask) | |||
1049 | */ | 1292 | */ |
1050 | static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; | 1293 | static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; |
1051 | unsigned int old_vector; | 1294 | unsigned int old_vector; |
1052 | int cpu; | 1295 | int cpu, err; |
1053 | struct irq_cfg *cfg; | 1296 | cpumask_var_t tmp_mask; |
1054 | |||
1055 | cfg = irq_cfg(irq); | ||
1056 | |||
1057 | /* Only try and allocate irqs on cpus that are present */ | ||
1058 | cpus_and(mask, mask, cpu_online_map); | ||
1059 | 1297 | ||
1060 | if ((cfg->move_in_progress) || cfg->move_cleanup_count) | 1298 | if ((cfg->move_in_progress) || cfg->move_cleanup_count) |
1061 | return -EBUSY; | 1299 | return -EBUSY; |
1062 | 1300 | ||
1301 | if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) | ||
1302 | return -ENOMEM; | ||
1303 | |||
1063 | old_vector = cfg->vector; | 1304 | old_vector = cfg->vector; |
1064 | if (old_vector) { | 1305 | if (old_vector) { |
1065 | cpumask_t tmp; | 1306 | cpumask_and(tmp_mask, mask, cpu_online_mask); |
1066 | cpus_and(tmp, cfg->domain, mask); | 1307 | cpumask_and(tmp_mask, cfg->domain, tmp_mask); |
1067 | if (!cpus_empty(tmp)) | 1308 | if (!cpumask_empty(tmp_mask)) { |
1309 | free_cpumask_var(tmp_mask); | ||
1068 | return 0; | 1310 | return 0; |
1311 | } | ||
1069 | } | 1312 | } |
1070 | 1313 | ||
1071 | for_each_cpu_mask_nr(cpu, mask) { | 1314 | /* Only try and allocate irqs on cpus that are present */ |
1072 | cpumask_t domain, new_mask; | 1315 | err = -ENOSPC; |
1316 | for_each_cpu_and(cpu, mask, cpu_online_mask) { | ||
1073 | int new_cpu; | 1317 | int new_cpu; |
1074 | int vector, offset; | 1318 | int vector, offset; |
1075 | 1319 | ||
1076 | domain = vector_allocation_domain(cpu); | 1320 | vector_allocation_domain(cpu, tmp_mask); |
1077 | cpus_and(new_mask, domain, cpu_online_map); | ||
1078 | 1321 | ||
1079 | vector = current_vector; | 1322 | vector = current_vector; |
1080 | offset = current_offset; | 1323 | offset = current_offset; |
1081 | next: | 1324 | next: |
1082 | vector += 8; | 1325 | vector += 8; |
1083 | if (vector >= first_system_vector) { | 1326 | if (vector >= first_system_vector) { |
1084 | /* If we run out of vectors on large boxen, must share them. */ | 1327 | /* If out of vectors on large boxen, must share them. */ |
1085 | offset = (offset + 1) % 8; | 1328 | offset = (offset + 1) % 8; |
1086 | vector = FIRST_DEVICE_VECTOR + offset; | 1329 | vector = FIRST_DEVICE_VECTOR + offset; |
1087 | } | 1330 | } |
1088 | if (unlikely(current_vector == vector)) | 1331 | if (unlikely(current_vector == vector)) |
1089 | continue; | 1332 | continue; |
1090 | #ifdef CONFIG_X86_64 | 1333 | |
1091 | if (vector == IA32_SYSCALL_VECTOR) | 1334 | if (test_bit(vector, used_vectors)) |
1092 | goto next; | ||
1093 | #else | ||
1094 | if (vector == SYSCALL_VECTOR) | ||
1095 | goto next; | 1335 | goto next; |
1096 | #endif | 1336 | |
1097 | for_each_cpu_mask_nr(new_cpu, new_mask) | 1337 | for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) |
1098 | if (per_cpu(vector_irq, new_cpu)[vector] != -1) | 1338 | if (per_cpu(vector_irq, new_cpu)[vector] != -1) |
1099 | goto next; | 1339 | goto next; |
1100 | /* Found one! */ | 1340 | /* Found one! */ |
@@ -1102,49 +1342,47 @@ next: | |||
1102 | current_offset = offset; | 1342 | current_offset = offset; |
1103 | if (old_vector) { | 1343 | if (old_vector) { |
1104 | cfg->move_in_progress = 1; | 1344 | cfg->move_in_progress = 1; |
1105 | cfg->old_domain = cfg->domain; | 1345 | cpumask_copy(cfg->old_domain, cfg->domain); |
1106 | } | 1346 | } |
1107 | for_each_cpu_mask_nr(new_cpu, new_mask) | 1347 | for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) |
1108 | per_cpu(vector_irq, new_cpu)[vector] = irq; | 1348 | per_cpu(vector_irq, new_cpu)[vector] = irq; |
1109 | cfg->vector = vector; | 1349 | cfg->vector = vector; |
1110 | cfg->domain = domain; | 1350 | cpumask_copy(cfg->domain, tmp_mask); |
1111 | return 0; | 1351 | err = 0; |
1352 | break; | ||
1112 | } | 1353 | } |
1113 | return -ENOSPC; | 1354 | free_cpumask_var(tmp_mask); |
1355 | return err; | ||
1114 | } | 1356 | } |
1115 | 1357 | ||
1116 | static int assign_irq_vector(int irq, cpumask_t mask) | 1358 | static int |
1359 | assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | ||
1117 | { | 1360 | { |
1118 | int err; | 1361 | int err; |
1119 | unsigned long flags; | 1362 | unsigned long flags; |
1120 | 1363 | ||
1121 | spin_lock_irqsave(&vector_lock, flags); | 1364 | spin_lock_irqsave(&vector_lock, flags); |
1122 | err = __assign_irq_vector(irq, mask); | 1365 | err = __assign_irq_vector(irq, cfg, mask); |
1123 | spin_unlock_irqrestore(&vector_lock, flags); | 1366 | spin_unlock_irqrestore(&vector_lock, flags); |
1124 | return err; | 1367 | return err; |
1125 | } | 1368 | } |
1126 | 1369 | ||
1127 | static void __clear_irq_vector(int irq) | 1370 | static void __clear_irq_vector(int irq, struct irq_cfg *cfg) |
1128 | { | 1371 | { |
1129 | struct irq_cfg *cfg; | ||
1130 | cpumask_t mask; | ||
1131 | int cpu, vector; | 1372 | int cpu, vector; |
1132 | 1373 | ||
1133 | cfg = irq_cfg(irq); | ||
1134 | BUG_ON(!cfg->vector); | 1374 | BUG_ON(!cfg->vector); |
1135 | 1375 | ||
1136 | vector = cfg->vector; | 1376 | vector = cfg->vector; |
1137 | cpus_and(mask, cfg->domain, cpu_online_map); | 1377 | for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) |
1138 | for_each_cpu_mask_nr(cpu, mask) | ||
1139 | per_cpu(vector_irq, cpu)[vector] = -1; | 1378 | per_cpu(vector_irq, cpu)[vector] = -1; |
1140 | 1379 | ||
1141 | cfg->vector = 0; | 1380 | cfg->vector = 0; |
1142 | cpus_clear(cfg->domain); | 1381 | cpumask_clear(cfg->domain); |
1143 | 1382 | ||
1144 | if (likely(!cfg->move_in_progress)) | 1383 | if (likely(!cfg->move_in_progress)) |
1145 | return; | 1384 | return; |
1146 | cpus_and(mask, cfg->old_domain, cpu_online_map); | 1385 | for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { |
1147 | for_each_cpu_mask_nr(cpu, mask) { | ||
1148 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; | 1386 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; |
1149 | vector++) { | 1387 | vector++) { |
1150 | if (per_cpu(vector_irq, cpu)[vector] != irq) | 1388 | if (per_cpu(vector_irq, cpu)[vector] != irq) |
@@ -1162,10 +1400,12 @@ void __setup_vector_irq(int cpu) | |||
1162 | /* This function must be called with vector_lock held */ | 1400 | /* This function must be called with vector_lock held */ |
1163 | int irq, vector; | 1401 | int irq, vector; |
1164 | struct irq_cfg *cfg; | 1402 | struct irq_cfg *cfg; |
1403 | struct irq_desc *desc; | ||
1165 | 1404 | ||
1166 | /* Mark the inuse vectors */ | 1405 | /* Mark the inuse vectors */ |
1167 | for_each_irq_cfg(irq, cfg) { | 1406 | for_each_irq_desc(irq, desc) { |
1168 | if (!cpu_isset(cpu, cfg->domain)) | 1407 | cfg = desc->chip_data; |
1408 | if (!cpumask_test_cpu(cpu, cfg->domain)) | ||
1169 | continue; | 1409 | continue; |
1170 | vector = cfg->vector; | 1410 | vector = cfg->vector; |
1171 | per_cpu(vector_irq, cpu)[vector] = irq; | 1411 | per_cpu(vector_irq, cpu)[vector] = irq; |
@@ -1177,7 +1417,7 @@ void __setup_vector_irq(int cpu) | |||
1177 | continue; | 1417 | continue; |
1178 | 1418 | ||
1179 | cfg = irq_cfg(irq); | 1419 | cfg = irq_cfg(irq); |
1180 | if (!cpu_isset(cpu, cfg->domain)) | 1420 | if (!cpumask_test_cpu(cpu, cfg->domain)) |
1181 | per_cpu(vector_irq, cpu)[vector] = -1; | 1421 | per_cpu(vector_irq, cpu)[vector] = -1; |
1182 | } | 1422 | } |
1183 | } | 1423 | } |
@@ -1215,11 +1455,8 @@ static inline int IO_APIC_irq_trigger(int irq) | |||
1215 | } | 1455 | } |
1216 | #endif | 1456 | #endif |
1217 | 1457 | ||
1218 | static void ioapic_register_intr(int irq, unsigned long trigger) | 1458 | static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger) |
1219 | { | 1459 | { |
1220 | struct irq_desc *desc; | ||
1221 | |||
1222 | desc = irq_to_desc(irq); | ||
1223 | 1460 | ||
1224 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || | 1461 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || |
1225 | trigger == IOAPIC_LEVEL) | 1462 | trigger == IOAPIC_LEVEL) |
@@ -1311,23 +1548,22 @@ static int setup_ioapic_entry(int apic, int irq, | |||
1311 | return 0; | 1548 | return 0; |
1312 | } | 1549 | } |
1313 | 1550 | ||
1314 | static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, | 1551 | static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc, |
1315 | int trigger, int polarity) | 1552 | int trigger, int polarity) |
1316 | { | 1553 | { |
1317 | struct irq_cfg *cfg; | 1554 | struct irq_cfg *cfg; |
1318 | struct IO_APIC_route_entry entry; | 1555 | struct IO_APIC_route_entry entry; |
1319 | cpumask_t mask; | 1556 | unsigned int dest; |
1320 | 1557 | ||
1321 | if (!IO_APIC_IRQ(irq)) | 1558 | if (!IO_APIC_IRQ(irq)) |
1322 | return; | 1559 | return; |
1323 | 1560 | ||
1324 | cfg = irq_cfg(irq); | 1561 | cfg = desc->chip_data; |
1325 | 1562 | ||
1326 | mask = TARGET_CPUS; | 1563 | if (assign_irq_vector(irq, cfg, TARGET_CPUS)) |
1327 | if (assign_irq_vector(irq, mask)) | ||
1328 | return; | 1564 | return; |
1329 | 1565 | ||
1330 | cpus_and(mask, cfg->domain, mask); | 1566 | dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); |
1331 | 1567 | ||
1332 | apic_printk(APIC_VERBOSE,KERN_DEBUG | 1568 | apic_printk(APIC_VERBOSE,KERN_DEBUG |
1333 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " | 1569 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " |
@@ -1337,16 +1573,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, | |||
1337 | 1573 | ||
1338 | 1574 | ||
1339 | if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, | 1575 | if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, |
1340 | cpu_mask_to_apicid(mask), trigger, polarity, | 1576 | dest, trigger, polarity, cfg->vector)) { |
1341 | cfg->vector)) { | ||
1342 | printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", | 1577 | printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", |
1343 | mp_ioapics[apic].mp_apicid, pin); | 1578 | mp_ioapics[apic].mp_apicid, pin); |
1344 | __clear_irq_vector(irq); | 1579 | __clear_irq_vector(irq, cfg); |
1345 | return; | 1580 | return; |
1346 | } | 1581 | } |
1347 | 1582 | ||
1348 | ioapic_register_intr(irq, trigger); | 1583 | ioapic_register_intr(irq, desc, trigger); |
1349 | if (irq < 16) | 1584 | if (irq < NR_IRQS_LEGACY) |
1350 | disable_8259A_irq(irq); | 1585 | disable_8259A_irq(irq); |
1351 | 1586 | ||
1352 | ioapic_write_entry(apic, pin, entry); | 1587 | ioapic_write_entry(apic, pin, entry); |
@@ -1356,6 +1591,9 @@ static void __init setup_IO_APIC_irqs(void) | |||
1356 | { | 1591 | { |
1357 | int apic, pin, idx, irq; | 1592 | int apic, pin, idx, irq; |
1358 | int notcon = 0; | 1593 | int notcon = 0; |
1594 | struct irq_desc *desc; | ||
1595 | struct irq_cfg *cfg; | ||
1596 | int cpu = boot_cpu_id; | ||
1359 | 1597 | ||
1360 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | 1598 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); |
1361 | 1599 | ||
@@ -1387,9 +1625,15 @@ static void __init setup_IO_APIC_irqs(void) | |||
1387 | if (multi_timer_check(apic, irq)) | 1625 | if (multi_timer_check(apic, irq)) |
1388 | continue; | 1626 | continue; |
1389 | #endif | 1627 | #endif |
1390 | add_pin_to_irq(irq, apic, pin); | 1628 | desc = irq_to_desc_alloc_cpu(irq, cpu); |
1629 | if (!desc) { | ||
1630 | printk(KERN_INFO "can not get irq_desc for %d\n", irq); | ||
1631 | continue; | ||
1632 | } | ||
1633 | cfg = desc->chip_data; | ||
1634 | add_pin_to_irq_cpu(cfg, cpu, apic, pin); | ||
1391 | 1635 | ||
1392 | setup_IO_APIC_irq(apic, pin, irq, | 1636 | setup_IO_APIC_irq(apic, pin, irq, desc, |
1393 | irq_trigger(idx), irq_polarity(idx)); | 1637 | irq_trigger(idx), irq_polarity(idx)); |
1394 | } | 1638 | } |
1395 | } | 1639 | } |
@@ -1448,6 +1692,7 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1448 | union IO_APIC_reg_03 reg_03; | 1692 | union IO_APIC_reg_03 reg_03; |
1449 | unsigned long flags; | 1693 | unsigned long flags; |
1450 | struct irq_cfg *cfg; | 1694 | struct irq_cfg *cfg; |
1695 | struct irq_desc *desc; | ||
1451 | unsigned int irq; | 1696 | unsigned int irq; |
1452 | 1697 | ||
1453 | if (apic_verbosity == APIC_QUIET) | 1698 | if (apic_verbosity == APIC_QUIET) |
@@ -1537,8 +1782,11 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1537 | } | 1782 | } |
1538 | } | 1783 | } |
1539 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); | 1784 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); |
1540 | for_each_irq_cfg(irq, cfg) { | 1785 | for_each_irq_desc(irq, desc) { |
1541 | struct irq_pin_list *entry = cfg->irq_2_pin; | 1786 | struct irq_pin_list *entry; |
1787 | |||
1788 | cfg = desc->chip_data; | ||
1789 | entry = cfg->irq_2_pin; | ||
1542 | if (!entry) | 1790 | if (!entry) |
1543 | continue; | 1791 | continue; |
1544 | printk(KERN_DEBUG "IRQ%d ", irq); | 1792 | printk(KERN_DEBUG "IRQ%d ", irq); |
@@ -2022,14 +2270,16 @@ static unsigned int startup_ioapic_irq(unsigned int irq) | |||
2022 | { | 2270 | { |
2023 | int was_pending = 0; | 2271 | int was_pending = 0; |
2024 | unsigned long flags; | 2272 | unsigned long flags; |
2273 | struct irq_cfg *cfg; | ||
2025 | 2274 | ||
2026 | spin_lock_irqsave(&ioapic_lock, flags); | 2275 | spin_lock_irqsave(&ioapic_lock, flags); |
2027 | if (irq < 16) { | 2276 | if (irq < NR_IRQS_LEGACY) { |
2028 | disable_8259A_irq(irq); | 2277 | disable_8259A_irq(irq); |
2029 | if (i8259A_irq_pending(irq)) | 2278 | if (i8259A_irq_pending(irq)) |
2030 | was_pending = 1; | 2279 | was_pending = 1; |
2031 | } | 2280 | } |
2032 | __unmask_IO_APIC_irq(irq); | 2281 | cfg = irq_cfg(irq); |
2282 | __unmask_IO_APIC_irq(cfg); | ||
2033 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2283 | spin_unlock_irqrestore(&ioapic_lock, flags); |
2034 | 2284 | ||
2035 | return was_pending; | 2285 | return was_pending; |
@@ -2043,7 +2293,7 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
2043 | unsigned long flags; | 2293 | unsigned long flags; |
2044 | 2294 | ||
2045 | spin_lock_irqsave(&vector_lock, flags); | 2295 | spin_lock_irqsave(&vector_lock, flags); |
2046 | send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); | 2296 | send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); |
2047 | spin_unlock_irqrestore(&vector_lock, flags); | 2297 | spin_unlock_irqrestore(&vector_lock, flags); |
2048 | 2298 | ||
2049 | return 1; | 2299 | return 1; |
@@ -2092,35 +2342,35 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); | |||
2092 | * as simple as edge triggered migration and we can do the irq migration | 2342 | * as simple as edge triggered migration and we can do the irq migration |
2093 | * with a simple atomic update to IO-APIC RTE. | 2343 | * with a simple atomic update to IO-APIC RTE. |
2094 | */ | 2344 | */ |
2095 | static void migrate_ioapic_irq(int irq, cpumask_t mask) | 2345 | static void |
2346 | migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | ||
2096 | { | 2347 | { |
2097 | struct irq_cfg *cfg; | 2348 | struct irq_cfg *cfg; |
2098 | struct irq_desc *desc; | ||
2099 | cpumask_t tmp, cleanup_mask; | ||
2100 | struct irte irte; | 2349 | struct irte irte; |
2101 | int modify_ioapic_rte; | 2350 | int modify_ioapic_rte; |
2102 | unsigned int dest; | 2351 | unsigned int dest; |
2103 | unsigned long flags; | 2352 | unsigned long flags; |
2353 | unsigned int irq; | ||
2104 | 2354 | ||
2105 | cpus_and(tmp, mask, cpu_online_map); | 2355 | if (!cpumask_intersects(mask, cpu_online_mask)) |
2106 | if (cpus_empty(tmp)) | ||
2107 | return; | 2356 | return; |
2108 | 2357 | ||
2358 | irq = desc->irq; | ||
2109 | if (get_irte(irq, &irte)) | 2359 | if (get_irte(irq, &irte)) |
2110 | return; | 2360 | return; |
2111 | 2361 | ||
2112 | if (assign_irq_vector(irq, mask)) | 2362 | cfg = desc->chip_data; |
2363 | if (assign_irq_vector(irq, cfg, mask)) | ||
2113 | return; | 2364 | return; |
2114 | 2365 | ||
2115 | cfg = irq_cfg(irq); | 2366 | set_extra_move_desc(desc, mask); |
2116 | cpus_and(tmp, cfg->domain, mask); | 2367 | |
2117 | dest = cpu_mask_to_apicid(tmp); | 2368 | dest = cpu_mask_to_apicid_and(cfg->domain, mask); |
2118 | 2369 | ||
2119 | desc = irq_to_desc(irq); | ||
2120 | modify_ioapic_rte = desc->status & IRQ_LEVEL; | 2370 | modify_ioapic_rte = desc->status & IRQ_LEVEL; |
2121 | if (modify_ioapic_rte) { | 2371 | if (modify_ioapic_rte) { |
2122 | spin_lock_irqsave(&ioapic_lock, flags); | 2372 | spin_lock_irqsave(&ioapic_lock, flags); |
2123 | __target_IO_APIC_irq(irq, dest, cfg->vector); | 2373 | __target_IO_APIC_irq(irq, dest, cfg); |
2124 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2374 | spin_unlock_irqrestore(&ioapic_lock, flags); |
2125 | } | 2375 | } |
2126 | 2376 | ||
@@ -2132,24 +2382,20 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask) | |||
2132 | */ | 2382 | */ |
2133 | modify_irte(irq, &irte); | 2383 | modify_irte(irq, &irte); |
2134 | 2384 | ||
2135 | if (cfg->move_in_progress) { | 2385 | if (cfg->move_in_progress) |
2136 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | 2386 | send_cleanup_vector(cfg); |
2137 | cfg->move_cleanup_count = cpus_weight(cleanup_mask); | ||
2138 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
2139 | cfg->move_in_progress = 0; | ||
2140 | } | ||
2141 | 2387 | ||
2142 | desc->affinity = mask; | 2388 | cpumask_copy(&desc->affinity, mask); |
2143 | } | 2389 | } |
2144 | 2390 | ||
2145 | static int migrate_irq_remapped_level(int irq) | 2391 | static int migrate_irq_remapped_level_desc(struct irq_desc *desc) |
2146 | { | 2392 | { |
2147 | int ret = -1; | 2393 | int ret = -1; |
2148 | struct irq_desc *desc = irq_to_desc(irq); | 2394 | struct irq_cfg *cfg = desc->chip_data; |
2149 | 2395 | ||
2150 | mask_IO_APIC_irq(irq); | 2396 | mask_IO_APIC_irq_desc(desc); |
2151 | 2397 | ||
2152 | if (io_apic_level_ack_pending(irq)) { | 2398 | if (io_apic_level_ack_pending(cfg)) { |
2153 | /* | 2399 | /* |
2154 | * Interrupt in progress. Migrating irq now will change the | 2400 | * Interrupt in progress. Migrating irq now will change the |
2155 | * vector information in the IO-APIC RTE and that will confuse | 2401 | * vector information in the IO-APIC RTE and that will confuse |
@@ -2161,14 +2407,15 @@ static int migrate_irq_remapped_level(int irq) | |||
2161 | } | 2407 | } |
2162 | 2408 | ||
2163 | /* everthing is clear. we have right of way */ | 2409 | /* everthing is clear. we have right of way */ |
2164 | migrate_ioapic_irq(irq, desc->pending_mask); | 2410 | migrate_ioapic_irq_desc(desc, &desc->pending_mask); |
2165 | 2411 | ||
2166 | ret = 0; | 2412 | ret = 0; |
2167 | desc->status &= ~IRQ_MOVE_PENDING; | 2413 | desc->status &= ~IRQ_MOVE_PENDING; |
2168 | cpus_clear(desc->pending_mask); | 2414 | cpumask_clear(&desc->pending_mask); |
2169 | 2415 | ||
2170 | unmask: | 2416 | unmask: |
2171 | unmask_IO_APIC_irq(irq); | 2417 | unmask_IO_APIC_irq_desc(desc); |
2418 | |||
2172 | return ret; | 2419 | return ret; |
2173 | } | 2420 | } |
2174 | 2421 | ||
@@ -2189,7 +2436,7 @@ static void ir_irq_migration(struct work_struct *work) | |||
2189 | continue; | 2436 | continue; |
2190 | } | 2437 | } |
2191 | 2438 | ||
2192 | desc->chip->set_affinity(irq, desc->pending_mask); | 2439 | desc->chip->set_affinity(irq, &desc->pending_mask); |
2193 | spin_unlock_irqrestore(&desc->lock, flags); | 2440 | spin_unlock_irqrestore(&desc->lock, flags); |
2194 | } | 2441 | } |
2195 | } | 2442 | } |
@@ -2198,18 +2445,24 @@ static void ir_irq_migration(struct work_struct *work) | |||
2198 | /* | 2445 | /* |
2199 | * Migrates the IRQ destination in the process context. | 2446 | * Migrates the IRQ destination in the process context. |
2200 | */ | 2447 | */ |
2201 | static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | 2448 | static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, |
2449 | const struct cpumask *mask) | ||
2202 | { | 2450 | { |
2203 | struct irq_desc *desc = irq_to_desc(irq); | ||
2204 | |||
2205 | if (desc->status & IRQ_LEVEL) { | 2451 | if (desc->status & IRQ_LEVEL) { |
2206 | desc->status |= IRQ_MOVE_PENDING; | 2452 | desc->status |= IRQ_MOVE_PENDING; |
2207 | desc->pending_mask = mask; | 2453 | cpumask_copy(&desc->pending_mask, mask); |
2208 | migrate_irq_remapped_level(irq); | 2454 | migrate_irq_remapped_level_desc(desc); |
2209 | return; | 2455 | return; |
2210 | } | 2456 | } |
2211 | 2457 | ||
2212 | migrate_ioapic_irq(irq, mask); | 2458 | migrate_ioapic_irq_desc(desc, mask); |
2459 | } | ||
2460 | static void set_ir_ioapic_affinity_irq(unsigned int irq, | ||
2461 | const struct cpumask *mask) | ||
2462 | { | ||
2463 | struct irq_desc *desc = irq_to_desc(irq); | ||
2464 | |||
2465 | set_ir_ioapic_affinity_irq_desc(desc, mask); | ||
2213 | } | 2466 | } |
2214 | #endif | 2467 | #endif |
2215 | 2468 | ||
@@ -2228,6 +2481,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2228 | struct irq_cfg *cfg; | 2481 | struct irq_cfg *cfg; |
2229 | irq = __get_cpu_var(vector_irq)[vector]; | 2482 | irq = __get_cpu_var(vector_irq)[vector]; |
2230 | 2483 | ||
2484 | if (irq == -1) | ||
2485 | continue; | ||
2486 | |||
2231 | desc = irq_to_desc(irq); | 2487 | desc = irq_to_desc(irq); |
2232 | if (!desc) | 2488 | if (!desc) |
2233 | continue; | 2489 | continue; |
@@ -2237,7 +2493,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2237 | if (!cfg->move_cleanup_count) | 2493 | if (!cfg->move_cleanup_count) |
2238 | goto unlock; | 2494 | goto unlock; |
2239 | 2495 | ||
2240 | if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) | 2496 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) |
2241 | goto unlock; | 2497 | goto unlock; |
2242 | 2498 | ||
2243 | __get_cpu_var(vector_irq)[vector] = -1; | 2499 | __get_cpu_var(vector_irq)[vector] = -1; |
@@ -2249,28 +2505,44 @@ unlock: | |||
2249 | irq_exit(); | 2505 | irq_exit(); |
2250 | } | 2506 | } |
2251 | 2507 | ||
2252 | static void irq_complete_move(unsigned int irq) | 2508 | static void irq_complete_move(struct irq_desc **descp) |
2253 | { | 2509 | { |
2254 | struct irq_cfg *cfg = irq_cfg(irq); | 2510 | struct irq_desc *desc = *descp; |
2511 | struct irq_cfg *cfg = desc->chip_data; | ||
2255 | unsigned vector, me; | 2512 | unsigned vector, me; |
2256 | 2513 | ||
2257 | if (likely(!cfg->move_in_progress)) | 2514 | if (likely(!cfg->move_in_progress)) { |
2515 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC | ||
2516 | if (likely(!cfg->move_desc_pending)) | ||
2517 | return; | ||
2518 | |||
2519 | /* domain has not changed, but affinity did */ | ||
2520 | me = smp_processor_id(); | ||
2521 | if (cpu_isset(me, desc->affinity)) { | ||
2522 | *descp = desc = move_irq_desc(desc, me); | ||
2523 | /* get the new one */ | ||
2524 | cfg = desc->chip_data; | ||
2525 | cfg->move_desc_pending = 0; | ||
2526 | } | ||
2527 | #endif | ||
2258 | return; | 2528 | return; |
2529 | } | ||
2259 | 2530 | ||
2260 | vector = ~get_irq_regs()->orig_ax; | 2531 | vector = ~get_irq_regs()->orig_ax; |
2261 | me = smp_processor_id(); | 2532 | me = smp_processor_id(); |
2262 | if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { | 2533 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC |
2263 | cpumask_t cleanup_mask; | 2534 | *descp = desc = move_irq_desc(desc, me); |
2535 | /* get the new one */ | ||
2536 | cfg = desc->chip_data; | ||
2537 | #endif | ||
2264 | 2538 | ||
2265 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | 2539 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) |
2266 | cfg->move_cleanup_count = cpus_weight(cleanup_mask); | 2540 | send_cleanup_vector(cfg); |
2267 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
2268 | cfg->move_in_progress = 0; | ||
2269 | } | ||
2270 | } | 2541 | } |
2271 | #else | 2542 | #else |
2272 | static inline void irq_complete_move(unsigned int irq) {} | 2543 | static inline void irq_complete_move(struct irq_desc **descp) {} |
2273 | #endif | 2544 | #endif |
2545 | |||
2274 | #ifdef CONFIG_INTR_REMAP | 2546 | #ifdef CONFIG_INTR_REMAP |
2275 | static void ack_x2apic_level(unsigned int irq) | 2547 | static void ack_x2apic_level(unsigned int irq) |
2276 | { | 2548 | { |
@@ -2281,11 +2553,14 @@ static void ack_x2apic_edge(unsigned int irq) | |||
2281 | { | 2553 | { |
2282 | ack_x2APIC_irq(); | 2554 | ack_x2APIC_irq(); |
2283 | } | 2555 | } |
2556 | |||
2284 | #endif | 2557 | #endif |
2285 | 2558 | ||
2286 | static void ack_apic_edge(unsigned int irq) | 2559 | static void ack_apic_edge(unsigned int irq) |
2287 | { | 2560 | { |
2288 | irq_complete_move(irq); | 2561 | struct irq_desc *desc = irq_to_desc(irq); |
2562 | |||
2563 | irq_complete_move(&desc); | ||
2289 | move_native_irq(irq); | 2564 | move_native_irq(irq); |
2290 | ack_APIC_irq(); | 2565 | ack_APIC_irq(); |
2291 | } | 2566 | } |
@@ -2294,18 +2569,21 @@ atomic_t irq_mis_count; | |||
2294 | 2569 | ||
2295 | static void ack_apic_level(unsigned int irq) | 2570 | static void ack_apic_level(unsigned int irq) |
2296 | { | 2571 | { |
2572 | struct irq_desc *desc = irq_to_desc(irq); | ||
2573 | |||
2297 | #ifdef CONFIG_X86_32 | 2574 | #ifdef CONFIG_X86_32 |
2298 | unsigned long v; | 2575 | unsigned long v; |
2299 | int i; | 2576 | int i; |
2300 | #endif | 2577 | #endif |
2578 | struct irq_cfg *cfg; | ||
2301 | int do_unmask_irq = 0; | 2579 | int do_unmask_irq = 0; |
2302 | 2580 | ||
2303 | irq_complete_move(irq); | 2581 | irq_complete_move(&desc); |
2304 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 2582 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
2305 | /* If we are moving the irq we need to mask it */ | 2583 | /* If we are moving the irq we need to mask it */ |
2306 | if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { | 2584 | if (unlikely(desc->status & IRQ_MOVE_PENDING)) { |
2307 | do_unmask_irq = 1; | 2585 | do_unmask_irq = 1; |
2308 | mask_IO_APIC_irq(irq); | 2586 | mask_IO_APIC_irq_desc(desc); |
2309 | } | 2587 | } |
2310 | #endif | 2588 | #endif |
2311 | 2589 | ||
@@ -2329,7 +2607,8 @@ static void ack_apic_level(unsigned int irq) | |||
2329 | * operation to prevent an edge-triggered interrupt escaping meanwhile. | 2607 | * operation to prevent an edge-triggered interrupt escaping meanwhile. |
2330 | * The idea is from Manfred Spraul. --macro | 2608 | * The idea is from Manfred Spraul. --macro |
2331 | */ | 2609 | */ |
2332 | i = irq_cfg(irq)->vector; | 2610 | cfg = desc->chip_data; |
2611 | i = cfg->vector; | ||
2333 | 2612 | ||
2334 | v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); | 2613 | v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); |
2335 | #endif | 2614 | #endif |
@@ -2368,17 +2647,18 @@ static void ack_apic_level(unsigned int irq) | |||
2368 | * accurate and is causing problems then it is a hardware bug | 2647 | * accurate and is causing problems then it is a hardware bug |
2369 | * and you can go talk to the chipset vendor about it. | 2648 | * and you can go talk to the chipset vendor about it. |
2370 | */ | 2649 | */ |
2371 | if (!io_apic_level_ack_pending(irq)) | 2650 | cfg = desc->chip_data; |
2651 | if (!io_apic_level_ack_pending(cfg)) | ||
2372 | move_masked_irq(irq); | 2652 | move_masked_irq(irq); |
2373 | unmask_IO_APIC_irq(irq); | 2653 | unmask_IO_APIC_irq_desc(desc); |
2374 | } | 2654 | } |
2375 | 2655 | ||
2376 | #ifdef CONFIG_X86_32 | 2656 | #ifdef CONFIG_X86_32 |
2377 | if (!(v & (1 << (i & 0x1f)))) { | 2657 | if (!(v & (1 << (i & 0x1f)))) { |
2378 | atomic_inc(&irq_mis_count); | 2658 | atomic_inc(&irq_mis_count); |
2379 | spin_lock(&ioapic_lock); | 2659 | spin_lock(&ioapic_lock); |
2380 | __mask_and_edge_IO_APIC_irq(irq); | 2660 | __mask_and_edge_IO_APIC_irq(cfg); |
2381 | __unmask_and_level_IO_APIC_irq(irq); | 2661 | __unmask_and_level_IO_APIC_irq(cfg); |
2382 | spin_unlock(&ioapic_lock); | 2662 | spin_unlock(&ioapic_lock); |
2383 | } | 2663 | } |
2384 | #endif | 2664 | #endif |
@@ -2429,20 +2709,19 @@ static inline void init_IO_APIC_traps(void) | |||
2429 | * Also, we've got to be careful not to trash gate | 2709 | * Also, we've got to be careful not to trash gate |
2430 | * 0x80, because int 0x80 is hm, kind of importantish. ;) | 2710 | * 0x80, because int 0x80 is hm, kind of importantish. ;) |
2431 | */ | 2711 | */ |
2432 | for_each_irq_cfg(irq, cfg) { | 2712 | for_each_irq_desc(irq, desc) { |
2433 | if (IO_APIC_IRQ(irq) && !cfg->vector) { | 2713 | cfg = desc->chip_data; |
2714 | if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { | ||
2434 | /* | 2715 | /* |
2435 | * Hmm.. We don't have an entry for this, | 2716 | * Hmm.. We don't have an entry for this, |
2436 | * so default to an old-fashioned 8259 | 2717 | * so default to an old-fashioned 8259 |
2437 | * interrupt if we can.. | 2718 | * interrupt if we can.. |
2438 | */ | 2719 | */ |
2439 | if (irq < 16) | 2720 | if (irq < NR_IRQS_LEGACY) |
2440 | make_8259A_irq(irq); | 2721 | make_8259A_irq(irq); |
2441 | else { | 2722 | else |
2442 | desc = irq_to_desc(irq); | ||
2443 | /* Strange. Oh, well.. */ | 2723 | /* Strange. Oh, well.. */ |
2444 | desc->chip = &no_irq_chip; | 2724 | desc->chip = &no_irq_chip; |
2445 | } | ||
2446 | } | 2725 | } |
2447 | } | 2726 | } |
2448 | } | 2727 | } |
@@ -2467,7 +2746,7 @@ static void unmask_lapic_irq(unsigned int irq) | |||
2467 | apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); | 2746 | apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); |
2468 | } | 2747 | } |
2469 | 2748 | ||
2470 | static void ack_lapic_irq (unsigned int irq) | 2749 | static void ack_lapic_irq(unsigned int irq) |
2471 | { | 2750 | { |
2472 | ack_APIC_irq(); | 2751 | ack_APIC_irq(); |
2473 | } | 2752 | } |
@@ -2479,11 +2758,8 @@ static struct irq_chip lapic_chip __read_mostly = { | |||
2479 | .ack = ack_lapic_irq, | 2758 | .ack = ack_lapic_irq, |
2480 | }; | 2759 | }; |
2481 | 2760 | ||
2482 | static void lapic_register_intr(int irq) | 2761 | static void lapic_register_intr(int irq, struct irq_desc *desc) |
2483 | { | 2762 | { |
2484 | struct irq_desc *desc; | ||
2485 | |||
2486 | desc = irq_to_desc(irq); | ||
2487 | desc->status &= ~IRQ_LEVEL; | 2763 | desc->status &= ~IRQ_LEVEL; |
2488 | set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, | 2764 | set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, |
2489 | "edge"); | 2765 | "edge"); |
@@ -2587,7 +2863,9 @@ int timer_through_8259 __initdata; | |||
2587 | */ | 2863 | */ |
2588 | static inline void __init check_timer(void) | 2864 | static inline void __init check_timer(void) |
2589 | { | 2865 | { |
2590 | struct irq_cfg *cfg = irq_cfg(0); | 2866 | struct irq_desc *desc = irq_to_desc(0); |
2867 | struct irq_cfg *cfg = desc->chip_data; | ||
2868 | int cpu = boot_cpu_id; | ||
2591 | int apic1, pin1, apic2, pin2; | 2869 | int apic1, pin1, apic2, pin2; |
2592 | unsigned long flags; | 2870 | unsigned long flags; |
2593 | unsigned int ver; | 2871 | unsigned int ver; |
@@ -2602,7 +2880,7 @@ static inline void __init check_timer(void) | |||
2602 | * get/set the timer IRQ vector: | 2880 | * get/set the timer IRQ vector: |
2603 | */ | 2881 | */ |
2604 | disable_8259A_irq(0); | 2882 | disable_8259A_irq(0); |
2605 | assign_irq_vector(0, TARGET_CPUS); | 2883 | assign_irq_vector(0, cfg, TARGET_CPUS); |
2606 | 2884 | ||
2607 | /* | 2885 | /* |
2608 | * As IRQ0 is to be enabled in the 8259A, the virtual | 2886 | * As IRQ0 is to be enabled in the 8259A, the virtual |
@@ -2653,10 +2931,10 @@ static inline void __init check_timer(void) | |||
2653 | * Ok, does IRQ0 through the IOAPIC work? | 2931 | * Ok, does IRQ0 through the IOAPIC work? |
2654 | */ | 2932 | */ |
2655 | if (no_pin1) { | 2933 | if (no_pin1) { |
2656 | add_pin_to_irq(0, apic1, pin1); | 2934 | add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); |
2657 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); | 2935 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); |
2658 | } | 2936 | } |
2659 | unmask_IO_APIC_irq(0); | 2937 | unmask_IO_APIC_irq_desc(desc); |
2660 | if (timer_irq_works()) { | 2938 | if (timer_irq_works()) { |
2661 | if (nmi_watchdog == NMI_IO_APIC) { | 2939 | if (nmi_watchdog == NMI_IO_APIC) { |
2662 | setup_nmi(); | 2940 | setup_nmi(); |
@@ -2682,9 +2960,9 @@ static inline void __init check_timer(void) | |||
2682 | /* | 2960 | /* |
2683 | * legacy devices should be connected to IO APIC #0 | 2961 | * legacy devices should be connected to IO APIC #0 |
2684 | */ | 2962 | */ |
2685 | replace_pin_at_irq(0, apic1, pin1, apic2, pin2); | 2963 | replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); |
2686 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); | 2964 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); |
2687 | unmask_IO_APIC_irq(0); | 2965 | unmask_IO_APIC_irq_desc(desc); |
2688 | enable_8259A_irq(0); | 2966 | enable_8259A_irq(0); |
2689 | if (timer_irq_works()) { | 2967 | if (timer_irq_works()) { |
2690 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); | 2968 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); |
@@ -2716,7 +2994,7 @@ static inline void __init check_timer(void) | |||
2716 | apic_printk(APIC_QUIET, KERN_INFO | 2994 | apic_printk(APIC_QUIET, KERN_INFO |
2717 | "...trying to set up timer as Virtual Wire IRQ...\n"); | 2995 | "...trying to set up timer as Virtual Wire IRQ...\n"); |
2718 | 2996 | ||
2719 | lapic_register_intr(0); | 2997 | lapic_register_intr(0, desc); |
2720 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ | 2998 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ |
2721 | enable_8259A_irq(0); | 2999 | enable_8259A_irq(0); |
2722 | 3000 | ||
@@ -2901,22 +3179,26 @@ unsigned int create_irq_nr(unsigned int irq_want) | |||
2901 | unsigned int irq; | 3179 | unsigned int irq; |
2902 | unsigned int new; | 3180 | unsigned int new; |
2903 | unsigned long flags; | 3181 | unsigned long flags; |
2904 | struct irq_cfg *cfg_new; | 3182 | struct irq_cfg *cfg_new = NULL; |
2905 | 3183 | int cpu = boot_cpu_id; | |
2906 | irq_want = nr_irqs - 1; | 3184 | struct irq_desc *desc_new = NULL; |
2907 | 3185 | ||
2908 | irq = 0; | 3186 | irq = 0; |
2909 | spin_lock_irqsave(&vector_lock, flags); | 3187 | spin_lock_irqsave(&vector_lock, flags); |
2910 | for (new = irq_want; new > 0; new--) { | 3188 | for (new = irq_want; new < NR_IRQS; new++) { |
2911 | if (platform_legacy_irq(new)) | 3189 | if (platform_legacy_irq(new)) |
2912 | continue; | 3190 | continue; |
2913 | cfg_new = irq_cfg(new); | 3191 | |
2914 | if (cfg_new && cfg_new->vector != 0) | 3192 | desc_new = irq_to_desc_alloc_cpu(new, cpu); |
3193 | if (!desc_new) { | ||
3194 | printk(KERN_INFO "can not get irq_desc for %d\n", new); | ||
2915 | continue; | 3195 | continue; |
2916 | /* check if need to create one */ | 3196 | } |
2917 | if (!cfg_new) | 3197 | cfg_new = desc_new->chip_data; |
2918 | cfg_new = irq_cfg_alloc(new); | 3198 | |
2919 | if (__assign_irq_vector(new, TARGET_CPUS) == 0) | 3199 | if (cfg_new->vector != 0) |
3200 | continue; | ||
3201 | if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0) | ||
2920 | irq = new; | 3202 | irq = new; |
2921 | break; | 3203 | break; |
2922 | } | 3204 | } |
@@ -2924,15 +3206,21 @@ unsigned int create_irq_nr(unsigned int irq_want) | |||
2924 | 3206 | ||
2925 | if (irq > 0) { | 3207 | if (irq > 0) { |
2926 | dynamic_irq_init(irq); | 3208 | dynamic_irq_init(irq); |
3209 | /* restore it, in case dynamic_irq_init clear it */ | ||
3210 | if (desc_new) | ||
3211 | desc_new->chip_data = cfg_new; | ||
2927 | } | 3212 | } |
2928 | return irq; | 3213 | return irq; |
2929 | } | 3214 | } |
2930 | 3215 | ||
3216 | static int nr_irqs_gsi = NR_IRQS_LEGACY; | ||
2931 | int create_irq(void) | 3217 | int create_irq(void) |
2932 | { | 3218 | { |
3219 | unsigned int irq_want; | ||
2933 | int irq; | 3220 | int irq; |
2934 | 3221 | ||
2935 | irq = create_irq_nr(nr_irqs - 1); | 3222 | irq_want = nr_irqs_gsi; |
3223 | irq = create_irq_nr(irq_want); | ||
2936 | 3224 | ||
2937 | if (irq == 0) | 3225 | if (irq == 0) |
2938 | irq = -1; | 3226 | irq = -1; |
@@ -2943,14 +3231,22 @@ int create_irq(void) | |||
2943 | void destroy_irq(unsigned int irq) | 3231 | void destroy_irq(unsigned int irq) |
2944 | { | 3232 | { |
2945 | unsigned long flags; | 3233 | unsigned long flags; |
3234 | struct irq_cfg *cfg; | ||
3235 | struct irq_desc *desc; | ||
2946 | 3236 | ||
3237 | /* store it, in case dynamic_irq_cleanup clear it */ | ||
3238 | desc = irq_to_desc(irq); | ||
3239 | cfg = desc->chip_data; | ||
2947 | dynamic_irq_cleanup(irq); | 3240 | dynamic_irq_cleanup(irq); |
3241 | /* connect back irq_cfg */ | ||
3242 | if (desc) | ||
3243 | desc->chip_data = cfg; | ||
2948 | 3244 | ||
2949 | #ifdef CONFIG_INTR_REMAP | 3245 | #ifdef CONFIG_INTR_REMAP |
2950 | free_irte(irq); | 3246 | free_irte(irq); |
2951 | #endif | 3247 | #endif |
2952 | spin_lock_irqsave(&vector_lock, flags); | 3248 | spin_lock_irqsave(&vector_lock, flags); |
2953 | __clear_irq_vector(irq); | 3249 | __clear_irq_vector(irq, cfg); |
2954 | spin_unlock_irqrestore(&vector_lock, flags); | 3250 | spin_unlock_irqrestore(&vector_lock, flags); |
2955 | } | 3251 | } |
2956 | 3252 | ||
@@ -2963,16 +3259,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
2963 | struct irq_cfg *cfg; | 3259 | struct irq_cfg *cfg; |
2964 | int err; | 3260 | int err; |
2965 | unsigned dest; | 3261 | unsigned dest; |
2966 | cpumask_t tmp; | ||
2967 | 3262 | ||
2968 | tmp = TARGET_CPUS; | 3263 | cfg = irq_cfg(irq); |
2969 | err = assign_irq_vector(irq, tmp); | 3264 | err = assign_irq_vector(irq, cfg, TARGET_CPUS); |
2970 | if (err) | 3265 | if (err) |
2971 | return err; | 3266 | return err; |
2972 | 3267 | ||
2973 | cfg = irq_cfg(irq); | 3268 | dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); |
2974 | cpus_and(tmp, cfg->domain, tmp); | ||
2975 | dest = cpu_mask_to_apicid(tmp); | ||
2976 | 3269 | ||
2977 | #ifdef CONFIG_INTR_REMAP | 3270 | #ifdef CONFIG_INTR_REMAP |
2978 | if (irq_remapped(irq)) { | 3271 | if (irq_remapped(irq)) { |
@@ -3026,64 +3319,48 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
3026 | } | 3319 | } |
3027 | 3320 | ||
3028 | #ifdef CONFIG_SMP | 3321 | #ifdef CONFIG_SMP |
3029 | static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | 3322 | static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) |
3030 | { | 3323 | { |
3324 | struct irq_desc *desc = irq_to_desc(irq); | ||
3031 | struct irq_cfg *cfg; | 3325 | struct irq_cfg *cfg; |
3032 | struct msi_msg msg; | 3326 | struct msi_msg msg; |
3033 | unsigned int dest; | 3327 | unsigned int dest; |
3034 | cpumask_t tmp; | ||
3035 | struct irq_desc *desc; | ||
3036 | 3328 | ||
3037 | cpus_and(tmp, mask, cpu_online_map); | 3329 | dest = set_desc_affinity(desc, mask); |
3038 | if (cpus_empty(tmp)) | 3330 | if (dest == BAD_APICID) |
3039 | return; | 3331 | return; |
3040 | 3332 | ||
3041 | if (assign_irq_vector(irq, mask)) | 3333 | cfg = desc->chip_data; |
3042 | return; | ||
3043 | 3334 | ||
3044 | cfg = irq_cfg(irq); | 3335 | read_msi_msg_desc(desc, &msg); |
3045 | cpus_and(tmp, cfg->domain, mask); | ||
3046 | dest = cpu_mask_to_apicid(tmp); | ||
3047 | |||
3048 | read_msi_msg(irq, &msg); | ||
3049 | 3336 | ||
3050 | msg.data &= ~MSI_DATA_VECTOR_MASK; | 3337 | msg.data &= ~MSI_DATA_VECTOR_MASK; |
3051 | msg.data |= MSI_DATA_VECTOR(cfg->vector); | 3338 | msg.data |= MSI_DATA_VECTOR(cfg->vector); |
3052 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; | 3339 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; |
3053 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3340 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
3054 | 3341 | ||
3055 | write_msi_msg(irq, &msg); | 3342 | write_msi_msg_desc(desc, &msg); |
3056 | desc = irq_to_desc(irq); | ||
3057 | desc->affinity = mask; | ||
3058 | } | 3343 | } |
3059 | |||
3060 | #ifdef CONFIG_INTR_REMAP | 3344 | #ifdef CONFIG_INTR_REMAP |
3061 | /* | 3345 | /* |
3062 | * Migrate the MSI irq to another cpumask. This migration is | 3346 | * Migrate the MSI irq to another cpumask. This migration is |
3063 | * done in the process context using interrupt-remapping hardware. | 3347 | * done in the process context using interrupt-remapping hardware. |
3064 | */ | 3348 | */ |
3065 | static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | 3349 | static void |
3350 | ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | ||
3066 | { | 3351 | { |
3067 | struct irq_cfg *cfg; | 3352 | struct irq_desc *desc = irq_to_desc(irq); |
3353 | struct irq_cfg *cfg = desc->chip_data; | ||
3068 | unsigned int dest; | 3354 | unsigned int dest; |
3069 | cpumask_t tmp, cleanup_mask; | ||
3070 | struct irte irte; | 3355 | struct irte irte; |
3071 | struct irq_desc *desc; | ||
3072 | |||
3073 | cpus_and(tmp, mask, cpu_online_map); | ||
3074 | if (cpus_empty(tmp)) | ||
3075 | return; | ||
3076 | 3356 | ||
3077 | if (get_irte(irq, &irte)) | 3357 | if (get_irte(irq, &irte)) |
3078 | return; | 3358 | return; |
3079 | 3359 | ||
3080 | if (assign_irq_vector(irq, mask)) | 3360 | dest = set_desc_affinity(desc, mask); |
3361 | if (dest == BAD_APICID) | ||
3081 | return; | 3362 | return; |
3082 | 3363 | ||
3083 | cfg = irq_cfg(irq); | ||
3084 | cpus_and(tmp, cfg->domain, mask); | ||
3085 | dest = cpu_mask_to_apicid(tmp); | ||
3086 | |||
3087 | irte.vector = cfg->vector; | 3364 | irte.vector = cfg->vector; |
3088 | irte.dest_id = IRTE_DEST(dest); | 3365 | irte.dest_id = IRTE_DEST(dest); |
3089 | 3366 | ||
@@ -3097,16 +3374,10 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | |||
3097 | * at the new destination. So, time to cleanup the previous | 3374 | * at the new destination. So, time to cleanup the previous |
3098 | * vector allocation. | 3375 | * vector allocation. |
3099 | */ | 3376 | */ |
3100 | if (cfg->move_in_progress) { | 3377 | if (cfg->move_in_progress) |
3101 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | 3378 | send_cleanup_vector(cfg); |
3102 | cfg->move_cleanup_count = cpus_weight(cleanup_mask); | ||
3103 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
3104 | cfg->move_in_progress = 0; | ||
3105 | } | ||
3106 | |||
3107 | desc = irq_to_desc(irq); | ||
3108 | desc->affinity = mask; | ||
3109 | } | 3379 | } |
3380 | |||
3110 | #endif | 3381 | #endif |
3111 | #endif /* CONFIG_SMP */ | 3382 | #endif /* CONFIG_SMP */ |
3112 | 3383 | ||
@@ -3165,7 +3436,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) | |||
3165 | } | 3436 | } |
3166 | #endif | 3437 | #endif |
3167 | 3438 | ||
3168 | static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) | 3439 | static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) |
3169 | { | 3440 | { |
3170 | int ret; | 3441 | int ret; |
3171 | struct msi_msg msg; | 3442 | struct msi_msg msg; |
@@ -3174,7 +3445,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) | |||
3174 | if (ret < 0) | 3445 | if (ret < 0) |
3175 | return ret; | 3446 | return ret; |
3176 | 3447 | ||
3177 | set_irq_msi(irq, desc); | 3448 | set_irq_msi(irq, msidesc); |
3178 | write_msi_msg(irq, &msg); | 3449 | write_msi_msg(irq, &msg); |
3179 | 3450 | ||
3180 | #ifdef CONFIG_INTR_REMAP | 3451 | #ifdef CONFIG_INTR_REMAP |
@@ -3194,26 +3465,13 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) | |||
3194 | return 0; | 3465 | return 0; |
3195 | } | 3466 | } |
3196 | 3467 | ||
3197 | static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) | 3468 | int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc) |
3198 | { | ||
3199 | unsigned int irq; | ||
3200 | |||
3201 | irq = dev->bus->number; | ||
3202 | irq <<= 8; | ||
3203 | irq |= dev->devfn; | ||
3204 | irq <<= 12; | ||
3205 | |||
3206 | return irq; | ||
3207 | } | ||
3208 | |||
3209 | int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) | ||
3210 | { | 3469 | { |
3211 | unsigned int irq; | 3470 | unsigned int irq; |
3212 | int ret; | 3471 | int ret; |
3213 | unsigned int irq_want; | 3472 | unsigned int irq_want; |
3214 | 3473 | ||
3215 | irq_want = build_irq_for_pci_dev(dev) + 0x100; | 3474 | irq_want = nr_irqs_gsi; |
3216 | |||
3217 | irq = create_irq_nr(irq_want); | 3475 | irq = create_irq_nr(irq_want); |
3218 | if (irq == 0) | 3476 | if (irq == 0) |
3219 | return -1; | 3477 | return -1; |
@@ -3227,7 +3485,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) | |||
3227 | goto error; | 3485 | goto error; |
3228 | no_ir: | 3486 | no_ir: |
3229 | #endif | 3487 | #endif |
3230 | ret = setup_msi_irq(dev, desc, irq); | 3488 | ret = setup_msi_irq(dev, msidesc, irq); |
3231 | if (ret < 0) { | 3489 | if (ret < 0) { |
3232 | destroy_irq(irq); | 3490 | destroy_irq(irq); |
3233 | return ret; | 3491 | return ret; |
@@ -3245,7 +3503,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
3245 | { | 3503 | { |
3246 | unsigned int irq; | 3504 | unsigned int irq; |
3247 | int ret, sub_handle; | 3505 | int ret, sub_handle; |
3248 | struct msi_desc *desc; | 3506 | struct msi_desc *msidesc; |
3249 | unsigned int irq_want; | 3507 | unsigned int irq_want; |
3250 | 3508 | ||
3251 | #ifdef CONFIG_INTR_REMAP | 3509 | #ifdef CONFIG_INTR_REMAP |
@@ -3253,10 +3511,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
3253 | int index = 0; | 3511 | int index = 0; |
3254 | #endif | 3512 | #endif |
3255 | 3513 | ||
3256 | irq_want = build_irq_for_pci_dev(dev) + 0x100; | 3514 | irq_want = nr_irqs_gsi; |
3257 | sub_handle = 0; | 3515 | sub_handle = 0; |
3258 | list_for_each_entry(desc, &dev->msi_list, list) { | 3516 | list_for_each_entry(msidesc, &dev->msi_list, list) { |
3259 | irq = create_irq_nr(irq_want--); | 3517 | irq = create_irq_nr(irq_want); |
3518 | irq_want++; | ||
3260 | if (irq == 0) | 3519 | if (irq == 0) |
3261 | return -1; | 3520 | return -1; |
3262 | #ifdef CONFIG_INTR_REMAP | 3521 | #ifdef CONFIG_INTR_REMAP |
@@ -3288,7 +3547,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
3288 | } | 3547 | } |
3289 | no_ir: | 3548 | no_ir: |
3290 | #endif | 3549 | #endif |
3291 | ret = setup_msi_irq(dev, desc, irq); | 3550 | ret = setup_msi_irq(dev, msidesc, irq); |
3292 | if (ret < 0) | 3551 | if (ret < 0) |
3293 | goto error; | 3552 | goto error; |
3294 | sub_handle++; | 3553 | sub_handle++; |
@@ -3307,24 +3566,18 @@ void arch_teardown_msi_irq(unsigned int irq) | |||
3307 | 3566 | ||
3308 | #ifdef CONFIG_DMAR | 3567 | #ifdef CONFIG_DMAR |
3309 | #ifdef CONFIG_SMP | 3568 | #ifdef CONFIG_SMP |
3310 | static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) | 3569 | static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) |
3311 | { | 3570 | { |
3571 | struct irq_desc *desc = irq_to_desc(irq); | ||
3312 | struct irq_cfg *cfg; | 3572 | struct irq_cfg *cfg; |
3313 | struct msi_msg msg; | 3573 | struct msi_msg msg; |
3314 | unsigned int dest; | 3574 | unsigned int dest; |
3315 | cpumask_t tmp; | ||
3316 | struct irq_desc *desc; | ||
3317 | 3575 | ||
3318 | cpus_and(tmp, mask, cpu_online_map); | 3576 | dest = set_desc_affinity(desc, mask); |
3319 | if (cpus_empty(tmp)) | 3577 | if (dest == BAD_APICID) |
3320 | return; | 3578 | return; |
3321 | 3579 | ||
3322 | if (assign_irq_vector(irq, mask)) | 3580 | cfg = desc->chip_data; |
3323 | return; | ||
3324 | |||
3325 | cfg = irq_cfg(irq); | ||
3326 | cpus_and(tmp, cfg->domain, mask); | ||
3327 | dest = cpu_mask_to_apicid(tmp); | ||
3328 | 3581 | ||
3329 | dmar_msi_read(irq, &msg); | 3582 | dmar_msi_read(irq, &msg); |
3330 | 3583 | ||
@@ -3334,9 +3587,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) | |||
3334 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3587 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
3335 | 3588 | ||
3336 | dmar_msi_write(irq, &msg); | 3589 | dmar_msi_write(irq, &msg); |
3337 | desc = irq_to_desc(irq); | ||
3338 | desc->affinity = mask; | ||
3339 | } | 3590 | } |
3591 | |||
3340 | #endif /* CONFIG_SMP */ | 3592 | #endif /* CONFIG_SMP */ |
3341 | 3593 | ||
3342 | struct irq_chip dmar_msi_type = { | 3594 | struct irq_chip dmar_msi_type = { |
@@ -3368,24 +3620,18 @@ int arch_setup_dmar_msi(unsigned int irq) | |||
3368 | #ifdef CONFIG_HPET_TIMER | 3620 | #ifdef CONFIG_HPET_TIMER |
3369 | 3621 | ||
3370 | #ifdef CONFIG_SMP | 3622 | #ifdef CONFIG_SMP |
3371 | static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) | 3623 | static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) |
3372 | { | 3624 | { |
3625 | struct irq_desc *desc = irq_to_desc(irq); | ||
3373 | struct irq_cfg *cfg; | 3626 | struct irq_cfg *cfg; |
3374 | struct irq_desc *desc; | ||
3375 | struct msi_msg msg; | 3627 | struct msi_msg msg; |
3376 | unsigned int dest; | 3628 | unsigned int dest; |
3377 | cpumask_t tmp; | ||
3378 | 3629 | ||
3379 | cpus_and(tmp, mask, cpu_online_map); | 3630 | dest = set_desc_affinity(desc, mask); |
3380 | if (cpus_empty(tmp)) | 3631 | if (dest == BAD_APICID) |
3381 | return; | 3632 | return; |
3382 | 3633 | ||
3383 | if (assign_irq_vector(irq, mask)) | 3634 | cfg = desc->chip_data; |
3384 | return; | ||
3385 | |||
3386 | cfg = irq_cfg(irq); | ||
3387 | cpus_and(tmp, cfg->domain, mask); | ||
3388 | dest = cpu_mask_to_apicid(tmp); | ||
3389 | 3635 | ||
3390 | hpet_msi_read(irq, &msg); | 3636 | hpet_msi_read(irq, &msg); |
3391 | 3637 | ||
@@ -3395,9 +3641,8 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) | |||
3395 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3641 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
3396 | 3642 | ||
3397 | hpet_msi_write(irq, &msg); | 3643 | hpet_msi_write(irq, &msg); |
3398 | desc = irq_to_desc(irq); | ||
3399 | desc->affinity = mask; | ||
3400 | } | 3644 | } |
3645 | |||
3401 | #endif /* CONFIG_SMP */ | 3646 | #endif /* CONFIG_SMP */ |
3402 | 3647 | ||
3403 | struct irq_chip hpet_msi_type = { | 3648 | struct irq_chip hpet_msi_type = { |
@@ -3450,28 +3695,21 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) | |||
3450 | write_ht_irq_msg(irq, &msg); | 3695 | write_ht_irq_msg(irq, &msg); |
3451 | } | 3696 | } |
3452 | 3697 | ||
3453 | static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) | 3698 | static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) |
3454 | { | 3699 | { |
3700 | struct irq_desc *desc = irq_to_desc(irq); | ||
3455 | struct irq_cfg *cfg; | 3701 | struct irq_cfg *cfg; |
3456 | unsigned int dest; | 3702 | unsigned int dest; |
3457 | cpumask_t tmp; | ||
3458 | struct irq_desc *desc; | ||
3459 | 3703 | ||
3460 | cpus_and(tmp, mask, cpu_online_map); | 3704 | dest = set_desc_affinity(desc, mask); |
3461 | if (cpus_empty(tmp)) | 3705 | if (dest == BAD_APICID) |
3462 | return; | 3706 | return; |
3463 | 3707 | ||
3464 | if (assign_irq_vector(irq, mask)) | 3708 | cfg = desc->chip_data; |
3465 | return; | ||
3466 | |||
3467 | cfg = irq_cfg(irq); | ||
3468 | cpus_and(tmp, cfg->domain, mask); | ||
3469 | dest = cpu_mask_to_apicid(tmp); | ||
3470 | 3709 | ||
3471 | target_ht_irq(irq, dest, cfg->vector); | 3710 | target_ht_irq(irq, dest, cfg->vector); |
3472 | desc = irq_to_desc(irq); | ||
3473 | desc->affinity = mask; | ||
3474 | } | 3711 | } |
3712 | |||
3475 | #endif | 3713 | #endif |
3476 | 3714 | ||
3477 | static struct irq_chip ht_irq_chip = { | 3715 | static struct irq_chip ht_irq_chip = { |
@@ -3489,17 +3727,14 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
3489 | { | 3727 | { |
3490 | struct irq_cfg *cfg; | 3728 | struct irq_cfg *cfg; |
3491 | int err; | 3729 | int err; |
3492 | cpumask_t tmp; | ||
3493 | 3730 | ||
3494 | tmp = TARGET_CPUS; | 3731 | cfg = irq_cfg(irq); |
3495 | err = assign_irq_vector(irq, tmp); | 3732 | err = assign_irq_vector(irq, cfg, TARGET_CPUS); |
3496 | if (!err) { | 3733 | if (!err) { |
3497 | struct ht_irq_msg msg; | 3734 | struct ht_irq_msg msg; |
3498 | unsigned dest; | 3735 | unsigned dest; |
3499 | 3736 | ||
3500 | cfg = irq_cfg(irq); | 3737 | dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); |
3501 | cpus_and(tmp, cfg->domain, tmp); | ||
3502 | dest = cpu_mask_to_apicid(tmp); | ||
3503 | 3738 | ||
3504 | msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); | 3739 | msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); |
3505 | 3740 | ||
@@ -3535,7 +3770,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
3535 | int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | 3770 | int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, |
3536 | unsigned long mmr_offset) | 3771 | unsigned long mmr_offset) |
3537 | { | 3772 | { |
3538 | const cpumask_t *eligible_cpu = get_cpu_mask(cpu); | 3773 | const struct cpumask *eligible_cpu = cpumask_of(cpu); |
3539 | struct irq_cfg *cfg; | 3774 | struct irq_cfg *cfg; |
3540 | int mmr_pnode; | 3775 | int mmr_pnode; |
3541 | unsigned long mmr_value; | 3776 | unsigned long mmr_value; |
@@ -3543,7 +3778,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
3543 | unsigned long flags; | 3778 | unsigned long flags; |
3544 | int err; | 3779 | int err; |
3545 | 3780 | ||
3546 | err = assign_irq_vector(irq, *eligible_cpu); | 3781 | cfg = irq_cfg(irq); |
3782 | |||
3783 | err = assign_irq_vector(irq, cfg, eligible_cpu); | ||
3547 | if (err != 0) | 3784 | if (err != 0) |
3548 | return err; | 3785 | return err; |
3549 | 3786 | ||
@@ -3552,8 +3789,6 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
3552 | irq_name); | 3789 | irq_name); |
3553 | spin_unlock_irqrestore(&vector_lock, flags); | 3790 | spin_unlock_irqrestore(&vector_lock, flags); |
3554 | 3791 | ||
3555 | cfg = irq_cfg(irq); | ||
3556 | |||
3557 | mmr_value = 0; | 3792 | mmr_value = 0; |
3558 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | 3793 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; |
3559 | BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); | 3794 | BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); |
@@ -3564,7 +3799,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
3564 | entry->polarity = 0; | 3799 | entry->polarity = 0; |
3565 | entry->trigger = 0; | 3800 | entry->trigger = 0; |
3566 | entry->mask = 0; | 3801 | entry->mask = 0; |
3567 | entry->dest = cpu_mask_to_apicid(*eligible_cpu); | 3802 | entry->dest = cpu_mask_to_apicid(eligible_cpu); |
3568 | 3803 | ||
3569 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | 3804 | mmr_pnode = uv_blade_to_pnode(mmr_blade); |
3570 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | 3805 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); |
@@ -3605,9 +3840,16 @@ int __init io_apic_get_redir_entries (int ioapic) | |||
3605 | return reg_01.bits.entries; | 3840 | return reg_01.bits.entries; |
3606 | } | 3841 | } |
3607 | 3842 | ||
3608 | int __init probe_nr_irqs(void) | 3843 | void __init probe_nr_irqs_gsi(void) |
3609 | { | 3844 | { |
3610 | return NR_IRQS; | 3845 | int idx; |
3846 | int nr = 0; | ||
3847 | |||
3848 | for (idx = 0; idx < nr_ioapics; idx++) | ||
3849 | nr += io_apic_get_redir_entries(idx) + 1; | ||
3850 | |||
3851 | if (nr > nr_irqs_gsi) | ||
3852 | nr_irqs_gsi = nr; | ||
3611 | } | 3853 | } |
3612 | 3854 | ||
3613 | /* -------------------------------------------------------------------------- | 3855 | /* -------------------------------------------------------------------------- |
@@ -3706,19 +3948,31 @@ int __init io_apic_get_version(int ioapic) | |||
3706 | 3948 | ||
3707 | int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) | 3949 | int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) |
3708 | { | 3950 | { |
3951 | struct irq_desc *desc; | ||
3952 | struct irq_cfg *cfg; | ||
3953 | int cpu = boot_cpu_id; | ||
3954 | |||
3709 | if (!IO_APIC_IRQ(irq)) { | 3955 | if (!IO_APIC_IRQ(irq)) { |
3710 | apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", | 3956 | apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", |
3711 | ioapic); | 3957 | ioapic); |
3712 | return -EINVAL; | 3958 | return -EINVAL; |
3713 | } | 3959 | } |
3714 | 3960 | ||
3961 | desc = irq_to_desc_alloc_cpu(irq, cpu); | ||
3962 | if (!desc) { | ||
3963 | printk(KERN_INFO "can not get irq_desc %d\n", irq); | ||
3964 | return 0; | ||
3965 | } | ||
3966 | |||
3715 | /* | 3967 | /* |
3716 | * IRQs < 16 are already in the irq_2_pin[] map | 3968 | * IRQs < 16 are already in the irq_2_pin[] map |
3717 | */ | 3969 | */ |
3718 | if (irq >= 16) | 3970 | if (irq >= NR_IRQS_LEGACY) { |
3719 | add_pin_to_irq(irq, ioapic, pin); | 3971 | cfg = desc->chip_data; |
3972 | add_pin_to_irq_cpu(cfg, cpu, ioapic, pin); | ||
3973 | } | ||
3720 | 3974 | ||
3721 | setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); | 3975 | setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity); |
3722 | 3976 | ||
3723 | return 0; | 3977 | return 0; |
3724 | } | 3978 | } |
@@ -3756,7 +4010,7 @@ void __init setup_ioapic_dest(void) | |||
3756 | int pin, ioapic, irq, irq_entry; | 4010 | int pin, ioapic, irq, irq_entry; |
3757 | struct irq_desc *desc; | 4011 | struct irq_desc *desc; |
3758 | struct irq_cfg *cfg; | 4012 | struct irq_cfg *cfg; |
3759 | cpumask_t mask; | 4013 | const struct cpumask *mask; |
3760 | 4014 | ||
3761 | if (skip_ioapic_setup == 1) | 4015 | if (skip_ioapic_setup == 1) |
3762 | return; | 4016 | return; |
@@ -3772,9 +4026,10 @@ void __init setup_ioapic_dest(void) | |||
3772 | * when you have too many devices, because at that time only boot | 4026 | * when you have too many devices, because at that time only boot |
3773 | * cpu is online. | 4027 | * cpu is online. |
3774 | */ | 4028 | */ |
3775 | cfg = irq_cfg(irq); | 4029 | desc = irq_to_desc(irq); |
4030 | cfg = desc->chip_data; | ||
3776 | if (!cfg->vector) { | 4031 | if (!cfg->vector) { |
3777 | setup_IO_APIC_irq(ioapic, pin, irq, | 4032 | setup_IO_APIC_irq(ioapic, pin, irq, desc, |
3778 | irq_trigger(irq_entry), | 4033 | irq_trigger(irq_entry), |
3779 | irq_polarity(irq_entry)); | 4034 | irq_polarity(irq_entry)); |
3780 | continue; | 4035 | continue; |
@@ -3784,19 +4039,18 @@ void __init setup_ioapic_dest(void) | |||
3784 | /* | 4039 | /* |
3785 | * Honour affinities which have been set in early boot | 4040 | * Honour affinities which have been set in early boot |
3786 | */ | 4041 | */ |
3787 | desc = irq_to_desc(irq); | ||
3788 | if (desc->status & | 4042 | if (desc->status & |
3789 | (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) | 4043 | (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) |
3790 | mask = desc->affinity; | 4044 | mask = &desc->affinity; |
3791 | else | 4045 | else |
3792 | mask = TARGET_CPUS; | 4046 | mask = TARGET_CPUS; |
3793 | 4047 | ||
3794 | #ifdef CONFIG_INTR_REMAP | 4048 | #ifdef CONFIG_INTR_REMAP |
3795 | if (intr_remapping_enabled) | 4049 | if (intr_remapping_enabled) |
3796 | set_ir_ioapic_affinity_irq(irq, mask); | 4050 | set_ir_ioapic_affinity_irq_desc(desc, mask); |
3797 | else | 4051 | else |
3798 | #endif | 4052 | #endif |
3799 | set_ioapic_affinity_irq(irq, mask); | 4053 | set_ioapic_affinity_irq_desc(desc, mask); |
3800 | } | 4054 | } |
3801 | 4055 | ||
3802 | } | 4056 | } |
@@ -3845,7 +4099,6 @@ void __init ioapic_init_mappings(void) | |||
3845 | struct resource *ioapic_res; | 4099 | struct resource *ioapic_res; |
3846 | int i; | 4100 | int i; |
3847 | 4101 | ||
3848 | irq_2_pin_init(); | ||
3849 | ioapic_res = ioapic_setup_resources(); | 4102 | ioapic_res = ioapic_setup_resources(); |
3850 | for (i = 0; i < nr_ioapics; i++) { | 4103 | for (i = 0; i < nr_ioapics; i++) { |
3851 | if (smp_found_config) { | 4104 | if (smp_found_config) { |
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index f1c688e46f35..285bbf8831fa 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c | |||
@@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) | |||
116 | /* | 116 | /* |
117 | * This is only used on smaller machines. | 117 | * This is only used on smaller machines. |
118 | */ | 118 | */ |
119 | void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) | 119 | void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) |
120 | { | 120 | { |
121 | unsigned long mask = cpus_addr(cpumask)[0]; | 121 | unsigned long mask = cpumask_bits(cpumask)[0]; |
122 | unsigned long flags; | 122 | unsigned long flags; |
123 | 123 | ||
124 | local_irq_save(flags); | 124 | local_irq_save(flags); |
125 | WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); | 125 | WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); |
126 | __send_IPI_dest_field(mask, vector); | 126 | __send_IPI_dest_field(mask, vector); |
127 | local_irq_restore(flags); | 127 | local_irq_restore(flags); |
128 | } | 128 | } |
129 | 129 | ||
130 | void send_IPI_mask_sequence(cpumask_t mask, int vector) | 130 | void send_IPI_mask_sequence(const struct cpumask *mask, int vector) |
131 | { | 131 | { |
132 | unsigned long flags; | 132 | unsigned long flags; |
133 | unsigned int query_cpu; | 133 | unsigned int query_cpu; |
@@ -139,12 +139,24 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector) | |||
139 | */ | 139 | */ |
140 | 140 | ||
141 | local_irq_save(flags); | 141 | local_irq_save(flags); |
142 | for_each_possible_cpu(query_cpu) { | 142 | for_each_cpu(query_cpu, mask) |
143 | if (cpu_isset(query_cpu, mask)) { | 143 | __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); |
144 | local_irq_restore(flags); | ||
145 | } | ||
146 | |||
147 | void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) | ||
148 | { | ||
149 | unsigned long flags; | ||
150 | unsigned int query_cpu; | ||
151 | unsigned int this_cpu = smp_processor_id(); | ||
152 | |||
153 | /* See Hack comment above */ | ||
154 | |||
155 | local_irq_save(flags); | ||
156 | for_each_cpu(query_cpu, mask) | ||
157 | if (query_cpu != this_cpu) | ||
144 | __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), | 158 | __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), |
145 | vector); | 159 | vector); |
146 | } | ||
147 | } | ||
148 | local_irq_restore(flags); | 160 | local_irq_restore(flags); |
149 | } | 161 | } |
150 | 162 | ||
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index d1d4dc52f649..bce53e1352a0 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <asm/apic.h> | 9 | #include <asm/apic.h> |
10 | #include <asm/io_apic.h> | 10 | #include <asm/io_apic.h> |
11 | #include <asm/smp.h> | 11 | #include <asm/smp.h> |
12 | #include <asm/irq.h> | ||
12 | 13 | ||
13 | atomic_t irq_err_count; | 14 | atomic_t irq_err_count; |
14 | 15 | ||
@@ -118,6 +119,9 @@ int show_interrupts(struct seq_file *p, void *v) | |||
118 | } | 119 | } |
119 | 120 | ||
120 | desc = irq_to_desc(i); | 121 | desc = irq_to_desc(i); |
122 | if (!desc) | ||
123 | return 0; | ||
124 | |||
121 | spin_lock_irqsave(&desc->lock, flags); | 125 | spin_lock_irqsave(&desc->lock, flags); |
122 | #ifndef CONFIG_SMP | 126 | #ifndef CONFIG_SMP |
123 | any_count = kstat_irqs(i); | 127 | any_count = kstat_irqs(i); |
@@ -187,3 +191,5 @@ u64 arch_irq_stat(void) | |||
187 | #endif | 191 | #endif |
188 | return sum; | 192 | return sum; |
189 | } | 193 | } |
194 | |||
195 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); | ||
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index a51382672de0..9dc5588f336a 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -233,25 +233,28 @@ unsigned int do_IRQ(struct pt_regs *regs) | |||
233 | #ifdef CONFIG_HOTPLUG_CPU | 233 | #ifdef CONFIG_HOTPLUG_CPU |
234 | #include <mach_apic.h> | 234 | #include <mach_apic.h> |
235 | 235 | ||
236 | void fixup_irqs(cpumask_t map) | 236 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ |
237 | void fixup_irqs(void) | ||
237 | { | 238 | { |
238 | unsigned int irq; | 239 | unsigned int irq; |
239 | static int warned; | 240 | static int warned; |
240 | struct irq_desc *desc; | 241 | struct irq_desc *desc; |
241 | 242 | ||
242 | for_each_irq_desc(irq, desc) { | 243 | for_each_irq_desc(irq, desc) { |
243 | cpumask_t mask; | 244 | const struct cpumask *affinity; |
244 | 245 | ||
246 | if (!desc) | ||
247 | continue; | ||
245 | if (irq == 2) | 248 | if (irq == 2) |
246 | continue; | 249 | continue; |
247 | 250 | ||
248 | cpus_and(mask, desc->affinity, map); | 251 | affinity = &desc->affinity; |
249 | if (any_online_cpu(mask) == NR_CPUS) { | 252 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { |
250 | printk("Breaking affinity for irq %i\n", irq); | 253 | printk("Breaking affinity for irq %i\n", irq); |
251 | mask = map; | 254 | affinity = cpu_all_mask; |
252 | } | 255 | } |
253 | if (desc->chip->set_affinity) | 256 | if (desc->chip->set_affinity) |
254 | desc->chip->set_affinity(irq, mask); | 257 | desc->chip->set_affinity(irq, affinity); |
255 | else if (desc->action && !(warned++)) | 258 | else if (desc->action && !(warned++)) |
256 | printk("Cannot set affinity for irq %i\n", irq); | 259 | printk("Cannot set affinity for irq %i\n", irq); |
257 | } | 260 | } |
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 1df869e5bd0b..6383d50f82ea 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
@@ -80,40 +80,43 @@ asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) | |||
80 | } | 80 | } |
81 | 81 | ||
82 | #ifdef CONFIG_HOTPLUG_CPU | 82 | #ifdef CONFIG_HOTPLUG_CPU |
83 | void fixup_irqs(cpumask_t map) | 83 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ |
84 | void fixup_irqs(void) | ||
84 | { | 85 | { |
85 | unsigned int irq; | 86 | unsigned int irq; |
86 | static int warned; | 87 | static int warned; |
87 | struct irq_desc *desc; | 88 | struct irq_desc *desc; |
88 | 89 | ||
89 | for_each_irq_desc(irq, desc) { | 90 | for_each_irq_desc(irq, desc) { |
90 | cpumask_t mask; | ||
91 | int break_affinity = 0; | 91 | int break_affinity = 0; |
92 | int set_affinity = 1; | 92 | int set_affinity = 1; |
93 | const struct cpumask *affinity; | ||
93 | 94 | ||
95 | if (!desc) | ||
96 | continue; | ||
94 | if (irq == 2) | 97 | if (irq == 2) |
95 | continue; | 98 | continue; |
96 | 99 | ||
97 | /* interrupt's are disabled at this point */ | 100 | /* interrupt's are disabled at this point */ |
98 | spin_lock(&desc->lock); | 101 | spin_lock(&desc->lock); |
99 | 102 | ||
103 | affinity = &desc->affinity; | ||
100 | if (!irq_has_action(irq) || | 104 | if (!irq_has_action(irq) || |
101 | cpus_equal(desc->affinity, map)) { | 105 | cpumask_equal(affinity, cpu_online_mask)) { |
102 | spin_unlock(&desc->lock); | 106 | spin_unlock(&desc->lock); |
103 | continue; | 107 | continue; |
104 | } | 108 | } |
105 | 109 | ||
106 | cpus_and(mask, desc->affinity, map); | 110 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { |
107 | if (cpus_empty(mask)) { | ||
108 | break_affinity = 1; | 111 | break_affinity = 1; |
109 | mask = map; | 112 | affinity = cpu_all_mask; |
110 | } | 113 | } |
111 | 114 | ||
112 | if (desc->chip->mask) | 115 | if (desc->chip->mask) |
113 | desc->chip->mask(irq); | 116 | desc->chip->mask(irq); |
114 | 117 | ||
115 | if (desc->chip->set_affinity) | 118 | if (desc->chip->set_affinity) |
116 | desc->chip->set_affinity(irq, mask); | 119 | desc->chip->set_affinity(irq, affinity); |
117 | else if (!(warned++)) | 120 | else if (!(warned++)) |
118 | set_affinity = 0; | 121 | set_affinity = 0; |
119 | 122 | ||
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 607db63044a5..84723295f88a 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c | |||
@@ -68,8 +68,7 @@ void __init init_ISA_irqs (void) | |||
68 | /* | 68 | /* |
69 | * 16 old-style INTA-cycle interrupts: | 69 | * 16 old-style INTA-cycle interrupts: |
70 | */ | 70 | */ |
71 | for (i = 0; i < 16; i++) { | 71 | for (i = 0; i < NR_IRQS_LEGACY; i++) { |
72 | /* first time call this irq_desc */ | ||
73 | struct irq_desc *desc = irq_to_desc(i); | 72 | struct irq_desc *desc = irq_to_desc(i); |
74 | 73 | ||
75 | desc->status = IRQ_DISABLED; | 74 | desc->status = IRQ_DISABLED; |
@@ -111,6 +110,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | |||
111 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 | 110 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 |
112 | }; | 111 | }; |
113 | 112 | ||
113 | int vector_used_by_percpu_irq(unsigned int vector) | ||
114 | { | ||
115 | int cpu; | ||
116 | |||
117 | for_each_online_cpu(cpu) { | ||
118 | if (per_cpu(vector_irq, cpu)[vector] != -1) | ||
119 | return 1; | ||
120 | } | ||
121 | |||
122 | return 0; | ||
123 | } | ||
124 | |||
114 | /* Overridden in paravirt.c */ | 125 | /* Overridden in paravirt.c */ |
115 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | 126 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); |
116 | 127 | ||
@@ -147,10 +158,12 @@ void __init native_init_IRQ(void) | |||
147 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | 158 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
148 | 159 | ||
149 | /* IPI for single call function */ | 160 | /* IPI for single call function */ |
150 | set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); | 161 | alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, |
162 | call_function_single_interrupt); | ||
151 | 163 | ||
152 | /* Low priority IPI to cleanup after moving an irq */ | 164 | /* Low priority IPI to cleanup after moving an irq */ |
153 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | 165 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); |
166 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); | ||
154 | #endif | 167 | #endif |
155 | 168 | ||
156 | #ifdef CONFIG_X86_LOCAL_APIC | 169 | #ifdef CONFIG_X86_LOCAL_APIC |
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 8670b3ce626e..31ebfe38e96c 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c | |||
@@ -69,6 +69,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | |||
69 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 | 69 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 |
70 | }; | 70 | }; |
71 | 71 | ||
72 | int vector_used_by_percpu_irq(unsigned int vector) | ||
73 | { | ||
74 | int cpu; | ||
75 | |||
76 | for_each_online_cpu(cpu) { | ||
77 | if (per_cpu(vector_irq, cpu)[vector] != -1) | ||
78 | return 1; | ||
79 | } | ||
80 | |||
81 | return 0; | ||
82 | } | ||
83 | |||
72 | void __init init_ISA_irqs(void) | 84 | void __init init_ISA_irqs(void) |
73 | { | 85 | { |
74 | int i; | 86 | int i; |
@@ -76,8 +88,7 @@ void __init init_ISA_irqs(void) | |||
76 | init_bsp_APIC(); | 88 | init_bsp_APIC(); |
77 | init_8259A(0); | 89 | init_8259A(0); |
78 | 90 | ||
79 | for (i = 0; i < 16; i++) { | 91 | for (i = 0; i < NR_IRQS_LEGACY; i++) { |
80 | /* first time call this irq_desc */ | ||
81 | struct irq_desc *desc = irq_to_desc(i); | 92 | struct irq_desc *desc = irq_to_desc(i); |
82 | 93 | ||
83 | desc->status = IRQ_DISABLED; | 94 | desc->status = IRQ_DISABLED; |
@@ -122,6 +133,7 @@ static void __init smp_intr_init(void) | |||
122 | 133 | ||
123 | /* Low priority IPI to cleanup after moving an irq */ | 134 | /* Low priority IPI to cleanup after moving an irq */ |
124 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | 135 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); |
136 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); | ||
125 | #endif | 137 | #endif |
126 | } | 138 | } |
127 | 139 | ||
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index e169ae9b6a62..652fce6d2cce 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void) | |||
89 | */ | 89 | */ |
90 | static unsigned long kvm_get_tsc_khz(void) | 90 | static unsigned long kvm_get_tsc_khz(void) |
91 | { | 91 | { |
92 | return preset_lpj; | 92 | struct pvclock_vcpu_time_info *src; |
93 | src = &per_cpu(hv_clock, 0); | ||
94 | return pvclock_tsc_khz(src); | ||
93 | } | 95 | } |
94 | 96 | ||
95 | static void kvm_get_preset_lpj(void) | 97 | static void kvm_get_preset_lpj(void) |
96 | { | 98 | { |
97 | struct pvclock_vcpu_time_info *src; | ||
98 | unsigned long khz; | 99 | unsigned long khz; |
99 | u64 lpj; | 100 | u64 lpj; |
100 | 101 | ||
101 | src = &per_cpu(hv_clock, 0); | 102 | khz = kvm_get_tsc_khz(); |
102 | khz = pvclock_tsc_khz(src); | ||
103 | 103 | ||
104 | lpj = ((u64)khz * 1000); | 104 | lpj = ((u64)khz * 1000); |
105 | do_div(lpj, HZ); | 105 | do_div(lpj, HZ); |
@@ -194,5 +194,7 @@ void __init kvmclock_init(void) | |||
194 | #endif | 194 | #endif |
195 | kvm_get_preset_lpj(); | 195 | kvm_get_preset_lpj(); |
196 | clocksource_register(&kvm_clock); | 196 | clocksource_register(&kvm_clock); |
197 | pv_info.paravirt_enabled = 1; | ||
198 | pv_info.name = "KVM"; | ||
197 | } | 199 | } |
198 | } | 200 | } |
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 3b599518c322..c12314c9e86f 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c | |||
@@ -287,7 +287,7 @@ static struct clock_event_device mfgpt_clockevent = { | |||
287 | .set_mode = mfgpt_set_mode, | 287 | .set_mode = mfgpt_set_mode, |
288 | .set_next_event = mfgpt_next_event, | 288 | .set_next_event = mfgpt_next_event, |
289 | .rating = 250, | 289 | .rating = 250, |
290 | .cpumask = CPU_MASK_ALL, | 290 | .cpumask = cpu_all_mask, |
291 | .shift = 32 | 291 | .shift = 32 |
292 | }; | 292 | }; |
293 | 293 | ||
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 7a3dfceb90e4..19a1044a0cd9 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -101,11 +101,15 @@ static void __init dma32_free_bootmem(void) | |||
101 | dma32_bootmem_ptr = NULL; | 101 | dma32_bootmem_ptr = NULL; |
102 | dma32_bootmem_size = 0; | 102 | dma32_bootmem_size = 0; |
103 | } | 103 | } |
104 | #endif | ||
104 | 105 | ||
105 | void __init pci_iommu_alloc(void) | 106 | void __init pci_iommu_alloc(void) |
106 | { | 107 | { |
108 | #ifdef CONFIG_X86_64 | ||
107 | /* free the range so iommu could get some range less than 4G */ | 109 | /* free the range so iommu could get some range less than 4G */ |
108 | dma32_free_bootmem(); | 110 | dma32_free_bootmem(); |
111 | #endif | ||
112 | |||
109 | /* | 113 | /* |
110 | * The order of these functions is important for | 114 | * The order of these functions is important for |
111 | * fall-back/fail-over reasons | 115 | * fall-back/fail-over reasons |
@@ -121,15 +125,6 @@ void __init pci_iommu_alloc(void) | |||
121 | pci_swiotlb_init(); | 125 | pci_swiotlb_init(); |
122 | } | 126 | } |
123 | 127 | ||
124 | unsigned long iommu_nr_pages(unsigned long addr, unsigned long len) | ||
125 | { | ||
126 | unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); | ||
127 | |||
128 | return size >> PAGE_SHIFT; | ||
129 | } | ||
130 | EXPORT_SYMBOL(iommu_nr_pages); | ||
131 | #endif | ||
132 | |||
133 | void *dma_generic_alloc_coherent(struct device *dev, size_t size, | 128 | void *dma_generic_alloc_coherent(struct device *dev, size_t size, |
134 | dma_addr_t *dma_addr, gfp_t flag) | 129 | dma_addr_t *dma_addr, gfp_t flag) |
135 | { | 130 | { |
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 3c539d111abb..242c3440687f 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c | |||
@@ -3,6 +3,8 @@ | |||
3 | #include <linux/pci.h> | 3 | #include <linux/pci.h> |
4 | #include <linux/cache.h> | 4 | #include <linux/cache.h> |
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/swiotlb.h> | ||
7 | #include <linux/bootmem.h> | ||
6 | #include <linux/dma-mapping.h> | 8 | #include <linux/dma-mapping.h> |
7 | 9 | ||
8 | #include <asm/iommu.h> | 10 | #include <asm/iommu.h> |
@@ -11,6 +13,31 @@ | |||
11 | 13 | ||
12 | int swiotlb __read_mostly; | 14 | int swiotlb __read_mostly; |
13 | 15 | ||
16 | void *swiotlb_alloc_boot(size_t size, unsigned long nslabs) | ||
17 | { | ||
18 | return alloc_bootmem_low_pages(size); | ||
19 | } | ||
20 | |||
21 | void *swiotlb_alloc(unsigned order, unsigned long nslabs) | ||
22 | { | ||
23 | return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); | ||
24 | } | ||
25 | |||
26 | dma_addr_t swiotlb_phys_to_bus(phys_addr_t paddr) | ||
27 | { | ||
28 | return paddr; | ||
29 | } | ||
30 | |||
31 | phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr) | ||
32 | { | ||
33 | return baddr; | ||
34 | } | ||
35 | |||
36 | int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size) | ||
37 | { | ||
38 | return 0; | ||
39 | } | ||
40 | |||
14 | static dma_addr_t | 41 | static dma_addr_t |
15 | swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, | 42 | swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, |
16 | int direction) | 43 | int direction) |
@@ -50,8 +77,10 @@ struct dma_mapping_ops swiotlb_dma_ops = { | |||
50 | void __init pci_swiotlb_init(void) | 77 | void __init pci_swiotlb_init(void) |
51 | { | 78 | { |
52 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ | 79 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ |
80 | #ifdef CONFIG_X86_64 | ||
53 | if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) | 81 | if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) |
54 | swiotlb = 1; | 82 | swiotlb = 1; |
83 | #endif | ||
55 | if (swiotlb_force) | 84 | if (swiotlb_force) |
56 | swiotlb = 1; | 85 | swiotlb = 1; |
57 | if (swiotlb) { | 86 | if (swiotlb) { |
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 67465ed89310..309949e9e1c1 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -168,6 +168,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31, | |||
168 | ich_force_enable_hpet); | 168 | ich_force_enable_hpet); |
169 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, | 169 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, |
170 | ich_force_enable_hpet); | 170 | ich_force_enable_hpet); |
171 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4, | ||
172 | ich_force_enable_hpet); | ||
171 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, | 173 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, |
172 | ich_force_enable_hpet); | 174 | ich_force_enable_hpet); |
173 | 175 | ||
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index a90913cccfb7..bf088c61fa40 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <asm/reboot_fixups.h> | 13 | #include <asm/reboot_fixups.h> |
14 | #include <asm/reboot.h> | 14 | #include <asm/reboot.h> |
15 | #include <asm/pci_x86.h> | 15 | #include <asm/pci_x86.h> |
16 | #include <asm/virtext.h> | ||
16 | 17 | ||
17 | #ifdef CONFIG_X86_32 | 18 | #ifdef CONFIG_X86_32 |
18 | # include <linux/dmi.h> | 19 | # include <linux/dmi.h> |
@@ -39,6 +40,12 @@ int reboot_force; | |||
39 | static int reboot_cpu = -1; | 40 | static int reboot_cpu = -1; |
40 | #endif | 41 | #endif |
41 | 42 | ||
43 | /* This is set if we need to go through the 'emergency' path. | ||
44 | * When machine_emergency_restart() is called, we may be on | ||
45 | * an inconsistent state and won't be able to do a clean cleanup | ||
46 | */ | ||
47 | static int reboot_emergency; | ||
48 | |||
42 | /* This is set by the PCI code if either type 1 or type 2 PCI is detected */ | 49 | /* This is set by the PCI code if either type 1 or type 2 PCI is detected */ |
43 | bool port_cf9_safe = false; | 50 | bool port_cf9_safe = false; |
44 | 51 | ||
@@ -368,6 +375,48 @@ static inline void kb_wait(void) | |||
368 | } | 375 | } |
369 | } | 376 | } |
370 | 377 | ||
378 | static void vmxoff_nmi(int cpu, struct die_args *args) | ||
379 | { | ||
380 | cpu_emergency_vmxoff(); | ||
381 | } | ||
382 | |||
383 | /* Use NMIs as IPIs to tell all CPUs to disable virtualization | ||
384 | */ | ||
385 | static void emergency_vmx_disable_all(void) | ||
386 | { | ||
387 | /* Just make sure we won't change CPUs while doing this */ | ||
388 | local_irq_disable(); | ||
389 | |||
390 | /* We need to disable VMX on all CPUs before rebooting, otherwise | ||
391 | * we risk hanging up the machine, because the CPU ignore INIT | ||
392 | * signals when VMX is enabled. | ||
393 | * | ||
394 | * We can't take any locks and we may be on an inconsistent | ||
395 | * state, so we use NMIs as IPIs to tell the other CPUs to disable | ||
396 | * VMX and halt. | ||
397 | * | ||
398 | * For safety, we will avoid running the nmi_shootdown_cpus() | ||
399 | * stuff unnecessarily, but we don't have a way to check | ||
400 | * if other CPUs have VMX enabled. So we will call it only if the | ||
401 | * CPU we are running on has VMX enabled. | ||
402 | * | ||
403 | * We will miss cases where VMX is not enabled on all CPUs. This | ||
404 | * shouldn't do much harm because KVM always enable VMX on all | ||
405 | * CPUs anyway. But we can miss it on the small window where KVM | ||
406 | * is still enabling VMX. | ||
407 | */ | ||
408 | if (cpu_has_vmx() && cpu_vmx_enabled()) { | ||
409 | /* Disable VMX on this CPU. | ||
410 | */ | ||
411 | cpu_vmxoff(); | ||
412 | |||
413 | /* Halt and disable VMX on the other CPUs */ | ||
414 | nmi_shootdown_cpus(vmxoff_nmi); | ||
415 | |||
416 | } | ||
417 | } | ||
418 | |||
419 | |||
371 | void __attribute__((weak)) mach_reboot_fixups(void) | 420 | void __attribute__((weak)) mach_reboot_fixups(void) |
372 | { | 421 | { |
373 | } | 422 | } |
@@ -376,6 +425,9 @@ static void native_machine_emergency_restart(void) | |||
376 | { | 425 | { |
377 | int i; | 426 | int i; |
378 | 427 | ||
428 | if (reboot_emergency) | ||
429 | emergency_vmx_disable_all(); | ||
430 | |||
379 | /* Tell the BIOS if we want cold or warm reboot */ | 431 | /* Tell the BIOS if we want cold or warm reboot */ |
380 | *((unsigned short *)__va(0x472)) = reboot_mode; | 432 | *((unsigned short *)__va(0x472)) = reboot_mode; |
381 | 433 | ||
@@ -482,13 +534,19 @@ void native_machine_shutdown(void) | |||
482 | #endif | 534 | #endif |
483 | } | 535 | } |
484 | 536 | ||
537 | static void __machine_emergency_restart(int emergency) | ||
538 | { | ||
539 | reboot_emergency = emergency; | ||
540 | machine_ops.emergency_restart(); | ||
541 | } | ||
542 | |||
485 | static void native_machine_restart(char *__unused) | 543 | static void native_machine_restart(char *__unused) |
486 | { | 544 | { |
487 | printk("machine restart\n"); | 545 | printk("machine restart\n"); |
488 | 546 | ||
489 | if (!reboot_force) | 547 | if (!reboot_force) |
490 | machine_shutdown(); | 548 | machine_shutdown(); |
491 | machine_emergency_restart(); | 549 | __machine_emergency_restart(0); |
492 | } | 550 | } |
493 | 551 | ||
494 | static void native_machine_halt(void) | 552 | static void native_machine_halt(void) |
@@ -532,7 +590,7 @@ void machine_shutdown(void) | |||
532 | 590 | ||
533 | void machine_emergency_restart(void) | 591 | void machine_emergency_restart(void) |
534 | { | 592 | { |
535 | machine_ops.emergency_restart(); | 593 | __machine_emergency_restart(1); |
536 | } | 594 | } |
537 | 595 | ||
538 | void machine_restart(char *cmd) | 596 | void machine_restart(char *cmd) |
@@ -592,10 +650,7 @@ static int crash_nmi_callback(struct notifier_block *self, | |||
592 | 650 | ||
593 | static void smp_send_nmi_allbutself(void) | 651 | static void smp_send_nmi_allbutself(void) |
594 | { | 652 | { |
595 | cpumask_t mask = cpu_online_map; | 653 | send_IPI_allbutself(NMI_VECTOR); |
596 | cpu_clear(safe_smp_processor_id(), mask); | ||
597 | if (!cpus_empty(mask)) | ||
598 | send_IPI_mask(mask, NMI_VECTOR); | ||
599 | } | 654 | } |
600 | 655 | ||
601 | static struct notifier_block crash_nmi_nb = { | 656 | static struct notifier_block crash_nmi_nb = { |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 08e02e8453c9..ae0d8042cf69 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -953,7 +953,7 @@ void __init setup_arch(char **cmdline_p) | |||
953 | ioapic_init_mappings(); | 953 | ioapic_init_mappings(); |
954 | 954 | ||
955 | /* need to wait for io_apic is mapped */ | 955 | /* need to wait for io_apic is mapped */ |
956 | nr_irqs = probe_nr_irqs(); | 956 | probe_nr_irqs_gsi(); |
957 | 957 | ||
958 | kvm_guest_init(); | 958 | kvm_guest_init(); |
959 | 959 | ||
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ae0c0d3bb770..0b63b08e7530 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -152,6 +152,11 @@ void __init setup_per_cpu_areas(void) | |||
152 | old_size = PERCPU_ENOUGH_ROOM; | 152 | old_size = PERCPU_ENOUGH_ROOM; |
153 | align = max_t(unsigned long, PAGE_SIZE, align); | 153 | align = max_t(unsigned long, PAGE_SIZE, align); |
154 | size = roundup(old_size, align); | 154 | size = roundup(old_size, align); |
155 | |||
156 | printk(KERN_INFO | ||
157 | "NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", | ||
158 | NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); | ||
159 | |||
155 | printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", | 160 | printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", |
156 | size); | 161 | size); |
157 | 162 | ||
@@ -168,24 +173,24 @@ void __init setup_per_cpu_areas(void) | |||
168 | "cpu %d has no node %d or node-local memory\n", | 173 | "cpu %d has no node %d or node-local memory\n", |
169 | cpu, node); | 174 | cpu, node); |
170 | if (ptr) | 175 | if (ptr) |
171 | printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", | 176 | printk(KERN_DEBUG |
177 | "per cpu data for cpu%d at %016lx\n", | ||
172 | cpu, __pa(ptr)); | 178 | cpu, __pa(ptr)); |
173 | } | 179 | } |
174 | else { | 180 | else { |
175 | ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, | 181 | ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, |
176 | __pa(MAX_DMA_ADDRESS)); | 182 | __pa(MAX_DMA_ADDRESS)); |
177 | if (ptr) | 183 | if (ptr) |
178 | printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", | 184 | printk(KERN_DEBUG |
179 | cpu, node, __pa(ptr)); | 185 | "per cpu data for cpu%d on node%d " |
186 | "at %016lx\n", | ||
187 | cpu, node, __pa(ptr)); | ||
180 | } | 188 | } |
181 | #endif | 189 | #endif |
182 | per_cpu_offset(cpu) = ptr - __per_cpu_start; | 190 | per_cpu_offset(cpu) = ptr - __per_cpu_start; |
183 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | 191 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); |
184 | } | 192 | } |
185 | 193 | ||
186 | printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", | ||
187 | NR_CPUS, nr_cpu_ids, nr_node_ids); | ||
188 | |||
189 | /* Setup percpu data maps */ | 194 | /* Setup percpu data maps */ |
190 | setup_per_cpu_maps(); | 195 | setup_per_cpu_maps(); |
191 | 196 | ||
@@ -282,7 +287,7 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable) | |||
282 | else | 287 | else |
283 | cpu_clear(cpu, *mask); | 288 | cpu_clear(cpu, *mask); |
284 | 289 | ||
285 | cpulist_scnprintf(buf, sizeof(buf), *mask); | 290 | cpulist_scnprintf(buf, sizeof(buf), mask); |
286 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", | 291 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", |
287 | enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); | 292 | enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); |
288 | } | 293 | } |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 7e558db362c1..beea2649a240 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -118,22 +118,22 @@ static void native_smp_send_reschedule(int cpu) | |||
118 | WARN_ON(1); | 118 | WARN_ON(1); |
119 | return; | 119 | return; |
120 | } | 120 | } |
121 | send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); | 121 | send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); |
122 | } | 122 | } |
123 | 123 | ||
124 | void native_send_call_func_single_ipi(int cpu) | 124 | void native_send_call_func_single_ipi(int cpu) |
125 | { | 125 | { |
126 | send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR); | 126 | send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); |
127 | } | 127 | } |
128 | 128 | ||
129 | void native_send_call_func_ipi(cpumask_t mask) | 129 | void native_send_call_func_ipi(const struct cpumask *mask) |
130 | { | 130 | { |
131 | cpumask_t allbutself; | 131 | cpumask_t allbutself; |
132 | 132 | ||
133 | allbutself = cpu_online_map; | 133 | allbutself = cpu_online_map; |
134 | cpu_clear(smp_processor_id(), allbutself); | 134 | cpu_clear(smp_processor_id(), allbutself); |
135 | 135 | ||
136 | if (cpus_equal(mask, allbutself) && | 136 | if (cpus_equal(*mask, allbutself) && |
137 | cpus_equal(cpu_online_map, cpu_callout_map)) | 137 | cpus_equal(cpu_online_map, cpu_callout_map)) |
138 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); | 138 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); |
139 | else | 139 | else |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f8500c969442..31869bf5fabd 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -102,14 +102,8 @@ EXPORT_SYMBOL(smp_num_siblings); | |||
102 | /* Last level cache ID of each logical CPU */ | 102 | /* Last level cache ID of each logical CPU */ |
103 | DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; | 103 | DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; |
104 | 104 | ||
105 | /* bitmap of online cpus */ | ||
106 | cpumask_t cpu_online_map __read_mostly; | ||
107 | EXPORT_SYMBOL(cpu_online_map); | ||
108 | |||
109 | cpumask_t cpu_callin_map; | 105 | cpumask_t cpu_callin_map; |
110 | cpumask_t cpu_callout_map; | 106 | cpumask_t cpu_callout_map; |
111 | cpumask_t cpu_possible_map; | ||
112 | EXPORT_SYMBOL(cpu_possible_map); | ||
113 | 107 | ||
114 | /* representing HT siblings of each logical CPU */ | 108 | /* representing HT siblings of each logical CPU */ |
115 | DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); | 109 | DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); |
@@ -1260,6 +1254,15 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1260 | check_nmi_watchdog(); | 1254 | check_nmi_watchdog(); |
1261 | } | 1255 | } |
1262 | 1256 | ||
1257 | static int __initdata setup_possible_cpus = -1; | ||
1258 | static int __init _setup_possible_cpus(char *str) | ||
1259 | { | ||
1260 | get_option(&str, &setup_possible_cpus); | ||
1261 | return 0; | ||
1262 | } | ||
1263 | early_param("possible_cpus", _setup_possible_cpus); | ||
1264 | |||
1265 | |||
1263 | /* | 1266 | /* |
1264 | * cpu_possible_map should be static, it cannot change as cpu's | 1267 | * cpu_possible_map should be static, it cannot change as cpu's |
1265 | * are onlined, or offlined. The reason is per-cpu data-structures | 1268 | * are onlined, or offlined. The reason is per-cpu data-structures |
@@ -1272,7 +1275,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1272 | * | 1275 | * |
1273 | * Three ways to find out the number of additional hotplug CPUs: | 1276 | * Three ways to find out the number of additional hotplug CPUs: |
1274 | * - If the BIOS specified disabled CPUs in ACPI/mptables use that. | 1277 | * - If the BIOS specified disabled CPUs in ACPI/mptables use that. |
1275 | * - The user can overwrite it with additional_cpus=NUM | 1278 | * - The user can overwrite it with possible_cpus=NUM |
1276 | * - Otherwise don't reserve additional CPUs. | 1279 | * - Otherwise don't reserve additional CPUs. |
1277 | * We do this because additional CPUs waste a lot of memory. | 1280 | * We do this because additional CPUs waste a lot of memory. |
1278 | * -AK | 1281 | * -AK |
@@ -1285,9 +1288,17 @@ __init void prefill_possible_map(void) | |||
1285 | if (!num_processors) | 1288 | if (!num_processors) |
1286 | num_processors = 1; | 1289 | num_processors = 1; |
1287 | 1290 | ||
1288 | possible = num_processors + disabled_cpus; | 1291 | if (setup_possible_cpus == -1) |
1289 | if (possible > NR_CPUS) | 1292 | possible = num_processors + disabled_cpus; |
1290 | possible = NR_CPUS; | 1293 | else |
1294 | possible = setup_possible_cpus; | ||
1295 | |||
1296 | if (possible > CONFIG_NR_CPUS) { | ||
1297 | printk(KERN_WARNING | ||
1298 | "%d Processors exceeds NR_CPUS limit of %d\n", | ||
1299 | possible, CONFIG_NR_CPUS); | ||
1300 | possible = CONFIG_NR_CPUS; | ||
1301 | } | ||
1291 | 1302 | ||
1292 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", | 1303 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", |
1293 | possible, max_t(int, possible - num_processors, 0)); | 1304 | possible, max_t(int, possible - num_processors, 0)); |
@@ -1352,7 +1363,7 @@ void cpu_disable_common(void) | |||
1352 | lock_vector_lock(); | 1363 | lock_vector_lock(); |
1353 | remove_cpu_from_maps(cpu); | 1364 | remove_cpu_from_maps(cpu); |
1354 | unlock_vector_lock(); | 1365 | unlock_vector_lock(); |
1355 | fixup_irqs(cpu_online_map); | 1366 | fixup_irqs(); |
1356 | } | 1367 | } |
1357 | 1368 | ||
1358 | int native_cpu_disable(void) | 1369 | int native_cpu_disable(void) |
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index 8da059f949be..ce5054642247 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c | |||
@@ -163,7 +163,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, | |||
163 | * We have to send the IPI only to | 163 | * We have to send the IPI only to |
164 | * CPUs affected. | 164 | * CPUs affected. |
165 | */ | 165 | */ |
166 | send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); | 166 | send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR); |
167 | 167 | ||
168 | while (!cpus_empty(flush_cpumask)) | 168 | while (!cpus_empty(flush_cpumask)) |
169 | /* nothing. lockup detection does not belong here */ | 169 | /* nothing. lockup detection does not belong here */ |
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 29887d7081a9..f8be6f1d2e48 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c | |||
@@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, | |||
191 | * We have to send the IPI only to | 191 | * We have to send the IPI only to |
192 | * CPUs affected. | 192 | * CPUs affected. |
193 | */ | 193 | */ |
194 | send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); | 194 | send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender); |
195 | 195 | ||
196 | while (!cpus_empty(f->flush_cpumask)) | 196 | while (!cpus_empty(f->flush_cpumask)) |
197 | cpu_relax(); | 197 | cpu_relax(); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 961e26a69d55..ce6650eb64e9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -72,9 +72,6 @@ | |||
72 | 72 | ||
73 | #include "cpu/mcheck/mce.h" | 73 | #include "cpu/mcheck/mce.h" |
74 | 74 | ||
75 | DECLARE_BITMAP(used_vectors, NR_VECTORS); | ||
76 | EXPORT_SYMBOL_GPL(used_vectors); | ||
77 | |||
78 | asmlinkage int system_call(void); | 75 | asmlinkage int system_call(void); |
79 | 76 | ||
80 | /* Do we ignore FPU interrupts ? */ | 77 | /* Do we ignore FPU interrupts ? */ |
@@ -89,6 +86,9 @@ gate_desc idt_table[256] | |||
89 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; | 86 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; |
90 | #endif | 87 | #endif |
91 | 88 | ||
89 | DECLARE_BITMAP(used_vectors, NR_VECTORS); | ||
90 | EXPORT_SYMBOL_GPL(used_vectors); | ||
91 | |||
92 | static int ignore_nmis; | 92 | static int ignore_nmis; |
93 | 93 | ||
94 | static inline void conditional_sti(struct pt_regs *regs) | 94 | static inline void conditional_sti(struct pt_regs *regs) |
@@ -946,9 +946,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | |||
946 | 946 | ||
947 | void __init trap_init(void) | 947 | void __init trap_init(void) |
948 | { | 948 | { |
949 | #ifdef CONFIG_X86_32 | ||
950 | int i; | 949 | int i; |
951 | #endif | ||
952 | 950 | ||
953 | #ifdef CONFIG_EISA | 951 | #ifdef CONFIG_EISA |
954 | void __iomem *p = early_ioremap(0x0FFFD9, 4); | 952 | void __iomem *p = early_ioremap(0x0FFFD9, 4); |
@@ -1005,11 +1003,15 @@ void __init trap_init(void) | |||
1005 | } | 1003 | } |
1006 | 1004 | ||
1007 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); | 1005 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); |
1006 | #endif | ||
1008 | 1007 | ||
1009 | /* Reserve all the builtin and the syscall vector: */ | 1008 | /* Reserve all the builtin and the syscall vector: */ |
1010 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) | 1009 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) |
1011 | set_bit(i, used_vectors); | 1010 | set_bit(i, used_vectors); |
1012 | 1011 | ||
1012 | #ifdef CONFIG_X86_64 | ||
1013 | set_bit(IA32_SYSCALL_VECTOR, used_vectors); | ||
1014 | #else | ||
1013 | set_bit(SYSCALL_VECTOR, used_vectors); | 1015 | set_bit(SYSCALL_VECTOR, used_vectors); |
1014 | #endif | 1016 | #endif |
1015 | /* | 1017 | /* |
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 254ee07f8635..c4c1f9e09402 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c | |||
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void) | |||
226 | /* Upper bound is clockevent's use of ulong for cycle deltas. */ | 226 | /* Upper bound is clockevent's use of ulong for cycle deltas. */ |
227 | evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); | 227 | evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); |
228 | evt->min_delta_ns = clockevent_delta2ns(1, evt); | 228 | evt->min_delta_ns = clockevent_delta2ns(1, evt); |
229 | evt->cpumask = cpumask_of_cpu(cpu); | 229 | evt->cpumask = cpumask_of(cpu); |
230 | 230 | ||
231 | printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", | 231 | printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", |
232 | evt->name, evt->mult, evt->shift); | 232 | evt->name, evt->mult, evt->shift); |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 59ebd37ad79e..e665d1c623ca 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -603,10 +603,29 @@ void kvm_free_pit(struct kvm *kvm) | |||
603 | 603 | ||
604 | static void __inject_pit_timer_intr(struct kvm *kvm) | 604 | static void __inject_pit_timer_intr(struct kvm *kvm) |
605 | { | 605 | { |
606 | struct kvm_vcpu *vcpu; | ||
607 | int i; | ||
608 | |||
606 | mutex_lock(&kvm->lock); | 609 | mutex_lock(&kvm->lock); |
607 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); | 610 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); |
608 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); | 611 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); |
609 | mutex_unlock(&kvm->lock); | 612 | mutex_unlock(&kvm->lock); |
613 | |||
614 | /* | ||
615 | * Provides NMI watchdog support via Virtual Wire mode. | ||
616 | * The route is: PIT -> PIC -> LVT0 in NMI mode. | ||
617 | * | ||
618 | * Note: Our Virtual Wire implementation is simplified, only | ||
619 | * propagating PIT interrupts to all VCPUs when they have set | ||
620 | * LVT0 to NMI delivery. Other PIC interrupts are just sent to | ||
621 | * VCPU0, and only if its LVT0 is in EXTINT mode. | ||
622 | */ | ||
623 | if (kvm->arch.vapics_in_nmi_mode > 0) | ||
624 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||
625 | vcpu = kvm->vcpus[i]; | ||
626 | if (vcpu) | ||
627 | kvm_apic_nmi_wd_deliver(vcpu); | ||
628 | } | ||
610 | } | 629 | } |
611 | 630 | ||
612 | void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) | 631 | void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 17e41e165f1a..179dcb0103fd 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -26,10 +26,40 @@ | |||
26 | * Port from Qemu. | 26 | * Port from Qemu. |
27 | */ | 27 | */ |
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/bitops.h> | ||
29 | #include "irq.h" | 30 | #include "irq.h" |
30 | 31 | ||
31 | #include <linux/kvm_host.h> | 32 | #include <linux/kvm_host.h> |
32 | 33 | ||
34 | static void pic_lock(struct kvm_pic *s) | ||
35 | { | ||
36 | spin_lock(&s->lock); | ||
37 | } | ||
38 | |||
39 | static void pic_unlock(struct kvm_pic *s) | ||
40 | { | ||
41 | struct kvm *kvm = s->kvm; | ||
42 | unsigned acks = s->pending_acks; | ||
43 | bool wakeup = s->wakeup_needed; | ||
44 | struct kvm_vcpu *vcpu; | ||
45 | |||
46 | s->pending_acks = 0; | ||
47 | s->wakeup_needed = false; | ||
48 | |||
49 | spin_unlock(&s->lock); | ||
50 | |||
51 | while (acks) { | ||
52 | kvm_notify_acked_irq(kvm, __ffs(acks)); | ||
53 | acks &= acks - 1; | ||
54 | } | ||
55 | |||
56 | if (wakeup) { | ||
57 | vcpu = s->kvm->vcpus[0]; | ||
58 | if (vcpu) | ||
59 | kvm_vcpu_kick(vcpu); | ||
60 | } | ||
61 | } | ||
62 | |||
33 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | 63 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) |
34 | { | 64 | { |
35 | s->isr &= ~(1 << irq); | 65 | s->isr &= ~(1 << irq); |
@@ -136,17 +166,21 @@ static void pic_update_irq(struct kvm_pic *s) | |||
136 | 166 | ||
137 | void kvm_pic_update_irq(struct kvm_pic *s) | 167 | void kvm_pic_update_irq(struct kvm_pic *s) |
138 | { | 168 | { |
169 | pic_lock(s); | ||
139 | pic_update_irq(s); | 170 | pic_update_irq(s); |
171 | pic_unlock(s); | ||
140 | } | 172 | } |
141 | 173 | ||
142 | void kvm_pic_set_irq(void *opaque, int irq, int level) | 174 | void kvm_pic_set_irq(void *opaque, int irq, int level) |
143 | { | 175 | { |
144 | struct kvm_pic *s = opaque; | 176 | struct kvm_pic *s = opaque; |
145 | 177 | ||
178 | pic_lock(s); | ||
146 | if (irq >= 0 && irq < PIC_NUM_PINS) { | 179 | if (irq >= 0 && irq < PIC_NUM_PINS) { |
147 | pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); | 180 | pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); |
148 | pic_update_irq(s); | 181 | pic_update_irq(s); |
149 | } | 182 | } |
183 | pic_unlock(s); | ||
150 | } | 184 | } |
151 | 185 | ||
152 | /* | 186 | /* |
@@ -172,6 +206,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
172 | int irq, irq2, intno; | 206 | int irq, irq2, intno; |
173 | struct kvm_pic *s = pic_irqchip(kvm); | 207 | struct kvm_pic *s = pic_irqchip(kvm); |
174 | 208 | ||
209 | pic_lock(s); | ||
175 | irq = pic_get_irq(&s->pics[0]); | 210 | irq = pic_get_irq(&s->pics[0]); |
176 | if (irq >= 0) { | 211 | if (irq >= 0) { |
177 | pic_intack(&s->pics[0], irq); | 212 | pic_intack(&s->pics[0], irq); |
@@ -196,6 +231,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
196 | intno = s->pics[0].irq_base + irq; | 231 | intno = s->pics[0].irq_base + irq; |
197 | } | 232 | } |
198 | pic_update_irq(s); | 233 | pic_update_irq(s); |
234 | pic_unlock(s); | ||
199 | kvm_notify_acked_irq(kvm, irq); | 235 | kvm_notify_acked_irq(kvm, irq); |
200 | 236 | ||
201 | return intno; | 237 | return intno; |
@@ -203,7 +239,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
203 | 239 | ||
204 | void kvm_pic_reset(struct kvm_kpic_state *s) | 240 | void kvm_pic_reset(struct kvm_kpic_state *s) |
205 | { | 241 | { |
206 | int irq, irqbase; | 242 | int irq, irqbase, n; |
207 | struct kvm *kvm = s->pics_state->irq_request_opaque; | 243 | struct kvm *kvm = s->pics_state->irq_request_opaque; |
208 | struct kvm_vcpu *vcpu0 = kvm->vcpus[0]; | 244 | struct kvm_vcpu *vcpu0 = kvm->vcpus[0]; |
209 | 245 | ||
@@ -214,8 +250,10 @@ void kvm_pic_reset(struct kvm_kpic_state *s) | |||
214 | 250 | ||
215 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { | 251 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { |
216 | if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) | 252 | if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) |
217 | if (s->irr & (1 << irq) || s->isr & (1 << irq)) | 253 | if (s->irr & (1 << irq) || s->isr & (1 << irq)) { |
218 | kvm_notify_acked_irq(kvm, irq+irqbase); | 254 | n = irq + irqbase; |
255 | s->pics_state->pending_acks |= 1 << n; | ||
256 | } | ||
219 | } | 257 | } |
220 | s->last_irr = 0; | 258 | s->last_irr = 0; |
221 | s->irr = 0; | 259 | s->irr = 0; |
@@ -406,6 +444,7 @@ static void picdev_write(struct kvm_io_device *this, | |||
406 | printk(KERN_ERR "PIC: non byte write\n"); | 444 | printk(KERN_ERR "PIC: non byte write\n"); |
407 | return; | 445 | return; |
408 | } | 446 | } |
447 | pic_lock(s); | ||
409 | switch (addr) { | 448 | switch (addr) { |
410 | case 0x20: | 449 | case 0x20: |
411 | case 0x21: | 450 | case 0x21: |
@@ -418,6 +457,7 @@ static void picdev_write(struct kvm_io_device *this, | |||
418 | elcr_ioport_write(&s->pics[addr & 1], addr, data); | 457 | elcr_ioport_write(&s->pics[addr & 1], addr, data); |
419 | break; | 458 | break; |
420 | } | 459 | } |
460 | pic_unlock(s); | ||
421 | } | 461 | } |
422 | 462 | ||
423 | static void picdev_read(struct kvm_io_device *this, | 463 | static void picdev_read(struct kvm_io_device *this, |
@@ -431,6 +471,7 @@ static void picdev_read(struct kvm_io_device *this, | |||
431 | printk(KERN_ERR "PIC: non byte read\n"); | 471 | printk(KERN_ERR "PIC: non byte read\n"); |
432 | return; | 472 | return; |
433 | } | 473 | } |
474 | pic_lock(s); | ||
434 | switch (addr) { | 475 | switch (addr) { |
435 | case 0x20: | 476 | case 0x20: |
436 | case 0x21: | 477 | case 0x21: |
@@ -444,6 +485,7 @@ static void picdev_read(struct kvm_io_device *this, | |||
444 | break; | 485 | break; |
445 | } | 486 | } |
446 | *(unsigned char *)val = data; | 487 | *(unsigned char *)val = data; |
488 | pic_unlock(s); | ||
447 | } | 489 | } |
448 | 490 | ||
449 | /* | 491 | /* |
@@ -459,7 +501,7 @@ static void pic_irq_request(void *opaque, int level) | |||
459 | s->output = level; | 501 | s->output = level; |
460 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { | 502 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { |
461 | s->pics[0].isr_ack &= ~(1 << irq); | 503 | s->pics[0].isr_ack &= ~(1 << irq); |
462 | kvm_vcpu_kick(vcpu); | 504 | s->wakeup_needed = true; |
463 | } | 505 | } |
464 | } | 506 | } |
465 | 507 | ||
@@ -469,6 +511,8 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
469 | s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); | 511 | s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); |
470 | if (!s) | 512 | if (!s) |
471 | return NULL; | 513 | return NULL; |
514 | spin_lock_init(&s->lock); | ||
515 | s->kvm = kvm; | ||
472 | s->pics[0].elcr_mask = 0xf8; | 516 | s->pics[0].elcr_mask = 0xf8; |
473 | s->pics[1].elcr_mask = 0xde; | 517 | s->pics[1].elcr_mask = 0xde; |
474 | s->irq_request = pic_irq_request; | 518 | s->irq_request = pic_irq_request; |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index f17c8f5bbf31..2bf32a03ceec 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/mm_types.h> | 25 | #include <linux/mm_types.h> |
26 | #include <linux/hrtimer.h> | 26 | #include <linux/hrtimer.h> |
27 | #include <linux/kvm_host.h> | 27 | #include <linux/kvm_host.h> |
28 | #include <linux/spinlock.h> | ||
28 | 29 | ||
29 | #include "iodev.h" | 30 | #include "iodev.h" |
30 | #include "ioapic.h" | 31 | #include "ioapic.h" |
@@ -59,6 +60,10 @@ struct kvm_kpic_state { | |||
59 | }; | 60 | }; |
60 | 61 | ||
61 | struct kvm_pic { | 62 | struct kvm_pic { |
63 | spinlock_t lock; | ||
64 | bool wakeup_needed; | ||
65 | unsigned pending_acks; | ||
66 | struct kvm *kvm; | ||
62 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ | 67 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ |
63 | irq_request_func *irq_request; | 68 | irq_request_func *irq_request; |
64 | void *irq_request_opaque; | 69 | void *irq_request_opaque; |
@@ -87,6 +92,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s); | |||
87 | void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); | 92 | void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); |
88 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); | 93 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); |
89 | void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); | 94 | void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); |
95 | void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); | ||
90 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); | 96 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); |
91 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); | 97 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); |
92 | void __kvm_migrate_timers(struct kvm_vcpu *vcpu); | 98 | void __kvm_migrate_timers(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h index 65ef0fc2c036..8e5ee99551f6 100644 --- a/arch/x86/kvm/kvm_svm.h +++ b/arch/x86/kvm/kvm_svm.h | |||
@@ -7,7 +7,7 @@ | |||
7 | #include <linux/kvm_host.h> | 7 | #include <linux/kvm_host.h> |
8 | #include <asm/msr.h> | 8 | #include <asm/msr.h> |
9 | 9 | ||
10 | #include "svm.h" | 10 | #include <asm/svm.h> |
11 | 11 | ||
12 | static const u32 host_save_user_msrs[] = { | 12 | static const u32 host_save_user_msrs[] = { |
13 | #ifdef CONFIG_X86_64 | 13 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0fc3cab48943..afac68c0815c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -130,6 +130,11 @@ static inline int apic_lvtt_period(struct kvm_lapic *apic) | |||
130 | return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; | 130 | return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; |
131 | } | 131 | } |
132 | 132 | ||
133 | static inline int apic_lvt_nmi_mode(u32 lvt_val) | ||
134 | { | ||
135 | return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; | ||
136 | } | ||
137 | |||
133 | static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { | 138 | static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { |
134 | LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ | 139 | LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ |
135 | LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ | 140 | LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ |
@@ -354,6 +359,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
354 | 359 | ||
355 | case APIC_DM_NMI: | 360 | case APIC_DM_NMI: |
356 | kvm_inject_nmi(vcpu); | 361 | kvm_inject_nmi(vcpu); |
362 | kvm_vcpu_kick(vcpu); | ||
357 | break; | 363 | break; |
358 | 364 | ||
359 | case APIC_DM_INIT: | 365 | case APIC_DM_INIT: |
@@ -380,6 +386,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
380 | } | 386 | } |
381 | break; | 387 | break; |
382 | 388 | ||
389 | case APIC_DM_EXTINT: | ||
390 | /* | ||
391 | * Should only be called by kvm_apic_local_deliver() with LVT0, | ||
392 | * before NMI watchdog was enabled. Already handled by | ||
393 | * kvm_apic_accept_pic_intr(). | ||
394 | */ | ||
395 | break; | ||
396 | |||
383 | default: | 397 | default: |
384 | printk(KERN_ERR "TODO: unsupported delivery mode %x\n", | 398 | printk(KERN_ERR "TODO: unsupported delivery mode %x\n", |
385 | delivery_mode); | 399 | delivery_mode); |
@@ -663,6 +677,20 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
663 | apic->timer.period))); | 677 | apic->timer.period))); |
664 | } | 678 | } |
665 | 679 | ||
680 | static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) | ||
681 | { | ||
682 | int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0)); | ||
683 | |||
684 | if (apic_lvt_nmi_mode(lvt0_val)) { | ||
685 | if (!nmi_wd_enabled) { | ||
686 | apic_debug("Receive NMI setting on APIC_LVT0 " | ||
687 | "for cpu %d\n", apic->vcpu->vcpu_id); | ||
688 | apic->vcpu->kvm->arch.vapics_in_nmi_mode++; | ||
689 | } | ||
690 | } else if (nmi_wd_enabled) | ||
691 | apic->vcpu->kvm->arch.vapics_in_nmi_mode--; | ||
692 | } | ||
693 | |||
666 | static void apic_mmio_write(struct kvm_io_device *this, | 694 | static void apic_mmio_write(struct kvm_io_device *this, |
667 | gpa_t address, int len, const void *data) | 695 | gpa_t address, int len, const void *data) |
668 | { | 696 | { |
@@ -743,10 +771,11 @@ static void apic_mmio_write(struct kvm_io_device *this, | |||
743 | apic_set_reg(apic, APIC_ICR2, val & 0xff000000); | 771 | apic_set_reg(apic, APIC_ICR2, val & 0xff000000); |
744 | break; | 772 | break; |
745 | 773 | ||
774 | case APIC_LVT0: | ||
775 | apic_manage_nmi_watchdog(apic, val); | ||
746 | case APIC_LVTT: | 776 | case APIC_LVTT: |
747 | case APIC_LVTTHMR: | 777 | case APIC_LVTTHMR: |
748 | case APIC_LVTPC: | 778 | case APIC_LVTPC: |
749 | case APIC_LVT0: | ||
750 | case APIC_LVT1: | 779 | case APIC_LVT1: |
751 | case APIC_LVTERR: | 780 | case APIC_LVTERR: |
752 | /* TODO: Check vector */ | 781 | /* TODO: Check vector */ |
@@ -961,12 +990,26 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) | |||
961 | return 0; | 990 | return 0; |
962 | } | 991 | } |
963 | 992 | ||
964 | static int __inject_apic_timer_irq(struct kvm_lapic *apic) | 993 | static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) |
994 | { | ||
995 | u32 reg = apic_get_reg(apic, lvt_type); | ||
996 | int vector, mode, trig_mode; | ||
997 | |||
998 | if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { | ||
999 | vector = reg & APIC_VECTOR_MASK; | ||
1000 | mode = reg & APIC_MODE_MASK; | ||
1001 | trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; | ||
1002 | return __apic_accept_irq(apic, mode, vector, 1, trig_mode); | ||
1003 | } | ||
1004 | return 0; | ||
1005 | } | ||
1006 | |||
1007 | void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) | ||
965 | { | 1008 | { |
966 | int vector; | 1009 | struct kvm_lapic *apic = vcpu->arch.apic; |
967 | 1010 | ||
968 | vector = apic_lvt_vector(apic, APIC_LVTT); | 1011 | if (apic) |
969 | return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0); | 1012 | kvm_apic_local_deliver(apic, APIC_LVT0); |
970 | } | 1013 | } |
971 | 1014 | ||
972 | static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) | 1015 | static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) |
@@ -1061,9 +1104,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) | |||
1061 | { | 1104 | { |
1062 | struct kvm_lapic *apic = vcpu->arch.apic; | 1105 | struct kvm_lapic *apic = vcpu->arch.apic; |
1063 | 1106 | ||
1064 | if (apic && apic_lvt_enabled(apic, APIC_LVTT) && | 1107 | if (apic && atomic_read(&apic->timer.pending) > 0) { |
1065 | atomic_read(&apic->timer.pending) > 0) { | 1108 | if (kvm_apic_local_deliver(apic, APIC_LVTT)) |
1066 | if (__inject_apic_timer_irq(apic)) | ||
1067 | atomic_dec(&apic->timer.pending); | 1109 | atomic_dec(&apic->timer.pending); |
1068 | } | 1110 | } |
1069 | } | 1111 | } |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 410ddbc1aa2e..83f11c7474a1 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -17,7 +17,6 @@ | |||
17 | * | 17 | * |
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include "vmx.h" | ||
21 | #include "mmu.h" | 20 | #include "mmu.h" |
22 | 21 | ||
23 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
@@ -33,6 +32,7 @@ | |||
33 | #include <asm/page.h> | 32 | #include <asm/page.h> |
34 | #include <asm/cmpxchg.h> | 33 | #include <asm/cmpxchg.h> |
35 | #include <asm/io.h> | 34 | #include <asm/io.h> |
35 | #include <asm/vmx.h> | ||
36 | 36 | ||
37 | /* | 37 | /* |
38 | * When setting this variable to true it enables Two-Dimensional-Paging | 38 | * When setting this variable to true it enables Two-Dimensional-Paging |
@@ -168,6 +168,7 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ | |||
168 | static u64 __read_mostly shadow_user_mask; | 168 | static u64 __read_mostly shadow_user_mask; |
169 | static u64 __read_mostly shadow_accessed_mask; | 169 | static u64 __read_mostly shadow_accessed_mask; |
170 | static u64 __read_mostly shadow_dirty_mask; | 170 | static u64 __read_mostly shadow_dirty_mask; |
171 | static u64 __read_mostly shadow_mt_mask; | ||
171 | 172 | ||
172 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) | 173 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) |
173 | { | 174 | { |
@@ -183,13 +184,14 @@ void kvm_mmu_set_base_ptes(u64 base_pte) | |||
183 | EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); | 184 | EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); |
184 | 185 | ||
185 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 186 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
186 | u64 dirty_mask, u64 nx_mask, u64 x_mask) | 187 | u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask) |
187 | { | 188 | { |
188 | shadow_user_mask = user_mask; | 189 | shadow_user_mask = user_mask; |
189 | shadow_accessed_mask = accessed_mask; | 190 | shadow_accessed_mask = accessed_mask; |
190 | shadow_dirty_mask = dirty_mask; | 191 | shadow_dirty_mask = dirty_mask; |
191 | shadow_nx_mask = nx_mask; | 192 | shadow_nx_mask = nx_mask; |
192 | shadow_x_mask = x_mask; | 193 | shadow_x_mask = x_mask; |
194 | shadow_mt_mask = mt_mask; | ||
193 | } | 195 | } |
194 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | 196 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); |
195 | 197 | ||
@@ -384,7 +386,9 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn) | |||
384 | { | 386 | { |
385 | int *write_count; | 387 | int *write_count; |
386 | 388 | ||
387 | write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); | 389 | gfn = unalias_gfn(kvm, gfn); |
390 | write_count = slot_largepage_idx(gfn, | ||
391 | gfn_to_memslot_unaliased(kvm, gfn)); | ||
388 | *write_count += 1; | 392 | *write_count += 1; |
389 | } | 393 | } |
390 | 394 | ||
@@ -392,16 +396,20 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | |||
392 | { | 396 | { |
393 | int *write_count; | 397 | int *write_count; |
394 | 398 | ||
395 | write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); | 399 | gfn = unalias_gfn(kvm, gfn); |
400 | write_count = slot_largepage_idx(gfn, | ||
401 | gfn_to_memslot_unaliased(kvm, gfn)); | ||
396 | *write_count -= 1; | 402 | *write_count -= 1; |
397 | WARN_ON(*write_count < 0); | 403 | WARN_ON(*write_count < 0); |
398 | } | 404 | } |
399 | 405 | ||
400 | static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) | 406 | static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) |
401 | { | 407 | { |
402 | struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); | 408 | struct kvm_memory_slot *slot; |
403 | int *largepage_idx; | 409 | int *largepage_idx; |
404 | 410 | ||
411 | gfn = unalias_gfn(kvm, gfn); | ||
412 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
405 | if (slot) { | 413 | if (slot) { |
406 | largepage_idx = slot_largepage_idx(gfn, slot); | 414 | largepage_idx = slot_largepage_idx(gfn, slot); |
407 | return *largepage_idx; | 415 | return *largepage_idx; |
@@ -613,7 +621,7 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | |||
613 | return NULL; | 621 | return NULL; |
614 | } | 622 | } |
615 | 623 | ||
616 | static void rmap_write_protect(struct kvm *kvm, u64 gfn) | 624 | static int rmap_write_protect(struct kvm *kvm, u64 gfn) |
617 | { | 625 | { |
618 | unsigned long *rmapp; | 626 | unsigned long *rmapp; |
619 | u64 *spte; | 627 | u64 *spte; |
@@ -659,8 +667,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
659 | spte = rmap_next(kvm, rmapp, spte); | 667 | spte = rmap_next(kvm, rmapp, spte); |
660 | } | 668 | } |
661 | 669 | ||
662 | if (write_protected) | 670 | return write_protected; |
663 | kvm_flush_remote_tlbs(kvm); | ||
664 | } | 671 | } |
665 | 672 | ||
666 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) | 673 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) |
@@ -786,9 +793,11 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
786 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | 793 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); |
787 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 794 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
788 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 795 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
796 | INIT_LIST_HEAD(&sp->oos_link); | ||
789 | ASSERT(is_empty_shadow_page(sp->spt)); | 797 | ASSERT(is_empty_shadow_page(sp->spt)); |
790 | sp->slot_bitmap = 0; | 798 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
791 | sp->multimapped = 0; | 799 | sp->multimapped = 0; |
800 | sp->global = 1; | ||
792 | sp->parent_pte = parent_pte; | 801 | sp->parent_pte = parent_pte; |
793 | --vcpu->kvm->arch.n_free_mmu_pages; | 802 | --vcpu->kvm->arch.n_free_mmu_pages; |
794 | return sp; | 803 | return sp; |
@@ -900,8 +909,9 @@ static void kvm_mmu_update_unsync_bitmap(u64 *spte) | |||
900 | struct kvm_mmu_page *sp = page_header(__pa(spte)); | 909 | struct kvm_mmu_page *sp = page_header(__pa(spte)); |
901 | 910 | ||
902 | index = spte - sp->spt; | 911 | index = spte - sp->spt; |
903 | __set_bit(index, sp->unsync_child_bitmap); | 912 | if (!__test_and_set_bit(index, sp->unsync_child_bitmap)) |
904 | sp->unsync_children = 1; | 913 | sp->unsync_children++; |
914 | WARN_ON(!sp->unsync_children); | ||
905 | } | 915 | } |
906 | 916 | ||
907 | static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) | 917 | static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) |
@@ -928,7 +938,6 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) | |||
928 | 938 | ||
929 | static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 939 | static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
930 | { | 940 | { |
931 | sp->unsync_children = 1; | ||
932 | kvm_mmu_update_parents_unsync(sp); | 941 | kvm_mmu_update_parents_unsync(sp); |
933 | return 1; | 942 | return 1; |
934 | } | 943 | } |
@@ -959,38 +968,66 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) | |||
959 | { | 968 | { |
960 | } | 969 | } |
961 | 970 | ||
971 | #define KVM_PAGE_ARRAY_NR 16 | ||
972 | |||
973 | struct kvm_mmu_pages { | ||
974 | struct mmu_page_and_offset { | ||
975 | struct kvm_mmu_page *sp; | ||
976 | unsigned int idx; | ||
977 | } page[KVM_PAGE_ARRAY_NR]; | ||
978 | unsigned int nr; | ||
979 | }; | ||
980 | |||
962 | #define for_each_unsync_children(bitmap, idx) \ | 981 | #define for_each_unsync_children(bitmap, idx) \ |
963 | for (idx = find_first_bit(bitmap, 512); \ | 982 | for (idx = find_first_bit(bitmap, 512); \ |
964 | idx < 512; \ | 983 | idx < 512; \ |
965 | idx = find_next_bit(bitmap, 512, idx+1)) | 984 | idx = find_next_bit(bitmap, 512, idx+1)) |
966 | 985 | ||
967 | static int mmu_unsync_walk(struct kvm_mmu_page *sp, | 986 | int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, |
968 | struct kvm_unsync_walk *walker) | 987 | int idx) |
969 | { | 988 | { |
970 | int i, ret; | 989 | int i; |
971 | 990 | ||
972 | if (!sp->unsync_children) | 991 | if (sp->unsync) |
973 | return 0; | 992 | for (i=0; i < pvec->nr; i++) |
993 | if (pvec->page[i].sp == sp) | ||
994 | return 0; | ||
995 | |||
996 | pvec->page[pvec->nr].sp = sp; | ||
997 | pvec->page[pvec->nr].idx = idx; | ||
998 | pvec->nr++; | ||
999 | return (pvec->nr == KVM_PAGE_ARRAY_NR); | ||
1000 | } | ||
1001 | |||
1002 | static int __mmu_unsync_walk(struct kvm_mmu_page *sp, | ||
1003 | struct kvm_mmu_pages *pvec) | ||
1004 | { | ||
1005 | int i, ret, nr_unsync_leaf = 0; | ||
974 | 1006 | ||
975 | for_each_unsync_children(sp->unsync_child_bitmap, i) { | 1007 | for_each_unsync_children(sp->unsync_child_bitmap, i) { |
976 | u64 ent = sp->spt[i]; | 1008 | u64 ent = sp->spt[i]; |
977 | 1009 | ||
978 | if (is_shadow_present_pte(ent)) { | 1010 | if (is_shadow_present_pte(ent) && !is_large_pte(ent)) { |
979 | struct kvm_mmu_page *child; | 1011 | struct kvm_mmu_page *child; |
980 | child = page_header(ent & PT64_BASE_ADDR_MASK); | 1012 | child = page_header(ent & PT64_BASE_ADDR_MASK); |
981 | 1013 | ||
982 | if (child->unsync_children) { | 1014 | if (child->unsync_children) { |
983 | ret = mmu_unsync_walk(child, walker); | 1015 | if (mmu_pages_add(pvec, child, i)) |
984 | if (ret) | 1016 | return -ENOSPC; |
1017 | |||
1018 | ret = __mmu_unsync_walk(child, pvec); | ||
1019 | if (!ret) | ||
1020 | __clear_bit(i, sp->unsync_child_bitmap); | ||
1021 | else if (ret > 0) | ||
1022 | nr_unsync_leaf += ret; | ||
1023 | else | ||
985 | return ret; | 1024 | return ret; |
986 | __clear_bit(i, sp->unsync_child_bitmap); | ||
987 | } | 1025 | } |
988 | 1026 | ||
989 | if (child->unsync) { | 1027 | if (child->unsync) { |
990 | ret = walker->entry(child, walker); | 1028 | nr_unsync_leaf++; |
991 | __clear_bit(i, sp->unsync_child_bitmap); | 1029 | if (mmu_pages_add(pvec, child, i)) |
992 | if (ret) | 1030 | return -ENOSPC; |
993 | return ret; | ||
994 | } | 1031 | } |
995 | } | 1032 | } |
996 | } | 1033 | } |
@@ -998,7 +1035,17 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp, | |||
998 | if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) | 1035 | if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) |
999 | sp->unsync_children = 0; | 1036 | sp->unsync_children = 0; |
1000 | 1037 | ||
1001 | return 0; | 1038 | return nr_unsync_leaf; |
1039 | } | ||
1040 | |||
1041 | static int mmu_unsync_walk(struct kvm_mmu_page *sp, | ||
1042 | struct kvm_mmu_pages *pvec) | ||
1043 | { | ||
1044 | if (!sp->unsync_children) | ||
1045 | return 0; | ||
1046 | |||
1047 | mmu_pages_add(pvec, sp, 0); | ||
1048 | return __mmu_unsync_walk(sp, pvec); | ||
1002 | } | 1049 | } |
1003 | 1050 | ||
1004 | static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | 1051 | static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) |
@@ -1021,10 +1068,18 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | |||
1021 | return NULL; | 1068 | return NULL; |
1022 | } | 1069 | } |
1023 | 1070 | ||
1071 | static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp) | ||
1072 | { | ||
1073 | list_del(&sp->oos_link); | ||
1074 | --kvm->stat.mmu_unsync_global; | ||
1075 | } | ||
1076 | |||
1024 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1077 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
1025 | { | 1078 | { |
1026 | WARN_ON(!sp->unsync); | 1079 | WARN_ON(!sp->unsync); |
1027 | sp->unsync = 0; | 1080 | sp->unsync = 0; |
1081 | if (sp->global) | ||
1082 | kvm_unlink_unsync_global(kvm, sp); | ||
1028 | --kvm->stat.mmu_unsync; | 1083 | --kvm->stat.mmu_unsync; |
1029 | } | 1084 | } |
1030 | 1085 | ||
@@ -1037,7 +1092,8 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
1037 | return 1; | 1092 | return 1; |
1038 | } | 1093 | } |
1039 | 1094 | ||
1040 | rmap_write_protect(vcpu->kvm, sp->gfn); | 1095 | if (rmap_write_protect(vcpu->kvm, sp->gfn)) |
1096 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
1041 | kvm_unlink_unsync_page(vcpu->kvm, sp); | 1097 | kvm_unlink_unsync_page(vcpu->kvm, sp); |
1042 | if (vcpu->arch.mmu.sync_page(vcpu, sp)) { | 1098 | if (vcpu->arch.mmu.sync_page(vcpu, sp)) { |
1043 | kvm_mmu_zap_page(vcpu->kvm, sp); | 1099 | kvm_mmu_zap_page(vcpu->kvm, sp); |
@@ -1048,30 +1104,89 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
1048 | return 0; | 1104 | return 0; |
1049 | } | 1105 | } |
1050 | 1106 | ||
1051 | struct sync_walker { | 1107 | struct mmu_page_path { |
1052 | struct kvm_vcpu *vcpu; | 1108 | struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; |
1053 | struct kvm_unsync_walk walker; | 1109 | unsigned int idx[PT64_ROOT_LEVEL-1]; |
1054 | }; | 1110 | }; |
1055 | 1111 | ||
1056 | static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) | 1112 | #define for_each_sp(pvec, sp, parents, i) \ |
1113 | for (i = mmu_pages_next(&pvec, &parents, -1), \ | ||
1114 | sp = pvec.page[i].sp; \ | ||
1115 | i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ | ||
1116 | i = mmu_pages_next(&pvec, &parents, i)) | ||
1117 | |||
1118 | int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents, | ||
1119 | int i) | ||
1057 | { | 1120 | { |
1058 | struct sync_walker *sync_walk = container_of(walk, struct sync_walker, | 1121 | int n; |
1059 | walker); | ||
1060 | struct kvm_vcpu *vcpu = sync_walk->vcpu; | ||
1061 | 1122 | ||
1062 | kvm_sync_page(vcpu, sp); | 1123 | for (n = i+1; n < pvec->nr; n++) { |
1063 | return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)); | 1124 | struct kvm_mmu_page *sp = pvec->page[n].sp; |
1125 | |||
1126 | if (sp->role.level == PT_PAGE_TABLE_LEVEL) { | ||
1127 | parents->idx[0] = pvec->page[n].idx; | ||
1128 | return n; | ||
1129 | } | ||
1130 | |||
1131 | parents->parent[sp->role.level-2] = sp; | ||
1132 | parents->idx[sp->role.level-1] = pvec->page[n].idx; | ||
1133 | } | ||
1134 | |||
1135 | return n; | ||
1064 | } | 1136 | } |
1065 | 1137 | ||
1066 | static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1138 | void mmu_pages_clear_parents(struct mmu_page_path *parents) |
1067 | { | 1139 | { |
1068 | struct sync_walker walker = { | 1140 | struct kvm_mmu_page *sp; |
1069 | .walker = { .entry = mmu_sync_fn, }, | 1141 | unsigned int level = 0; |
1070 | .vcpu = vcpu, | 1142 | |
1071 | }; | 1143 | do { |
1144 | unsigned int idx = parents->idx[level]; | ||
1145 | |||
1146 | sp = parents->parent[level]; | ||
1147 | if (!sp) | ||
1148 | return; | ||
1149 | |||
1150 | --sp->unsync_children; | ||
1151 | WARN_ON((int)sp->unsync_children < 0); | ||
1152 | __clear_bit(idx, sp->unsync_child_bitmap); | ||
1153 | level++; | ||
1154 | } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); | ||
1155 | } | ||
1156 | |||
1157 | static void kvm_mmu_pages_init(struct kvm_mmu_page *parent, | ||
1158 | struct mmu_page_path *parents, | ||
1159 | struct kvm_mmu_pages *pvec) | ||
1160 | { | ||
1161 | parents->parent[parent->role.level-1] = NULL; | ||
1162 | pvec->nr = 0; | ||
1163 | } | ||
1164 | |||
1165 | static void mmu_sync_children(struct kvm_vcpu *vcpu, | ||
1166 | struct kvm_mmu_page *parent) | ||
1167 | { | ||
1168 | int i; | ||
1169 | struct kvm_mmu_page *sp; | ||
1170 | struct mmu_page_path parents; | ||
1171 | struct kvm_mmu_pages pages; | ||
1172 | |||
1173 | kvm_mmu_pages_init(parent, &parents, &pages); | ||
1174 | while (mmu_unsync_walk(parent, &pages)) { | ||
1175 | int protected = 0; | ||
1072 | 1176 | ||
1073 | while (mmu_unsync_walk(sp, &walker.walker)) | 1177 | for_each_sp(pages, sp, parents, i) |
1178 | protected |= rmap_write_protect(vcpu->kvm, sp->gfn); | ||
1179 | |||
1180 | if (protected) | ||
1181 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
1182 | |||
1183 | for_each_sp(pages, sp, parents, i) { | ||
1184 | kvm_sync_page(vcpu, sp); | ||
1185 | mmu_pages_clear_parents(&parents); | ||
1186 | } | ||
1074 | cond_resched_lock(&vcpu->kvm->mmu_lock); | 1187 | cond_resched_lock(&vcpu->kvm->mmu_lock); |
1188 | kvm_mmu_pages_init(parent, &parents, &pages); | ||
1189 | } | ||
1075 | } | 1190 | } |
1076 | 1191 | ||
1077 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | 1192 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, |
@@ -1129,7 +1244,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1129 | sp->role = role; | 1244 | sp->role = role; |
1130 | hlist_add_head(&sp->hash_link, bucket); | 1245 | hlist_add_head(&sp->hash_link, bucket); |
1131 | if (!metaphysical) { | 1246 | if (!metaphysical) { |
1132 | rmap_write_protect(vcpu->kvm, gfn); | 1247 | if (rmap_write_protect(vcpu->kvm, gfn)) |
1248 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
1133 | account_shadowed(vcpu->kvm, gfn); | 1249 | account_shadowed(vcpu->kvm, gfn); |
1134 | } | 1250 | } |
1135 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) | 1251 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) |
@@ -1153,6 +1269,8 @@ static int walk_shadow(struct kvm_shadow_walk *walker, | |||
1153 | if (level == PT32E_ROOT_LEVEL) { | 1269 | if (level == PT32E_ROOT_LEVEL) { |
1154 | shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; | 1270 | shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; |
1155 | shadow_addr &= PT64_BASE_ADDR_MASK; | 1271 | shadow_addr &= PT64_BASE_ADDR_MASK; |
1272 | if (!shadow_addr) | ||
1273 | return 1; | ||
1156 | --level; | 1274 | --level; |
1157 | } | 1275 | } |
1158 | 1276 | ||
@@ -1237,33 +1355,29 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
1237 | } | 1355 | } |
1238 | } | 1356 | } |
1239 | 1357 | ||
1240 | struct zap_walker { | 1358 | static int mmu_zap_unsync_children(struct kvm *kvm, |
1241 | struct kvm_unsync_walk walker; | 1359 | struct kvm_mmu_page *parent) |
1242 | struct kvm *kvm; | ||
1243 | int zapped; | ||
1244 | }; | ||
1245 | |||
1246 | static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) | ||
1247 | { | 1360 | { |
1248 | struct zap_walker *zap_walk = container_of(walk, struct zap_walker, | 1361 | int i, zapped = 0; |
1249 | walker); | 1362 | struct mmu_page_path parents; |
1250 | kvm_mmu_zap_page(zap_walk->kvm, sp); | 1363 | struct kvm_mmu_pages pages; |
1251 | zap_walk->zapped = 1; | ||
1252 | return 0; | ||
1253 | } | ||
1254 | 1364 | ||
1255 | static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp) | 1365 | if (parent->role.level == PT_PAGE_TABLE_LEVEL) |
1256 | { | ||
1257 | struct zap_walker walker = { | ||
1258 | .walker = { .entry = mmu_zap_fn, }, | ||
1259 | .kvm = kvm, | ||
1260 | .zapped = 0, | ||
1261 | }; | ||
1262 | |||
1263 | if (sp->role.level == PT_PAGE_TABLE_LEVEL) | ||
1264 | return 0; | 1366 | return 0; |
1265 | mmu_unsync_walk(sp, &walker.walker); | 1367 | |
1266 | return walker.zapped; | 1368 | kvm_mmu_pages_init(parent, &parents, &pages); |
1369 | while (mmu_unsync_walk(parent, &pages)) { | ||
1370 | struct kvm_mmu_page *sp; | ||
1371 | |||
1372 | for_each_sp(pages, sp, parents, i) { | ||
1373 | kvm_mmu_zap_page(kvm, sp); | ||
1374 | mmu_pages_clear_parents(&parents); | ||
1375 | } | ||
1376 | zapped += pages.nr; | ||
1377 | kvm_mmu_pages_init(parent, &parents, &pages); | ||
1378 | } | ||
1379 | |||
1380 | return zapped; | ||
1267 | } | 1381 | } |
1268 | 1382 | ||
1269 | static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1383 | static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
@@ -1362,7 +1476,7 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | |||
1362 | int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); | 1476 | int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); |
1363 | struct kvm_mmu_page *sp = page_header(__pa(pte)); | 1477 | struct kvm_mmu_page *sp = page_header(__pa(pte)); |
1364 | 1478 | ||
1365 | __set_bit(slot, &sp->slot_bitmap); | 1479 | __set_bit(slot, sp->slot_bitmap); |
1366 | } | 1480 | } |
1367 | 1481 | ||
1368 | static void mmu_convert_notrap(struct kvm_mmu_page *sp) | 1482 | static void mmu_convert_notrap(struct kvm_mmu_page *sp) |
@@ -1393,6 +1507,110 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | |||
1393 | return page; | 1507 | return page; |
1394 | } | 1508 | } |
1395 | 1509 | ||
1510 | /* | ||
1511 | * The function is based on mtrr_type_lookup() in | ||
1512 | * arch/x86/kernel/cpu/mtrr/generic.c | ||
1513 | */ | ||
1514 | static int get_mtrr_type(struct mtrr_state_type *mtrr_state, | ||
1515 | u64 start, u64 end) | ||
1516 | { | ||
1517 | int i; | ||
1518 | u64 base, mask; | ||
1519 | u8 prev_match, curr_match; | ||
1520 | int num_var_ranges = KVM_NR_VAR_MTRR; | ||
1521 | |||
1522 | if (!mtrr_state->enabled) | ||
1523 | return 0xFF; | ||
1524 | |||
1525 | /* Make end inclusive end, instead of exclusive */ | ||
1526 | end--; | ||
1527 | |||
1528 | /* Look in fixed ranges. Just return the type as per start */ | ||
1529 | if (mtrr_state->have_fixed && (start < 0x100000)) { | ||
1530 | int idx; | ||
1531 | |||
1532 | if (start < 0x80000) { | ||
1533 | idx = 0; | ||
1534 | idx += (start >> 16); | ||
1535 | return mtrr_state->fixed_ranges[idx]; | ||
1536 | } else if (start < 0xC0000) { | ||
1537 | idx = 1 * 8; | ||
1538 | idx += ((start - 0x80000) >> 14); | ||
1539 | return mtrr_state->fixed_ranges[idx]; | ||
1540 | } else if (start < 0x1000000) { | ||
1541 | idx = 3 * 8; | ||
1542 | idx += ((start - 0xC0000) >> 12); | ||
1543 | return mtrr_state->fixed_ranges[idx]; | ||
1544 | } | ||
1545 | } | ||
1546 | |||
1547 | /* | ||
1548 | * Look in variable ranges | ||
1549 | * Look of multiple ranges matching this address and pick type | ||
1550 | * as per MTRR precedence | ||
1551 | */ | ||
1552 | if (!(mtrr_state->enabled & 2)) | ||
1553 | return mtrr_state->def_type; | ||
1554 | |||
1555 | prev_match = 0xFF; | ||
1556 | for (i = 0; i < num_var_ranges; ++i) { | ||
1557 | unsigned short start_state, end_state; | ||
1558 | |||
1559 | if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11))) | ||
1560 | continue; | ||
1561 | |||
1562 | base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) + | ||
1563 | (mtrr_state->var_ranges[i].base_lo & PAGE_MASK); | ||
1564 | mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) + | ||
1565 | (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK); | ||
1566 | |||
1567 | start_state = ((start & mask) == (base & mask)); | ||
1568 | end_state = ((end & mask) == (base & mask)); | ||
1569 | if (start_state != end_state) | ||
1570 | return 0xFE; | ||
1571 | |||
1572 | if ((start & mask) != (base & mask)) | ||
1573 | continue; | ||
1574 | |||
1575 | curr_match = mtrr_state->var_ranges[i].base_lo & 0xff; | ||
1576 | if (prev_match == 0xFF) { | ||
1577 | prev_match = curr_match; | ||
1578 | continue; | ||
1579 | } | ||
1580 | |||
1581 | if (prev_match == MTRR_TYPE_UNCACHABLE || | ||
1582 | curr_match == MTRR_TYPE_UNCACHABLE) | ||
1583 | return MTRR_TYPE_UNCACHABLE; | ||
1584 | |||
1585 | if ((prev_match == MTRR_TYPE_WRBACK && | ||
1586 | curr_match == MTRR_TYPE_WRTHROUGH) || | ||
1587 | (prev_match == MTRR_TYPE_WRTHROUGH && | ||
1588 | curr_match == MTRR_TYPE_WRBACK)) { | ||
1589 | prev_match = MTRR_TYPE_WRTHROUGH; | ||
1590 | curr_match = MTRR_TYPE_WRTHROUGH; | ||
1591 | } | ||
1592 | |||
1593 | if (prev_match != curr_match) | ||
1594 | return MTRR_TYPE_UNCACHABLE; | ||
1595 | } | ||
1596 | |||
1597 | if (prev_match != 0xFF) | ||
1598 | return prev_match; | ||
1599 | |||
1600 | return mtrr_state->def_type; | ||
1601 | } | ||
1602 | |||
1603 | static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
1604 | { | ||
1605 | u8 mtrr; | ||
1606 | |||
1607 | mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT, | ||
1608 | (gfn << PAGE_SHIFT) + PAGE_SIZE); | ||
1609 | if (mtrr == 0xfe || mtrr == 0xff) | ||
1610 | mtrr = MTRR_TYPE_WRBACK; | ||
1611 | return mtrr; | ||
1612 | } | ||
1613 | |||
1396 | static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1614 | static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
1397 | { | 1615 | { |
1398 | unsigned index; | 1616 | unsigned index; |
@@ -1409,9 +1627,15 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
1409 | if (s->role.word != sp->role.word) | 1627 | if (s->role.word != sp->role.word) |
1410 | return 1; | 1628 | return 1; |
1411 | } | 1629 | } |
1412 | kvm_mmu_mark_parents_unsync(vcpu, sp); | ||
1413 | ++vcpu->kvm->stat.mmu_unsync; | 1630 | ++vcpu->kvm->stat.mmu_unsync; |
1414 | sp->unsync = 1; | 1631 | sp->unsync = 1; |
1632 | |||
1633 | if (sp->global) { | ||
1634 | list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages); | ||
1635 | ++vcpu->kvm->stat.mmu_unsync_global; | ||
1636 | } else | ||
1637 | kvm_mmu_mark_parents_unsync(vcpu, sp); | ||
1638 | |||
1415 | mmu_convert_notrap(sp); | 1639 | mmu_convert_notrap(sp); |
1416 | return 0; | 1640 | return 0; |
1417 | } | 1641 | } |
@@ -1437,11 +1661,24 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
1437 | static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | 1661 | static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, |
1438 | unsigned pte_access, int user_fault, | 1662 | unsigned pte_access, int user_fault, |
1439 | int write_fault, int dirty, int largepage, | 1663 | int write_fault, int dirty, int largepage, |
1440 | gfn_t gfn, pfn_t pfn, bool speculative, | 1664 | int global, gfn_t gfn, pfn_t pfn, bool speculative, |
1441 | bool can_unsync) | 1665 | bool can_unsync) |
1442 | { | 1666 | { |
1443 | u64 spte; | 1667 | u64 spte; |
1444 | int ret = 0; | 1668 | int ret = 0; |
1669 | u64 mt_mask = shadow_mt_mask; | ||
1670 | struct kvm_mmu_page *sp = page_header(__pa(shadow_pte)); | ||
1671 | |||
1672 | if (!(vcpu->arch.cr4 & X86_CR4_PGE)) | ||
1673 | global = 0; | ||
1674 | if (!global && sp->global) { | ||
1675 | sp->global = 0; | ||
1676 | if (sp->unsync) { | ||
1677 | kvm_unlink_unsync_global(vcpu->kvm, sp); | ||
1678 | kvm_mmu_mark_parents_unsync(vcpu, sp); | ||
1679 | } | ||
1680 | } | ||
1681 | |||
1445 | /* | 1682 | /* |
1446 | * We don't set the accessed bit, since we sometimes want to see | 1683 | * We don't set the accessed bit, since we sometimes want to see |
1447 | * whether the guest actually used the pte (in order to detect | 1684 | * whether the guest actually used the pte (in order to detect |
@@ -1460,6 +1697,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1460 | spte |= shadow_user_mask; | 1697 | spte |= shadow_user_mask; |
1461 | if (largepage) | 1698 | if (largepage) |
1462 | spte |= PT_PAGE_SIZE_MASK; | 1699 | spte |= PT_PAGE_SIZE_MASK; |
1700 | if (mt_mask) { | ||
1701 | mt_mask = get_memory_type(vcpu, gfn) << | ||
1702 | kvm_x86_ops->get_mt_mask_shift(); | ||
1703 | spte |= mt_mask; | ||
1704 | } | ||
1463 | 1705 | ||
1464 | spte |= (u64)pfn << PAGE_SHIFT; | 1706 | spte |= (u64)pfn << PAGE_SHIFT; |
1465 | 1707 | ||
@@ -1474,6 +1716,15 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1474 | 1716 | ||
1475 | spte |= PT_WRITABLE_MASK; | 1717 | spte |= PT_WRITABLE_MASK; |
1476 | 1718 | ||
1719 | /* | ||
1720 | * Optimization: for pte sync, if spte was writable the hash | ||
1721 | * lookup is unnecessary (and expensive). Write protection | ||
1722 | * is responsibility of mmu_get_page / kvm_sync_page. | ||
1723 | * Same reasoning can be applied to dirty page accounting. | ||
1724 | */ | ||
1725 | if (!can_unsync && is_writeble_pte(*shadow_pte)) | ||
1726 | goto set_pte; | ||
1727 | |||
1477 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { | 1728 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { |
1478 | pgprintk("%s: found shadow page for %lx, marking ro\n", | 1729 | pgprintk("%s: found shadow page for %lx, marking ro\n", |
1479 | __func__, gfn); | 1730 | __func__, gfn); |
@@ -1495,8 +1746,8 @@ set_pte: | |||
1495 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | 1746 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, |
1496 | unsigned pt_access, unsigned pte_access, | 1747 | unsigned pt_access, unsigned pte_access, |
1497 | int user_fault, int write_fault, int dirty, | 1748 | int user_fault, int write_fault, int dirty, |
1498 | int *ptwrite, int largepage, gfn_t gfn, | 1749 | int *ptwrite, int largepage, int global, |
1499 | pfn_t pfn, bool speculative) | 1750 | gfn_t gfn, pfn_t pfn, bool speculative) |
1500 | { | 1751 | { |
1501 | int was_rmapped = 0; | 1752 | int was_rmapped = 0; |
1502 | int was_writeble = is_writeble_pte(*shadow_pte); | 1753 | int was_writeble = is_writeble_pte(*shadow_pte); |
@@ -1529,7 +1780,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1529 | } | 1780 | } |
1530 | } | 1781 | } |
1531 | if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, | 1782 | if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, |
1532 | dirty, largepage, gfn, pfn, speculative, true)) { | 1783 | dirty, largepage, global, gfn, pfn, speculative, true)) { |
1533 | if (write_fault) | 1784 | if (write_fault) |
1534 | *ptwrite = 1; | 1785 | *ptwrite = 1; |
1535 | kvm_x86_ops->tlb_flush(vcpu); | 1786 | kvm_x86_ops->tlb_flush(vcpu); |
@@ -1586,7 +1837,7 @@ static int direct_map_entry(struct kvm_shadow_walk *_walk, | |||
1586 | || (walk->largepage && level == PT_DIRECTORY_LEVEL)) { | 1837 | || (walk->largepage && level == PT_DIRECTORY_LEVEL)) { |
1587 | mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL, | 1838 | mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL, |
1588 | 0, walk->write, 1, &walk->pt_write, | 1839 | 0, walk->write, 1, &walk->pt_write, |
1589 | walk->largepage, gfn, walk->pfn, false); | 1840 | walk->largepage, 0, gfn, walk->pfn, false); |
1590 | ++vcpu->stat.pf_fixed; | 1841 | ++vcpu->stat.pf_fixed; |
1591 | return 1; | 1842 | return 1; |
1592 | } | 1843 | } |
@@ -1773,6 +2024,15 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
1773 | } | 2024 | } |
1774 | } | 2025 | } |
1775 | 2026 | ||
2027 | static void mmu_sync_global(struct kvm_vcpu *vcpu) | ||
2028 | { | ||
2029 | struct kvm *kvm = vcpu->kvm; | ||
2030 | struct kvm_mmu_page *sp, *n; | ||
2031 | |||
2032 | list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link) | ||
2033 | kvm_sync_page(vcpu, sp); | ||
2034 | } | ||
2035 | |||
1776 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | 2036 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) |
1777 | { | 2037 | { |
1778 | spin_lock(&vcpu->kvm->mmu_lock); | 2038 | spin_lock(&vcpu->kvm->mmu_lock); |
@@ -1780,6 +2040,13 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
1780 | spin_unlock(&vcpu->kvm->mmu_lock); | 2040 | spin_unlock(&vcpu->kvm->mmu_lock); |
1781 | } | 2041 | } |
1782 | 2042 | ||
2043 | void kvm_mmu_sync_global(struct kvm_vcpu *vcpu) | ||
2044 | { | ||
2045 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2046 | mmu_sync_global(vcpu); | ||
2047 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2048 | } | ||
2049 | |||
1783 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) | 2050 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) |
1784 | { | 2051 | { |
1785 | return vaddr; | 2052 | return vaddr; |
@@ -2178,7 +2445,8 @@ static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) | |||
2178 | } | 2445 | } |
2179 | 2446 | ||
2180 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 2447 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
2181 | const u8 *new, int bytes) | 2448 | const u8 *new, int bytes, |
2449 | bool guest_initiated) | ||
2182 | { | 2450 | { |
2183 | gfn_t gfn = gpa >> PAGE_SHIFT; | 2451 | gfn_t gfn = gpa >> PAGE_SHIFT; |
2184 | struct kvm_mmu_page *sp; | 2452 | struct kvm_mmu_page *sp; |
@@ -2204,15 +2472,17 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2204 | kvm_mmu_free_some_pages(vcpu); | 2472 | kvm_mmu_free_some_pages(vcpu); |
2205 | ++vcpu->kvm->stat.mmu_pte_write; | 2473 | ++vcpu->kvm->stat.mmu_pte_write; |
2206 | kvm_mmu_audit(vcpu, "pre pte write"); | 2474 | kvm_mmu_audit(vcpu, "pre pte write"); |
2207 | if (gfn == vcpu->arch.last_pt_write_gfn | 2475 | if (guest_initiated) { |
2208 | && !last_updated_pte_accessed(vcpu)) { | 2476 | if (gfn == vcpu->arch.last_pt_write_gfn |
2209 | ++vcpu->arch.last_pt_write_count; | 2477 | && !last_updated_pte_accessed(vcpu)) { |
2210 | if (vcpu->arch.last_pt_write_count >= 3) | 2478 | ++vcpu->arch.last_pt_write_count; |
2211 | flooded = 1; | 2479 | if (vcpu->arch.last_pt_write_count >= 3) |
2212 | } else { | 2480 | flooded = 1; |
2213 | vcpu->arch.last_pt_write_gfn = gfn; | 2481 | } else { |
2214 | vcpu->arch.last_pt_write_count = 1; | 2482 | vcpu->arch.last_pt_write_gfn = gfn; |
2215 | vcpu->arch.last_pte_updated = NULL; | 2483 | vcpu->arch.last_pt_write_count = 1; |
2484 | vcpu->arch.last_pte_updated = NULL; | ||
2485 | } | ||
2216 | } | 2486 | } |
2217 | index = kvm_page_table_hashfn(gfn); | 2487 | index = kvm_page_table_hashfn(gfn); |
2218 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 2488 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
@@ -2352,9 +2622,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); | |||
2352 | 2622 | ||
2353 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) | 2623 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) |
2354 | { | 2624 | { |
2355 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2356 | vcpu->arch.mmu.invlpg(vcpu, gva); | 2625 | vcpu->arch.mmu.invlpg(vcpu, gva); |
2357 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2358 | kvm_mmu_flush_tlb(vcpu); | 2626 | kvm_mmu_flush_tlb(vcpu); |
2359 | ++vcpu->stat.invlpg; | 2627 | ++vcpu->stat.invlpg; |
2360 | } | 2628 | } |
@@ -2451,7 +2719,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
2451 | int i; | 2719 | int i; |
2452 | u64 *pt; | 2720 | u64 *pt; |
2453 | 2721 | ||
2454 | if (!test_bit(slot, &sp->slot_bitmap)) | 2722 | if (!test_bit(slot, sp->slot_bitmap)) |
2455 | continue; | 2723 | continue; |
2456 | 2724 | ||
2457 | pt = sp->spt; | 2725 | pt = sp->spt; |
@@ -2860,8 +3128,8 @@ static void audit_write_protection(struct kvm_vcpu *vcpu) | |||
2860 | if (sp->role.metaphysical) | 3128 | if (sp->role.metaphysical) |
2861 | continue; | 3129 | continue; |
2862 | 3130 | ||
2863 | slot = gfn_to_memslot(vcpu->kvm, sp->gfn); | ||
2864 | gfn = unalias_gfn(vcpu->kvm, sp->gfn); | 3131 | gfn = unalias_gfn(vcpu->kvm, sp->gfn); |
3132 | slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn); | ||
2865 | rmapp = &slot->rmap[gfn - slot->base_gfn]; | 3133 | rmapp = &slot->rmap[gfn - slot->base_gfn]; |
2866 | if (*rmapp) | 3134 | if (*rmapp) |
2867 | printk(KERN_ERR "%s: (%s) shadow page has writable" | 3135 | printk(KERN_ERR "%s: (%s) shadow page has writable" |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 84eee43bbe74..9fd78b6e17ad 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -82,6 +82,7 @@ struct shadow_walker { | |||
82 | int *ptwrite; | 82 | int *ptwrite; |
83 | pfn_t pfn; | 83 | pfn_t pfn; |
84 | u64 *sptep; | 84 | u64 *sptep; |
85 | gpa_t pte_gpa; | ||
85 | }; | 86 | }; |
86 | 87 | ||
87 | static gfn_t gpte_to_gfn(pt_element_t gpte) | 88 | static gfn_t gpte_to_gfn(pt_element_t gpte) |
@@ -222,7 +223,7 @@ walk: | |||
222 | if (ret) | 223 | if (ret) |
223 | goto walk; | 224 | goto walk; |
224 | pte |= PT_DIRTY_MASK; | 225 | pte |= PT_DIRTY_MASK; |
225 | kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte)); | 226 | kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0); |
226 | walker->ptes[walker->level - 1] = pte; | 227 | walker->ptes[walker->level - 1] = pte; |
227 | } | 228 | } |
228 | 229 | ||
@@ -274,7 +275,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
274 | return; | 275 | return; |
275 | kvm_get_pfn(pfn); | 276 | kvm_get_pfn(pfn); |
276 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, | 277 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, |
277 | gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), | 278 | gpte & PT_DIRTY_MASK, NULL, largepage, |
279 | gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte), | ||
278 | pfn, true); | 280 | pfn, true); |
279 | } | 281 | } |
280 | 282 | ||
@@ -301,8 +303,9 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, | |||
301 | mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, | 303 | mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, |
302 | sw->user_fault, sw->write_fault, | 304 | sw->user_fault, sw->write_fault, |
303 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, | 305 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, |
304 | sw->ptwrite, sw->largepage, gw->gfn, sw->pfn, | 306 | sw->ptwrite, sw->largepage, |
305 | false); | 307 | gw->ptes[gw->level-1] & PT_GLOBAL_MASK, |
308 | gw->gfn, sw->pfn, false); | ||
306 | sw->sptep = sptep; | 309 | sw->sptep = sptep; |
307 | return 1; | 310 | return 1; |
308 | } | 311 | } |
@@ -466,10 +469,22 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, | |||
466 | struct kvm_vcpu *vcpu, u64 addr, | 469 | struct kvm_vcpu *vcpu, u64 addr, |
467 | u64 *sptep, int level) | 470 | u64 *sptep, int level) |
468 | { | 471 | { |
472 | struct shadow_walker *sw = | ||
473 | container_of(_sw, struct shadow_walker, walker); | ||
469 | 474 | ||
470 | if (level == PT_PAGE_TABLE_LEVEL) { | 475 | /* FIXME: properly handle invlpg on large guest pages */ |
471 | if (is_shadow_present_pte(*sptep)) | 476 | if (level == PT_PAGE_TABLE_LEVEL || |
477 | ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) { | ||
478 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | ||
479 | |||
480 | sw->pte_gpa = (sp->gfn << PAGE_SHIFT); | ||
481 | sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | ||
482 | |||
483 | if (is_shadow_present_pte(*sptep)) { | ||
472 | rmap_remove(vcpu->kvm, sptep); | 484 | rmap_remove(vcpu->kvm, sptep); |
485 | if (is_large_pte(*sptep)) | ||
486 | --vcpu->kvm->stat.lpages; | ||
487 | } | ||
473 | set_shadow_pte(sptep, shadow_trap_nonpresent_pte); | 488 | set_shadow_pte(sptep, shadow_trap_nonpresent_pte); |
474 | return 1; | 489 | return 1; |
475 | } | 490 | } |
@@ -480,11 +495,26 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, | |||
480 | 495 | ||
481 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 496 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
482 | { | 497 | { |
498 | pt_element_t gpte; | ||
483 | struct shadow_walker walker = { | 499 | struct shadow_walker walker = { |
484 | .walker = { .entry = FNAME(shadow_invlpg_entry), }, | 500 | .walker = { .entry = FNAME(shadow_invlpg_entry), }, |
501 | .pte_gpa = -1, | ||
485 | }; | 502 | }; |
486 | 503 | ||
504 | spin_lock(&vcpu->kvm->mmu_lock); | ||
487 | walk_shadow(&walker.walker, vcpu, gva); | 505 | walk_shadow(&walker.walker, vcpu, gva); |
506 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
507 | if (walker.pte_gpa == -1) | ||
508 | return; | ||
509 | if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte, | ||
510 | sizeof(pt_element_t))) | ||
511 | return; | ||
512 | if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) { | ||
513 | if (mmu_topup_memory_caches(vcpu)) | ||
514 | return; | ||
515 | kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte, | ||
516 | sizeof(pt_element_t), 0); | ||
517 | } | ||
488 | } | 518 | } |
489 | 519 | ||
490 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | 520 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) |
@@ -580,7 +610,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
580 | nr_present++; | 610 | nr_present++; |
581 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 611 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
582 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, | 612 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, |
583 | is_dirty_pte(gpte), 0, gfn, | 613 | is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn, |
584 | spte_to_pfn(sp->spt[i]), true, false); | 614 | spte_to_pfn(sp->spt[i]), true, false); |
585 | } | 615 | } |
586 | 616 | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9c4ce657d963..1452851ae258 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -28,6 +28,8 @@ | |||
28 | 28 | ||
29 | #include <asm/desc.h> | 29 | #include <asm/desc.h> |
30 | 30 | ||
31 | #include <asm/virtext.h> | ||
32 | |||
31 | #define __ex(x) __kvm_handle_fault_on_reboot(x) | 33 | #define __ex(x) __kvm_handle_fault_on_reboot(x) |
32 | 34 | ||
33 | MODULE_AUTHOR("Qumranet"); | 35 | MODULE_AUTHOR("Qumranet"); |
@@ -245,34 +247,19 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
245 | 247 | ||
246 | static int has_svm(void) | 248 | static int has_svm(void) |
247 | { | 249 | { |
248 | uint32_t eax, ebx, ecx, edx; | 250 | const char *msg; |
249 | |||
250 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { | ||
251 | printk(KERN_INFO "has_svm: not amd\n"); | ||
252 | return 0; | ||
253 | } | ||
254 | 251 | ||
255 | cpuid(0x80000000, &eax, &ebx, &ecx, &edx); | 252 | if (!cpu_has_svm(&msg)) { |
256 | if (eax < SVM_CPUID_FUNC) { | 253 | printk(KERN_INFO "has_svn: %s\n", msg); |
257 | printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n"); | ||
258 | return 0; | 254 | return 0; |
259 | } | 255 | } |
260 | 256 | ||
261 | cpuid(0x80000001, &eax, &ebx, &ecx, &edx); | ||
262 | if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) { | ||
263 | printk(KERN_DEBUG "has_svm: svm not available\n"); | ||
264 | return 0; | ||
265 | } | ||
266 | return 1; | 257 | return 1; |
267 | } | 258 | } |
268 | 259 | ||
269 | static void svm_hardware_disable(void *garbage) | 260 | static void svm_hardware_disable(void *garbage) |
270 | { | 261 | { |
271 | uint64_t efer; | 262 | cpu_svm_disable(); |
272 | |||
273 | wrmsrl(MSR_VM_HSAVE_PA, 0); | ||
274 | rdmsrl(MSR_EFER, efer); | ||
275 | wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); | ||
276 | } | 263 | } |
277 | 264 | ||
278 | static void svm_hardware_enable(void *garbage) | 265 | static void svm_hardware_enable(void *garbage) |
@@ -772,6 +759,22 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
772 | var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; | 759 | var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; |
773 | var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; | 760 | var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; |
774 | var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; | 761 | var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; |
762 | |||
763 | /* | ||
764 | * SVM always stores 0 for the 'G' bit in the CS selector in | ||
765 | * the VMCB on a VMEXIT. This hurts cross-vendor migration: | ||
766 | * Intel's VMENTRY has a check on the 'G' bit. | ||
767 | */ | ||
768 | if (seg == VCPU_SREG_CS) | ||
769 | var->g = s->limit > 0xfffff; | ||
770 | |||
771 | /* | ||
772 | * Work around a bug where the busy flag in the tr selector | ||
773 | * isn't exposed | ||
774 | */ | ||
775 | if (seg == VCPU_SREG_TR) | ||
776 | var->type |= 0x2; | ||
777 | |||
775 | var->unusable = !var->present; | 778 | var->unusable = !var->present; |
776 | } | 779 | } |
777 | 780 | ||
@@ -1099,6 +1102,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1099 | rep = (io_info & SVM_IOIO_REP_MASK) != 0; | 1102 | rep = (io_info & SVM_IOIO_REP_MASK) != 0; |
1100 | down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; | 1103 | down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; |
1101 | 1104 | ||
1105 | skip_emulated_instruction(&svm->vcpu); | ||
1102 | return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); | 1106 | return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); |
1103 | } | 1107 | } |
1104 | 1108 | ||
@@ -1912,6 +1916,11 @@ static int get_npt_level(void) | |||
1912 | #endif | 1916 | #endif |
1913 | } | 1917 | } |
1914 | 1918 | ||
1919 | static int svm_get_mt_mask_shift(void) | ||
1920 | { | ||
1921 | return 0; | ||
1922 | } | ||
1923 | |||
1915 | static struct kvm_x86_ops svm_x86_ops = { | 1924 | static struct kvm_x86_ops svm_x86_ops = { |
1916 | .cpu_has_kvm_support = has_svm, | 1925 | .cpu_has_kvm_support = has_svm, |
1917 | .disabled_by_bios = is_disabled, | 1926 | .disabled_by_bios = is_disabled, |
@@ -1967,6 +1976,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
1967 | 1976 | ||
1968 | .set_tss_addr = svm_set_tss_addr, | 1977 | .set_tss_addr = svm_set_tss_addr, |
1969 | .get_tdp_level = get_npt_level, | 1978 | .get_tdp_level = get_npt_level, |
1979 | .get_mt_mask_shift = svm_get_mt_mask_shift, | ||
1970 | }; | 1980 | }; |
1971 | 1981 | ||
1972 | static int __init svm_init(void) | 1982 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a4018b01e1f9..6259d7467648 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -16,7 +16,6 @@ | |||
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include "irq.h" | 18 | #include "irq.h" |
19 | #include "vmx.h" | ||
20 | #include "mmu.h" | 19 | #include "mmu.h" |
21 | 20 | ||
22 | #include <linux/kvm_host.h> | 21 | #include <linux/kvm_host.h> |
@@ -31,6 +30,8 @@ | |||
31 | 30 | ||
32 | #include <asm/io.h> | 31 | #include <asm/io.h> |
33 | #include <asm/desc.h> | 32 | #include <asm/desc.h> |
33 | #include <asm/vmx.h> | ||
34 | #include <asm/virtext.h> | ||
34 | 35 | ||
35 | #define __ex(x) __kvm_handle_fault_on_reboot(x) | 36 | #define __ex(x) __kvm_handle_fault_on_reboot(x) |
36 | 37 | ||
@@ -90,6 +91,11 @@ struct vcpu_vmx { | |||
90 | } rmode; | 91 | } rmode; |
91 | int vpid; | 92 | int vpid; |
92 | bool emulation_required; | 93 | bool emulation_required; |
94 | |||
95 | /* Support for vnmi-less CPUs */ | ||
96 | int soft_vnmi_blocked; | ||
97 | ktime_t entry_time; | ||
98 | s64 vnmi_blocked_time; | ||
93 | }; | 99 | }; |
94 | 100 | ||
95 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | 101 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) |
@@ -122,7 +128,7 @@ static struct vmcs_config { | |||
122 | u32 vmentry_ctrl; | 128 | u32 vmentry_ctrl; |
123 | } vmcs_config; | 129 | } vmcs_config; |
124 | 130 | ||
125 | struct vmx_capability { | 131 | static struct vmx_capability { |
126 | u32 ept; | 132 | u32 ept; |
127 | u32 vpid; | 133 | u32 vpid; |
128 | } vmx_capability; | 134 | } vmx_capability; |
@@ -957,6 +963,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
957 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); | 963 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); |
958 | 964 | ||
959 | break; | 965 | break; |
966 | case MSR_IA32_CR_PAT: | ||
967 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { | ||
968 | vmcs_write64(GUEST_IA32_PAT, data); | ||
969 | vcpu->arch.pat = data; | ||
970 | break; | ||
971 | } | ||
972 | /* Otherwise falls through to kvm_set_msr_common */ | ||
960 | default: | 973 | default: |
961 | vmx_load_host_state(vmx); | 974 | vmx_load_host_state(vmx); |
962 | msr = find_msr_entry(vmx, msr_index); | 975 | msr = find_msr_entry(vmx, msr_index); |
@@ -1032,8 +1045,7 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu) | |||
1032 | 1045 | ||
1033 | static __init int cpu_has_kvm_support(void) | 1046 | static __init int cpu_has_kvm_support(void) |
1034 | { | 1047 | { |
1035 | unsigned long ecx = cpuid_ecx(1); | 1048 | return cpu_has_vmx(); |
1036 | return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ | ||
1037 | } | 1049 | } |
1038 | 1050 | ||
1039 | static __init int vmx_disabled_by_bios(void) | 1051 | static __init int vmx_disabled_by_bios(void) |
@@ -1079,13 +1091,22 @@ static void vmclear_local_vcpus(void) | |||
1079 | __vcpu_clear(vmx); | 1091 | __vcpu_clear(vmx); |
1080 | } | 1092 | } |
1081 | 1093 | ||
1082 | static void hardware_disable(void *garbage) | 1094 | |
1095 | /* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot() | ||
1096 | * tricks. | ||
1097 | */ | ||
1098 | static void kvm_cpu_vmxoff(void) | ||
1083 | { | 1099 | { |
1084 | vmclear_local_vcpus(); | ||
1085 | asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); | 1100 | asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); |
1086 | write_cr4(read_cr4() & ~X86_CR4_VMXE); | 1101 | write_cr4(read_cr4() & ~X86_CR4_VMXE); |
1087 | } | 1102 | } |
1088 | 1103 | ||
1104 | static void hardware_disable(void *garbage) | ||
1105 | { | ||
1106 | vmclear_local_vcpus(); | ||
1107 | kvm_cpu_vmxoff(); | ||
1108 | } | ||
1109 | |||
1089 | static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, | 1110 | static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, |
1090 | u32 msr, u32 *result) | 1111 | u32 msr, u32 *result) |
1091 | { | 1112 | { |
@@ -1176,12 +1197,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1176 | #ifdef CONFIG_X86_64 | 1197 | #ifdef CONFIG_X86_64 |
1177 | min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; | 1198 | min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; |
1178 | #endif | 1199 | #endif |
1179 | opt = 0; | 1200 | opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT; |
1180 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, | 1201 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, |
1181 | &_vmexit_control) < 0) | 1202 | &_vmexit_control) < 0) |
1182 | return -EIO; | 1203 | return -EIO; |
1183 | 1204 | ||
1184 | min = opt = 0; | 1205 | min = 0; |
1206 | opt = VM_ENTRY_LOAD_IA32_PAT; | ||
1185 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, | 1207 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, |
1186 | &_vmentry_control) < 0) | 1208 | &_vmentry_control) < 0) |
1187 | return -EIO; | 1209 | return -EIO; |
@@ -2087,8 +2109,9 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) | |||
2087 | */ | 2109 | */ |
2088 | static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | 2110 | static int vmx_vcpu_setup(struct vcpu_vmx *vmx) |
2089 | { | 2111 | { |
2090 | u32 host_sysenter_cs; | 2112 | u32 host_sysenter_cs, msr_low, msr_high; |
2091 | u32 junk; | 2113 | u32 junk; |
2114 | u64 host_pat; | ||
2092 | unsigned long a; | 2115 | unsigned long a; |
2093 | struct descriptor_table dt; | 2116 | struct descriptor_table dt; |
2094 | int i; | 2117 | int i; |
@@ -2176,6 +2199,20 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2176 | rdmsrl(MSR_IA32_SYSENTER_EIP, a); | 2199 | rdmsrl(MSR_IA32_SYSENTER_EIP, a); |
2177 | vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ | 2200 | vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ |
2178 | 2201 | ||
2202 | if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) { | ||
2203 | rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); | ||
2204 | host_pat = msr_low | ((u64) msr_high << 32); | ||
2205 | vmcs_write64(HOST_IA32_PAT, host_pat); | ||
2206 | } | ||
2207 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { | ||
2208 | rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); | ||
2209 | host_pat = msr_low | ((u64) msr_high << 32); | ||
2210 | /* Write the default value follow host pat */ | ||
2211 | vmcs_write64(GUEST_IA32_PAT, host_pat); | ||
2212 | /* Keep arch.pat sync with GUEST_IA32_PAT */ | ||
2213 | vmx->vcpu.arch.pat = host_pat; | ||
2214 | } | ||
2215 | |||
2179 | for (i = 0; i < NR_VMX_MSR; ++i) { | 2216 | for (i = 0; i < NR_VMX_MSR; ++i) { |
2180 | u32 index = vmx_msr_index[i]; | 2217 | u32 index = vmx_msr_index[i]; |
2181 | u32 data_low, data_high; | 2218 | u32 data_low, data_high; |
@@ -2230,6 +2267,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2230 | 2267 | ||
2231 | vmx->vcpu.arch.rmode.active = 0; | 2268 | vmx->vcpu.arch.rmode.active = 0; |
2232 | 2269 | ||
2270 | vmx->soft_vnmi_blocked = 0; | ||
2271 | |||
2233 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); | 2272 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); |
2234 | kvm_set_cr8(&vmx->vcpu, 0); | 2273 | kvm_set_cr8(&vmx->vcpu, 0); |
2235 | msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; | 2274 | msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; |
@@ -2335,6 +2374,29 @@ out: | |||
2335 | return ret; | 2374 | return ret; |
2336 | } | 2375 | } |
2337 | 2376 | ||
2377 | static void enable_irq_window(struct kvm_vcpu *vcpu) | ||
2378 | { | ||
2379 | u32 cpu_based_vm_exec_control; | ||
2380 | |||
2381 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
2382 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; | ||
2383 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
2384 | } | ||
2385 | |||
2386 | static void enable_nmi_window(struct kvm_vcpu *vcpu) | ||
2387 | { | ||
2388 | u32 cpu_based_vm_exec_control; | ||
2389 | |||
2390 | if (!cpu_has_virtual_nmis()) { | ||
2391 | enable_irq_window(vcpu); | ||
2392 | return; | ||
2393 | } | ||
2394 | |||
2395 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
2396 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; | ||
2397 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
2398 | } | ||
2399 | |||
2338 | static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) | 2400 | static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) |
2339 | { | 2401 | { |
2340 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2402 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -2358,10 +2420,54 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) | |||
2358 | 2420 | ||
2359 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | 2421 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) |
2360 | { | 2422 | { |
2423 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2424 | |||
2425 | if (!cpu_has_virtual_nmis()) { | ||
2426 | /* | ||
2427 | * Tracking the NMI-blocked state in software is built upon | ||
2428 | * finding the next open IRQ window. This, in turn, depends on | ||
2429 | * well-behaving guests: They have to keep IRQs disabled at | ||
2430 | * least as long as the NMI handler runs. Otherwise we may | ||
2431 | * cause NMI nesting, maybe breaking the guest. But as this is | ||
2432 | * highly unlikely, we can live with the residual risk. | ||
2433 | */ | ||
2434 | vmx->soft_vnmi_blocked = 1; | ||
2435 | vmx->vnmi_blocked_time = 0; | ||
2436 | } | ||
2437 | |||
2438 | ++vcpu->stat.nmi_injections; | ||
2439 | if (vcpu->arch.rmode.active) { | ||
2440 | vmx->rmode.irq.pending = true; | ||
2441 | vmx->rmode.irq.vector = NMI_VECTOR; | ||
2442 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); | ||
2443 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | ||
2444 | NMI_VECTOR | INTR_TYPE_SOFT_INTR | | ||
2445 | INTR_INFO_VALID_MASK); | ||
2446 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); | ||
2447 | kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); | ||
2448 | return; | ||
2449 | } | ||
2361 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 2450 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
2362 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); | 2451 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); |
2363 | } | 2452 | } |
2364 | 2453 | ||
2454 | static void vmx_update_window_states(struct kvm_vcpu *vcpu) | ||
2455 | { | ||
2456 | u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | ||
2457 | |||
2458 | vcpu->arch.nmi_window_open = | ||
2459 | !(guest_intr & (GUEST_INTR_STATE_STI | | ||
2460 | GUEST_INTR_STATE_MOV_SS | | ||
2461 | GUEST_INTR_STATE_NMI)); | ||
2462 | if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) | ||
2463 | vcpu->arch.nmi_window_open = 0; | ||
2464 | |||
2465 | vcpu->arch.interrupt_window_open = | ||
2466 | ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && | ||
2467 | !(guest_intr & (GUEST_INTR_STATE_STI | | ||
2468 | GUEST_INTR_STATE_MOV_SS))); | ||
2469 | } | ||
2470 | |||
2365 | static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) | 2471 | static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) |
2366 | { | 2472 | { |
2367 | int word_index = __ffs(vcpu->arch.irq_summary); | 2473 | int word_index = __ffs(vcpu->arch.irq_summary); |
@@ -2374,40 +2480,49 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) | |||
2374 | kvm_queue_interrupt(vcpu, irq); | 2480 | kvm_queue_interrupt(vcpu, irq); |
2375 | } | 2481 | } |
2376 | 2482 | ||
2377 | |||
2378 | static void do_interrupt_requests(struct kvm_vcpu *vcpu, | 2483 | static void do_interrupt_requests(struct kvm_vcpu *vcpu, |
2379 | struct kvm_run *kvm_run) | 2484 | struct kvm_run *kvm_run) |
2380 | { | 2485 | { |
2381 | u32 cpu_based_vm_exec_control; | 2486 | vmx_update_window_states(vcpu); |
2382 | |||
2383 | vcpu->arch.interrupt_window_open = | ||
2384 | ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && | ||
2385 | (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); | ||
2386 | 2487 | ||
2387 | if (vcpu->arch.interrupt_window_open && | 2488 | if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { |
2388 | vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) | 2489 | if (vcpu->arch.interrupt.pending) { |
2389 | kvm_do_inject_irq(vcpu); | 2490 | enable_nmi_window(vcpu); |
2491 | } else if (vcpu->arch.nmi_window_open) { | ||
2492 | vcpu->arch.nmi_pending = false; | ||
2493 | vcpu->arch.nmi_injected = true; | ||
2494 | } else { | ||
2495 | enable_nmi_window(vcpu); | ||
2496 | return; | ||
2497 | } | ||
2498 | } | ||
2499 | if (vcpu->arch.nmi_injected) { | ||
2500 | vmx_inject_nmi(vcpu); | ||
2501 | if (vcpu->arch.nmi_pending) | ||
2502 | enable_nmi_window(vcpu); | ||
2503 | else if (vcpu->arch.irq_summary | ||
2504 | || kvm_run->request_interrupt_window) | ||
2505 | enable_irq_window(vcpu); | ||
2506 | return; | ||
2507 | } | ||
2390 | 2508 | ||
2391 | if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending) | 2509 | if (vcpu->arch.interrupt_window_open) { |
2392 | vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); | 2510 | if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) |
2511 | kvm_do_inject_irq(vcpu); | ||
2393 | 2512 | ||
2394 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 2513 | if (vcpu->arch.interrupt.pending) |
2514 | vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); | ||
2515 | } | ||
2395 | if (!vcpu->arch.interrupt_window_open && | 2516 | if (!vcpu->arch.interrupt_window_open && |
2396 | (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) | 2517 | (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) |
2397 | /* | 2518 | enable_irq_window(vcpu); |
2398 | * Interrupts blocked. Wait for unblock. | ||
2399 | */ | ||
2400 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; | ||
2401 | else | ||
2402 | cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; | ||
2403 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
2404 | } | 2519 | } |
2405 | 2520 | ||
2406 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | 2521 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) |
2407 | { | 2522 | { |
2408 | int ret; | 2523 | int ret; |
2409 | struct kvm_userspace_memory_region tss_mem = { | 2524 | struct kvm_userspace_memory_region tss_mem = { |
2410 | .slot = 8, | 2525 | .slot = TSS_PRIVATE_MEMSLOT, |
2411 | .guest_phys_addr = addr, | 2526 | .guest_phys_addr = addr, |
2412 | .memory_size = PAGE_SIZE * 3, | 2527 | .memory_size = PAGE_SIZE * 3, |
2413 | .flags = 0, | 2528 | .flags = 0, |
@@ -2492,7 +2607,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2492 | set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); | 2607 | set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); |
2493 | } | 2608 | } |
2494 | 2609 | ||
2495 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ | 2610 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) |
2496 | return 1; /* already handled by vmx_vcpu_run() */ | 2611 | return 1; /* already handled by vmx_vcpu_run() */ |
2497 | 2612 | ||
2498 | if (is_no_device(intr_info)) { | 2613 | if (is_no_device(intr_info)) { |
@@ -2581,6 +2696,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2581 | rep = (exit_qualification & 32) != 0; | 2696 | rep = (exit_qualification & 32) != 0; |
2582 | port = exit_qualification >> 16; | 2697 | port = exit_qualification >> 16; |
2583 | 2698 | ||
2699 | skip_emulated_instruction(vcpu); | ||
2584 | return kvm_emulate_pio(vcpu, kvm_run, in, size, port); | 2700 | return kvm_emulate_pio(vcpu, kvm_run, in, size, port); |
2585 | } | 2701 | } |
2586 | 2702 | ||
@@ -2767,6 +2883,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, | |||
2767 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 2883 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
2768 | 2884 | ||
2769 | KVMTRACE_0D(PEND_INTR, vcpu, handler); | 2885 | KVMTRACE_0D(PEND_INTR, vcpu, handler); |
2886 | ++vcpu->stat.irq_window_exits; | ||
2770 | 2887 | ||
2771 | /* | 2888 | /* |
2772 | * If the user space waits to inject interrupts, exit as soon as | 2889 | * If the user space waits to inject interrupts, exit as soon as |
@@ -2775,7 +2892,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, | |||
2775 | if (kvm_run->request_interrupt_window && | 2892 | if (kvm_run->request_interrupt_window && |
2776 | !vcpu->arch.irq_summary) { | 2893 | !vcpu->arch.irq_summary) { |
2777 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | 2894 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; |
2778 | ++vcpu->stat.irq_window_exits; | ||
2779 | return 0; | 2895 | return 0; |
2780 | } | 2896 | } |
2781 | return 1; | 2897 | return 1; |
@@ -2832,6 +2948,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2832 | 2948 | ||
2833 | static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2949 | static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
2834 | { | 2950 | { |
2951 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2835 | unsigned long exit_qualification; | 2952 | unsigned long exit_qualification; |
2836 | u16 tss_selector; | 2953 | u16 tss_selector; |
2837 | int reason; | 2954 | int reason; |
@@ -2839,6 +2956,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2839 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 2956 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
2840 | 2957 | ||
2841 | reason = (u32)exit_qualification >> 30; | 2958 | reason = (u32)exit_qualification >> 30; |
2959 | if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected && | ||
2960 | (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && | ||
2961 | (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK) | ||
2962 | == INTR_TYPE_NMI_INTR) { | ||
2963 | vcpu->arch.nmi_injected = false; | ||
2964 | if (cpu_has_virtual_nmis()) | ||
2965 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
2966 | GUEST_INTR_STATE_NMI); | ||
2967 | } | ||
2842 | tss_selector = exit_qualification; | 2968 | tss_selector = exit_qualification; |
2843 | 2969 | ||
2844 | return kvm_task_switch(vcpu, tss_selector, reason); | 2970 | return kvm_task_switch(vcpu, tss_selector, reason); |
@@ -2927,16 +3053,12 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | |||
2927 | while (!guest_state_valid(vcpu)) { | 3053 | while (!guest_state_valid(vcpu)) { |
2928 | err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | 3054 | err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); |
2929 | 3055 | ||
2930 | switch (err) { | 3056 | if (err == EMULATE_DO_MMIO) |
2931 | case EMULATE_DONE: | 3057 | break; |
2932 | break; | 3058 | |
2933 | case EMULATE_DO_MMIO: | 3059 | if (err != EMULATE_DONE) { |
2934 | kvm_report_emulation_failure(vcpu, "mmio"); | 3060 | kvm_report_emulation_failure(vcpu, "emulation failure"); |
2935 | /* TODO: Handle MMIO */ | 3061 | return; |
2936 | return; | ||
2937 | default: | ||
2938 | kvm_report_emulation_failure(vcpu, "emulation failure"); | ||
2939 | return; | ||
2940 | } | 3062 | } |
2941 | 3063 | ||
2942 | if (signal_pending(current)) | 3064 | if (signal_pending(current)) |
@@ -2948,8 +3070,10 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | |||
2948 | local_irq_disable(); | 3070 | local_irq_disable(); |
2949 | preempt_disable(); | 3071 | preempt_disable(); |
2950 | 3072 | ||
2951 | /* Guest state should be valid now, no more emulation should be needed */ | 3073 | /* Guest state should be valid now except if we need to |
2952 | vmx->emulation_required = 0; | 3074 | * emulate an MMIO */ |
3075 | if (guest_state_valid(vcpu)) | ||
3076 | vmx->emulation_required = 0; | ||
2953 | } | 3077 | } |
2954 | 3078 | ||
2955 | /* | 3079 | /* |
@@ -2996,6 +3120,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2996 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), | 3120 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), |
2997 | (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit); | 3121 | (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit); |
2998 | 3122 | ||
3123 | /* If we need to emulate an MMIO from handle_invalid_guest_state | ||
3124 | * we just return 0 */ | ||
3125 | if (vmx->emulation_required && emulate_invalid_guest_state) | ||
3126 | return 0; | ||
3127 | |||
2999 | /* Access CR3 don't cause VMExit in paging mode, so we need | 3128 | /* Access CR3 don't cause VMExit in paging mode, so we need |
3000 | * to sync with guest real CR3. */ | 3129 | * to sync with guest real CR3. */ |
3001 | if (vm_need_ept() && is_paging(vcpu)) { | 3130 | if (vm_need_ept() && is_paging(vcpu)) { |
@@ -3012,9 +3141,32 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3012 | 3141 | ||
3013 | if ((vectoring_info & VECTORING_INFO_VALID_MASK) && | 3142 | if ((vectoring_info & VECTORING_INFO_VALID_MASK) && |
3014 | (exit_reason != EXIT_REASON_EXCEPTION_NMI && | 3143 | (exit_reason != EXIT_REASON_EXCEPTION_NMI && |
3015 | exit_reason != EXIT_REASON_EPT_VIOLATION)) | 3144 | exit_reason != EXIT_REASON_EPT_VIOLATION && |
3016 | printk(KERN_WARNING "%s: unexpected, valid vectoring info and " | 3145 | exit_reason != EXIT_REASON_TASK_SWITCH)) |
3017 | "exit reason is 0x%x\n", __func__, exit_reason); | 3146 | printk(KERN_WARNING "%s: unexpected, valid vectoring info " |
3147 | "(0x%x) and exit reason is 0x%x\n", | ||
3148 | __func__, vectoring_info, exit_reason); | ||
3149 | |||
3150 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) { | ||
3151 | if (vcpu->arch.interrupt_window_open) { | ||
3152 | vmx->soft_vnmi_blocked = 0; | ||
3153 | vcpu->arch.nmi_window_open = 1; | ||
3154 | } else if (vmx->vnmi_blocked_time > 1000000000LL && | ||
3155 | vcpu->arch.nmi_pending) { | ||
3156 | /* | ||
3157 | * This CPU don't support us in finding the end of an | ||
3158 | * NMI-blocked window if the guest runs with IRQs | ||
3159 | * disabled. So we pull the trigger after 1 s of | ||
3160 | * futile waiting, but inform the user about this. | ||
3161 | */ | ||
3162 | printk(KERN_WARNING "%s: Breaking out of NMI-blocked " | ||
3163 | "state on VCPU %d after 1 s timeout\n", | ||
3164 | __func__, vcpu->vcpu_id); | ||
3165 | vmx->soft_vnmi_blocked = 0; | ||
3166 | vmx->vcpu.arch.nmi_window_open = 1; | ||
3167 | } | ||
3168 | } | ||
3169 | |||
3018 | if (exit_reason < kvm_vmx_max_exit_handlers | 3170 | if (exit_reason < kvm_vmx_max_exit_handlers |
3019 | && kvm_vmx_exit_handlers[exit_reason]) | 3171 | && kvm_vmx_exit_handlers[exit_reason]) |
3020 | return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); | 3172 | return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); |
@@ -3042,51 +3194,6 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu) | |||
3042 | vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); | 3194 | vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); |
3043 | } | 3195 | } |
3044 | 3196 | ||
3045 | static void enable_irq_window(struct kvm_vcpu *vcpu) | ||
3046 | { | ||
3047 | u32 cpu_based_vm_exec_control; | ||
3048 | |||
3049 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
3050 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; | ||
3051 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
3052 | } | ||
3053 | |||
3054 | static void enable_nmi_window(struct kvm_vcpu *vcpu) | ||
3055 | { | ||
3056 | u32 cpu_based_vm_exec_control; | ||
3057 | |||
3058 | if (!cpu_has_virtual_nmis()) | ||
3059 | return; | ||
3060 | |||
3061 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
3062 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; | ||
3063 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
3064 | } | ||
3065 | |||
3066 | static int vmx_nmi_enabled(struct kvm_vcpu *vcpu) | ||
3067 | { | ||
3068 | u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | ||
3069 | return !(guest_intr & (GUEST_INTR_STATE_NMI | | ||
3070 | GUEST_INTR_STATE_MOV_SS | | ||
3071 | GUEST_INTR_STATE_STI)); | ||
3072 | } | ||
3073 | |||
3074 | static int vmx_irq_enabled(struct kvm_vcpu *vcpu) | ||
3075 | { | ||
3076 | u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | ||
3077 | return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS | | ||
3078 | GUEST_INTR_STATE_STI)) && | ||
3079 | (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); | ||
3080 | } | ||
3081 | |||
3082 | static void enable_intr_window(struct kvm_vcpu *vcpu) | ||
3083 | { | ||
3084 | if (vcpu->arch.nmi_pending) | ||
3085 | enable_nmi_window(vcpu); | ||
3086 | else if (kvm_cpu_has_interrupt(vcpu)) | ||
3087 | enable_irq_window(vcpu); | ||
3088 | } | ||
3089 | |||
3090 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | 3197 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) |
3091 | { | 3198 | { |
3092 | u32 exit_intr_info; | 3199 | u32 exit_intr_info; |
@@ -3109,7 +3216,9 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | |||
3109 | if (unblock_nmi && vector != DF_VECTOR) | 3216 | if (unblock_nmi && vector != DF_VECTOR) |
3110 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | 3217 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, |
3111 | GUEST_INTR_STATE_NMI); | 3218 | GUEST_INTR_STATE_NMI); |
3112 | } | 3219 | } else if (unlikely(vmx->soft_vnmi_blocked)) |
3220 | vmx->vnmi_blocked_time += | ||
3221 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); | ||
3113 | 3222 | ||
3114 | idt_vectoring_info = vmx->idt_vectoring_info; | 3223 | idt_vectoring_info = vmx->idt_vectoring_info; |
3115 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; | 3224 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; |
@@ -3147,26 +3256,29 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) | |||
3147 | { | 3256 | { |
3148 | update_tpr_threshold(vcpu); | 3257 | update_tpr_threshold(vcpu); |
3149 | 3258 | ||
3150 | if (cpu_has_virtual_nmis()) { | 3259 | vmx_update_window_states(vcpu); |
3151 | if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { | 3260 | |
3152 | if (vcpu->arch.interrupt.pending) { | 3261 | if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { |
3153 | enable_nmi_window(vcpu); | 3262 | if (vcpu->arch.interrupt.pending) { |
3154 | } else if (vmx_nmi_enabled(vcpu)) { | 3263 | enable_nmi_window(vcpu); |
3155 | vcpu->arch.nmi_pending = false; | 3264 | } else if (vcpu->arch.nmi_window_open) { |
3156 | vcpu->arch.nmi_injected = true; | 3265 | vcpu->arch.nmi_pending = false; |
3157 | } else { | 3266 | vcpu->arch.nmi_injected = true; |
3158 | enable_intr_window(vcpu); | 3267 | } else { |
3159 | return; | 3268 | enable_nmi_window(vcpu); |
3160 | } | ||
3161 | } | ||
3162 | if (vcpu->arch.nmi_injected) { | ||
3163 | vmx_inject_nmi(vcpu); | ||
3164 | enable_intr_window(vcpu); | ||
3165 | return; | 3269 | return; |
3166 | } | 3270 | } |
3167 | } | 3271 | } |
3272 | if (vcpu->arch.nmi_injected) { | ||
3273 | vmx_inject_nmi(vcpu); | ||
3274 | if (vcpu->arch.nmi_pending) | ||
3275 | enable_nmi_window(vcpu); | ||
3276 | else if (kvm_cpu_has_interrupt(vcpu)) | ||
3277 | enable_irq_window(vcpu); | ||
3278 | return; | ||
3279 | } | ||
3168 | if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { | 3280 | if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { |
3169 | if (vmx_irq_enabled(vcpu)) | 3281 | if (vcpu->arch.interrupt_window_open) |
3170 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); | 3282 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); |
3171 | else | 3283 | else |
3172 | enable_irq_window(vcpu); | 3284 | enable_irq_window(vcpu); |
@@ -3174,6 +3286,8 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) | |||
3174 | if (vcpu->arch.interrupt.pending) { | 3286 | if (vcpu->arch.interrupt.pending) { |
3175 | vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); | 3287 | vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); |
3176 | kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); | 3288 | kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); |
3289 | if (kvm_cpu_has_interrupt(vcpu)) | ||
3290 | enable_irq_window(vcpu); | ||
3177 | } | 3291 | } |
3178 | } | 3292 | } |
3179 | 3293 | ||
@@ -3213,6 +3327,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3213 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3327 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3214 | u32 intr_info; | 3328 | u32 intr_info; |
3215 | 3329 | ||
3330 | /* Record the guest's net vcpu time for enforced NMI injections. */ | ||
3331 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) | ||
3332 | vmx->entry_time = ktime_get(); | ||
3333 | |||
3216 | /* Handle invalid guest state instead of entering VMX */ | 3334 | /* Handle invalid guest state instead of entering VMX */ |
3217 | if (vmx->emulation_required && emulate_invalid_guest_state) { | 3335 | if (vmx->emulation_required && emulate_invalid_guest_state) { |
3218 | handle_invalid_guest_state(vcpu, kvm_run); | 3336 | handle_invalid_guest_state(vcpu, kvm_run); |
@@ -3327,9 +3445,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3327 | if (vmx->rmode.irq.pending) | 3445 | if (vmx->rmode.irq.pending) |
3328 | fixup_rmode_irq(vmx); | 3446 | fixup_rmode_irq(vmx); |
3329 | 3447 | ||
3330 | vcpu->arch.interrupt_window_open = | 3448 | vmx_update_window_states(vcpu); |
3331 | (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | ||
3332 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0; | ||
3333 | 3449 | ||
3334 | asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); | 3450 | asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); |
3335 | vmx->launched = 1; | 3451 | vmx->launched = 1; |
@@ -3337,7 +3453,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3337 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 3453 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
3338 | 3454 | ||
3339 | /* We need to handle NMIs before interrupts are enabled */ | 3455 | /* We need to handle NMIs before interrupts are enabled */ |
3340 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 && | 3456 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && |
3341 | (intr_info & INTR_INFO_VALID_MASK)) { | 3457 | (intr_info & INTR_INFO_VALID_MASK)) { |
3342 | KVMTRACE_0D(NMI, vcpu, handler); | 3458 | KVMTRACE_0D(NMI, vcpu, handler); |
3343 | asm("int $2"); | 3459 | asm("int $2"); |
@@ -3455,6 +3571,11 @@ static int get_ept_level(void) | |||
3455 | return VMX_EPT_DEFAULT_GAW + 1; | 3571 | return VMX_EPT_DEFAULT_GAW + 1; |
3456 | } | 3572 | } |
3457 | 3573 | ||
3574 | static int vmx_get_mt_mask_shift(void) | ||
3575 | { | ||
3576 | return VMX_EPT_MT_EPTE_SHIFT; | ||
3577 | } | ||
3578 | |||
3458 | static struct kvm_x86_ops vmx_x86_ops = { | 3579 | static struct kvm_x86_ops vmx_x86_ops = { |
3459 | .cpu_has_kvm_support = cpu_has_kvm_support, | 3580 | .cpu_has_kvm_support = cpu_has_kvm_support, |
3460 | .disabled_by_bios = vmx_disabled_by_bios, | 3581 | .disabled_by_bios = vmx_disabled_by_bios, |
@@ -3510,6 +3631,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3510 | 3631 | ||
3511 | .set_tss_addr = vmx_set_tss_addr, | 3632 | .set_tss_addr = vmx_set_tss_addr, |
3512 | .get_tdp_level = get_ept_level, | 3633 | .get_tdp_level = get_ept_level, |
3634 | .get_mt_mask_shift = vmx_get_mt_mask_shift, | ||
3513 | }; | 3635 | }; |
3514 | 3636 | ||
3515 | static int __init vmx_init(void) | 3637 | static int __init vmx_init(void) |
@@ -3566,10 +3688,10 @@ static int __init vmx_init(void) | |||
3566 | bypass_guest_pf = 0; | 3688 | bypass_guest_pf = 0; |
3567 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | 3689 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | |
3568 | VMX_EPT_WRITABLE_MASK | | 3690 | VMX_EPT_WRITABLE_MASK | |
3569 | VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT | | ||
3570 | VMX_EPT_IGMT_BIT); | 3691 | VMX_EPT_IGMT_BIT); |
3571 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, | 3692 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, |
3572 | VMX_EPT_EXECUTABLE_MASK); | 3693 | VMX_EPT_EXECUTABLE_MASK, |
3694 | VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); | ||
3573 | kvm_enable_tdp(); | 3695 | kvm_enable_tdp(); |
3574 | } else | 3696 | } else |
3575 | kvm_disable_tdp(); | 3697 | kvm_disable_tdp(); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f1f8ff2f1fa2..0e6aa8141dcd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <asm/uaccess.h> | 39 | #include <asm/uaccess.h> |
40 | #include <asm/msr.h> | 40 | #include <asm/msr.h> |
41 | #include <asm/desc.h> | 41 | #include <asm/desc.h> |
42 | #include <asm/mtrr.h> | ||
42 | 43 | ||
43 | #define MAX_IO_MSRS 256 | 44 | #define MAX_IO_MSRS 256 |
44 | #define CR0_RESERVED_BITS \ | 45 | #define CR0_RESERVED_BITS \ |
@@ -86,6 +87,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
86 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, | 87 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, |
87 | { "hypercalls", VCPU_STAT(hypercalls) }, | 88 | { "hypercalls", VCPU_STAT(hypercalls) }, |
88 | { "request_irq", VCPU_STAT(request_irq_exits) }, | 89 | { "request_irq", VCPU_STAT(request_irq_exits) }, |
90 | { "request_nmi", VCPU_STAT(request_nmi_exits) }, | ||
89 | { "irq_exits", VCPU_STAT(irq_exits) }, | 91 | { "irq_exits", VCPU_STAT(irq_exits) }, |
90 | { "host_state_reload", VCPU_STAT(host_state_reload) }, | 92 | { "host_state_reload", VCPU_STAT(host_state_reload) }, |
91 | { "efer_reload", VCPU_STAT(efer_reload) }, | 93 | { "efer_reload", VCPU_STAT(efer_reload) }, |
@@ -93,6 +95,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
93 | { "insn_emulation", VCPU_STAT(insn_emulation) }, | 95 | { "insn_emulation", VCPU_STAT(insn_emulation) }, |
94 | { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, | 96 | { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, |
95 | { "irq_injections", VCPU_STAT(irq_injections) }, | 97 | { "irq_injections", VCPU_STAT(irq_injections) }, |
98 | { "nmi_injections", VCPU_STAT(nmi_injections) }, | ||
96 | { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, | 99 | { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, |
97 | { "mmu_pte_write", VM_STAT(mmu_pte_write) }, | 100 | { "mmu_pte_write", VM_STAT(mmu_pte_write) }, |
98 | { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, | 101 | { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, |
@@ -101,6 +104,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
101 | { "mmu_recycled", VM_STAT(mmu_recycled) }, | 104 | { "mmu_recycled", VM_STAT(mmu_recycled) }, |
102 | { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, | 105 | { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, |
103 | { "mmu_unsync", VM_STAT(mmu_unsync) }, | 106 | { "mmu_unsync", VM_STAT(mmu_unsync) }, |
107 | { "mmu_unsync_global", VM_STAT(mmu_unsync_global) }, | ||
104 | { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, | 108 | { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, |
105 | { "largepages", VM_STAT(lpages) }, | 109 | { "largepages", VM_STAT(lpages) }, |
106 | { NULL } | 110 | { NULL } |
@@ -312,6 +316,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
312 | kvm_x86_ops->set_cr0(vcpu, cr0); | 316 | kvm_x86_ops->set_cr0(vcpu, cr0); |
313 | vcpu->arch.cr0 = cr0; | 317 | vcpu->arch.cr0 = cr0; |
314 | 318 | ||
319 | kvm_mmu_sync_global(vcpu); | ||
315 | kvm_mmu_reset_context(vcpu); | 320 | kvm_mmu_reset_context(vcpu); |
316 | return; | 321 | return; |
317 | } | 322 | } |
@@ -355,6 +360,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
355 | } | 360 | } |
356 | kvm_x86_ops->set_cr4(vcpu, cr4); | 361 | kvm_x86_ops->set_cr4(vcpu, cr4); |
357 | vcpu->arch.cr4 = cr4; | 362 | vcpu->arch.cr4 = cr4; |
363 | kvm_mmu_sync_global(vcpu); | ||
358 | kvm_mmu_reset_context(vcpu); | 364 | kvm_mmu_reset_context(vcpu); |
359 | } | 365 | } |
360 | EXPORT_SYMBOL_GPL(kvm_set_cr4); | 366 | EXPORT_SYMBOL_GPL(kvm_set_cr4); |
@@ -449,7 +455,7 @@ static u32 msrs_to_save[] = { | |||
449 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, | 455 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, |
450 | #endif | 456 | #endif |
451 | MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 457 | MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
452 | MSR_IA32_PERF_STATUS, | 458 | MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT |
453 | }; | 459 | }; |
454 | 460 | ||
455 | static unsigned num_msrs_to_save; | 461 | static unsigned num_msrs_to_save; |
@@ -648,10 +654,38 @@ static bool msr_mtrr_valid(unsigned msr) | |||
648 | 654 | ||
649 | static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 655 | static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
650 | { | 656 | { |
657 | u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; | ||
658 | |||
651 | if (!msr_mtrr_valid(msr)) | 659 | if (!msr_mtrr_valid(msr)) |
652 | return 1; | 660 | return 1; |
653 | 661 | ||
654 | vcpu->arch.mtrr[msr - 0x200] = data; | 662 | if (msr == MSR_MTRRdefType) { |
663 | vcpu->arch.mtrr_state.def_type = data; | ||
664 | vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10; | ||
665 | } else if (msr == MSR_MTRRfix64K_00000) | ||
666 | p[0] = data; | ||
667 | else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) | ||
668 | p[1 + msr - MSR_MTRRfix16K_80000] = data; | ||
669 | else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) | ||
670 | p[3 + msr - MSR_MTRRfix4K_C0000] = data; | ||
671 | else if (msr == MSR_IA32_CR_PAT) | ||
672 | vcpu->arch.pat = data; | ||
673 | else { /* Variable MTRRs */ | ||
674 | int idx, is_mtrr_mask; | ||
675 | u64 *pt; | ||
676 | |||
677 | idx = (msr - 0x200) / 2; | ||
678 | is_mtrr_mask = msr - 0x200 - 2 * idx; | ||
679 | if (!is_mtrr_mask) | ||
680 | pt = | ||
681 | (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; | ||
682 | else | ||
683 | pt = | ||
684 | (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; | ||
685 | *pt = data; | ||
686 | } | ||
687 | |||
688 | kvm_mmu_reset_context(vcpu); | ||
655 | return 0; | 689 | return 0; |
656 | } | 690 | } |
657 | 691 | ||
@@ -747,10 +781,37 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
747 | 781 | ||
748 | static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | 782 | static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) |
749 | { | 783 | { |
784 | u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; | ||
785 | |||
750 | if (!msr_mtrr_valid(msr)) | 786 | if (!msr_mtrr_valid(msr)) |
751 | return 1; | 787 | return 1; |
752 | 788 | ||
753 | *pdata = vcpu->arch.mtrr[msr - 0x200]; | 789 | if (msr == MSR_MTRRdefType) |
790 | *pdata = vcpu->arch.mtrr_state.def_type + | ||
791 | (vcpu->arch.mtrr_state.enabled << 10); | ||
792 | else if (msr == MSR_MTRRfix64K_00000) | ||
793 | *pdata = p[0]; | ||
794 | else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) | ||
795 | *pdata = p[1 + msr - MSR_MTRRfix16K_80000]; | ||
796 | else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) | ||
797 | *pdata = p[3 + msr - MSR_MTRRfix4K_C0000]; | ||
798 | else if (msr == MSR_IA32_CR_PAT) | ||
799 | *pdata = vcpu->arch.pat; | ||
800 | else { /* Variable MTRRs */ | ||
801 | int idx, is_mtrr_mask; | ||
802 | u64 *pt; | ||
803 | |||
804 | idx = (msr - 0x200) / 2; | ||
805 | is_mtrr_mask = msr - 0x200 - 2 * idx; | ||
806 | if (!is_mtrr_mask) | ||
807 | pt = | ||
808 | (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; | ||
809 | else | ||
810 | pt = | ||
811 | (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; | ||
812 | *pdata = *pt; | ||
813 | } | ||
814 | |||
754 | return 0; | 815 | return 0; |
755 | } | 816 | } |
756 | 817 | ||
@@ -903,7 +964,6 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
903 | case KVM_CAP_IRQCHIP: | 964 | case KVM_CAP_IRQCHIP: |
904 | case KVM_CAP_HLT: | 965 | case KVM_CAP_HLT: |
905 | case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: | 966 | case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: |
906 | case KVM_CAP_USER_MEMORY: | ||
907 | case KVM_CAP_SET_TSS_ADDR: | 967 | case KVM_CAP_SET_TSS_ADDR: |
908 | case KVM_CAP_EXT_CPUID: | 968 | case KVM_CAP_EXT_CPUID: |
909 | case KVM_CAP_CLOCKSOURCE: | 969 | case KVM_CAP_CLOCKSOURCE: |
@@ -1188,6 +1248,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1188 | int t, times = entry->eax & 0xff; | 1248 | int t, times = entry->eax & 0xff; |
1189 | 1249 | ||
1190 | entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | 1250 | entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; |
1251 | entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
1191 | for (t = 1; t < times && *nent < maxnent; ++t) { | 1252 | for (t = 1; t < times && *nent < maxnent; ++t) { |
1192 | do_cpuid_1_ent(&entry[t], function, 0); | 1253 | do_cpuid_1_ent(&entry[t], function, 0); |
1193 | entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | 1254 | entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; |
@@ -1218,7 +1279,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1218 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 1279 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; |
1219 | /* read more entries until level_type is zero */ | 1280 | /* read more entries until level_type is zero */ |
1220 | for (i = 1; *nent < maxnent; ++i) { | 1281 | for (i = 1; *nent < maxnent; ++i) { |
1221 | level_type = entry[i - 1].ecx & 0xff; | 1282 | level_type = entry[i - 1].ecx & 0xff00; |
1222 | if (!level_type) | 1283 | if (!level_type) |
1223 | break; | 1284 | break; |
1224 | do_cpuid_1_ent(&entry[i], function, i); | 1285 | do_cpuid_1_ent(&entry[i], function, i); |
@@ -1318,6 +1379,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, | |||
1318 | return 0; | 1379 | return 0; |
1319 | } | 1380 | } |
1320 | 1381 | ||
1382 | static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) | ||
1383 | { | ||
1384 | vcpu_load(vcpu); | ||
1385 | kvm_inject_nmi(vcpu); | ||
1386 | vcpu_put(vcpu); | ||
1387 | |||
1388 | return 0; | ||
1389 | } | ||
1390 | |||
1321 | static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, | 1391 | static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, |
1322 | struct kvm_tpr_access_ctl *tac) | 1392 | struct kvm_tpr_access_ctl *tac) |
1323 | { | 1393 | { |
@@ -1377,6 +1447,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
1377 | r = 0; | 1447 | r = 0; |
1378 | break; | 1448 | break; |
1379 | } | 1449 | } |
1450 | case KVM_NMI: { | ||
1451 | r = kvm_vcpu_ioctl_nmi(vcpu); | ||
1452 | if (r) | ||
1453 | goto out; | ||
1454 | r = 0; | ||
1455 | break; | ||
1456 | } | ||
1380 | case KVM_SET_CPUID: { | 1457 | case KVM_SET_CPUID: { |
1381 | struct kvm_cpuid __user *cpuid_arg = argp; | 1458 | struct kvm_cpuid __user *cpuid_arg = argp; |
1382 | struct kvm_cpuid cpuid; | 1459 | struct kvm_cpuid cpuid; |
@@ -1968,7 +2045,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1968 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); | 2045 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); |
1969 | if (ret < 0) | 2046 | if (ret < 0) |
1970 | return 0; | 2047 | return 0; |
1971 | kvm_mmu_pte_write(vcpu, gpa, val, bytes); | 2048 | kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1); |
1972 | return 1; | 2049 | return 1; |
1973 | } | 2050 | } |
1974 | 2051 | ||
@@ -2404,8 +2481,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
2404 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); | 2481 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
2405 | memcpy(vcpu->arch.pio_data, &val, 4); | 2482 | memcpy(vcpu->arch.pio_data, &val, 4); |
2406 | 2483 | ||
2407 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
2408 | |||
2409 | pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); | 2484 | pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); |
2410 | if (pio_dev) { | 2485 | if (pio_dev) { |
2411 | kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); | 2486 | kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); |
@@ -2541,7 +2616,7 @@ int kvm_arch_init(void *opaque) | |||
2541 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); | 2616 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); |
2542 | kvm_mmu_set_base_ptes(PT_PRESENT_MASK); | 2617 | kvm_mmu_set_base_ptes(PT_PRESENT_MASK); |
2543 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | 2618 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, |
2544 | PT_DIRTY_MASK, PT64_NX_MASK, 0); | 2619 | PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); |
2545 | return 0; | 2620 | return 0; |
2546 | 2621 | ||
2547 | out: | 2622 | out: |
@@ -2729,7 +2804,7 @@ static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) | |||
2729 | 2804 | ||
2730 | e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; | 2805 | e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; |
2731 | /* when no next entry is found, the current entry[i] is reselected */ | 2806 | /* when no next entry is found, the current entry[i] is reselected */ |
2732 | for (j = i + 1; j == i; j = (j + 1) % nent) { | 2807 | for (j = i + 1; ; j = (j + 1) % nent) { |
2733 | struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; | 2808 | struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; |
2734 | if (ej->function == e->function) { | 2809 | if (ej->function == e->function) { |
2735 | ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; | 2810 | ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; |
@@ -2973,7 +3048,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2973 | pr_debug("vcpu %d received sipi with vector # %x\n", | 3048 | pr_debug("vcpu %d received sipi with vector # %x\n", |
2974 | vcpu->vcpu_id, vcpu->arch.sipi_vector); | 3049 | vcpu->vcpu_id, vcpu->arch.sipi_vector); |
2975 | kvm_lapic_reset(vcpu); | 3050 | kvm_lapic_reset(vcpu); |
2976 | r = kvm_x86_ops->vcpu_reset(vcpu); | 3051 | r = kvm_arch_vcpu_reset(vcpu); |
2977 | if (r) | 3052 | if (r) |
2978 | return r; | 3053 | return r; |
2979 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 3054 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
@@ -3275,9 +3350,9 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, | |||
3275 | kvm_desct->padding = 0; | 3350 | kvm_desct->padding = 0; |
3276 | } | 3351 | } |
3277 | 3352 | ||
3278 | static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, | 3353 | static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu, |
3279 | u16 selector, | 3354 | u16 selector, |
3280 | struct descriptor_table *dtable) | 3355 | struct descriptor_table *dtable) |
3281 | { | 3356 | { |
3282 | if (selector & 1 << 2) { | 3357 | if (selector & 1 << 2) { |
3283 | struct kvm_segment kvm_seg; | 3358 | struct kvm_segment kvm_seg; |
@@ -3302,7 +3377,7 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
3302 | struct descriptor_table dtable; | 3377 | struct descriptor_table dtable; |
3303 | u16 index = selector >> 3; | 3378 | u16 index = selector >> 3; |
3304 | 3379 | ||
3305 | get_segment_descritptor_dtable(vcpu, selector, &dtable); | 3380 | get_segment_descriptor_dtable(vcpu, selector, &dtable); |
3306 | 3381 | ||
3307 | if (dtable.limit < index * 8 + 7) { | 3382 | if (dtable.limit < index * 8 + 7) { |
3308 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); | 3383 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); |
@@ -3321,7 +3396,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
3321 | struct descriptor_table dtable; | 3396 | struct descriptor_table dtable; |
3322 | u16 index = selector >> 3; | 3397 | u16 index = selector >> 3; |
3323 | 3398 | ||
3324 | get_segment_descritptor_dtable(vcpu, selector, &dtable); | 3399 | get_segment_descriptor_dtable(vcpu, selector, &dtable); |
3325 | 3400 | ||
3326 | if (dtable.limit < index * 8 + 7) | 3401 | if (dtable.limit < index * 8 + 7) |
3327 | return 1; | 3402 | return 1; |
@@ -3900,6 +3975,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
3900 | /* We do fxsave: this must be aligned. */ | 3975 | /* We do fxsave: this must be aligned. */ |
3901 | BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); | 3976 | BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); |
3902 | 3977 | ||
3978 | vcpu->arch.mtrr_state.have_fixed = 1; | ||
3903 | vcpu_load(vcpu); | 3979 | vcpu_load(vcpu); |
3904 | r = kvm_arch_vcpu_reset(vcpu); | 3980 | r = kvm_arch_vcpu_reset(vcpu); |
3905 | if (r == 0) | 3981 | if (r == 0) |
@@ -3925,6 +4001,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
3925 | 4001 | ||
3926 | int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | 4002 | int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) |
3927 | { | 4003 | { |
4004 | vcpu->arch.nmi_pending = false; | ||
4005 | vcpu->arch.nmi_injected = false; | ||
4006 | |||
3928 | return kvm_x86_ops->vcpu_reset(vcpu); | 4007 | return kvm_x86_ops->vcpu_reset(vcpu); |
3929 | } | 4008 | } |
3930 | 4009 | ||
@@ -4012,6 +4091,7 @@ struct kvm *kvm_arch_create_vm(void) | |||
4012 | return ERR_PTR(-ENOMEM); | 4091 | return ERR_PTR(-ENOMEM); |
4013 | 4092 | ||
4014 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 4093 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
4094 | INIT_LIST_HEAD(&kvm->arch.oos_global_pages); | ||
4015 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 4095 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
4016 | 4096 | ||
4017 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ | 4097 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ |
@@ -4048,8 +4128,8 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
4048 | 4128 | ||
4049 | void kvm_arch_destroy_vm(struct kvm *kvm) | 4129 | void kvm_arch_destroy_vm(struct kvm *kvm) |
4050 | { | 4130 | { |
4051 | kvm_iommu_unmap_guest(kvm); | ||
4052 | kvm_free_all_assigned_devices(kvm); | 4131 | kvm_free_all_assigned_devices(kvm); |
4132 | kvm_iommu_unmap_guest(kvm); | ||
4053 | kvm_free_pit(kvm); | 4133 | kvm_free_pit(kvm); |
4054 | kfree(kvm->arch.vpic); | 4134 | kfree(kvm->arch.vpic); |
4055 | kfree(kvm->arch.vioapic); | 4135 | kfree(kvm->arch.vioapic); |
@@ -4127,7 +4207,8 @@ void kvm_arch_flush_shadow(struct kvm *kvm) | |||
4127 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | 4207 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) |
4128 | { | 4208 | { |
4129 | return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE | 4209 | return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE |
4130 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED; | 4210 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED |
4211 | || vcpu->arch.nmi_pending; | ||
4131 | } | 4212 | } |
4132 | 4213 | ||
4133 | static void vcpu_kick_intr(void *info) | 4214 | static void vcpu_kick_intr(void *info) |
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index ea051173b0da..d174db7a3370 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #define SrcMem32 (4<<4) /* Memory operand (32-bit). */ | 58 | #define SrcMem32 (4<<4) /* Memory operand (32-bit). */ |
59 | #define SrcImm (5<<4) /* Immediate operand. */ | 59 | #define SrcImm (5<<4) /* Immediate operand. */ |
60 | #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ | 60 | #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ |
61 | #define SrcOne (7<<4) /* Implied '1' */ | ||
61 | #define SrcMask (7<<4) | 62 | #define SrcMask (7<<4) |
62 | /* Generic ModRM decode. */ | 63 | /* Generic ModRM decode. */ |
63 | #define ModRM (1<<7) | 64 | #define ModRM (1<<7) |
@@ -70,17 +71,23 @@ | |||
70 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ | 71 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ |
71 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ | 72 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ |
72 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ | 73 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ |
74 | /* Source 2 operand type */ | ||
75 | #define Src2None (0<<29) | ||
76 | #define Src2CL (1<<29) | ||
77 | #define Src2ImmByte (2<<29) | ||
78 | #define Src2One (3<<29) | ||
79 | #define Src2Mask (7<<29) | ||
73 | 80 | ||
74 | enum { | 81 | enum { |
75 | Group1_80, Group1_81, Group1_82, Group1_83, | 82 | Group1_80, Group1_81, Group1_82, Group1_83, |
76 | Group1A, Group3_Byte, Group3, Group4, Group5, Group7, | 83 | Group1A, Group3_Byte, Group3, Group4, Group5, Group7, |
77 | }; | 84 | }; |
78 | 85 | ||
79 | static u16 opcode_table[256] = { | 86 | static u32 opcode_table[256] = { |
80 | /* 0x00 - 0x07 */ | 87 | /* 0x00 - 0x07 */ |
81 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 88 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
82 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 89 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
83 | 0, 0, 0, 0, | 90 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, |
84 | /* 0x08 - 0x0F */ | 91 | /* 0x08 - 0x0F */ |
85 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 92 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
86 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 93 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
@@ -195,7 +202,7 @@ static u16 opcode_table[256] = { | |||
195 | ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, | 202 | ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, |
196 | }; | 203 | }; |
197 | 204 | ||
198 | static u16 twobyte_table[256] = { | 205 | static u32 twobyte_table[256] = { |
199 | /* 0x00 - 0x0F */ | 206 | /* 0x00 - 0x0F */ |
200 | 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0, | 207 | 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0, |
201 | ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, | 208 | ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, |
@@ -230,9 +237,14 @@ static u16 twobyte_table[256] = { | |||
230 | /* 0x90 - 0x9F */ | 237 | /* 0x90 - 0x9F */ |
231 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 238 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
232 | /* 0xA0 - 0xA7 */ | 239 | /* 0xA0 - 0xA7 */ |
233 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, | 240 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, |
241 | DstMem | SrcReg | Src2ImmByte | ModRM, | ||
242 | DstMem | SrcReg | Src2CL | ModRM, 0, 0, | ||
234 | /* 0xA8 - 0xAF */ | 243 | /* 0xA8 - 0xAF */ |
235 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0, | 244 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, |
245 | DstMem | SrcReg | Src2ImmByte | ModRM, | ||
246 | DstMem | SrcReg | Src2CL | ModRM, | ||
247 | ModRM, 0, | ||
236 | /* 0xB0 - 0xB7 */ | 248 | /* 0xB0 - 0xB7 */ |
237 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, | 249 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, |
238 | DstMem | SrcReg | ModRM | BitOp, | 250 | DstMem | SrcReg | ModRM | BitOp, |
@@ -253,7 +265,7 @@ static u16 twobyte_table[256] = { | |||
253 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | 265 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
254 | }; | 266 | }; |
255 | 267 | ||
256 | static u16 group_table[] = { | 268 | static u32 group_table[] = { |
257 | [Group1_80*8] = | 269 | [Group1_80*8] = |
258 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 270 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, |
259 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 271 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, |
@@ -297,9 +309,9 @@ static u16 group_table[] = { | |||
297 | SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, | 309 | SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, |
298 | }; | 310 | }; |
299 | 311 | ||
300 | static u16 group2_table[] = { | 312 | static u32 group2_table[] = { |
301 | [Group7*8] = | 313 | [Group7*8] = |
302 | SrcNone | ModRM, 0, 0, 0, | 314 | SrcNone | ModRM, 0, 0, SrcNone | ModRM, |
303 | SrcNone | ModRM | DstMem | Mov, 0, | 315 | SrcNone | ModRM | DstMem | Mov, 0, |
304 | SrcMem16 | ModRM | Mov, 0, | 316 | SrcMem16 | ModRM | Mov, 0, |
305 | }; | 317 | }; |
@@ -359,49 +371,48 @@ static u16 group2_table[] = { | |||
359 | "andl %"_msk",%"_LO32 _tmp"; " \ | 371 | "andl %"_msk",%"_LO32 _tmp"; " \ |
360 | "orl %"_LO32 _tmp",%"_sav"; " | 372 | "orl %"_LO32 _tmp",%"_sav"; " |
361 | 373 | ||
374 | #ifdef CONFIG_X86_64 | ||
375 | #define ON64(x) x | ||
376 | #else | ||
377 | #define ON64(x) | ||
378 | #endif | ||
379 | |||
380 | #define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \ | ||
381 | do { \ | ||
382 | __asm__ __volatile__ ( \ | ||
383 | _PRE_EFLAGS("0", "4", "2") \ | ||
384 | _op _suffix " %"_x"3,%1; " \ | ||
385 | _POST_EFLAGS("0", "4", "2") \ | ||
386 | : "=m" (_eflags), "=m" ((_dst).val), \ | ||
387 | "=&r" (_tmp) \ | ||
388 | : _y ((_src).val), "i" (EFLAGS_MASK)); \ | ||
389 | } while (0) | ||
390 | |||
391 | |||
362 | /* Raw emulation: instruction has two explicit operands. */ | 392 | /* Raw emulation: instruction has two explicit operands. */ |
363 | #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \ | 393 | #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \ |
364 | do { \ | 394 | do { \ |
365 | unsigned long _tmp; \ | 395 | unsigned long _tmp; \ |
366 | \ | 396 | \ |
367 | switch ((_dst).bytes) { \ | 397 | switch ((_dst).bytes) { \ |
368 | case 2: \ | 398 | case 2: \ |
369 | __asm__ __volatile__ ( \ | 399 | ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \ |
370 | _PRE_EFLAGS("0", "4", "2") \ | 400 | break; \ |
371 | _op"w %"_wx"3,%1; " \ | 401 | case 4: \ |
372 | _POST_EFLAGS("0", "4", "2") \ | 402 | ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \ |
373 | : "=m" (_eflags), "=m" ((_dst).val), \ | 403 | break; \ |
374 | "=&r" (_tmp) \ | 404 | case 8: \ |
375 | : _wy ((_src).val), "i" (EFLAGS_MASK)); \ | 405 | ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \ |
376 | break; \ | 406 | break; \ |
377 | case 4: \ | 407 | } \ |
378 | __asm__ __volatile__ ( \ | ||
379 | _PRE_EFLAGS("0", "4", "2") \ | ||
380 | _op"l %"_lx"3,%1; " \ | ||
381 | _POST_EFLAGS("0", "4", "2") \ | ||
382 | : "=m" (_eflags), "=m" ((_dst).val), \ | ||
383 | "=&r" (_tmp) \ | ||
384 | : _ly ((_src).val), "i" (EFLAGS_MASK)); \ | ||
385 | break; \ | ||
386 | case 8: \ | ||
387 | __emulate_2op_8byte(_op, _src, _dst, \ | ||
388 | _eflags, _qx, _qy); \ | ||
389 | break; \ | ||
390 | } \ | ||
391 | } while (0) | 408 | } while (0) |
392 | 409 | ||
393 | #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ | 410 | #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ |
394 | do { \ | 411 | do { \ |
395 | unsigned long __tmp; \ | 412 | unsigned long _tmp; \ |
396 | switch ((_dst).bytes) { \ | 413 | switch ((_dst).bytes) { \ |
397 | case 1: \ | 414 | case 1: \ |
398 | __asm__ __volatile__ ( \ | 415 | ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \ |
399 | _PRE_EFLAGS("0", "4", "2") \ | ||
400 | _op"b %"_bx"3,%1; " \ | ||
401 | _POST_EFLAGS("0", "4", "2") \ | ||
402 | : "=m" (_eflags), "=m" ((_dst).val), \ | ||
403 | "=&r" (__tmp) \ | ||
404 | : _by ((_src).val), "i" (EFLAGS_MASK)); \ | ||
405 | break; \ | 416 | break; \ |
406 | default: \ | 417 | default: \ |
407 | __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ | 418 | __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ |
@@ -425,71 +436,68 @@ static u16 group2_table[] = { | |||
425 | __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ | 436 | __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ |
426 | "w", "r", _LO32, "r", "", "r") | 437 | "w", "r", _LO32, "r", "", "r") |
427 | 438 | ||
428 | /* Instruction has only one explicit operand (no source operand). */ | 439 | /* Instruction has three operands and one operand is stored in ECX register */ |
429 | #define emulate_1op(_op, _dst, _eflags) \ | 440 | #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ |
430 | do { \ | 441 | do { \ |
431 | unsigned long _tmp; \ | 442 | unsigned long _tmp; \ |
432 | \ | 443 | _type _clv = (_cl).val; \ |
433 | switch ((_dst).bytes) { \ | 444 | _type _srcv = (_src).val; \ |
434 | case 1: \ | 445 | _type _dstv = (_dst).val; \ |
435 | __asm__ __volatile__ ( \ | 446 | \ |
436 | _PRE_EFLAGS("0", "3", "2") \ | 447 | __asm__ __volatile__ ( \ |
437 | _op"b %1; " \ | 448 | _PRE_EFLAGS("0", "5", "2") \ |
438 | _POST_EFLAGS("0", "3", "2") \ | 449 | _op _suffix " %4,%1 \n" \ |
439 | : "=m" (_eflags), "=m" ((_dst).val), \ | 450 | _POST_EFLAGS("0", "5", "2") \ |
440 | "=&r" (_tmp) \ | 451 | : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ |
441 | : "i" (EFLAGS_MASK)); \ | 452 | : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ |
442 | break; \ | 453 | ); \ |
443 | case 2: \ | 454 | \ |
444 | __asm__ __volatile__ ( \ | 455 | (_cl).val = (unsigned long) _clv; \ |
445 | _PRE_EFLAGS("0", "3", "2") \ | 456 | (_src).val = (unsigned long) _srcv; \ |
446 | _op"w %1; " \ | 457 | (_dst).val = (unsigned long) _dstv; \ |
447 | _POST_EFLAGS("0", "3", "2") \ | ||
448 | : "=m" (_eflags), "=m" ((_dst).val), \ | ||
449 | "=&r" (_tmp) \ | ||
450 | : "i" (EFLAGS_MASK)); \ | ||
451 | break; \ | ||
452 | case 4: \ | ||
453 | __asm__ __volatile__ ( \ | ||
454 | _PRE_EFLAGS("0", "3", "2") \ | ||
455 | _op"l %1; " \ | ||
456 | _POST_EFLAGS("0", "3", "2") \ | ||
457 | : "=m" (_eflags), "=m" ((_dst).val), \ | ||
458 | "=&r" (_tmp) \ | ||
459 | : "i" (EFLAGS_MASK)); \ | ||
460 | break; \ | ||
461 | case 8: \ | ||
462 | __emulate_1op_8byte(_op, _dst, _eflags); \ | ||
463 | break; \ | ||
464 | } \ | ||
465 | } while (0) | 458 | } while (0) |
466 | 459 | ||
467 | /* Emulate an instruction with quadword operands (x86/64 only). */ | 460 | #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ |
468 | #if defined(CONFIG_X86_64) | 461 | do { \ |
469 | #define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) \ | 462 | switch ((_dst).bytes) { \ |
470 | do { \ | 463 | case 2: \ |
471 | __asm__ __volatile__ ( \ | 464 | __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ |
472 | _PRE_EFLAGS("0", "4", "2") \ | 465 | "w", unsigned short); \ |
473 | _op"q %"_qx"3,%1; " \ | 466 | break; \ |
474 | _POST_EFLAGS("0", "4", "2") \ | 467 | case 4: \ |
475 | : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ | 468 | __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ |
476 | : _qy ((_src).val), "i" (EFLAGS_MASK)); \ | 469 | "l", unsigned int); \ |
470 | break; \ | ||
471 | case 8: \ | ||
472 | ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ | ||
473 | "q", unsigned long)); \ | ||
474 | break; \ | ||
475 | } \ | ||
477 | } while (0) | 476 | } while (0) |
478 | 477 | ||
479 | #define __emulate_1op_8byte(_op, _dst, _eflags) \ | 478 | #define __emulate_1op(_op, _dst, _eflags, _suffix) \ |
480 | do { \ | 479 | do { \ |
481 | __asm__ __volatile__ ( \ | 480 | unsigned long _tmp; \ |
482 | _PRE_EFLAGS("0", "3", "2") \ | 481 | \ |
483 | _op"q %1; " \ | 482 | __asm__ __volatile__ ( \ |
484 | _POST_EFLAGS("0", "3", "2") \ | 483 | _PRE_EFLAGS("0", "3", "2") \ |
485 | : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ | 484 | _op _suffix " %1; " \ |
486 | : "i" (EFLAGS_MASK)); \ | 485 | _POST_EFLAGS("0", "3", "2") \ |
486 | : "=m" (_eflags), "+m" ((_dst).val), \ | ||
487 | "=&r" (_tmp) \ | ||
488 | : "i" (EFLAGS_MASK)); \ | ||
487 | } while (0) | 489 | } while (0) |
488 | 490 | ||
489 | #elif defined(__i386__) | 491 | /* Instruction has only one explicit operand (no source operand). */ |
490 | #define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) | 492 | #define emulate_1op(_op, _dst, _eflags) \ |
491 | #define __emulate_1op_8byte(_op, _dst, _eflags) | 493 | do { \ |
492 | #endif /* __i386__ */ | 494 | switch ((_dst).bytes) { \ |
495 | case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \ | ||
496 | case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \ | ||
497 | case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \ | ||
498 | case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \ | ||
499 | } \ | ||
500 | } while (0) | ||
493 | 501 | ||
494 | /* Fetch next part of the instruction being emulated. */ | 502 | /* Fetch next part of the instruction being emulated. */ |
495 | #define insn_fetch(_type, _size, _eip) \ | 503 | #define insn_fetch(_type, _size, _eip) \ |
@@ -1041,6 +1049,33 @@ done_prefixes: | |||
1041 | c->src.bytes = 1; | 1049 | c->src.bytes = 1; |
1042 | c->src.val = insn_fetch(s8, 1, c->eip); | 1050 | c->src.val = insn_fetch(s8, 1, c->eip); |
1043 | break; | 1051 | break; |
1052 | case SrcOne: | ||
1053 | c->src.bytes = 1; | ||
1054 | c->src.val = 1; | ||
1055 | break; | ||
1056 | } | ||
1057 | |||
1058 | /* | ||
1059 | * Decode and fetch the second source operand: register, memory | ||
1060 | * or immediate. | ||
1061 | */ | ||
1062 | switch (c->d & Src2Mask) { | ||
1063 | case Src2None: | ||
1064 | break; | ||
1065 | case Src2CL: | ||
1066 | c->src2.bytes = 1; | ||
1067 | c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8; | ||
1068 | break; | ||
1069 | case Src2ImmByte: | ||
1070 | c->src2.type = OP_IMM; | ||
1071 | c->src2.ptr = (unsigned long *)c->eip; | ||
1072 | c->src2.bytes = 1; | ||
1073 | c->src2.val = insn_fetch(u8, 1, c->eip); | ||
1074 | break; | ||
1075 | case Src2One: | ||
1076 | c->src2.bytes = 1; | ||
1077 | c->src2.val = 1; | ||
1078 | break; | ||
1044 | } | 1079 | } |
1045 | 1080 | ||
1046 | /* Decode and fetch the destination operand: register or memory. */ | 1081 | /* Decode and fetch the destination operand: register or memory. */ |
@@ -1100,20 +1135,33 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt) | |||
1100 | c->regs[VCPU_REGS_RSP]); | 1135 | c->regs[VCPU_REGS_RSP]); |
1101 | } | 1136 | } |
1102 | 1137 | ||
1103 | static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, | 1138 | static int emulate_pop(struct x86_emulate_ctxt *ctxt, |
1104 | struct x86_emulate_ops *ops) | 1139 | struct x86_emulate_ops *ops) |
1105 | { | 1140 | { |
1106 | struct decode_cache *c = &ctxt->decode; | 1141 | struct decode_cache *c = &ctxt->decode; |
1107 | int rc; | 1142 | int rc; |
1108 | 1143 | ||
1109 | rc = ops->read_std(register_address(c, ss_base(ctxt), | 1144 | rc = ops->read_emulated(register_address(c, ss_base(ctxt), |
1110 | c->regs[VCPU_REGS_RSP]), | 1145 | c->regs[VCPU_REGS_RSP]), |
1111 | &c->dst.val, c->dst.bytes, ctxt->vcpu); | 1146 | &c->src.val, c->src.bytes, ctxt->vcpu); |
1112 | if (rc != 0) | 1147 | if (rc != 0) |
1113 | return rc; | 1148 | return rc; |
1114 | 1149 | ||
1115 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes); | 1150 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes); |
1151 | return rc; | ||
1152 | } | ||
1153 | |||
1154 | static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, | ||
1155 | struct x86_emulate_ops *ops) | ||
1156 | { | ||
1157 | struct decode_cache *c = &ctxt->decode; | ||
1158 | int rc; | ||
1116 | 1159 | ||
1160 | c->src.bytes = c->dst.bytes; | ||
1161 | rc = emulate_pop(ctxt, ops); | ||
1162 | if (rc != 0) | ||
1163 | return rc; | ||
1164 | c->dst.val = c->src.val; | ||
1117 | return 0; | 1165 | return 0; |
1118 | } | 1166 | } |
1119 | 1167 | ||
@@ -1415,24 +1463,15 @@ special_insn: | |||
1415 | emulate_1op("dec", c->dst, ctxt->eflags); | 1463 | emulate_1op("dec", c->dst, ctxt->eflags); |
1416 | break; | 1464 | break; |
1417 | case 0x50 ... 0x57: /* push reg */ | 1465 | case 0x50 ... 0x57: /* push reg */ |
1418 | c->dst.type = OP_MEM; | 1466 | emulate_push(ctxt); |
1419 | c->dst.bytes = c->op_bytes; | ||
1420 | c->dst.val = c->src.val; | ||
1421 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], | ||
1422 | -c->op_bytes); | ||
1423 | c->dst.ptr = (void *) register_address( | ||
1424 | c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]); | ||
1425 | break; | 1467 | break; |
1426 | case 0x58 ... 0x5f: /* pop reg */ | 1468 | case 0x58 ... 0x5f: /* pop reg */ |
1427 | pop_instruction: | 1469 | pop_instruction: |
1428 | if ((rc = ops->read_std(register_address(c, ss_base(ctxt), | 1470 | c->src.bytes = c->op_bytes; |
1429 | c->regs[VCPU_REGS_RSP]), c->dst.ptr, | 1471 | rc = emulate_pop(ctxt, ops); |
1430 | c->op_bytes, ctxt->vcpu)) != 0) | 1472 | if (rc != 0) |
1431 | goto done; | 1473 | goto done; |
1432 | 1474 | c->dst.val = c->src.val; | |
1433 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], | ||
1434 | c->op_bytes); | ||
1435 | c->dst.type = OP_NONE; /* Disable writeback. */ | ||
1436 | break; | 1475 | break; |
1437 | case 0x63: /* movsxd */ | 1476 | case 0x63: /* movsxd */ |
1438 | if (ctxt->mode != X86EMUL_MODE_PROT64) | 1477 | if (ctxt->mode != X86EMUL_MODE_PROT64) |
@@ -1591,7 +1630,9 @@ special_insn: | |||
1591 | emulate_push(ctxt); | 1630 | emulate_push(ctxt); |
1592 | break; | 1631 | break; |
1593 | case 0x9d: /* popf */ | 1632 | case 0x9d: /* popf */ |
1633 | c->dst.type = OP_REG; | ||
1594 | c->dst.ptr = (unsigned long *) &ctxt->eflags; | 1634 | c->dst.ptr = (unsigned long *) &ctxt->eflags; |
1635 | c->dst.bytes = c->op_bytes; | ||
1595 | goto pop_instruction; | 1636 | goto pop_instruction; |
1596 | case 0xa0 ... 0xa1: /* mov */ | 1637 | case 0xa0 ... 0xa1: /* mov */ |
1597 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | 1638 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; |
@@ -1689,7 +1730,9 @@ special_insn: | |||
1689 | emulate_grp2(ctxt); | 1730 | emulate_grp2(ctxt); |
1690 | break; | 1731 | break; |
1691 | case 0xc3: /* ret */ | 1732 | case 0xc3: /* ret */ |
1733 | c->dst.type = OP_REG; | ||
1692 | c->dst.ptr = &c->eip; | 1734 | c->dst.ptr = &c->eip; |
1735 | c->dst.bytes = c->op_bytes; | ||
1693 | goto pop_instruction; | 1736 | goto pop_instruction; |
1694 | case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ | 1737 | case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ |
1695 | mov: | 1738 | mov: |
@@ -1778,7 +1821,7 @@ special_insn: | |||
1778 | c->eip = saved_eip; | 1821 | c->eip = saved_eip; |
1779 | goto cannot_emulate; | 1822 | goto cannot_emulate; |
1780 | } | 1823 | } |
1781 | return 0; | 1824 | break; |
1782 | case 0xf4: /* hlt */ | 1825 | case 0xf4: /* hlt */ |
1783 | ctxt->vcpu->arch.halt_request = 1; | 1826 | ctxt->vcpu->arch.halt_request = 1; |
1784 | break; | 1827 | break; |
@@ -1999,12 +2042,20 @@ twobyte_insn: | |||
1999 | c->src.val &= (c->dst.bytes << 3) - 1; | 2042 | c->src.val &= (c->dst.bytes << 3) - 1; |
2000 | emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags); | 2043 | emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags); |
2001 | break; | 2044 | break; |
2045 | case 0xa4: /* shld imm8, r, r/m */ | ||
2046 | case 0xa5: /* shld cl, r, r/m */ | ||
2047 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); | ||
2048 | break; | ||
2002 | case 0xab: | 2049 | case 0xab: |
2003 | bts: /* bts */ | 2050 | bts: /* bts */ |
2004 | /* only subword offset */ | 2051 | /* only subword offset */ |
2005 | c->src.val &= (c->dst.bytes << 3) - 1; | 2052 | c->src.val &= (c->dst.bytes << 3) - 1; |
2006 | emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags); | 2053 | emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags); |
2007 | break; | 2054 | break; |
2055 | case 0xac: /* shrd imm8, r, r/m */ | ||
2056 | case 0xad: /* shrd cl, r, r/m */ | ||
2057 | emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags); | ||
2058 | break; | ||
2008 | case 0xae: /* clflush */ | 2059 | case 0xae: /* clflush */ |
2009 | break; | 2060 | break; |
2010 | case 0xb0 ... 0xb1: /* cmpxchg */ | 2061 | case 0xb0 ... 0xb1: /* cmpxchg */ |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 50a779264bb1..a7ed208f81e3 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -738,7 +738,7 @@ static void lguest_time_init(void) | |||
738 | 738 | ||
739 | /* We can't set cpumask in the initializer: damn C limitations! Set it | 739 | /* We can't set cpumask in the initializer: damn C limitations! Set it |
740 | * here and register our timer device. */ | 740 | * here and register our timer device. */ |
741 | lguest_clockevent.cpumask = cpumask_of_cpu(0); | 741 | lguest_clockevent.cpumask = cpumask_of(0); |
742 | clockevents_register_device(&lguest_clockevent); | 742 | clockevents_register_device(&lguest_clockevent); |
743 | 743 | ||
744 | /* Finally, we unblock the timer interrupt. */ | 744 | /* Finally, we unblock the timer interrupt. */ |
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index 5c7cef34c9e7..10b9bd35a8ff 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S | |||
@@ -30,21 +30,6 @@ ENTRY(lguest_entry) | |||
30 | movl $lguest_data - __PAGE_OFFSET, %edx | 30 | movl $lguest_data - __PAGE_OFFSET, %edx |
31 | int $LGUEST_TRAP_ENTRY | 31 | int $LGUEST_TRAP_ENTRY |
32 | 32 | ||
33 | /* The Host put the toplevel pagetable in lguest_data.pgdir. The movsl | ||
34 | * instruction uses %esi implicitly as the source for the copy we're | ||
35 | * about to do. */ | ||
36 | movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi | ||
37 | |||
38 | /* Copy first 32 entries of page directory to __PAGE_OFFSET entries. | ||
39 | * This means the first 128M of kernel memory will be mapped at | ||
40 | * PAGE_OFFSET where the kernel expects to run. This will get it far | ||
41 | * enough through boot to switch to its own pagetables. */ | ||
42 | movl $32, %ecx | ||
43 | movl %esi, %edi | ||
44 | addl $((__PAGE_OFFSET >> 22) * 4), %edi | ||
45 | rep | ||
46 | movsl | ||
47 | |||
48 | /* Set up the initial stack so we can run C code. */ | 33 | /* Set up the initial stack so we can run C code. */ |
49 | movl $(init_thread_union+THREAD_SIZE),%esp | 34 | movl $(init_thread_union+THREAD_SIZE),%esp |
50 | 35 | ||
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 9e68075544f6..4a20b2f9a381 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c | |||
@@ -39,7 +39,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon | |||
39 | #define __do_strncpy_from_user(dst, src, count, res) \ | 39 | #define __do_strncpy_from_user(dst, src, count, res) \ |
40 | do { \ | 40 | do { \ |
41 | int __d0, __d1, __d2; \ | 41 | int __d0, __d1, __d2; \ |
42 | might_sleep(); \ | 42 | might_fault(); \ |
43 | __asm__ __volatile__( \ | 43 | __asm__ __volatile__( \ |
44 | " testl %1,%1\n" \ | 44 | " testl %1,%1\n" \ |
45 | " jz 2f\n" \ | 45 | " jz 2f\n" \ |
@@ -126,7 +126,7 @@ EXPORT_SYMBOL(strncpy_from_user); | |||
126 | #define __do_clear_user(addr,size) \ | 126 | #define __do_clear_user(addr,size) \ |
127 | do { \ | 127 | do { \ |
128 | int __d0; \ | 128 | int __d0; \ |
129 | might_sleep(); \ | 129 | might_fault(); \ |
130 | __asm__ __volatile__( \ | 130 | __asm__ __volatile__( \ |
131 | "0: rep; stosl\n" \ | 131 | "0: rep; stosl\n" \ |
132 | " movl %2,%0\n" \ | 132 | " movl %2,%0\n" \ |
@@ -155,7 +155,7 @@ do { \ | |||
155 | unsigned long | 155 | unsigned long |
156 | clear_user(void __user *to, unsigned long n) | 156 | clear_user(void __user *to, unsigned long n) |
157 | { | 157 | { |
158 | might_sleep(); | 158 | might_fault(); |
159 | if (access_ok(VERIFY_WRITE, to, n)) | 159 | if (access_ok(VERIFY_WRITE, to, n)) |
160 | __do_clear_user(to, n); | 160 | __do_clear_user(to, n); |
161 | return n; | 161 | return n; |
@@ -197,7 +197,7 @@ long strnlen_user(const char __user *s, long n) | |||
197 | unsigned long mask = -__addr_ok(s); | 197 | unsigned long mask = -__addr_ok(s); |
198 | unsigned long res, tmp; | 198 | unsigned long res, tmp; |
199 | 199 | ||
200 | might_sleep(); | 200 | might_fault(); |
201 | 201 | ||
202 | __asm__ __volatile__( | 202 | __asm__ __volatile__( |
203 | " testl %0, %0\n" | 203 | " testl %0, %0\n" |
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index f4df6e7c718b..64d6c84e6353 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c | |||
@@ -15,7 +15,7 @@ | |||
15 | #define __do_strncpy_from_user(dst,src,count,res) \ | 15 | #define __do_strncpy_from_user(dst,src,count,res) \ |
16 | do { \ | 16 | do { \ |
17 | long __d0, __d1, __d2; \ | 17 | long __d0, __d1, __d2; \ |
18 | might_sleep(); \ | 18 | might_fault(); \ |
19 | __asm__ __volatile__( \ | 19 | __asm__ __volatile__( \ |
20 | " testq %1,%1\n" \ | 20 | " testq %1,%1\n" \ |
21 | " jz 2f\n" \ | 21 | " jz 2f\n" \ |
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user); | |||
64 | unsigned long __clear_user(void __user *addr, unsigned long size) | 64 | unsigned long __clear_user(void __user *addr, unsigned long size) |
65 | { | 65 | { |
66 | long __d0; | 66 | long __d0; |
67 | might_sleep(); | 67 | might_fault(); |
68 | /* no memory constraint because it doesn't change any memory gcc knows | 68 | /* no memory constraint because it doesn't change any memory gcc knows |
69 | about */ | 69 | about */ |
70 | asm volatile( | 70 | asm volatile( |
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 3624a364b7f3..bc4c7840b2a8 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c | |||
@@ -42,9 +42,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { | |||
42 | { } | 42 | { } |
43 | }; | 43 | }; |
44 | 44 | ||
45 | static cpumask_t vector_allocation_domain(int cpu) | 45 | static void vector_allocation_domain(int cpu, cpumask_t *retmask) |
46 | { | 46 | { |
47 | return cpumask_of_cpu(cpu); | 47 | cpus_clear(*retmask); |
48 | cpu_set(cpu, *retmask); | ||
48 | } | 49 | } |
49 | 50 | ||
50 | static int probe_bigsmp(void) | 51 | static int probe_bigsmp(void) |
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 7b4e6d0d1690..4ba5ccaa1584 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c | |||
@@ -87,7 +87,7 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
87 | } | 87 | } |
88 | #endif | 88 | #endif |
89 | 89 | ||
90 | static cpumask_t vector_allocation_domain(int cpu) | 90 | static void vector_allocation_domain(int cpu, cpumask_t *retmask) |
91 | { | 91 | { |
92 | /* Careful. Some cpus do not strictly honor the set of cpus | 92 | /* Careful. Some cpus do not strictly honor the set of cpus |
93 | * specified in the interrupt destination when using lowest | 93 | * specified in the interrupt destination when using lowest |
@@ -97,8 +97,7 @@ static cpumask_t vector_allocation_domain(int cpu) | |||
97 | * deliver interrupts to the wrong hyperthread when only one | 97 | * deliver interrupts to the wrong hyperthread when only one |
98 | * hyperthread was specified in the interrupt desitination. | 98 | * hyperthread was specified in the interrupt desitination. |
99 | */ | 99 | */ |
100 | cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; | 100 | *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; |
101 | return domain; | ||
102 | } | 101 | } |
103 | 102 | ||
104 | struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); | 103 | struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); |
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 71a309b122e6..511d7941364f 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c | |||
@@ -38,7 +38,7 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
38 | return 0; | 38 | return 0; |
39 | } | 39 | } |
40 | 40 | ||
41 | static cpumask_t vector_allocation_domain(int cpu) | 41 | static void vector_allocation_domain(int cpu, cpumask_t *retmask) |
42 | { | 42 | { |
43 | /* Careful. Some cpus do not strictly honor the set of cpus | 43 | /* Careful. Some cpus do not strictly honor the set of cpus |
44 | * specified in the interrupt destination when using lowest | 44 | * specified in the interrupt destination when using lowest |
@@ -48,8 +48,7 @@ static cpumask_t vector_allocation_domain(int cpu) | |||
48 | * deliver interrupts to the wrong hyperthread when only one | 48 | * deliver interrupts to the wrong hyperthread when only one |
49 | * hyperthread was specified in the interrupt desitination. | 49 | * hyperthread was specified in the interrupt desitination. |
50 | */ | 50 | */ |
51 | cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; | 51 | *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; |
52 | return domain; | ||
53 | } | 52 | } |
54 | 53 | ||
55 | struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); | 54 | struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); |
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 2c6d234e0009..2821ffc188b5 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c | |||
@@ -24,7 +24,7 @@ static int probe_summit(void) | |||
24 | return 0; | 24 | return 0; |
25 | } | 25 | } |
26 | 26 | ||
27 | static cpumask_t vector_allocation_domain(int cpu) | 27 | static void vector_allocation_domain(int cpu, cpumask_t *retmask) |
28 | { | 28 | { |
29 | /* Careful. Some cpus do not strictly honor the set of cpus | 29 | /* Careful. Some cpus do not strictly honor the set of cpus |
30 | * specified in the interrupt destination when using lowest | 30 | * specified in the interrupt destination when using lowest |
@@ -34,8 +34,7 @@ static cpumask_t vector_allocation_domain(int cpu) | |||
34 | * deliver interrupts to the wrong hyperthread when only one | 34 | * deliver interrupts to the wrong hyperthread when only one |
35 | * hyperthread was specified in the interrupt desitination. | 35 | * hyperthread was specified in the interrupt desitination. |
36 | */ | 36 | */ |
37 | cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; | 37 | *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; |
38 | return domain; | ||
39 | } | 38 | } |
40 | 39 | ||
41 | struct genapic apic_summit = APIC_INIT("summit", probe_summit); | 40 | struct genapic apic_summit = APIC_INIT("summit", probe_summit); |
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 52145007bd7e..a5bc05492b1e 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c | |||
@@ -63,11 +63,6 @@ static int voyager_extended_cpus = 1; | |||
63 | /* Used for the invalidate map that's also checked in the spinlock */ | 63 | /* Used for the invalidate map that's also checked in the spinlock */ |
64 | static volatile unsigned long smp_invalidate_needed; | 64 | static volatile unsigned long smp_invalidate_needed; |
65 | 65 | ||
66 | /* Bitmask of currently online CPUs - used by setup.c for | ||
67 | /proc/cpuinfo, visible externally but still physical */ | ||
68 | cpumask_t cpu_online_map = CPU_MASK_NONE; | ||
69 | EXPORT_SYMBOL(cpu_online_map); | ||
70 | |||
71 | /* Bitmask of CPUs present in the system - exported by i386_syms.c, used | 66 | /* Bitmask of CPUs present in the system - exported by i386_syms.c, used |
72 | * by scheduler but indexed physically */ | 67 | * by scheduler but indexed physically */ |
73 | cpumask_t phys_cpu_present_map = CPU_MASK_NONE; | 68 | cpumask_t phys_cpu_present_map = CPU_MASK_NONE; |
@@ -218,8 +213,6 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE; | |||
218 | /* This is for the new dynamic CPU boot code */ | 213 | /* This is for the new dynamic CPU boot code */ |
219 | cpumask_t cpu_callin_map = CPU_MASK_NONE; | 214 | cpumask_t cpu_callin_map = CPU_MASK_NONE; |
220 | cpumask_t cpu_callout_map = CPU_MASK_NONE; | 215 | cpumask_t cpu_callout_map = CPU_MASK_NONE; |
221 | cpumask_t cpu_possible_map = CPU_MASK_NONE; | ||
222 | EXPORT_SYMBOL(cpu_possible_map); | ||
223 | 216 | ||
224 | /* The per processor IRQ masks (these are usually kept in sync) */ | 217 | /* The per processor IRQ masks (these are usually kept in sync) */ |
225 | static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned; | 218 | static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned; |
@@ -679,7 +672,7 @@ void __init smp_boot_cpus(void) | |||
679 | 672 | ||
680 | /* loop over all the extended VIC CPUs and boot them. The | 673 | /* loop over all the extended VIC CPUs and boot them. The |
681 | * Quad CPUs must be bootstrapped by their extended VIC cpu */ | 674 | * Quad CPUs must be bootstrapped by their extended VIC cpu */ |
682 | for (i = 0; i < NR_CPUS; i++) { | 675 | for (i = 0; i < nr_cpu_ids; i++) { |
683 | if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map)) | 676 | if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map)) |
684 | continue; | 677 | continue; |
685 | do_boot_cpu(i); | 678 | do_boot_cpu(i); |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ad98b18f2b48..f99a6c6c432e 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/init.h> | 21 | #include <linux/init.h> |
22 | #include <linux/highmem.h> | 22 | #include <linux/highmem.h> |
23 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
24 | #include <linux/pci.h> | ||
24 | #include <linux/pfn.h> | 25 | #include <linux/pfn.h> |
25 | #include <linux/poison.h> | 26 | #include <linux/poison.h> |
26 | #include <linux/bootmem.h> | 27 | #include <linux/bootmem.h> |
@@ -971,6 +972,8 @@ void __init mem_init(void) | |||
971 | int codesize, reservedpages, datasize, initsize; | 972 | int codesize, reservedpages, datasize, initsize; |
972 | int tmp; | 973 | int tmp; |
973 | 974 | ||
975 | pci_iommu_alloc(); | ||
976 | |||
974 | #ifdef CONFIG_FLATMEM | 977 | #ifdef CONFIG_FLATMEM |
975 | BUG_ON(!mem_map); | 978 | BUG_ON(!mem_map); |
976 | #endif | 979 | #endif |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index cebcbf152d46..71a14f89f89e 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -278,7 +278,7 @@ void __init numa_init_array(void) | |||
278 | int rr, i; | 278 | int rr, i; |
279 | 279 | ||
280 | rr = first_node(node_online_map); | 280 | rr = first_node(node_online_map); |
281 | for (i = 0; i < NR_CPUS; i++) { | 281 | for (i = 0; i < nr_cpu_ids; i++) { |
282 | if (early_cpu_to_node(i) != NUMA_NO_NODE) | 282 | if (early_cpu_to_node(i) != NUMA_NO_NODE) |
283 | continue; | 283 | continue; |
284 | numa_set_node(i, rr); | 284 | numa_set_node(i, rr); |
@@ -549,7 +549,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) | |||
549 | memnodemap[0] = 0; | 549 | memnodemap[0] = 0; |
550 | node_set_online(0); | 550 | node_set_online(0); |
551 | node_set(0, node_possible_map); | 551 | node_set(0, node_possible_map); |
552 | for (i = 0; i < NR_CPUS; i++) | 552 | for (i = 0; i < nr_cpu_ids; i++) |
553 | numa_set_node(i, 0); | 553 | numa_set_node(i, 0); |
554 | e820_register_active_regions(0, start_pfn, last_pfn); | 554 | e820_register_active_regions(0, start_pfn, last_pfn); |
555 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); | 555 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 51c0a2fc14fe..09737c8af074 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -382,7 +382,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
382 | if (!node_online(i)) | 382 | if (!node_online(i)) |
383 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | 383 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); |
384 | 384 | ||
385 | for (i = 0; i < NR_CPUS; i++) { | 385 | for (i = 0; i < nr_cpu_ids; i++) { |
386 | int node = early_cpu_to_node(i); | 386 | int node = early_cpu_to_node(i); |
387 | 387 | ||
388 | if (node == NUMA_NO_NODE) | 388 | if (node == NUMA_NO_NODE) |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 509513760a6e..98658f25f542 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -65,11 +65,13 @@ static unsigned long reset_value[NUM_COUNTERS]; | |||
65 | #define IBS_FETCH_BEGIN 3 | 65 | #define IBS_FETCH_BEGIN 3 |
66 | #define IBS_OP_BEGIN 4 | 66 | #define IBS_OP_BEGIN 4 |
67 | 67 | ||
68 | /* The function interface needs to be fixed, something like add | 68 | /* |
69 | data. Should then be added to linux/oprofile.h. */ | 69 | * The function interface needs to be fixed, something like add |
70 | * data. Should then be added to linux/oprofile.h. | ||
71 | */ | ||
70 | extern void | 72 | extern void |
71 | oprofile_add_ibs_sample(struct pt_regs *const regs, | 73 | oprofile_add_ibs_sample(struct pt_regs * const regs, |
72 | unsigned int *const ibs_sample, int ibs_code); | 74 | unsigned int * const ibs_sample, int ibs_code); |
73 | 75 | ||
74 | struct ibs_fetch_sample { | 76 | struct ibs_fetch_sample { |
75 | /* MSRC001_1031 IBS Fetch Linear Address Register */ | 77 | /* MSRC001_1031 IBS Fetch Linear Address Register */ |
@@ -104,11 +106,6 @@ struct ibs_op_sample { | |||
104 | unsigned int ibs_dc_phys_high; | 106 | unsigned int ibs_dc_phys_high; |
105 | }; | 107 | }; |
106 | 108 | ||
107 | /* | ||
108 | * unitialize the APIC for the IBS interrupts if needed on AMD Family10h+ | ||
109 | */ | ||
110 | static void clear_ibs_nmi(void); | ||
111 | |||
112 | static int ibs_allowed; /* AMD Family10h and later */ | 109 | static int ibs_allowed; /* AMD Family10h and later */ |
113 | 110 | ||
114 | struct op_ibs_config { | 111 | struct op_ibs_config { |
@@ -223,7 +220,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, | |||
223 | (unsigned int *)&ibs_fetch, | 220 | (unsigned int *)&ibs_fetch, |
224 | IBS_FETCH_BEGIN); | 221 | IBS_FETCH_BEGIN); |
225 | 222 | ||
226 | /*reenable the IRQ */ | 223 | /* reenable the IRQ */ |
227 | rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); | 224 | rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); |
228 | high &= ~IBS_FETCH_HIGH_VALID_BIT; | 225 | high &= ~IBS_FETCH_HIGH_VALID_BIT; |
229 | high |= IBS_FETCH_HIGH_ENABLE; | 226 | high |= IBS_FETCH_HIGH_ENABLE; |
@@ -331,8 +328,10 @@ static void op_amd_stop(struct op_msrs const * const msrs) | |||
331 | unsigned int low, high; | 328 | unsigned int low, high; |
332 | int i; | 329 | int i; |
333 | 330 | ||
334 | /* Subtle: stop on all counters to avoid race with | 331 | /* |
335 | * setting our pm callback */ | 332 | * Subtle: stop on all counters to avoid race with setting our |
333 | * pm callback | ||
334 | */ | ||
336 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | 335 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { |
337 | if (!reset_value[i]) | 336 | if (!reset_value[i]) |
338 | continue; | 337 | continue; |
@@ -343,13 +342,15 @@ static void op_amd_stop(struct op_msrs const * const msrs) | |||
343 | 342 | ||
344 | #ifdef CONFIG_OPROFILE_IBS | 343 | #ifdef CONFIG_OPROFILE_IBS |
345 | if (ibs_allowed && ibs_config.fetch_enabled) { | 344 | if (ibs_allowed && ibs_config.fetch_enabled) { |
346 | low = 0; /* clear max count and enable */ | 345 | /* clear max count and enable */ |
346 | low = 0; | ||
347 | high = 0; | 347 | high = 0; |
348 | wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); | 348 | wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); |
349 | } | 349 | } |
350 | 350 | ||
351 | if (ibs_allowed && ibs_config.op_enabled) { | 351 | if (ibs_allowed && ibs_config.op_enabled) { |
352 | low = 0; /* clear max count and enable */ | 352 | /* clear max count and enable */ |
353 | low = 0; | ||
353 | high = 0; | 354 | high = 0; |
354 | wrmsr(MSR_AMD64_IBSOPCTL, low, high); | 355 | wrmsr(MSR_AMD64_IBSOPCTL, low, high); |
355 | } | 356 | } |
@@ -370,18 +371,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) | |||
370 | } | 371 | } |
371 | } | 372 | } |
372 | 373 | ||
373 | #ifndef CONFIG_OPROFILE_IBS | 374 | #ifdef CONFIG_OPROFILE_IBS |
374 | |||
375 | /* no IBS support */ | ||
376 | |||
377 | static int op_amd_init(struct oprofile_operations *ops) | ||
378 | { | ||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | static void op_amd_exit(void) {} | ||
383 | |||
384 | #else | ||
385 | 375 | ||
386 | static u8 ibs_eilvt_off; | 376 | static u8 ibs_eilvt_off; |
387 | 377 | ||
@@ -395,7 +385,7 @@ static inline void apic_clear_ibs_nmi_per_cpu(void *arg) | |||
395 | setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); | 385 | setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); |
396 | } | 386 | } |
397 | 387 | ||
398 | static int pfm_amd64_setup_eilvt(void) | 388 | static int init_ibs_nmi(void) |
399 | { | 389 | { |
400 | #define IBSCTL_LVTOFFSETVAL (1 << 8) | 390 | #define IBSCTL_LVTOFFSETVAL (1 << 8) |
401 | #define IBSCTL 0x1cc | 391 | #define IBSCTL 0x1cc |
@@ -443,18 +433,22 @@ static int pfm_amd64_setup_eilvt(void) | |||
443 | return 0; | 433 | return 0; |
444 | } | 434 | } |
445 | 435 | ||
446 | /* | 436 | /* uninitialize the APIC for the IBS interrupts if needed */ |
447 | * initialize the APIC for the IBS interrupts | 437 | static void clear_ibs_nmi(void) |
448 | * if available (AMD Family10h rev B0 and later) | 438 | { |
449 | */ | 439 | if (ibs_allowed) |
450 | static void setup_ibs(void) | 440 | on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); |
441 | } | ||
442 | |||
443 | /* initialize the APIC for the IBS interrupts if available */ | ||
444 | static void ibs_init(void) | ||
451 | { | 445 | { |
452 | ibs_allowed = boot_cpu_has(X86_FEATURE_IBS); | 446 | ibs_allowed = boot_cpu_has(X86_FEATURE_IBS); |
453 | 447 | ||
454 | if (!ibs_allowed) | 448 | if (!ibs_allowed) |
455 | return; | 449 | return; |
456 | 450 | ||
457 | if (pfm_amd64_setup_eilvt()) { | 451 | if (init_ibs_nmi()) { |
458 | ibs_allowed = 0; | 452 | ibs_allowed = 0; |
459 | return; | 453 | return; |
460 | } | 454 | } |
@@ -462,14 +456,12 @@ static void setup_ibs(void) | |||
462 | printk(KERN_INFO "oprofile: AMD IBS detected\n"); | 456 | printk(KERN_INFO "oprofile: AMD IBS detected\n"); |
463 | } | 457 | } |
464 | 458 | ||
465 | 459 | static void ibs_exit(void) | |
466 | /* | ||
467 | * unitialize the APIC for the IBS interrupts if needed on AMD Family10h | ||
468 | * rev B0 and later */ | ||
469 | static void clear_ibs_nmi(void) | ||
470 | { | 460 | { |
471 | if (ibs_allowed) | 461 | if (!ibs_allowed) |
472 | on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); | 462 | return; |
463 | |||
464 | clear_ibs_nmi(); | ||
473 | } | 465 | } |
474 | 466 | ||
475 | static int (*create_arch_files)(struct super_block *sb, struct dentry *root); | 467 | static int (*create_arch_files)(struct super_block *sb, struct dentry *root); |
@@ -519,7 +511,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
519 | 511 | ||
520 | static int op_amd_init(struct oprofile_operations *ops) | 512 | static int op_amd_init(struct oprofile_operations *ops) |
521 | { | 513 | { |
522 | setup_ibs(); | 514 | ibs_init(); |
523 | create_arch_files = ops->create_files; | 515 | create_arch_files = ops->create_files; |
524 | ops->create_files = setup_ibs_files; | 516 | ops->create_files = setup_ibs_files; |
525 | return 0; | 517 | return 0; |
@@ -527,10 +519,21 @@ static int op_amd_init(struct oprofile_operations *ops) | |||
527 | 519 | ||
528 | static void op_amd_exit(void) | 520 | static void op_amd_exit(void) |
529 | { | 521 | { |
530 | clear_ibs_nmi(); | 522 | ibs_exit(); |
531 | } | 523 | } |
532 | 524 | ||
533 | #endif | 525 | #else |
526 | |||
527 | /* no IBS support */ | ||
528 | |||
529 | static int op_amd_init(struct oprofile_operations *ops) | ||
530 | { | ||
531 | return 0; | ||
532 | } | ||
533 | |||
534 | static void op_amd_exit(void) {} | ||
535 | |||
536 | #endif /* CONFIG_OPROFILE_IBS */ | ||
534 | 537 | ||
535 | struct op_x86_model_spec const op_amd_spec = { | 538 | struct op_x86_model_spec const op_amd_spec = { |
536 | .init = op_amd_init, | 539 | .init = op_amd_init, |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 773d68d3e912..503c240e26c7 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1082,7 +1082,7 @@ static void drop_other_mm_ref(void *info) | |||
1082 | 1082 | ||
1083 | static void xen_drop_mm_ref(struct mm_struct *mm) | 1083 | static void xen_drop_mm_ref(struct mm_struct *mm) |
1084 | { | 1084 | { |
1085 | cpumask_t mask; | 1085 | cpumask_var_t mask; |
1086 | unsigned cpu; | 1086 | unsigned cpu; |
1087 | 1087 | ||
1088 | if (current->active_mm == mm) { | 1088 | if (current->active_mm == mm) { |
@@ -1094,7 +1094,16 @@ static void xen_drop_mm_ref(struct mm_struct *mm) | |||
1094 | } | 1094 | } |
1095 | 1095 | ||
1096 | /* Get the "official" set of cpus referring to our pagetable. */ | 1096 | /* Get the "official" set of cpus referring to our pagetable. */ |
1097 | mask = mm->cpu_vm_mask; | 1097 | if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { |
1098 | for_each_online_cpu(cpu) { | ||
1099 | if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask) | ||
1100 | && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) | ||
1101 | continue; | ||
1102 | smp_call_function_single(cpu, drop_other_mm_ref, mm, 1); | ||
1103 | } | ||
1104 | return; | ||
1105 | } | ||
1106 | cpumask_copy(mask, &mm->cpu_vm_mask); | ||
1098 | 1107 | ||
1099 | /* It's possible that a vcpu may have a stale reference to our | 1108 | /* It's possible that a vcpu may have a stale reference to our |
1100 | cr3, because its in lazy mode, and it hasn't yet flushed | 1109 | cr3, because its in lazy mode, and it hasn't yet flushed |
@@ -1103,11 +1112,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm) | |||
1103 | if needed. */ | 1112 | if needed. */ |
1104 | for_each_online_cpu(cpu) { | 1113 | for_each_online_cpu(cpu) { |
1105 | if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) | 1114 | if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) |
1106 | cpu_set(cpu, mask); | 1115 | cpumask_set_cpu(cpu, mask); |
1107 | } | 1116 | } |
1108 | 1117 | ||
1109 | if (!cpus_empty(mask)) | 1118 | if (!cpumask_empty(mask)) |
1110 | smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); | 1119 | smp_call_function_many(mask, drop_other_mm_ref, mm, 1); |
1120 | free_cpumask_var(mask); | ||
1111 | } | 1121 | } |
1112 | #else | 1122 | #else |
1113 | static void xen_drop_mm_ref(struct mm_struct *mm) | 1123 | static void xen_drop_mm_ref(struct mm_struct *mm) |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index acd9b6705e02..c44e2069c7c7 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -33,7 +33,7 @@ | |||
33 | #include "xen-ops.h" | 33 | #include "xen-ops.h" |
34 | #include "mmu.h" | 34 | #include "mmu.h" |
35 | 35 | ||
36 | cpumask_t xen_cpu_initialized_map; | 36 | cpumask_var_t xen_cpu_initialized_map; |
37 | 37 | ||
38 | static DEFINE_PER_CPU(int, resched_irq); | 38 | static DEFINE_PER_CPU(int, resched_irq); |
39 | static DEFINE_PER_CPU(int, callfunc_irq); | 39 | static DEFINE_PER_CPU(int, callfunc_irq); |
@@ -158,7 +158,7 @@ static void __init xen_fill_possible_map(void) | |||
158 | { | 158 | { |
159 | int i, rc; | 159 | int i, rc; |
160 | 160 | ||
161 | for (i = 0; i < NR_CPUS; i++) { | 161 | for (i = 0; i < nr_cpu_ids; i++) { |
162 | rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); | 162 | rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); |
163 | if (rc >= 0) { | 163 | if (rc >= 0) { |
164 | num_processors++; | 164 | num_processors++; |
@@ -192,11 +192,14 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) | |||
192 | if (xen_smp_intr_init(0)) | 192 | if (xen_smp_intr_init(0)) |
193 | BUG(); | 193 | BUG(); |
194 | 194 | ||
195 | xen_cpu_initialized_map = cpumask_of_cpu(0); | 195 | if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL)) |
196 | panic("could not allocate xen_cpu_initialized_map\n"); | ||
197 | |||
198 | cpumask_copy(xen_cpu_initialized_map, cpumask_of(0)); | ||
196 | 199 | ||
197 | /* Restrict the possible_map according to max_cpus. */ | 200 | /* Restrict the possible_map according to max_cpus. */ |
198 | while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { | 201 | while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { |
199 | for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--) | 202 | for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--) |
200 | continue; | 203 | continue; |
201 | cpu_clear(cpu, cpu_possible_map); | 204 | cpu_clear(cpu, cpu_possible_map); |
202 | } | 205 | } |
@@ -221,7 +224,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
221 | struct vcpu_guest_context *ctxt; | 224 | struct vcpu_guest_context *ctxt; |
222 | struct desc_struct *gdt; | 225 | struct desc_struct *gdt; |
223 | 226 | ||
224 | if (cpu_test_and_set(cpu, xen_cpu_initialized_map)) | 227 | if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) |
225 | return 0; | 228 | return 0; |
226 | 229 | ||
227 | ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); | 230 | ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); |
@@ -408,24 +411,23 @@ static void xen_smp_send_reschedule(int cpu) | |||
408 | xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); | 411 | xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); |
409 | } | 412 | } |
410 | 413 | ||
411 | static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) | 414 | static void xen_send_IPI_mask(const struct cpumask *mask, |
415 | enum ipi_vector vector) | ||
412 | { | 416 | { |
413 | unsigned cpu; | 417 | unsigned cpu; |
414 | 418 | ||
415 | cpus_and(mask, mask, cpu_online_map); | 419 | for_each_cpu_and(cpu, mask, cpu_online_mask) |
416 | |||
417 | for_each_cpu_mask_nr(cpu, mask) | ||
418 | xen_send_IPI_one(cpu, vector); | 420 | xen_send_IPI_one(cpu, vector); |
419 | } | 421 | } |
420 | 422 | ||
421 | static void xen_smp_send_call_function_ipi(cpumask_t mask) | 423 | static void xen_smp_send_call_function_ipi(const struct cpumask *mask) |
422 | { | 424 | { |
423 | int cpu; | 425 | int cpu; |
424 | 426 | ||
425 | xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); | 427 | xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); |
426 | 428 | ||
427 | /* Make sure other vcpus get a chance to run if they need to. */ | 429 | /* Make sure other vcpus get a chance to run if they need to. */ |
428 | for_each_cpu_mask_nr(cpu, mask) { | 430 | for_each_cpu(cpu, mask) { |
429 | if (xen_vcpu_stolen(cpu)) { | 431 | if (xen_vcpu_stolen(cpu)) { |
430 | HYPERVISOR_sched_op(SCHEDOP_yield, 0); | 432 | HYPERVISOR_sched_op(SCHEDOP_yield, 0); |
431 | break; | 433 | break; |
@@ -435,7 +437,8 @@ static void xen_smp_send_call_function_ipi(cpumask_t mask) | |||
435 | 437 | ||
436 | static void xen_smp_send_call_function_single_ipi(int cpu) | 438 | static void xen_smp_send_call_function_single_ipi(int cpu) |
437 | { | 439 | { |
438 | xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); | 440 | xen_send_IPI_mask(cpumask_of(cpu), |
441 | XEN_CALL_FUNCTION_SINGLE_VECTOR); | ||
439 | } | 442 | } |
440 | 443 | ||
441 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) | 444 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) |
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 2a234db5949b..212ffe012b76 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
@@ -35,7 +35,8 @@ void xen_post_suspend(int suspend_cancelled) | |||
35 | pfn_to_mfn(xen_start_info->console.domU.mfn); | 35 | pfn_to_mfn(xen_start_info->console.domU.mfn); |
36 | } else { | 36 | } else { |
37 | #ifdef CONFIG_SMP | 37 | #ifdef CONFIG_SMP |
38 | xen_cpu_initialized_map = cpu_online_map; | 38 | BUG_ON(xen_cpu_initialized_map == NULL); |
39 | cpumask_copy(xen_cpu_initialized_map, cpu_online_mask); | ||
39 | #endif | 40 | #endif |
40 | xen_vcpu_restore(); | 41 | xen_vcpu_restore(); |
41 | } | 42 | } |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c9f7cda48ed7..65d75a6be0ba 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -437,7 +437,7 @@ void xen_setup_timer(int cpu) | |||
437 | evt = &per_cpu(xen_clock_events, cpu); | 437 | evt = &per_cpu(xen_clock_events, cpu); |
438 | memcpy(evt, xen_clockevent, sizeof(*evt)); | 438 | memcpy(evt, xen_clockevent, sizeof(*evt)); |
439 | 439 | ||
440 | evt->cpumask = cpumask_of_cpu(cpu); | 440 | evt->cpumask = cpumask_of(cpu); |
441 | evt->irq = irq; | 441 | evt->irq = irq; |
442 | 442 | ||
443 | setup_runstate_info(cpu); | 443 | setup_runstate_info(cpu); |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9e1afae8461f..c1f8faf0a2c5 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -58,7 +58,7 @@ void __init xen_init_spinlocks(void); | |||
58 | __cpuinit void xen_init_lock_cpu(int cpu); | 58 | __cpuinit void xen_init_lock_cpu(int cpu); |
59 | void xen_uninit_lock_cpu(int cpu); | 59 | void xen_uninit_lock_cpu(int cpu); |
60 | 60 | ||
61 | extern cpumask_t xen_cpu_initialized_map; | 61 | extern cpumask_var_t xen_cpu_initialized_map; |
62 | #else | 62 | #else |
63 | static inline void xen_smp_init(void) {} | 63 | static inline void xen_smp_init(void) {} |
64 | #endif | 64 | #endif |