aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-06 03:53:05 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-06 03:53:05 -0500
commit3d7a96f5a485b7d06c2379f343d7312af89ec9e2 (patch)
tree5f097f68eb0f9fd3fa4a10f38672e300e9127b10 /arch/x86/kernel
parent723cbe0775514853c22dc45005af59c360916af1 (diff)
parent238c6d54830c624f34ac9cf123ac04aebfca5013 (diff)
Merge branch 'linus' into tracing/kmemtrace2
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/boot.c31
-rw-r--r--arch/x86/kernel/amd_iommu.c666
-rw-r--r--arch/x86/kernel/amd_iommu_init.c19
-rw-r--r--arch/x86/kernel/apic.c42
-rw-r--r--arch/x86/kernel/bios_uv.c2
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c28
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c9
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c24
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c51
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c9
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c45
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c108
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c12
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c10
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h18
-rw-r--r--arch/x86/kernel/cpuid.c8
-rw-r--r--arch/x86/kernel/crash.c18
-rw-r--r--arch/x86/kernel/early_printk.c2
-rw-r--r--arch/x86/kernel/genapic_flat_64.c107
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c81
-rw-r--r--arch/x86/kernel/genx2apic_phys.c78
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c61
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/hpet.c8
-rw-r--r--arch/x86/kernel/i8253.c2
-rw-r--r--arch/x86/kernel/init_task.c1
-rw-r--r--arch/x86/kernel/io_apic.c390
-rw-r--r--arch/x86/kernel/ipi.c28
-rw-r--r--arch/x86/kernel/irq.c3
-rw-r--r--arch/x86/kernel/irq_32.c13
-rw-r--r--arch/x86/kernel/irq_64.c15
-rw-r--r--arch/x86/kernel/irqinit_32.c16
-rw-r--r--arch/x86/kernel/irqinit_64.c13
-rw-r--r--arch/x86/kernel/kvmclock.c10
-rw-r--r--arch/x86/kernel/ldt.c4
-rw-r--r--arch/x86/kernel/mfgpt_32.c2
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c3
-rw-r--r--arch/x86/kernel/mpparse.c10
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/nmi.c3
-rw-r--r--arch/x86/kernel/pci-gart_64.c2
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c2
-rw-r--r--arch/x86/kernel/reboot.c73
-rw-r--r--arch/x86/kernel/setup_percpu.c36
-rw-r--r--arch/x86/kernel/smp.c8
-rw-r--r--arch/x86/kernel/smpboot.c48
-rw-r--r--arch/x86/kernel/tlb_32.c2
-rw-r--r--arch/x86/kernel/tlb_64.c2
-rw-r--r--arch/x86/kernel/tlb_uv.c9
-rw-r--r--arch/x86/kernel/traps.c45
-rw-r--r--arch/x86/kernel/vmiclock_32.c2
-rw-r--r--arch/x86/kernel/xsave.c2
54 files changed, 1523 insertions, 670 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 65d0b72777ea..29dc0c89d4af 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -538,9 +538,10 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
538 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 538 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
539 union acpi_object *obj; 539 union acpi_object *obj;
540 struct acpi_madt_local_apic *lapic; 540 struct acpi_madt_local_apic *lapic;
541 cpumask_t tmp_map, new_map; 541 cpumask_var_t tmp_map, new_map;
542 u8 physid; 542 u8 physid;
543 int cpu; 543 int cpu;
544 int retval = -ENOMEM;
544 545
545 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) 546 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
546 return -EINVAL; 547 return -EINVAL;
@@ -569,23 +570,37 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
569 buffer.length = ACPI_ALLOCATE_BUFFER; 570 buffer.length = ACPI_ALLOCATE_BUFFER;
570 buffer.pointer = NULL; 571 buffer.pointer = NULL;
571 572
572 tmp_map = cpu_present_map; 573 if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL))
574 goto out;
575
576 if (!alloc_cpumask_var(&new_map, GFP_KERNEL))
577 goto free_tmp_map;
578
579 cpumask_copy(tmp_map, cpu_present_mask);
573 acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); 580 acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED);
574 581
575 /* 582 /*
576 * If mp_register_lapic successfully generates a new logical cpu 583 * If mp_register_lapic successfully generates a new logical cpu
577 * number, then the following will get us exactly what was mapped 584 * number, then the following will get us exactly what was mapped
578 */ 585 */
579 cpus_andnot(new_map, cpu_present_map, tmp_map); 586 cpumask_andnot(new_map, cpu_present_mask, tmp_map);
580 if (cpus_empty(new_map)) { 587 if (cpumask_empty(new_map)) {
581 printk ("Unable to map lapic to logical cpu number\n"); 588 printk ("Unable to map lapic to logical cpu number\n");
582 return -EINVAL; 589 retval = -EINVAL;
590 goto free_new_map;
583 } 591 }
584 592
585 cpu = first_cpu(new_map); 593 cpu = cpumask_first(new_map);
586 594
587 *pcpu = cpu; 595 *pcpu = cpu;
588 return 0; 596 retval = 0;
597
598free_new_map:
599 free_cpumask_var(new_map);
600free_tmp_map:
601 free_cpumask_var(tmp_map);
602out:
603 return retval;
589} 604}
590 605
591/* wrapper to silence section mismatch warning */ 606/* wrapper to silence section mismatch warning */
@@ -598,7 +613,7 @@ EXPORT_SYMBOL(acpi_map_lsapic);
598int acpi_unmap_lsapic(int cpu) 613int acpi_unmap_lsapic(int cpu)
599{ 614{
600 per_cpu(x86_cpu_to_apicid, cpu) = -1; 615 per_cpu(x86_cpu_to_apicid, cpu) = -1;
601 cpu_clear(cpu, cpu_present_map); 616 set_cpu_present(cpu, false);
602 num_processors--; 617 num_processors--;
603 618
604 return (0); 619 return (0);
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2e2da717b350..5113c080f0c4 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -20,8 +20,12 @@
20#include <linux/pci.h> 20#include <linux/pci.h>
21#include <linux/gfp.h> 21#include <linux/gfp.h>
22#include <linux/bitops.h> 22#include <linux/bitops.h>
23#include <linux/debugfs.h>
23#include <linux/scatterlist.h> 24#include <linux/scatterlist.h>
24#include <linux/iommu-helper.h> 25#include <linux/iommu-helper.h>
26#ifdef CONFIG_IOMMU_API
27#include <linux/iommu.h>
28#endif
25#include <asm/proto.h> 29#include <asm/proto.h>
26#include <asm/iommu.h> 30#include <asm/iommu.h>
27#include <asm/gart.h> 31#include <asm/gart.h>
@@ -38,6 +42,10 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock);
38static LIST_HEAD(iommu_pd_list); 42static LIST_HEAD(iommu_pd_list);
39static DEFINE_SPINLOCK(iommu_pd_list_lock); 43static DEFINE_SPINLOCK(iommu_pd_list_lock);
40 44
45#ifdef CONFIG_IOMMU_API
46static struct iommu_ops amd_iommu_ops;
47#endif
48
41/* 49/*
42 * general struct to manage commands send to an IOMMU 50 * general struct to manage commands send to an IOMMU
43 */ 51 */
@@ -47,6 +55,68 @@ struct iommu_cmd {
47 55
48static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, 56static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
49 struct unity_map_entry *e); 57 struct unity_map_entry *e);
58static struct dma_ops_domain *find_protection_domain(u16 devid);
59
60
61#ifdef CONFIG_AMD_IOMMU_STATS
62
63/*
64 * Initialization code for statistics collection
65 */
66
67DECLARE_STATS_COUNTER(compl_wait);
68DECLARE_STATS_COUNTER(cnt_map_single);
69DECLARE_STATS_COUNTER(cnt_unmap_single);
70DECLARE_STATS_COUNTER(cnt_map_sg);
71DECLARE_STATS_COUNTER(cnt_unmap_sg);
72DECLARE_STATS_COUNTER(cnt_alloc_coherent);
73DECLARE_STATS_COUNTER(cnt_free_coherent);
74DECLARE_STATS_COUNTER(cross_page);
75DECLARE_STATS_COUNTER(domain_flush_single);
76DECLARE_STATS_COUNTER(domain_flush_all);
77DECLARE_STATS_COUNTER(alloced_io_mem);
78DECLARE_STATS_COUNTER(total_map_requests);
79
80static struct dentry *stats_dir;
81static struct dentry *de_isolate;
82static struct dentry *de_fflush;
83
84static void amd_iommu_stats_add(struct __iommu_counter *cnt)
85{
86 if (stats_dir == NULL)
87 return;
88
89 cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
90 &cnt->value);
91}
92
93static void amd_iommu_stats_init(void)
94{
95 stats_dir = debugfs_create_dir("amd-iommu", NULL);
96 if (stats_dir == NULL)
97 return;
98
99 de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
100 (u32 *)&amd_iommu_isolate);
101
102 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
103 (u32 *)&amd_iommu_unmap_flush);
104
105 amd_iommu_stats_add(&compl_wait);
106 amd_iommu_stats_add(&cnt_map_single);
107 amd_iommu_stats_add(&cnt_unmap_single);
108 amd_iommu_stats_add(&cnt_map_sg);
109 amd_iommu_stats_add(&cnt_unmap_sg);
110 amd_iommu_stats_add(&cnt_alloc_coherent);
111 amd_iommu_stats_add(&cnt_free_coherent);
112 amd_iommu_stats_add(&cross_page);
113 amd_iommu_stats_add(&domain_flush_single);
114 amd_iommu_stats_add(&domain_flush_all);
115 amd_iommu_stats_add(&alloced_io_mem);
116 amd_iommu_stats_add(&total_map_requests);
117}
118
119#endif
50 120
51/* returns !0 if the IOMMU is caching non-present entries in its TLB */ 121/* returns !0 if the IOMMU is caching non-present entries in its TLB */
52static int iommu_has_npcache(struct amd_iommu *iommu) 122static int iommu_has_npcache(struct amd_iommu *iommu)
@@ -189,13 +259,55 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
189 spin_lock_irqsave(&iommu->lock, flags); 259 spin_lock_irqsave(&iommu->lock, flags);
190 ret = __iommu_queue_command(iommu, cmd); 260 ret = __iommu_queue_command(iommu, cmd);
191 if (!ret) 261 if (!ret)
192 iommu->need_sync = 1; 262 iommu->need_sync = true;
193 spin_unlock_irqrestore(&iommu->lock, flags); 263 spin_unlock_irqrestore(&iommu->lock, flags);
194 264
195 return ret; 265 return ret;
196} 266}
197 267
198/* 268/*
269 * This function waits until an IOMMU has completed a completion
270 * wait command
271 */
272static void __iommu_wait_for_completion(struct amd_iommu *iommu)
273{
274 int ready = 0;
275 unsigned status = 0;
276 unsigned long i = 0;
277
278 INC_STATS_COUNTER(compl_wait);
279
280 while (!ready && (i < EXIT_LOOP_COUNT)) {
281 ++i;
282 /* wait for the bit to become one */
283 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
284 ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
285 }
286
287 /* set bit back to zero */
288 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
289 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
290
291 if (unlikely(i == EXIT_LOOP_COUNT))
292 panic("AMD IOMMU: Completion wait loop failed\n");
293}
294
295/*
296 * This function queues a completion wait command into the command
297 * buffer of an IOMMU
298 */
299static int __iommu_completion_wait(struct amd_iommu *iommu)
300{
301 struct iommu_cmd cmd;
302
303 memset(&cmd, 0, sizeof(cmd));
304 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
305 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
306
307 return __iommu_queue_command(iommu, &cmd);
308}
309
310/*
199 * This function is called whenever we need to ensure that the IOMMU has 311 * This function is called whenever we need to ensure that the IOMMU has
200 * completed execution of all commands we sent. It sends a 312 * completed execution of all commands we sent. It sends a
201 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs 313 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
@@ -204,40 +316,22 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
204 */ 316 */
205static int iommu_completion_wait(struct amd_iommu *iommu) 317static int iommu_completion_wait(struct amd_iommu *iommu)
206{ 318{
207 int ret = 0, ready = 0; 319 int ret = 0;
208 unsigned status = 0; 320 unsigned long flags;
209 struct iommu_cmd cmd;
210 unsigned long flags, i = 0;
211
212 memset(&cmd, 0, sizeof(cmd));
213 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
214 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
215 321
216 spin_lock_irqsave(&iommu->lock, flags); 322 spin_lock_irqsave(&iommu->lock, flags);
217 323
218 if (!iommu->need_sync) 324 if (!iommu->need_sync)
219 goto out; 325 goto out;
220 326
221 iommu->need_sync = 0; 327 ret = __iommu_completion_wait(iommu);
222 328
223 ret = __iommu_queue_command(iommu, &cmd); 329 iommu->need_sync = false;
224 330
225 if (ret) 331 if (ret)
226 goto out; 332 goto out;
227 333
228 while (!ready && (i < EXIT_LOOP_COUNT)) { 334 __iommu_wait_for_completion(iommu);
229 ++i;
230 /* wait for the bit to become one */
231 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
232 ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
233 }
234
235 /* set bit back to zero */
236 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
237 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
238
239 if (unlikely(i == EXIT_LOOP_COUNT))
240 panic("AMD IOMMU: Completion wait loop failed\n");
241 335
242out: 336out:
243 spin_unlock_irqrestore(&iommu->lock, flags); 337 spin_unlock_irqrestore(&iommu->lock, flags);
@@ -264,6 +358,21 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
264 return ret; 358 return ret;
265} 359}
266 360
361static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
362 u16 domid, int pde, int s)
363{
364 memset(cmd, 0, sizeof(*cmd));
365 address &= PAGE_MASK;
366 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
367 cmd->data[1] |= domid;
368 cmd->data[2] = lower_32_bits(address);
369 cmd->data[3] = upper_32_bits(address);
370 if (s) /* size bit - we flush more than one 4kb page */
371 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
372 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
373 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
374}
375
267/* 376/*
268 * Generic command send function for invalidaing TLB entries 377 * Generic command send function for invalidaing TLB entries
269 */ 378 */
@@ -273,16 +382,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
273 struct iommu_cmd cmd; 382 struct iommu_cmd cmd;
274 int ret; 383 int ret;
275 384
276 memset(&cmd, 0, sizeof(cmd)); 385 __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
277 address &= PAGE_MASK;
278 CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
279 cmd.data[1] |= domid;
280 cmd.data[2] = lower_32_bits(address);
281 cmd.data[3] = upper_32_bits(address);
282 if (s) /* size bit - we flush more than one 4kb page */
283 cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
284 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
285 cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
286 386
287 ret = iommu_queue_command(iommu, &cmd); 387 ret = iommu_queue_command(iommu, &cmd);
288 388
@@ -321,9 +421,35 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
321{ 421{
322 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; 422 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
323 423
424 INC_STATS_COUNTER(domain_flush_single);
425
324 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); 426 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
325} 427}
326 428
429/*
430 * This function is used to flush the IO/TLB for a given protection domain
431 * on every IOMMU in the system
432 */
433static void iommu_flush_domain(u16 domid)
434{
435 unsigned long flags;
436 struct amd_iommu *iommu;
437 struct iommu_cmd cmd;
438
439 INC_STATS_COUNTER(domain_flush_all);
440
441 __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
442 domid, 1, 1);
443
444 list_for_each_entry(iommu, &amd_iommu_list, list) {
445 spin_lock_irqsave(&iommu->lock, flags);
446 __iommu_queue_command(iommu, &cmd);
447 __iommu_completion_wait(iommu);
448 __iommu_wait_for_completion(iommu);
449 spin_unlock_irqrestore(&iommu->lock, flags);
450 }
451}
452
327/**************************************************************************** 453/****************************************************************************
328 * 454 *
329 * The functions below are used the create the page table mappings for 455 * The functions below are used the create the page table mappings for
@@ -338,10 +464,10 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
338 * supporting all features of AMD IOMMU page tables like level skipping 464 * supporting all features of AMD IOMMU page tables like level skipping
339 * and full 64 bit address spaces. 465 * and full 64 bit address spaces.
340 */ 466 */
341static int iommu_map(struct protection_domain *dom, 467static int iommu_map_page(struct protection_domain *dom,
342 unsigned long bus_addr, 468 unsigned long bus_addr,
343 unsigned long phys_addr, 469 unsigned long phys_addr,
344 int prot) 470 int prot)
345{ 471{
346 u64 __pte, *pte, *page; 472 u64 __pte, *pte, *page;
347 473
@@ -388,6 +514,28 @@ static int iommu_map(struct protection_domain *dom,
388 return 0; 514 return 0;
389} 515}
390 516
517static void iommu_unmap_page(struct protection_domain *dom,
518 unsigned long bus_addr)
519{
520 u64 *pte;
521
522 pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
523
524 if (!IOMMU_PTE_PRESENT(*pte))
525 return;
526
527 pte = IOMMU_PTE_PAGE(*pte);
528 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
529
530 if (!IOMMU_PTE_PRESENT(*pte))
531 return;
532
533 pte = IOMMU_PTE_PAGE(*pte);
534 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
535
536 *pte = 0;
537}
538
391/* 539/*
392 * This function checks if a specific unity mapping entry is needed for 540 * This function checks if a specific unity mapping entry is needed for
393 * this specific IOMMU. 541 * this specific IOMMU.
@@ -440,7 +588,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
440 588
441 for (addr = e->address_start; addr < e->address_end; 589 for (addr = e->address_start; addr < e->address_end;
442 addr += PAGE_SIZE) { 590 addr += PAGE_SIZE) {
443 ret = iommu_map(&dma_dom->domain, addr, addr, e->prot); 591 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);
444 if (ret) 592 if (ret)
445 return ret; 593 return ret;
446 /* 594 /*
@@ -571,6 +719,16 @@ static u16 domain_id_alloc(void)
571 return id; 719 return id;
572} 720}
573 721
722static void domain_id_free(int id)
723{
724 unsigned long flags;
725
726 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
727 if (id > 0 && id < MAX_DOMAIN_ID)
728 __clear_bit(id, amd_iommu_pd_alloc_bitmap);
729 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
730}
731
574/* 732/*
575 * Used to reserve address ranges in the aperture (e.g. for exclusion 733 * Used to reserve address ranges in the aperture (e.g. for exclusion
576 * ranges. 734 * ranges.
@@ -587,12 +745,12 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
587 iommu_area_reserve(dom->bitmap, start_page, pages); 745 iommu_area_reserve(dom->bitmap, start_page, pages);
588} 746}
589 747
590static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) 748static void free_pagetable(struct protection_domain *domain)
591{ 749{
592 int i, j; 750 int i, j;
593 u64 *p1, *p2, *p3; 751 u64 *p1, *p2, *p3;
594 752
595 p1 = dma_dom->domain.pt_root; 753 p1 = domain->pt_root;
596 754
597 if (!p1) 755 if (!p1)
598 return; 756 return;
@@ -613,6 +771,8 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
613 } 771 }
614 772
615 free_page((unsigned long)p1); 773 free_page((unsigned long)p1);
774
775 domain->pt_root = NULL;
616} 776}
617 777
618/* 778/*
@@ -624,7 +784,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
624 if (!dom) 784 if (!dom)
625 return; 785 return;
626 786
627 dma_ops_free_pagetable(dom); 787 free_pagetable(&dom->domain);
628 788
629 kfree(dom->pte_pages); 789 kfree(dom->pte_pages);
630 790
@@ -663,6 +823,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
663 goto free_dma_dom; 823 goto free_dma_dom;
664 dma_dom->domain.mode = PAGE_MODE_3_LEVEL; 824 dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
665 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); 825 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
826 dma_dom->domain.flags = PD_DMA_OPS_MASK;
666 dma_dom->domain.priv = dma_dom; 827 dma_dom->domain.priv = dma_dom;
667 if (!dma_dom->domain.pt_root) 828 if (!dma_dom->domain.pt_root)
668 goto free_dma_dom; 829 goto free_dma_dom;
@@ -725,6 +886,15 @@ free_dma_dom:
725} 886}
726 887
727/* 888/*
889 * little helper function to check whether a given protection domain is a
890 * dma_ops domain
891 */
892static bool dma_ops_domain(struct protection_domain *domain)
893{
894 return domain->flags & PD_DMA_OPS_MASK;
895}
896
897/*
728 * Find out the protection domain structure for a given PCI device. This 898 * Find out the protection domain structure for a given PCI device. This
729 * will give us the pointer to the page table root for example. 899 * will give us the pointer to the page table root for example.
730 */ 900 */
@@ -744,14 +914,15 @@ static struct protection_domain *domain_for_device(u16 devid)
744 * If a device is not yet associated with a domain, this function does 914 * If a device is not yet associated with a domain, this function does
745 * assigns it visible for the hardware 915 * assigns it visible for the hardware
746 */ 916 */
747static void set_device_domain(struct amd_iommu *iommu, 917static void attach_device(struct amd_iommu *iommu,
748 struct protection_domain *domain, 918 struct protection_domain *domain,
749 u16 devid) 919 u16 devid)
750{ 920{
751 unsigned long flags; 921 unsigned long flags;
752
753 u64 pte_root = virt_to_phys(domain->pt_root); 922 u64 pte_root = virt_to_phys(domain->pt_root);
754 923
924 domain->dev_cnt += 1;
925
755 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) 926 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
756 << DEV_ENTRY_MODE_SHIFT; 927 << DEV_ENTRY_MODE_SHIFT;
757 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; 928 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
@@ -767,6 +938,116 @@ static void set_device_domain(struct amd_iommu *iommu,
767 iommu_queue_inv_dev_entry(iommu, devid); 938 iommu_queue_inv_dev_entry(iommu, devid);
768} 939}
769 940
941/*
942 * Removes a device from a protection domain (unlocked)
943 */
944static void __detach_device(struct protection_domain *domain, u16 devid)
945{
946
947 /* lock domain */
948 spin_lock(&domain->lock);
949
950 /* remove domain from the lookup table */
951 amd_iommu_pd_table[devid] = NULL;
952
953 /* remove entry from the device table seen by the hardware */
954 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
955 amd_iommu_dev_table[devid].data[1] = 0;
956 amd_iommu_dev_table[devid].data[2] = 0;
957
958 /* decrease reference counter */
959 domain->dev_cnt -= 1;
960
961 /* ready */
962 spin_unlock(&domain->lock);
963}
964
965/*
966 * Removes a device from a protection domain (with devtable_lock held)
967 */
968static void detach_device(struct protection_domain *domain, u16 devid)
969{
970 unsigned long flags;
971
972 /* lock device table */
973 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
974 __detach_device(domain, devid);
975 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
976}
977
978static int device_change_notifier(struct notifier_block *nb,
979 unsigned long action, void *data)
980{
981 struct device *dev = data;
982 struct pci_dev *pdev = to_pci_dev(dev);
983 u16 devid = calc_devid(pdev->bus->number, pdev->devfn);
984 struct protection_domain *domain;
985 struct dma_ops_domain *dma_domain;
986 struct amd_iommu *iommu;
987 int order = amd_iommu_aperture_order;
988 unsigned long flags;
989
990 if (devid > amd_iommu_last_bdf)
991 goto out;
992
993 devid = amd_iommu_alias_table[devid];
994
995 iommu = amd_iommu_rlookup_table[devid];
996 if (iommu == NULL)
997 goto out;
998
999 domain = domain_for_device(devid);
1000
1001 if (domain && !dma_ops_domain(domain))
1002 WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound "
1003 "to a non-dma-ops domain\n", dev_name(dev));
1004
1005 switch (action) {
1006 case BUS_NOTIFY_BOUND_DRIVER:
1007 if (domain)
1008 goto out;
1009 dma_domain = find_protection_domain(devid);
1010 if (!dma_domain)
1011 dma_domain = iommu->default_dom;
1012 attach_device(iommu, &dma_domain->domain, devid);
1013 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
1014 "device %s\n", dma_domain->domain.id, dev_name(dev));
1015 break;
1016 case BUS_NOTIFY_UNBIND_DRIVER:
1017 if (!domain)
1018 goto out;
1019 detach_device(domain, devid);
1020 break;
1021 case BUS_NOTIFY_ADD_DEVICE:
1022 /* allocate a protection domain if a device is added */
1023 dma_domain = find_protection_domain(devid);
1024 if (dma_domain)
1025 goto out;
1026 dma_domain = dma_ops_domain_alloc(iommu, order);
1027 if (!dma_domain)
1028 goto out;
1029 dma_domain->target_dev = devid;
1030
1031 spin_lock_irqsave(&iommu_pd_list_lock, flags);
1032 list_add_tail(&dma_domain->list, &iommu_pd_list);
1033 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1034
1035 break;
1036 default:
1037 goto out;
1038 }
1039
1040 iommu_queue_inv_dev_entry(iommu, devid);
1041 iommu_completion_wait(iommu);
1042
1043out:
1044 return 0;
1045}
1046
1047struct notifier_block device_nb = {
1048 .notifier_call = device_change_notifier,
1049};
1050
770/***************************************************************************** 1051/*****************************************************************************
771 * 1052 *
772 * The next functions belong to the dma_ops mapping/unmapping code. 1053 * The next functions belong to the dma_ops mapping/unmapping code.
@@ -802,7 +1083,6 @@ static struct dma_ops_domain *find_protection_domain(u16 devid)
802 list_for_each_entry(entry, &iommu_pd_list, list) { 1083 list_for_each_entry(entry, &iommu_pd_list, list) {
803 if (entry->target_dev == devid) { 1084 if (entry->target_dev == devid) {
804 ret = entry; 1085 ret = entry;
805 list_del(&ret->list);
806 break; 1086 break;
807 } 1087 }
808 } 1088 }
@@ -853,14 +1133,13 @@ static int get_device_resources(struct device *dev,
853 if (!dma_dom) 1133 if (!dma_dom)
854 dma_dom = (*iommu)->default_dom; 1134 dma_dom = (*iommu)->default_dom;
855 *domain = &dma_dom->domain; 1135 *domain = &dma_dom->domain;
856 set_device_domain(*iommu, *domain, *bdf); 1136 attach_device(*iommu, *domain, *bdf);
857 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " 1137 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
858 "device ", (*domain)->id); 1138 "device %s\n", (*domain)->id, dev_name(dev));
859 print_devid(_bdf, 1);
860 } 1139 }
861 1140
862 if (domain_for_device(_bdf) == NULL) 1141 if (domain_for_device(_bdf) == NULL)
863 set_device_domain(*iommu, *domain, _bdf); 1142 attach_device(*iommu, *domain, _bdf);
864 1143
865 return 1; 1144 return 1;
866} 1145}
@@ -946,6 +1225,11 @@ static dma_addr_t __map_single(struct device *dev,
946 pages = iommu_num_pages(paddr, size, PAGE_SIZE); 1225 pages = iommu_num_pages(paddr, size, PAGE_SIZE);
947 paddr &= PAGE_MASK; 1226 paddr &= PAGE_MASK;
948 1227
1228 INC_STATS_COUNTER(total_map_requests);
1229
1230 if (pages > 1)
1231 INC_STATS_COUNTER(cross_page);
1232
949 if (align) 1233 if (align)
950 align_mask = (1UL << get_order(size)) - 1; 1234 align_mask = (1UL << get_order(size)) - 1;
951 1235
@@ -962,6 +1246,8 @@ static dma_addr_t __map_single(struct device *dev,
962 } 1246 }
963 address += offset; 1247 address += offset;
964 1248
1249 ADD_STATS_COUNTER(alloced_io_mem, size);
1250
965 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { 1251 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
966 iommu_flush_tlb(iommu, dma_dom->domain.id); 1252 iommu_flush_tlb(iommu, dma_dom->domain.id);
967 dma_dom->need_flush = false; 1253 dma_dom->need_flush = false;
@@ -998,6 +1284,8 @@ static void __unmap_single(struct amd_iommu *iommu,
998 start += PAGE_SIZE; 1284 start += PAGE_SIZE;
999 } 1285 }
1000 1286
1287 SUB_STATS_COUNTER(alloced_io_mem, size);
1288
1001 dma_ops_free_addresses(dma_dom, dma_addr, pages); 1289 dma_ops_free_addresses(dma_dom, dma_addr, pages);
1002 1290
1003 if (amd_iommu_unmap_flush || dma_dom->need_flush) { 1291 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
@@ -1019,6 +1307,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
1019 dma_addr_t addr; 1307 dma_addr_t addr;
1020 u64 dma_mask; 1308 u64 dma_mask;
1021 1309
1310 INC_STATS_COUNTER(cnt_map_single);
1311
1022 if (!check_device(dev)) 1312 if (!check_device(dev))
1023 return bad_dma_address; 1313 return bad_dma_address;
1024 1314
@@ -1030,6 +1320,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
1030 /* device not handled by any AMD IOMMU */ 1320 /* device not handled by any AMD IOMMU */
1031 return (dma_addr_t)paddr; 1321 return (dma_addr_t)paddr;
1032 1322
1323 if (!dma_ops_domain(domain))
1324 return bad_dma_address;
1325
1033 spin_lock_irqsave(&domain->lock, flags); 1326 spin_lock_irqsave(&domain->lock, flags);
1034 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, 1327 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
1035 dma_mask); 1328 dma_mask);
@@ -1055,11 +1348,16 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
1055 struct protection_domain *domain; 1348 struct protection_domain *domain;
1056 u16 devid; 1349 u16 devid;
1057 1350
1351 INC_STATS_COUNTER(cnt_unmap_single);
1352
1058 if (!check_device(dev) || 1353 if (!check_device(dev) ||
1059 !get_device_resources(dev, &iommu, &domain, &devid)) 1354 !get_device_resources(dev, &iommu, &domain, &devid))
1060 /* device not handled by any AMD IOMMU */ 1355 /* device not handled by any AMD IOMMU */
1061 return; 1356 return;
1062 1357
1358 if (!dma_ops_domain(domain))
1359 return;
1360
1063 spin_lock_irqsave(&domain->lock, flags); 1361 spin_lock_irqsave(&domain->lock, flags);
1064 1362
1065 __unmap_single(iommu, domain->priv, dma_addr, size, dir); 1363 __unmap_single(iommu, domain->priv, dma_addr, size, dir);
@@ -1104,6 +1402,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1104 int mapped_elems = 0; 1402 int mapped_elems = 0;
1105 u64 dma_mask; 1403 u64 dma_mask;
1106 1404
1405 INC_STATS_COUNTER(cnt_map_sg);
1406
1107 if (!check_device(dev)) 1407 if (!check_device(dev))
1108 return 0; 1408 return 0;
1109 1409
@@ -1114,6 +1414,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1114 if (!iommu || !domain) 1414 if (!iommu || !domain)
1115 return map_sg_no_iommu(dev, sglist, nelems, dir); 1415 return map_sg_no_iommu(dev, sglist, nelems, dir);
1116 1416
1417 if (!dma_ops_domain(domain))
1418 return 0;
1419
1117 spin_lock_irqsave(&domain->lock, flags); 1420 spin_lock_irqsave(&domain->lock, flags);
1118 1421
1119 for_each_sg(sglist, s, nelems, i) { 1422 for_each_sg(sglist, s, nelems, i) {
@@ -1163,10 +1466,15 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
1163 u16 devid; 1466 u16 devid;
1164 int i; 1467 int i;
1165 1468
1469 INC_STATS_COUNTER(cnt_unmap_sg);
1470
1166 if (!check_device(dev) || 1471 if (!check_device(dev) ||
1167 !get_device_resources(dev, &iommu, &domain, &devid)) 1472 !get_device_resources(dev, &iommu, &domain, &devid))
1168 return; 1473 return;
1169 1474
1475 if (!dma_ops_domain(domain))
1476 return;
1477
1170 spin_lock_irqsave(&domain->lock, flags); 1478 spin_lock_irqsave(&domain->lock, flags);
1171 1479
1172 for_each_sg(sglist, s, nelems, i) { 1480 for_each_sg(sglist, s, nelems, i) {
@@ -1194,6 +1502,8 @@ static void *alloc_coherent(struct device *dev, size_t size,
1194 phys_addr_t paddr; 1502 phys_addr_t paddr;
1195 u64 dma_mask = dev->coherent_dma_mask; 1503 u64 dma_mask = dev->coherent_dma_mask;
1196 1504
1505 INC_STATS_COUNTER(cnt_alloc_coherent);
1506
1197 if (!check_device(dev)) 1507 if (!check_device(dev))
1198 return NULL; 1508 return NULL;
1199 1509
@@ -1212,6 +1522,9 @@ static void *alloc_coherent(struct device *dev, size_t size,
1212 return virt_addr; 1522 return virt_addr;
1213 } 1523 }
1214 1524
1525 if (!dma_ops_domain(domain))
1526 goto out_free;
1527
1215 if (!dma_mask) 1528 if (!dma_mask)
1216 dma_mask = *dev->dma_mask; 1529 dma_mask = *dev->dma_mask;
1217 1530
@@ -1220,18 +1533,20 @@ static void *alloc_coherent(struct device *dev, size_t size,
1220 *dma_addr = __map_single(dev, iommu, domain->priv, paddr, 1533 *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
1221 size, DMA_BIDIRECTIONAL, true, dma_mask); 1534 size, DMA_BIDIRECTIONAL, true, dma_mask);
1222 1535
1223 if (*dma_addr == bad_dma_address) { 1536 if (*dma_addr == bad_dma_address)
1224 free_pages((unsigned long)virt_addr, get_order(size)); 1537 goto out_free;
1225 virt_addr = NULL;
1226 goto out;
1227 }
1228 1538
1229 iommu_completion_wait(iommu); 1539 iommu_completion_wait(iommu);
1230 1540
1231out:
1232 spin_unlock_irqrestore(&domain->lock, flags); 1541 spin_unlock_irqrestore(&domain->lock, flags);
1233 1542
1234 return virt_addr; 1543 return virt_addr;
1544
1545out_free:
1546
1547 free_pages((unsigned long)virt_addr, get_order(size));
1548
1549 return NULL;
1235} 1550}
1236 1551
1237/* 1552/*
@@ -1245,6 +1560,8 @@ static void free_coherent(struct device *dev, size_t size,
1245 struct protection_domain *domain; 1560 struct protection_domain *domain;
1246 u16 devid; 1561 u16 devid;
1247 1562
1563 INC_STATS_COUNTER(cnt_free_coherent);
1564
1248 if (!check_device(dev)) 1565 if (!check_device(dev))
1249 return; 1566 return;
1250 1567
@@ -1253,6 +1570,9 @@ static void free_coherent(struct device *dev, size_t size,
1253 if (!iommu || !domain) 1570 if (!iommu || !domain)
1254 goto free_mem; 1571 goto free_mem;
1255 1572
1573 if (!dma_ops_domain(domain))
1574 goto free_mem;
1575
1256 spin_lock_irqsave(&domain->lock, flags); 1576 spin_lock_irqsave(&domain->lock, flags);
1257 1577
1258 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); 1578 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
@@ -1296,7 +1616,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
1296 * we don't need to preallocate the protection domains anymore. 1616 * we don't need to preallocate the protection domains anymore.
1297 * For now we have to. 1617 * For now we have to.
1298 */ 1618 */
1299void prealloc_protection_domains(void) 1619static void prealloc_protection_domains(void)
1300{ 1620{
1301 struct pci_dev *dev = NULL; 1621 struct pci_dev *dev = NULL;
1302 struct dma_ops_domain *dma_dom; 1622 struct dma_ops_domain *dma_dom;
@@ -1305,7 +1625,7 @@ void prealloc_protection_domains(void)
1305 u16 devid; 1625 u16 devid;
1306 1626
1307 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 1627 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1308 devid = (dev->bus->number << 8) | dev->devfn; 1628 devid = calc_devid(dev->bus->number, dev->devfn);
1309 if (devid > amd_iommu_last_bdf) 1629 if (devid > amd_iommu_last_bdf)
1310 continue; 1630 continue;
1311 devid = amd_iommu_alias_table[devid]; 1631 devid = amd_iommu_alias_table[devid];
@@ -1352,6 +1672,7 @@ int __init amd_iommu_init_dma_ops(void)
1352 iommu->default_dom = dma_ops_domain_alloc(iommu, order); 1672 iommu->default_dom = dma_ops_domain_alloc(iommu, order);
1353 if (iommu->default_dom == NULL) 1673 if (iommu->default_dom == NULL)
1354 return -ENOMEM; 1674 return -ENOMEM;
1675 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
1355 ret = iommu_init_unity_mappings(iommu); 1676 ret = iommu_init_unity_mappings(iommu);
1356 if (ret) 1677 if (ret)
1357 goto free_domains; 1678 goto free_domains;
@@ -1375,6 +1696,12 @@ int __init amd_iommu_init_dma_ops(void)
1375 /* Make the driver finally visible to the drivers */ 1696 /* Make the driver finally visible to the drivers */
1376 dma_ops = &amd_iommu_dma_ops; 1697 dma_ops = &amd_iommu_dma_ops;
1377 1698
1699 register_iommu(&amd_iommu_ops);
1700
1701 bus_register_notifier(&pci_bus_type, &device_nb);
1702
1703 amd_iommu_stats_init();
1704
1378 return 0; 1705 return 0;
1379 1706
1380free_domains: 1707free_domains:
@@ -1386,3 +1713,224 @@ free_domains:
1386 1713
1387 return ret; 1714 return ret;
1388} 1715}
1716
1717/*****************************************************************************
1718 *
1719 * The following functions belong to the exported interface of AMD IOMMU
1720 *
1721 * This interface allows access to lower level functions of the IOMMU
1722 * like protection domain handling and assignement of devices to domains
1723 * which is not possible with the dma_ops interface.
1724 *
1725 *****************************************************************************/
1726
1727static void cleanup_domain(struct protection_domain *domain)
1728{
1729 unsigned long flags;
1730 u16 devid;
1731
1732 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1733
1734 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
1735 if (amd_iommu_pd_table[devid] == domain)
1736 __detach_device(domain, devid);
1737
1738 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1739}
1740
1741static int amd_iommu_domain_init(struct iommu_domain *dom)
1742{
1743 struct protection_domain *domain;
1744
1745 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
1746 if (!domain)
1747 return -ENOMEM;
1748
1749 spin_lock_init(&domain->lock);
1750 domain->mode = PAGE_MODE_3_LEVEL;
1751 domain->id = domain_id_alloc();
1752 if (!domain->id)
1753 goto out_free;
1754 domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
1755 if (!domain->pt_root)
1756 goto out_free;
1757
1758 dom->priv = domain;
1759
1760 return 0;
1761
1762out_free:
1763 kfree(domain);
1764
1765 return -ENOMEM;
1766}
1767
1768static void amd_iommu_domain_destroy(struct iommu_domain *dom)
1769{
1770 struct protection_domain *domain = dom->priv;
1771
1772 if (!domain)
1773 return;
1774
1775 if (domain->dev_cnt > 0)
1776 cleanup_domain(domain);
1777
1778 BUG_ON(domain->dev_cnt != 0);
1779
1780 free_pagetable(domain);
1781
1782 domain_id_free(domain->id);
1783
1784 kfree(domain);
1785
1786 dom->priv = NULL;
1787}
1788
1789static void amd_iommu_detach_device(struct iommu_domain *dom,
1790 struct device *dev)
1791{
1792 struct protection_domain *domain = dom->priv;
1793 struct amd_iommu *iommu;
1794 struct pci_dev *pdev;
1795 u16 devid;
1796
1797 if (dev->bus != &pci_bus_type)
1798 return;
1799
1800 pdev = to_pci_dev(dev);
1801
1802 devid = calc_devid(pdev->bus->number, pdev->devfn);
1803
1804 if (devid > 0)
1805 detach_device(domain, devid);
1806
1807 iommu = amd_iommu_rlookup_table[devid];
1808 if (!iommu)
1809 return;
1810
1811 iommu_queue_inv_dev_entry(iommu, devid);
1812 iommu_completion_wait(iommu);
1813}
1814
1815static int amd_iommu_attach_device(struct iommu_domain *dom,
1816 struct device *dev)
1817{
1818 struct protection_domain *domain = dom->priv;
1819 struct protection_domain *old_domain;
1820 struct amd_iommu *iommu;
1821 struct pci_dev *pdev;
1822 u16 devid;
1823
1824 if (dev->bus != &pci_bus_type)
1825 return -EINVAL;
1826
1827 pdev = to_pci_dev(dev);
1828
1829 devid = calc_devid(pdev->bus->number, pdev->devfn);
1830
1831 if (devid >= amd_iommu_last_bdf ||
1832 devid != amd_iommu_alias_table[devid])
1833 return -EINVAL;
1834
1835 iommu = amd_iommu_rlookup_table[devid];
1836 if (!iommu)
1837 return -EINVAL;
1838
1839 old_domain = domain_for_device(devid);
1840 if (old_domain)
1841 return -EBUSY;
1842
1843 attach_device(iommu, domain, devid);
1844
1845 iommu_completion_wait(iommu);
1846
1847 return 0;
1848}
1849
1850static int amd_iommu_map_range(struct iommu_domain *dom,
1851 unsigned long iova, phys_addr_t paddr,
1852 size_t size, int iommu_prot)
1853{
1854 struct protection_domain *domain = dom->priv;
1855 unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE);
1856 int prot = 0;
1857 int ret;
1858
1859 if (iommu_prot & IOMMU_READ)
1860 prot |= IOMMU_PROT_IR;
1861 if (iommu_prot & IOMMU_WRITE)
1862 prot |= IOMMU_PROT_IW;
1863
1864 iova &= PAGE_MASK;
1865 paddr &= PAGE_MASK;
1866
1867 for (i = 0; i < npages; ++i) {
1868 ret = iommu_map_page(domain, iova, paddr, prot);
1869 if (ret)
1870 return ret;
1871
1872 iova += PAGE_SIZE;
1873 paddr += PAGE_SIZE;
1874 }
1875
1876 return 0;
1877}
1878
1879static void amd_iommu_unmap_range(struct iommu_domain *dom,
1880 unsigned long iova, size_t size)
1881{
1882
1883 struct protection_domain *domain = dom->priv;
1884 unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE);
1885
1886 iova &= PAGE_MASK;
1887
1888 for (i = 0; i < npages; ++i) {
1889 iommu_unmap_page(domain, iova);
1890 iova += PAGE_SIZE;
1891 }
1892
1893 iommu_flush_domain(domain->id);
1894}
1895
1896static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
1897 unsigned long iova)
1898{
1899 struct protection_domain *domain = dom->priv;
1900 unsigned long offset = iova & ~PAGE_MASK;
1901 phys_addr_t paddr;
1902 u64 *pte;
1903
1904 pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];
1905
1906 if (!IOMMU_PTE_PRESENT(*pte))
1907 return 0;
1908
1909 pte = IOMMU_PTE_PAGE(*pte);
1910 pte = &pte[IOMMU_PTE_L1_INDEX(iova)];
1911
1912 if (!IOMMU_PTE_PRESENT(*pte))
1913 return 0;
1914
1915 pte = IOMMU_PTE_PAGE(*pte);
1916 pte = &pte[IOMMU_PTE_L0_INDEX(iova)];
1917
1918 if (!IOMMU_PTE_PRESENT(*pte))
1919 return 0;
1920
1921 paddr = *pte & IOMMU_PAGE_MASK;
1922 paddr |= offset;
1923
1924 return paddr;
1925}
1926
1927static struct iommu_ops amd_iommu_ops = {
1928 .domain_init = amd_iommu_domain_init,
1929 .domain_destroy = amd_iommu_domain_destroy,
1930 .attach_dev = amd_iommu_attach_device,
1931 .detach_dev = amd_iommu_detach_device,
1932 .map = amd_iommu_map_range,
1933 .unmap = amd_iommu_unmap_range,
1934 .iova_to_phys = amd_iommu_iova_to_phys,
1935};
1936
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c625800c55ca..42c33cebf00f 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -122,7 +122,8 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
122LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings 122LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
123 we find in ACPI */ 123 we find in ACPI */
124unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ 124unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
125int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */ 125bool amd_iommu_isolate = true; /* if true, device isolation is
126 enabled */
126bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ 127bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
127 128
128LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the 129LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
@@ -243,20 +244,16 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
243} 244}
244 245
245/* Function to enable the hardware */ 246/* Function to enable the hardware */
246void __init iommu_enable(struct amd_iommu *iommu) 247static void __init iommu_enable(struct amd_iommu *iommu)
247{ 248{
248 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " 249 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
249 "at %02x:%02x.%x cap 0x%hx\n", 250 dev_name(&iommu->dev->dev), iommu->cap_ptr);
250 iommu->dev->bus->number,
251 PCI_SLOT(iommu->dev->devfn),
252 PCI_FUNC(iommu->dev->devfn),
253 iommu->cap_ptr);
254 251
255 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 252 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
256} 253}
257 254
258/* Function to enable IOMMU event logging and event interrupts */ 255/* Function to enable IOMMU event logging and event interrupts */
259void __init iommu_enable_event_logging(struct amd_iommu *iommu) 256static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
260{ 257{
261 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); 258 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
262 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); 259 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
@@ -1218,9 +1215,9 @@ static int __init parse_amd_iommu_options(char *str)
1218{ 1215{
1219 for (; *str; ++str) { 1216 for (; *str; ++str) {
1220 if (strncmp(str, "isolate", 7) == 0) 1217 if (strncmp(str, "isolate", 7) == 0)
1221 amd_iommu_isolate = 1; 1218 amd_iommu_isolate = true;
1222 if (strncmp(str, "share", 5) == 0) 1219 if (strncmp(str, "share", 5) == 0)
1223 amd_iommu_isolate = 0; 1220 amd_iommu_isolate = false;
1224 if (strncmp(str, "fullflush", 9) == 0) 1221 if (strncmp(str, "fullflush", 9) == 0)
1225 amd_iommu_unmap_flush = true; 1222 amd_iommu_unmap_flush = true;
1226 } 1223 }
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index b5229affb953..b13d3c4dbd42 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -98,8 +98,8 @@ __setup("apicpmtimer", setup_apicpmtimer);
98#ifdef HAVE_X2APIC 98#ifdef HAVE_X2APIC
99int x2apic; 99int x2apic;
100/* x2apic enabled before OS handover */ 100/* x2apic enabled before OS handover */
101int x2apic_preenabled; 101static int x2apic_preenabled;
102int disable_x2apic; 102static int disable_x2apic;
103static __init int setup_nox2apic(char *str) 103static __init int setup_nox2apic(char *str)
104{ 104{
105 disable_x2apic = 1; 105 disable_x2apic = 1;
@@ -119,8 +119,6 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
119 119
120int first_system_vector = 0xfe; 120int first_system_vector = 0xfe;
121 121
122char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
123
124/* 122/*
125 * Debug level, exported for io_apic.c 123 * Debug level, exported for io_apic.c
126 */ 124 */
@@ -142,7 +140,7 @@ static int lapic_next_event(unsigned long delta,
142 struct clock_event_device *evt); 140 struct clock_event_device *evt);
143static void lapic_timer_setup(enum clock_event_mode mode, 141static void lapic_timer_setup(enum clock_event_mode mode,
144 struct clock_event_device *evt); 142 struct clock_event_device *evt);
145static void lapic_timer_broadcast(cpumask_t mask); 143static void lapic_timer_broadcast(const struct cpumask *mask);
146static void apic_pm_activate(void); 144static void apic_pm_activate(void);
147 145
148/* 146/*
@@ -228,7 +226,7 @@ void xapic_icr_write(u32 low, u32 id)
228 apic_write(APIC_ICR, low); 226 apic_write(APIC_ICR, low);
229} 227}
230 228
231u64 xapic_icr_read(void) 229static u64 xapic_icr_read(void)
232{ 230{
233 u32 icr1, icr2; 231 u32 icr1, icr2;
234 232
@@ -268,7 +266,7 @@ void x2apic_icr_write(u32 low, u32 id)
268 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); 266 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
269} 267}
270 268
271u64 x2apic_icr_read(void) 269static u64 x2apic_icr_read(void)
272{ 270{
273 unsigned long val; 271 unsigned long val;
274 272
@@ -455,7 +453,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
455/* 453/*
456 * Local APIC timer broadcast function 454 * Local APIC timer broadcast function
457 */ 455 */
458static void lapic_timer_broadcast(cpumask_t mask) 456static void lapic_timer_broadcast(const struct cpumask *mask)
459{ 457{
460#ifdef CONFIG_SMP 458#ifdef CONFIG_SMP
461 send_IPI_mask(mask, LOCAL_TIMER_VECTOR); 459 send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
@@ -471,7 +469,7 @@ static void __cpuinit setup_APIC_timer(void)
471 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 469 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
472 470
473 memcpy(levt, &lapic_clockevent, sizeof(*levt)); 471 memcpy(levt, &lapic_clockevent, sizeof(*levt));
474 levt->cpumask = cpumask_of_cpu(smp_processor_id()); 472 levt->cpumask = cpumask_of(smp_processor_id());
475 473
476 clockevents_register_device(levt); 474 clockevents_register_device(levt);
477} 475}
@@ -1807,28 +1805,32 @@ void disconnect_bsp_APIC(int virt_wire_setup)
1807void __cpuinit generic_processor_info(int apicid, int version) 1805void __cpuinit generic_processor_info(int apicid, int version)
1808{ 1806{
1809 int cpu; 1807 int cpu;
1810 cpumask_t tmp_map;
1811 1808
1812 /* 1809 /*
1813 * Validate version 1810 * Validate version
1814 */ 1811 */
1815 if (version == 0x0) { 1812 if (version == 0x0) {
1816 pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " 1813 pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
1817 "fixing up to 0x10. (tell your hw vendor)\n", 1814 "fixing up to 0x10. (tell your hw vendor)\n",
1818 version); 1815 version);
1819 version = 0x10; 1816 version = 0x10;
1820 } 1817 }
1821 apic_version[apicid] = version; 1818 apic_version[apicid] = version;
1822 1819
1823 if (num_processors >= NR_CPUS) { 1820 if (num_processors >= nr_cpu_ids) {
1824 pr_warning("WARNING: NR_CPUS limit of %i reached." 1821 int max = nr_cpu_ids;
1825 " Processor ignored.\n", NR_CPUS); 1822 int thiscpu = max + disabled_cpus;
1823
1824 pr_warning(
1825 "ACPI: NR_CPUS/possible_cpus limit of %i reached."
1826 " Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
1827
1828 disabled_cpus++;
1826 return; 1829 return;
1827 } 1830 }
1828 1831
1829 num_processors++; 1832 num_processors++;
1830 cpus_complement(tmp_map, cpu_present_map); 1833 cpu = cpumask_next_zero(-1, cpu_present_mask);
1831 cpu = first_cpu(tmp_map);
1832 1834
1833 physid_set(apicid, phys_cpu_present_map); 1835 physid_set(apicid, phys_cpu_present_map);
1834 if (apicid == boot_cpu_physical_apicid) { 1836 if (apicid == boot_cpu_physical_apicid) {
@@ -1878,8 +1880,8 @@ void __cpuinit generic_processor_info(int apicid, int version)
1878 } 1880 }
1879#endif 1881#endif
1880 1882
1881 cpu_set(cpu, cpu_possible_map); 1883 set_cpu_possible(cpu, true);
1882 cpu_set(cpu, cpu_present_map); 1884 set_cpu_present(cpu, true);
1883} 1885}
1884 1886
1885#ifdef CONFIG_X86_64 1887#ifdef CONFIG_X86_64
@@ -2081,7 +2083,7 @@ __cpuinit int apic_is_clustered_box(void)
2081 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); 2083 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
2082 bitmap_zero(clustermap, NUM_APIC_CLUSTERS); 2084 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
2083 2085
2084 for (i = 0; i < NR_CPUS; i++) { 2086 for (i = 0; i < nr_cpu_ids; i++) {
2085 /* are we being called early in kernel startup? */ 2087 /* are we being called early in kernel startup? */
2086 if (bios_cpu_apicid) { 2088 if (bios_cpu_apicid) {
2087 id = bios_cpu_apicid[i]; 2089 id = bios_cpu_apicid[i];
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index 2a0a2a3cac26..f63882728d91 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -25,7 +25,7 @@
25#include <asm/uv/bios.h> 25#include <asm/uv/bios.h>
26#include <asm/uv/uv_hub.h> 26#include <asm/uv/uv_hub.h>
27 27
28struct uv_systab uv_systab; 28static struct uv_systab uv_systab;
29 29
30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) 30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
31{ 31{
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 42e0853030cb..3f95a40f718a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -355,7 +355,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
355 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 355 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
356 } else if (smp_num_siblings > 1) { 356 } else if (smp_num_siblings > 1) {
357 357
358 if (smp_num_siblings > NR_CPUS) { 358 if (smp_num_siblings > nr_cpu_ids) {
359 printk(KERN_WARNING "CPU: Unsupported number of siblings %d", 359 printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
360 smp_num_siblings); 360 smp_num_siblings);
361 smp_num_siblings = 1; 361 smp_num_siblings = 1;
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 88ea02dcb622..28102ad1a363 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -517,6 +517,17 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
517 } 517 }
518} 518}
519 519
520static void free_acpi_perf_data(void)
521{
522 unsigned int i;
523
524 /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
525 for_each_possible_cpu(i)
526 free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
527 ->shared_cpu_map);
528 free_percpu(acpi_perf_data);
529}
530
520/* 531/*
521 * acpi_cpufreq_early_init - initialize ACPI P-States library 532 * acpi_cpufreq_early_init - initialize ACPI P-States library
522 * 533 *
@@ -527,6 +538,7 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
527 */ 538 */
528static int __init acpi_cpufreq_early_init(void) 539static int __init acpi_cpufreq_early_init(void)
529{ 540{
541 unsigned int i;
530 dprintk("acpi_cpufreq_early_init\n"); 542 dprintk("acpi_cpufreq_early_init\n");
531 543
532 acpi_perf_data = alloc_percpu(struct acpi_processor_performance); 544 acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
@@ -534,6 +546,16 @@ static int __init acpi_cpufreq_early_init(void)
534 dprintk("Memory allocation error for acpi_perf_data.\n"); 546 dprintk("Memory allocation error for acpi_perf_data.\n");
535 return -ENOMEM; 547 return -ENOMEM;
536 } 548 }
549 for_each_possible_cpu(i) {
550 if (!alloc_cpumask_var_node(
551 &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
552 GFP_KERNEL, cpu_to_node(i))) {
553
554 /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
555 free_acpi_perf_data();
556 return -ENOMEM;
557 }
558 }
537 559
538 /* Do initialization in ACPI core */ 560 /* Do initialization in ACPI core */
539 acpi_processor_preregister_performance(acpi_perf_data); 561 acpi_processor_preregister_performance(acpi_perf_data);
@@ -604,9 +626,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
604 */ 626 */
605 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || 627 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
606 policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { 628 policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
607 policy->cpus = perf->shared_cpu_map; 629 cpumask_copy(&policy->cpus, perf->shared_cpu_map);
608 } 630 }
609 policy->related_cpus = perf->shared_cpu_map; 631 cpumask_copy(&policy->related_cpus, perf->shared_cpu_map);
610 632
611#ifdef CONFIG_SMP 633#ifdef CONFIG_SMP
612 dmi_check_system(sw_any_bug_dmi_table); 634 dmi_check_system(sw_any_bug_dmi_table);
@@ -795,7 +817,7 @@ static int __init acpi_cpufreq_init(void)
795 817
796 ret = cpufreq_register_driver(&acpi_cpufreq_driver); 818 ret = cpufreq_register_driver(&acpi_cpufreq_driver);
797 if (ret) 819 if (ret)
798 free_percpu(acpi_perf_data); 820 free_acpi_perf_data();
799 821
800 return ret; 822 return ret;
801} 823}
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index b8e05ee4f736..beea4466b063 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -160,6 +160,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
160 switch (c->x86_model) { 160 switch (c->x86_model) {
161 case 0x0E: /* Core */ 161 case 0x0E: /* Core */
162 case 0x0F: /* Core Duo */ 162 case 0x0F: /* Core Duo */
163 case 0x16: /* Celeron Core */
163 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; 164 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
164 return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE); 165 return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE);
165 case 0x0D: /* Pentium M (Dothan) */ 166 case 0x0D: /* Pentium M (Dothan) */
@@ -171,7 +172,9 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
171 } 172 }
172 173
173 if (c->x86 != 0xF) { 174 if (c->x86 != 0xF) {
174 printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@vger.kernel.org>\n"); 175 if (!cpu_has(c, X86_FEATURE_EST))
176 printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. "
177 "Please send an e-mail to <cpufreq@vger.kernel.org>\n");
175 return 0; 178 return 0;
176 } 179 }
177 180
@@ -274,6 +277,7 @@ static struct cpufreq_driver p4clockmod_driver = {
274 .name = "p4-clockmod", 277 .name = "p4-clockmod",
275 .owner = THIS_MODULE, 278 .owner = THIS_MODULE,
276 .attr = p4clockmod_attr, 279 .attr = p4clockmod_attr,
280 .hide_interface = 1,
277}; 281};
278 282
279 283
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 7c7d56b43136..1b446d79a8fd 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -310,6 +310,12 @@ static int powernow_acpi_init(void)
310 goto err0; 310 goto err0;
311 } 311 }
312 312
313 if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
314 GFP_KERNEL)) {
315 retval = -ENOMEM;
316 goto err05;
317 }
318
313 if (acpi_processor_register_performance(acpi_processor_perf, 0)) { 319 if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
314 retval = -EIO; 320 retval = -EIO;
315 goto err1; 321 goto err1;
@@ -412,6 +418,8 @@ static int powernow_acpi_init(void)
412err2: 418err2:
413 acpi_processor_unregister_performance(acpi_processor_perf, 0); 419 acpi_processor_unregister_performance(acpi_processor_perf, 0);
414err1: 420err1:
421 free_cpumask_var(acpi_processor_perf->shared_cpu_map);
422err05:
415 kfree(acpi_processor_perf); 423 kfree(acpi_processor_perf);
416err0: 424err0:
417 printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); 425 printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n");
@@ -652,6 +660,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) {
652#ifdef CONFIG_X86_POWERNOW_K7_ACPI 660#ifdef CONFIG_X86_POWERNOW_K7_ACPI
653 if (acpi_processor_perf) { 661 if (acpi_processor_perf) {
654 acpi_processor_unregister_performance(acpi_processor_perf, 0); 662 acpi_processor_unregister_performance(acpi_processor_perf, 0);
663 free_cpumask_var(acpi_processor_perf->shared_cpu_map);
655 kfree(acpi_processor_perf); 664 kfree(acpi_processor_perf);
656 } 665 }
657#endif 666#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 7f05f44b97e9..c3c9adbaa26f 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -766,7 +766,7 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned
766static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) 766static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
767{ 767{
768 struct cpufreq_frequency_table *powernow_table; 768 struct cpufreq_frequency_table *powernow_table;
769 int ret_val; 769 int ret_val = -ENODEV;
770 770
771 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { 771 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
772 dprintk("register performance failed: bad ACPI data\n"); 772 dprintk("register performance failed: bad ACPI data\n");
@@ -815,6 +815,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
815 /* notify BIOS that we exist */ 815 /* notify BIOS that we exist */
816 acpi_processor_notify_smm(THIS_MODULE); 816 acpi_processor_notify_smm(THIS_MODULE);
817 817
818 if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
819 printk(KERN_ERR PFX
820 "unable to alloc powernow_k8_data cpumask\n");
821 ret_val = -ENOMEM;
822 goto err_out_mem;
823 }
824
818 return 0; 825 return 0;
819 826
820err_out_mem: 827err_out_mem:
@@ -826,7 +833,7 @@ err_out:
826 /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ 833 /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
827 data->acpi_data.state_count = 0; 834 data->acpi_data.state_count = 0;
828 835
829 return -ENODEV; 836 return ret_val;
830} 837}
831 838
832static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) 839static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
@@ -929,6 +936,7 @@ static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
929{ 936{
930 if (data->acpi_data.state_count) 937 if (data->acpi_data.state_count)
931 acpi_processor_unregister_performance(&data->acpi_data, data->cpu); 938 acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
939 free_cpumask_var(data->acpi_data.shared_cpu_map);
932} 940}
933 941
934#else 942#else
@@ -1134,7 +1142,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1134 data->cpu = pol->cpu; 1142 data->cpu = pol->cpu;
1135 data->currpstate = HW_PSTATE_INVALID; 1143 data->currpstate = HW_PSTATE_INVALID;
1136 1144
1137 if (powernow_k8_cpu_init_acpi(data)) { 1145 rc = powernow_k8_cpu_init_acpi(data);
1146 if (rc) {
1138 /* 1147 /*
1139 * Use the PSB BIOS structure. This is only availabe on 1148 * Use the PSB BIOS structure. This is only availabe on
1140 * an UP version, and is deprecated by AMD. 1149 * an UP version, and is deprecated by AMD.
@@ -1152,20 +1161,17 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1152 "ACPI maintainers and complain to your BIOS " 1161 "ACPI maintainers and complain to your BIOS "
1153 "vendor.\n"); 1162 "vendor.\n");
1154#endif 1163#endif
1155 kfree(data); 1164 goto err_out;
1156 return -ENODEV;
1157 } 1165 }
1158 if (pol->cpu != 0) { 1166 if (pol->cpu != 0) {
1159 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " 1167 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
1160 "CPU other than CPU0. Complain to your BIOS " 1168 "CPU other than CPU0. Complain to your BIOS "
1161 "vendor.\n"); 1169 "vendor.\n");
1162 kfree(data); 1170 goto err_out;
1163 return -ENODEV;
1164 } 1171 }
1165 rc = find_psb_table(data); 1172 rc = find_psb_table(data);
1166 if (rc) { 1173 if (rc) {
1167 kfree(data); 1174 goto err_out;
1168 return -ENODEV;
1169 } 1175 }
1170 } 1176 }
1171 1177
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 3b5f06423e77..f0ea6fa2f53c 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -459,9 +459,7 @@ static int centrino_verify (struct cpufreq_policy *policy)
459 * Sets a new CPUFreq policy. 459 * Sets a new CPUFreq policy.
460 */ 460 */
461struct allmasks { 461struct allmasks {
462 cpumask_t online_policy_cpus;
463 cpumask_t saved_mask; 462 cpumask_t saved_mask;
464 cpumask_t set_mask;
465 cpumask_t covered_cpus; 463 cpumask_t covered_cpus;
466}; 464};
467 465
@@ -475,9 +473,7 @@ static int centrino_target (struct cpufreq_policy *policy,
475 int retval = 0; 473 int retval = 0;
476 unsigned int j, k, first_cpu, tmp; 474 unsigned int j, k, first_cpu, tmp;
477 CPUMASK_ALLOC(allmasks); 475 CPUMASK_ALLOC(allmasks);
478 CPUMASK_PTR(online_policy_cpus, allmasks);
479 CPUMASK_PTR(saved_mask, allmasks); 476 CPUMASK_PTR(saved_mask, allmasks);
480 CPUMASK_PTR(set_mask, allmasks);
481 CPUMASK_PTR(covered_cpus, allmasks); 477 CPUMASK_PTR(covered_cpus, allmasks);
482 478
483 if (unlikely(allmasks == NULL)) 479 if (unlikely(allmasks == NULL))
@@ -497,30 +493,28 @@ static int centrino_target (struct cpufreq_policy *policy,
497 goto out; 493 goto out;
498 } 494 }
499 495
500#ifdef CONFIG_HOTPLUG_CPU
501 /* cpufreq holds the hotplug lock, so we are safe from here on */
502 cpus_and(*online_policy_cpus, cpu_online_map, policy->cpus);
503#else
504 *online_policy_cpus = policy->cpus;
505#endif
506
507 *saved_mask = current->cpus_allowed; 496 *saved_mask = current->cpus_allowed;
508 first_cpu = 1; 497 first_cpu = 1;
509 cpus_clear(*covered_cpus); 498 cpus_clear(*covered_cpus);
510 for_each_cpu_mask_nr(j, *online_policy_cpus) { 499 for_each_cpu_mask_nr(j, policy->cpus) {
500 const cpumask_t *mask;
501
502 /* cpufreq holds the hotplug lock, so we are safe here */
503 if (!cpu_online(j))
504 continue;
505
511 /* 506 /*
512 * Support for SMP systems. 507 * Support for SMP systems.
513 * Make sure we are running on CPU that wants to change freq 508 * Make sure we are running on CPU that wants to change freq
514 */ 509 */
515 cpus_clear(*set_mask);
516 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) 510 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
517 cpus_or(*set_mask, *set_mask, *online_policy_cpus); 511 mask = &policy->cpus;
518 else 512 else
519 cpu_set(j, *set_mask); 513 mask = &cpumask_of_cpu(j);
520 514
521 set_cpus_allowed_ptr(current, set_mask); 515 set_cpus_allowed_ptr(current, mask);
522 preempt_disable(); 516 preempt_disable();
523 if (unlikely(!cpu_isset(smp_processor_id(), *set_mask))) { 517 if (unlikely(!cpu_isset(smp_processor_id(), *mask))) {
524 dprintk("couldn't limit to CPUs in this domain\n"); 518 dprintk("couldn't limit to CPUs in this domain\n");
525 retval = -EAGAIN; 519 retval = -EAGAIN;
526 if (first_cpu) { 520 if (first_cpu) {
@@ -548,7 +542,9 @@ static int centrino_target (struct cpufreq_policy *policy,
548 dprintk("target=%dkHz old=%d new=%d msr=%04x\n", 542 dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
549 target_freq, freqs.old, freqs.new, msr); 543 target_freq, freqs.old, freqs.new, msr);
550 544
551 for_each_cpu_mask_nr(k, *online_policy_cpus) { 545 for_each_cpu_mask_nr(k, policy->cpus) {
546 if (!cpu_online(k))
547 continue;
552 freqs.cpu = k; 548 freqs.cpu = k;
553 cpufreq_notify_transition(&freqs, 549 cpufreq_notify_transition(&freqs,
554 CPUFREQ_PRECHANGE); 550 CPUFREQ_PRECHANGE);
@@ -571,7 +567,9 @@ static int centrino_target (struct cpufreq_policy *policy,
571 preempt_enable(); 567 preempt_enable();
572 } 568 }
573 569
574 for_each_cpu_mask_nr(k, *online_policy_cpus) { 570 for_each_cpu_mask_nr(k, policy->cpus) {
571 if (!cpu_online(k))
572 continue;
575 freqs.cpu = k; 573 freqs.cpu = k;
576 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 574 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
577 } 575 }
@@ -584,18 +582,17 @@ static int centrino_target (struct cpufreq_policy *policy,
584 * Best effort undo.. 582 * Best effort undo..
585 */ 583 */
586 584
587 if (!cpus_empty(*covered_cpus)) 585 for_each_cpu_mask_nr(j, *covered_cpus) {
588 for_each_cpu_mask_nr(j, *covered_cpus) { 586 set_cpus_allowed_ptr(current, &cpumask_of_cpu(j));
589 set_cpus_allowed_ptr(current, 587 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
590 &cpumask_of_cpu(j)); 588 }
591 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
592 }
593 589
594 tmp = freqs.new; 590 tmp = freqs.new;
595 freqs.new = freqs.old; 591 freqs.new = freqs.old;
596 freqs.old = tmp; 592 freqs.old = tmp;
597 for_each_cpu_mask_nr(j, *online_policy_cpus) { 593 for_each_cpu_mask_nr(j, policy->cpus) {
598 freqs.cpu = j; 594 if (!cpu_online(j))
595 continue;
599 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 596 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
600 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 597 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
601 } 598 }
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index 98d4fdb7dc04..cdac7d62369b 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -139,6 +139,15 @@ static unsigned int pentium_core_get_frequency(void)
139 case 3: 139 case 3:
140 fsb = 166667; 140 fsb = 166667;
141 break; 141 break;
142 case 2:
143 fsb = 200000;
144 break;
145 case 0:
146 fsb = 266667;
147 break;
148 case 4:
149 fsb = 333333;
150 break;
142 default: 151 default:
143 printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value"); 152 printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value");
144 } 153 }
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 68b5d8681cbb..48533d77be78 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -534,31 +534,16 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
534 per_cpu(cpuid4_info, cpu) = NULL; 534 per_cpu(cpuid4_info, cpu) = NULL;
535} 535}
536 536
537static int __cpuinit detect_cache_attributes(unsigned int cpu) 537static void __cpuinit get_cpu_leaves(void *_retval)
538{ 538{
539 struct _cpuid4_info *this_leaf; 539 int j, *retval = _retval, cpu = smp_processor_id();
540 unsigned long j;
541 int retval;
542 cpumask_t oldmask;
543
544 if (num_cache_leaves == 0)
545 return -ENOENT;
546
547 per_cpu(cpuid4_info, cpu) = kzalloc(
548 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
549 if (per_cpu(cpuid4_info, cpu) == NULL)
550 return -ENOMEM;
551
552 oldmask = current->cpus_allowed;
553 retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
554 if (retval)
555 goto out;
556 540
557 /* Do cpuid and store the results */ 541 /* Do cpuid and store the results */
558 for (j = 0; j < num_cache_leaves; j++) { 542 for (j = 0; j < num_cache_leaves; j++) {
543 struct _cpuid4_info *this_leaf;
559 this_leaf = CPUID4_INFO_IDX(cpu, j); 544 this_leaf = CPUID4_INFO_IDX(cpu, j);
560 retval = cpuid4_cache_lookup(j, this_leaf); 545 *retval = cpuid4_cache_lookup(j, this_leaf);
561 if (unlikely(retval < 0)) { 546 if (unlikely(*retval < 0)) {
562 int i; 547 int i;
563 548
564 for (i = 0; i < j; i++) 549 for (i = 0; i < j; i++)
@@ -567,9 +552,21 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
567 } 552 }
568 cache_shared_cpu_map_setup(cpu, j); 553 cache_shared_cpu_map_setup(cpu, j);
569 } 554 }
570 set_cpus_allowed_ptr(current, &oldmask); 555}
556
557static int __cpuinit detect_cache_attributes(unsigned int cpu)
558{
559 int retval;
560
561 if (num_cache_leaves == 0)
562 return -ENOENT;
563
564 per_cpu(cpuid4_info, cpu) = kzalloc(
565 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
566 if (per_cpu(cpuid4_info, cpu) == NULL)
567 return -ENOMEM;
571 568
572out: 569 smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
573 if (retval) { 570 if (retval) {
574 kfree(per_cpu(cpuid4_info, cpu)); 571 kfree(per_cpu(cpuid4_info, cpu));
575 per_cpu(cpuid4_info, cpu) = NULL; 572 per_cpu(cpuid4_info, cpu) = NULL;
@@ -626,8 +623,8 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
626 cpumask_t *mask = &this_leaf->shared_cpu_map; 623 cpumask_t *mask = &this_leaf->shared_cpu_map;
627 624
628 n = type? 625 n = type?
629 cpulist_scnprintf(buf, len-2, *mask): 626 cpulist_scnprintf(buf, len-2, mask) :
630 cpumask_scnprintf(buf, len-2, *mask); 627 cpumask_scnprintf(buf, len-2, mask);
631 buf[n++] = '\n'; 628 buf[n++] = '\n';
632 buf[n] = '\0'; 629 buf[n] = '\0';
633 } 630 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 748c8f9e7a05..a5a5e0530370 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -83,34 +83,41 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
83 * CPU Initialization 83 * CPU Initialization
84 */ 84 */
85 85
86struct thresh_restart {
87 struct threshold_block *b;
88 int reset;
89 u16 old_limit;
90};
91
86/* must be called with correct cpu affinity */ 92/* must be called with correct cpu affinity */
87static void threshold_restart_bank(struct threshold_block *b, 93static long threshold_restart_bank(void *_tr)
88 int reset, u16 old_limit)
89{ 94{
95 struct thresh_restart *tr = _tr;
90 u32 mci_misc_hi, mci_misc_lo; 96 u32 mci_misc_hi, mci_misc_lo;
91 97
92 rdmsr(b->address, mci_misc_lo, mci_misc_hi); 98 rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
93 99
94 if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) 100 if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
95 reset = 1; /* limit cannot be lower than err count */ 101 tr->reset = 1; /* limit cannot be lower than err count */
96 102
97 if (reset) { /* reset err count and overflow bit */ 103 if (tr->reset) { /* reset err count and overflow bit */
98 mci_misc_hi = 104 mci_misc_hi =
99 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | 105 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
100 (THRESHOLD_MAX - b->threshold_limit); 106 (THRESHOLD_MAX - tr->b->threshold_limit);
101 } else if (old_limit) { /* change limit w/o reset */ 107 } else if (tr->old_limit) { /* change limit w/o reset */
102 int new_count = (mci_misc_hi & THRESHOLD_MAX) + 108 int new_count = (mci_misc_hi & THRESHOLD_MAX) +
103 (old_limit - b->threshold_limit); 109 (tr->old_limit - tr->b->threshold_limit);
104 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | 110 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
105 (new_count & THRESHOLD_MAX); 111 (new_count & THRESHOLD_MAX);
106 } 112 }
107 113
108 b->interrupt_enable ? 114 tr->b->interrupt_enable ?
109 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : 115 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
110 (mci_misc_hi &= ~MASK_INT_TYPE_HI); 116 (mci_misc_hi &= ~MASK_INT_TYPE_HI);
111 117
112 mci_misc_hi |= MASK_COUNT_EN_HI; 118 mci_misc_hi |= MASK_COUNT_EN_HI;
113 wrmsr(b->address, mci_misc_lo, mci_misc_hi); 119 wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
120 return 0;
114} 121}
115 122
116/* cpu init entry point, called from mce.c with preempt off */ 123/* cpu init entry point, called from mce.c with preempt off */
@@ -120,6 +127,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
120 unsigned int cpu = smp_processor_id(); 127 unsigned int cpu = smp_processor_id();
121 u8 lvt_off; 128 u8 lvt_off;
122 u32 low = 0, high = 0, address = 0; 129 u32 low = 0, high = 0, address = 0;
130 struct thresh_restart tr;
123 131
124 for (bank = 0; bank < NR_BANKS; ++bank) { 132 for (bank = 0; bank < NR_BANKS; ++bank) {
125 for (block = 0; block < NR_BLOCKS; ++block) { 133 for (block = 0; block < NR_BLOCKS; ++block) {
@@ -162,7 +170,10 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
162 wrmsr(address, low, high); 170 wrmsr(address, low, high);
163 171
164 threshold_defaults.address = address; 172 threshold_defaults.address = address;
165 threshold_restart_bank(&threshold_defaults, 0, 0); 173 tr.b = &threshold_defaults;
174 tr.reset = 0;
175 tr.old_limit = 0;
176 threshold_restart_bank(&tr);
166 } 177 }
167 } 178 }
168} 179}
@@ -251,20 +262,6 @@ struct threshold_attr {
251 ssize_t(*store) (struct threshold_block *, const char *, size_t count); 262 ssize_t(*store) (struct threshold_block *, const char *, size_t count);
252}; 263};
253 264
254static void affinity_set(unsigned int cpu, cpumask_t *oldmask,
255 cpumask_t *newmask)
256{
257 *oldmask = current->cpus_allowed;
258 cpus_clear(*newmask);
259 cpu_set(cpu, *newmask);
260 set_cpus_allowed_ptr(current, newmask);
261}
262
263static void affinity_restore(const cpumask_t *oldmask)
264{
265 set_cpus_allowed_ptr(current, oldmask);
266}
267
268#define SHOW_FIELDS(name) \ 265#define SHOW_FIELDS(name) \
269static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ 266static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
270{ \ 267{ \
@@ -277,15 +274,16 @@ static ssize_t store_interrupt_enable(struct threshold_block *b,
277 const char *buf, size_t count) 274 const char *buf, size_t count)
278{ 275{
279 char *end; 276 char *end;
280 cpumask_t oldmask, newmask; 277 struct thresh_restart tr;
281 unsigned long new = simple_strtoul(buf, &end, 0); 278 unsigned long new = simple_strtoul(buf, &end, 0);
282 if (end == buf) 279 if (end == buf)
283 return -EINVAL; 280 return -EINVAL;
284 b->interrupt_enable = !!new; 281 b->interrupt_enable = !!new;
285 282
286 affinity_set(b->cpu, &oldmask, &newmask); 283 tr.b = b;
287 threshold_restart_bank(b, 0, 0); 284 tr.reset = 0;
288 affinity_restore(&oldmask); 285 tr.old_limit = 0;
286 work_on_cpu(b->cpu, threshold_restart_bank, &tr);
289 287
290 return end - buf; 288 return end - buf;
291} 289}
@@ -294,8 +292,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
294 const char *buf, size_t count) 292 const char *buf, size_t count)
295{ 293{
296 char *end; 294 char *end;
297 cpumask_t oldmask, newmask; 295 struct thresh_restart tr;
298 u16 old;
299 unsigned long new = simple_strtoul(buf, &end, 0); 296 unsigned long new = simple_strtoul(buf, &end, 0);
300 if (end == buf) 297 if (end == buf)
301 return -EINVAL; 298 return -EINVAL;
@@ -303,34 +300,36 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
303 new = THRESHOLD_MAX; 300 new = THRESHOLD_MAX;
304 if (new < 1) 301 if (new < 1)
305 new = 1; 302 new = 1;
306 old = b->threshold_limit; 303 tr.old_limit = b->threshold_limit;
307 b->threshold_limit = new; 304 b->threshold_limit = new;
305 tr.b = b;
306 tr.reset = 0;
308 307
309 affinity_set(b->cpu, &oldmask, &newmask); 308 work_on_cpu(b->cpu, threshold_restart_bank, &tr);
310 threshold_restart_bank(b, 0, old);
311 affinity_restore(&oldmask);
312 309
313 return end - buf; 310 return end - buf;
314} 311}
315 312
316static ssize_t show_error_count(struct threshold_block *b, char *buf) 313static long local_error_count(void *_b)
317{ 314{
318 u32 high, low; 315 struct threshold_block *b = _b;
319 cpumask_t oldmask, newmask; 316 u32 low, high;
320 affinity_set(b->cpu, &oldmask, &newmask); 317
321 rdmsr(b->address, low, high); 318 rdmsr(b->address, low, high);
322 affinity_restore(&oldmask); 319 return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
323 return sprintf(buf, "%x\n", 320}
324 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); 321
322static ssize_t show_error_count(struct threshold_block *b, char *buf)
323{
324 return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b));
325} 325}
326 326
327static ssize_t store_error_count(struct threshold_block *b, 327static ssize_t store_error_count(struct threshold_block *b,
328 const char *buf, size_t count) 328 const char *buf, size_t count)
329{ 329{
330 cpumask_t oldmask, newmask; 330 struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
331 affinity_set(b->cpu, &oldmask, &newmask); 331
332 threshold_restart_bank(b, 1, 0); 332 work_on_cpu(b->cpu, threshold_restart_bank, &tr);
333 affinity_restore(&oldmask);
334 return 1; 333 return 1;
335} 334}
336 335
@@ -463,12 +462,19 @@ out_free:
463 return err; 462 return err;
464} 463}
465 464
465static long local_allocate_threshold_blocks(void *_bank)
466{
467 unsigned int *bank = _bank;
468
469 return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
470 MSR_IA32_MC0_MISC + *bank * 4);
471}
472
466/* symlinks sibling shared banks to first core. first core owns dir/files. */ 473/* symlinks sibling shared banks to first core. first core owns dir/files. */
467static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) 474static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
468{ 475{
469 int i, err = 0; 476 int i, err = 0;
470 struct threshold_bank *b = NULL; 477 struct threshold_bank *b = NULL;
471 cpumask_t oldmask, newmask;
472 char name[32]; 478 char name[32];
473 479
474 sprintf(name, "threshold_bank%i", bank); 480 sprintf(name, "threshold_bank%i", bank);
@@ -519,11 +525,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
519 525
520 per_cpu(threshold_banks, cpu)[bank] = b; 526 per_cpu(threshold_banks, cpu)[bank] = b;
521 527
522 affinity_set(cpu, &oldmask, &newmask); 528 err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank);
523 err = allocate_threshold_blocks(cpu, bank, 0,
524 MSR_IA32_MC0_MISC + bank * 4);
525 affinity_restore(&oldmask);
526
527 if (err) 529 if (err)
528 goto out_free; 530 goto out_free;
529 531
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 4e8d77f01eeb..b59ddcc88cd8 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -14,14 +14,6 @@
14#include <asm/pat.h> 14#include <asm/pat.h>
15#include "mtrr.h" 15#include "mtrr.h"
16 16
17struct mtrr_state {
18 struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
19 mtrr_type fixed_ranges[NUM_FIXED_RANGES];
20 unsigned char enabled;
21 unsigned char have_fixed;
22 mtrr_type def_type;
23};
24
25struct fixed_range_block { 17struct fixed_range_block {
26 int base_msr; /* start address of an MTRR block */ 18 int base_msr; /* start address of an MTRR block */
27 int ranges; /* number of MTRRs in this block */ 19 int ranges; /* number of MTRRs in this block */
@@ -35,10 +27,12 @@ static struct fixed_range_block fixed_range_blocks[] = {
35}; 27};
36 28
37static unsigned long smp_changes_mask; 29static unsigned long smp_changes_mask;
38static struct mtrr_state mtrr_state = {};
39static int mtrr_state_set; 30static int mtrr_state_set;
40u64 mtrr_tom2; 31u64 mtrr_tom2;
41 32
33struct mtrr_state_type mtrr_state = {};
34EXPORT_SYMBOL_GPL(mtrr_state);
35
42#undef MODULE_PARAM_PREFIX 36#undef MODULE_PARAM_PREFIX
43#define MODULE_PARAM_PREFIX "mtrr." 37#define MODULE_PARAM_PREFIX "mtrr."
44 38
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 1159e269e596..d259e5d2e054 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -49,7 +49,7 @@
49 49
50u32 num_var_ranges = 0; 50u32 num_var_ranges = 0;
51 51
52unsigned int mtrr_usage_table[MAX_VAR_RANGES]; 52unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
53static DEFINE_MUTEX(mtrr_mutex); 53static DEFINE_MUTEX(mtrr_mutex);
54 54
55u64 size_or_mask, size_and_mask; 55u64 size_or_mask, size_and_mask;
@@ -574,7 +574,7 @@ struct mtrr_value {
574 unsigned long lsize; 574 unsigned long lsize;
575}; 575};
576 576
577static struct mtrr_value mtrr_state[MAX_VAR_RANGES]; 577static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES];
578 578
579static int mtrr_save(struct sys_device * sysdev, pm_message_t state) 579static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
580{ 580{
@@ -824,16 +824,14 @@ static int enable_mtrr_cleanup __initdata =
824 824
825static int __init disable_mtrr_cleanup_setup(char *str) 825static int __init disable_mtrr_cleanup_setup(char *str)
826{ 826{
827 if (enable_mtrr_cleanup != -1) 827 enable_mtrr_cleanup = 0;
828 enable_mtrr_cleanup = 0;
829 return 0; 828 return 0;
830} 829}
831early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); 830early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
832 831
833static int __init enable_mtrr_cleanup_setup(char *str) 832static int __init enable_mtrr_cleanup_setup(char *str)
834{ 833{
835 if (enable_mtrr_cleanup != -1) 834 enable_mtrr_cleanup = 1;
836 enable_mtrr_cleanup = 1;
837 return 0; 835 return 0;
838} 836}
839early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); 837early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 2dc4ec656b23..ffd60409cc6d 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -8,11 +8,6 @@
8#define MTRRcap_MSR 0x0fe 8#define MTRRcap_MSR 0x0fe
9#define MTRRdefType_MSR 0x2ff 9#define MTRRdefType_MSR 0x2ff
10 10
11#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
12#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
13
14#define NUM_FIXED_RANGES 88
15#define MAX_VAR_RANGES 256
16#define MTRRfix64K_00000_MSR 0x250 11#define MTRRfix64K_00000_MSR 0x250
17#define MTRRfix16K_80000_MSR 0x258 12#define MTRRfix16K_80000_MSR 0x258
18#define MTRRfix16K_A0000_MSR 0x259 13#define MTRRfix16K_A0000_MSR 0x259
@@ -29,11 +24,7 @@
29#define MTRR_CHANGE_MASK_VARIABLE 0x02 24#define MTRR_CHANGE_MASK_VARIABLE 0x02
30#define MTRR_CHANGE_MASK_DEFTYPE 0x04 25#define MTRR_CHANGE_MASK_DEFTYPE 0x04
31 26
32/* In the Intel processor's MTRR interface, the MTRR type is always held in 27extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
33 an 8 bit field: */
34typedef u8 mtrr_type;
35
36extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
37 28
38struct mtrr_ops { 29struct mtrr_ops {
39 u32 vendor; 30 u32 vendor;
@@ -70,13 +61,6 @@ struct set_mtrr_context {
70 u32 ccr3; 61 u32 ccr3;
71}; 62};
72 63
73struct mtrr_var_range {
74 u32 base_lo;
75 u32 base_hi;
76 u32 mask_lo;
77 u32 mask_hi;
78};
79
80void set_mtrr_done(struct set_mtrr_context *ctxt); 64void set_mtrr_done(struct set_mtrr_context *ctxt);
81void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); 65void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
82void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); 66void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 72cefd1e649b..2ac1f0c2beb3 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -39,10 +39,10 @@
39#include <linux/device.h> 39#include <linux/device.h>
40#include <linux/cpu.h> 40#include <linux/cpu.h>
41#include <linux/notifier.h> 41#include <linux/notifier.h>
42#include <linux/uaccess.h>
42 43
43#include <asm/processor.h> 44#include <asm/processor.h>
44#include <asm/msr.h> 45#include <asm/msr.h>
45#include <asm/uaccess.h>
46#include <asm/system.h> 46#include <asm/system.h>
47 47
48static struct class *cpuid_class; 48static struct class *cpuid_class;
@@ -82,7 +82,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
82} 82}
83 83
84static ssize_t cpuid_read(struct file *file, char __user *buf, 84static ssize_t cpuid_read(struct file *file, char __user *buf,
85 size_t count, loff_t * ppos) 85 size_t count, loff_t *ppos)
86{ 86{
87 char __user *tmp = buf; 87 char __user *tmp = buf;
88 struct cpuid_regs cmd; 88 struct cpuid_regs cmd;
@@ -117,11 +117,11 @@ static int cpuid_open(struct inode *inode, struct file *file)
117 unsigned int cpu; 117 unsigned int cpu;
118 struct cpuinfo_x86 *c; 118 struct cpuinfo_x86 *c;
119 int ret = 0; 119 int ret = 0;
120 120
121 lock_kernel(); 121 lock_kernel();
122 122
123 cpu = iminor(file->f_path.dentry->d_inode); 123 cpu = iminor(file->f_path.dentry->d_inode);
124 if (cpu >= NR_CPUS || !cpu_online(cpu)) { 124 if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
125 ret = -ENXIO; /* No such CPU */ 125 ret = -ENXIO; /* No such CPU */
126 goto out; 126 goto out;
127 } 127 }
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index d84a852e4cd7..c689d19e35ab 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -26,6 +26,7 @@
26#include <linux/kdebug.h> 26#include <linux/kdebug.h>
27#include <asm/smp.h> 27#include <asm/smp.h>
28#include <asm/reboot.h> 28#include <asm/reboot.h>
29#include <asm/virtext.h>
29 30
30#include <mach_ipi.h> 31#include <mach_ipi.h>
31 32
@@ -49,6 +50,15 @@ static void kdump_nmi_callback(int cpu, struct die_args *args)
49#endif 50#endif
50 crash_save_cpu(regs, cpu); 51 crash_save_cpu(regs, cpu);
51 52
53 /* Disable VMX or SVM if needed.
54 *
55 * We need to disable virtualization on all CPUs.
56 * Having VMX or SVM enabled on any CPU may break rebooting
57 * after the kdump kernel has finished its task.
58 */
59 cpu_emergency_vmxoff();
60 cpu_emergency_svm_disable();
61
52 disable_local_APIC(); 62 disable_local_APIC();
53} 63}
54 64
@@ -80,6 +90,14 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
80 local_irq_disable(); 90 local_irq_disable();
81 91
82 kdump_nmi_shootdown_cpus(); 92 kdump_nmi_shootdown_cpus();
93
94 /* Booting kdump kernel with VMX or SVM enabled won't work,
95 * because (among other limitations) we can't disable paging
96 * with the virt flags.
97 */
98 cpu_emergency_vmxoff();
99 cpu_emergency_svm_disable();
100
83 lapic_shutdown(); 101 lapic_shutdown();
84#if defined(CONFIG_X86_IO_APIC) 102#if defined(CONFIG_X86_IO_APIC)
85 disable_IO_APIC(); 103 disable_IO_APIC();
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 23b138e31e9c..504ad198e4ad 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -886,7 +886,7 @@ asmlinkage void early_printk(const char *fmt, ...)
886 va_list ap; 886 va_list ap;
887 887
888 va_start(ap, fmt); 888 va_start(ap, fmt);
889 n = vscnprintf(buf, 512, fmt, ap); 889 n = vscnprintf(buf, sizeof(buf), fmt, ap);
890 early_console->write(early_console, buf, n); 890 early_console->write(early_console, buf, n);
891 va_end(ap); 891 va_end(ap);
892} 892}
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index c0262791bda4..34185488e4fb 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
30 return 1; 30 return 1;
31} 31}
32 32
33static cpumask_t flat_target_cpus(void) 33static const struct cpumask *flat_target_cpus(void)
34{ 34{
35 return cpu_online_map; 35 return cpu_online_mask;
36} 36}
37 37
38static cpumask_t flat_vector_allocation_domain(int cpu) 38static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
39{ 39{
40 /* Careful. Some cpus do not strictly honor the set of cpus 40 /* Careful. Some cpus do not strictly honor the set of cpus
41 * specified in the interrupt destination when using lowest 41 * specified in the interrupt destination when using lowest
@@ -45,8 +45,8 @@ static cpumask_t flat_vector_allocation_domain(int cpu)
45 * deliver interrupts to the wrong hyperthread when only one 45 * deliver interrupts to the wrong hyperthread when only one
46 * hyperthread was specified in the interrupt desitination. 46 * hyperthread was specified in the interrupt desitination.
47 */ 47 */
48 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; 48 cpumask_clear(retmask);
49 return domain; 49 cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
50} 50}
51 51
52/* 52/*
@@ -69,9 +69,8 @@ static void flat_init_apic_ldr(void)
69 apic_write(APIC_LDR, val); 69 apic_write(APIC_LDR, val);
70} 70}
71 71
72static void flat_send_IPI_mask(cpumask_t cpumask, int vector) 72static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
73{ 73{
74 unsigned long mask = cpus_addr(cpumask)[0];
75 unsigned long flags; 74 unsigned long flags;
76 75
77 local_irq_save(flags); 76 local_irq_save(flags);
@@ -79,20 +78,41 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
79 local_irq_restore(flags); 78 local_irq_restore(flags);
80} 79}
81 80
81static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
82{
83 unsigned long mask = cpumask_bits(cpumask)[0];
84
85 _flat_send_IPI_mask(mask, vector);
86}
87
88static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
89 int vector)
90{
91 unsigned long mask = cpumask_bits(cpumask)[0];
92 int cpu = smp_processor_id();
93
94 if (cpu < BITS_PER_LONG)
95 clear_bit(cpu, &mask);
96 _flat_send_IPI_mask(mask, vector);
97}
98
82static void flat_send_IPI_allbutself(int vector) 99static void flat_send_IPI_allbutself(int vector)
83{ 100{
101 int cpu = smp_processor_id();
84#ifdef CONFIG_HOTPLUG_CPU 102#ifdef CONFIG_HOTPLUG_CPU
85 int hotplug = 1; 103 int hotplug = 1;
86#else 104#else
87 int hotplug = 0; 105 int hotplug = 0;
88#endif 106#endif
89 if (hotplug || vector == NMI_VECTOR) { 107 if (hotplug || vector == NMI_VECTOR) {
90 cpumask_t allbutme = cpu_online_map; 108 if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
109 unsigned long mask = cpumask_bits(cpu_online_mask)[0];
91 110
92 cpu_clear(smp_processor_id(), allbutme); 111 if (cpu < BITS_PER_LONG)
112 clear_bit(cpu, &mask);
93 113
94 if (!cpus_empty(allbutme)) 114 _flat_send_IPI_mask(mask, vector);
95 flat_send_IPI_mask(allbutme, vector); 115 }
96 } else if (num_online_cpus() > 1) { 116 } else if (num_online_cpus() > 1) {
97 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); 117 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
98 } 118 }
@@ -101,7 +121,7 @@ static void flat_send_IPI_allbutself(int vector)
101static void flat_send_IPI_all(int vector) 121static void flat_send_IPI_all(int vector)
102{ 122{
103 if (vector == NMI_VECTOR) 123 if (vector == NMI_VECTOR)
104 flat_send_IPI_mask(cpu_online_map, vector); 124 flat_send_IPI_mask(cpu_online_mask, vector);
105 else 125 else
106 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 126 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
107} 127}
@@ -135,9 +155,18 @@ static int flat_apic_id_registered(void)
135 return physid_isset(read_xapic_id(), phys_cpu_present_map); 155 return physid_isset(read_xapic_id(), phys_cpu_present_map);
136} 156}
137 157
138static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) 158static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
159{
160 return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
161}
162
163static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
164 const struct cpumask *andmask)
139{ 165{
140 return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; 166 unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
167 unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
168
169 return mask1 & mask2;
141} 170}
142 171
143static unsigned int phys_pkg_id(int index_msb) 172static unsigned int phys_pkg_id(int index_msb)
@@ -157,8 +186,10 @@ struct genapic apic_flat = {
157 .send_IPI_all = flat_send_IPI_all, 186 .send_IPI_all = flat_send_IPI_all,
158 .send_IPI_allbutself = flat_send_IPI_allbutself, 187 .send_IPI_allbutself = flat_send_IPI_allbutself,
159 .send_IPI_mask = flat_send_IPI_mask, 188 .send_IPI_mask = flat_send_IPI_mask,
189 .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
160 .send_IPI_self = apic_send_IPI_self, 190 .send_IPI_self = apic_send_IPI_self,
161 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 191 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
192 .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
162 .phys_pkg_id = phys_pkg_id, 193 .phys_pkg_id = phys_pkg_id,
163 .get_apic_id = get_apic_id, 194 .get_apic_id = get_apic_id,
164 .set_apic_id = set_apic_id, 195 .set_apic_id = set_apic_id,
@@ -188,35 +219,39 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
188 return 0; 219 return 0;
189} 220}
190 221
191static cpumask_t physflat_target_cpus(void) 222static const struct cpumask *physflat_target_cpus(void)
192{ 223{
193 return cpu_online_map; 224 return cpu_online_mask;
194} 225}
195 226
196static cpumask_t physflat_vector_allocation_domain(int cpu) 227static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
197{ 228{
198 return cpumask_of_cpu(cpu); 229 cpumask_clear(retmask);
230 cpumask_set_cpu(cpu, retmask);
199} 231}
200 232
201static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) 233static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
202{ 234{
203 send_IPI_mask_sequence(cpumask, vector); 235 send_IPI_mask_sequence(cpumask, vector);
204} 236}
205 237
206static void physflat_send_IPI_allbutself(int vector) 238static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
239 int vector)
207{ 240{
208 cpumask_t allbutme = cpu_online_map; 241 send_IPI_mask_allbutself(cpumask, vector);
242}
209 243
210 cpu_clear(smp_processor_id(), allbutme); 244static void physflat_send_IPI_allbutself(int vector)
211 physflat_send_IPI_mask(allbutme, vector); 245{
246 send_IPI_mask_allbutself(cpu_online_mask, vector);
212} 247}
213 248
214static void physflat_send_IPI_all(int vector) 249static void physflat_send_IPI_all(int vector)
215{ 250{
216 physflat_send_IPI_mask(cpu_online_map, vector); 251 physflat_send_IPI_mask(cpu_online_mask, vector);
217} 252}
218 253
219static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) 254static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
220{ 255{
221 int cpu; 256 int cpu;
222 257
@@ -224,13 +259,31 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
224 * We're using fixed IRQ delivery, can only return one phys APIC ID. 259 * We're using fixed IRQ delivery, can only return one phys APIC ID.
225 * May as well be the first. 260 * May as well be the first.
226 */ 261 */
227 cpu = first_cpu(cpumask); 262 cpu = cpumask_first(cpumask);
228 if ((unsigned)cpu < nr_cpu_ids) 263 if ((unsigned)cpu < nr_cpu_ids)
229 return per_cpu(x86_cpu_to_apicid, cpu); 264 return per_cpu(x86_cpu_to_apicid, cpu);
230 else 265 else
231 return BAD_APICID; 266 return BAD_APICID;
232} 267}
233 268
269static unsigned int
270physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
271 const struct cpumask *andmask)
272{
273 int cpu;
274
275 /*
276 * We're using fixed IRQ delivery, can only return one phys APIC ID.
277 * May as well be the first.
278 */
279 for_each_cpu_and(cpu, cpumask, andmask)
280 if (cpumask_test_cpu(cpu, cpu_online_mask))
281 break;
282 if (cpu < nr_cpu_ids)
283 return per_cpu(x86_cpu_to_apicid, cpu);
284 return BAD_APICID;
285}
286
234struct genapic apic_physflat = { 287struct genapic apic_physflat = {
235 .name = "physical flat", 288 .name = "physical flat",
236 .acpi_madt_oem_check = physflat_acpi_madt_oem_check, 289 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
@@ -243,8 +296,10 @@ struct genapic apic_physflat = {
243 .send_IPI_all = physflat_send_IPI_all, 296 .send_IPI_all = physflat_send_IPI_all,
244 .send_IPI_allbutself = physflat_send_IPI_allbutself, 297 .send_IPI_allbutself = physflat_send_IPI_allbutself,
245 .send_IPI_mask = physflat_send_IPI_mask, 298 .send_IPI_mask = physflat_send_IPI_mask,
299 .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
246 .send_IPI_self = apic_send_IPI_self, 300 .send_IPI_self = apic_send_IPI_self,
247 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 301 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
302 .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
248 .phys_pkg_id = phys_pkg_id, 303 .phys_pkg_id = phys_pkg_id,
249 .get_apic_id = get_apic_id, 304 .get_apic_id = get_apic_id,
250 .set_apic_id = set_apic_id, 305 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index f6a2c8eb48a6..6ce497cc372d 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -22,19 +22,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
22 22
23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
24 24
25static cpumask_t x2apic_target_cpus(void) 25static const struct cpumask *x2apic_target_cpus(void)
26{ 26{
27 return cpumask_of_cpu(0); 27 return cpumask_of(0);
28} 28}
29 29
30/* 30/*
31 * for now each logical cpu is in its own vector allocation domain. 31 * for now each logical cpu is in its own vector allocation domain.
32 */ 32 */
33static cpumask_t x2apic_vector_allocation_domain(int cpu) 33static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
34{ 34{
35 cpumask_t domain = CPU_MASK_NONE; 35 cpumask_clear(retmask);
36 cpu_set(cpu, domain); 36 cpumask_set_cpu(cpu, retmask);
37 return domain;
38} 37}
39 38
40static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, 39static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -56,32 +55,53 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
56 * at once. We have 16 cpu's in a cluster. This will minimize IPI register 55 * at once. We have 16 cpu's in a cluster. This will minimize IPI register
57 * writes. 56 * writes.
58 */ 57 */
59static void x2apic_send_IPI_mask(cpumask_t mask, int vector) 58static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
60{ 59{
61 unsigned long flags; 60 unsigned long flags;
62 unsigned long query_cpu; 61 unsigned long query_cpu;
63 62
64 local_irq_save(flags); 63 local_irq_save(flags);
65 for_each_cpu_mask(query_cpu, mask) { 64 for_each_cpu(query_cpu, mask)
66 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), 65 __x2apic_send_IPI_dest(
67 vector, APIC_DEST_LOGICAL); 66 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
68 } 67 vector, APIC_DEST_LOGICAL);
69 local_irq_restore(flags); 68 local_irq_restore(flags);
70} 69}
71 70
72static void x2apic_send_IPI_allbutself(int vector) 71static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
72 int vector)
73{ 73{
74 cpumask_t mask = cpu_online_map; 74 unsigned long flags;
75 unsigned long query_cpu;
76 unsigned long this_cpu = smp_processor_id();
75 77
76 cpu_clear(smp_processor_id(), mask); 78 local_irq_save(flags);
79 for_each_cpu(query_cpu, mask)
80 if (query_cpu != this_cpu)
81 __x2apic_send_IPI_dest(
82 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
83 vector, APIC_DEST_LOGICAL);
84 local_irq_restore(flags);
85}
86
87static void x2apic_send_IPI_allbutself(int vector)
88{
89 unsigned long flags;
90 unsigned long query_cpu;
91 unsigned long this_cpu = smp_processor_id();
77 92
78 if (!cpus_empty(mask)) 93 local_irq_save(flags);
79 x2apic_send_IPI_mask(mask, vector); 94 for_each_online_cpu(query_cpu)
95 if (query_cpu != this_cpu)
96 __x2apic_send_IPI_dest(
97 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
98 vector, APIC_DEST_LOGICAL);
99 local_irq_restore(flags);
80} 100}
81 101
82static void x2apic_send_IPI_all(int vector) 102static void x2apic_send_IPI_all(int vector)
83{ 103{
84 x2apic_send_IPI_mask(cpu_online_map, vector); 104 x2apic_send_IPI_mask(cpu_online_mask, vector);
85} 105}
86 106
87static int x2apic_apic_id_registered(void) 107static int x2apic_apic_id_registered(void)
@@ -89,21 +109,38 @@ static int x2apic_apic_id_registered(void)
89 return 1; 109 return 1;
90} 110}
91 111
92static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) 112static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
93{ 113{
94 int cpu; 114 int cpu;
95 115
96 /* 116 /*
97 * We're using fixed IRQ delivery, can only return one phys APIC ID. 117 * We're using fixed IRQ delivery, can only return one logical APIC ID.
98 * May as well be the first. 118 * May as well be the first.
99 */ 119 */
100 cpu = first_cpu(cpumask); 120 cpu = cpumask_first(cpumask);
101 if ((unsigned)cpu < NR_CPUS) 121 if ((unsigned)cpu < nr_cpu_ids)
102 return per_cpu(x86_cpu_to_logical_apicid, cpu); 122 return per_cpu(x86_cpu_to_logical_apicid, cpu);
103 else 123 else
104 return BAD_APICID; 124 return BAD_APICID;
105} 125}
106 126
127static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
128 const struct cpumask *andmask)
129{
130 int cpu;
131
132 /*
133 * We're using fixed IRQ delivery, can only return one logical APIC ID.
134 * May as well be the first.
135 */
136 for_each_cpu_and(cpu, cpumask, andmask)
137 if (cpumask_test_cpu(cpu, cpu_online_mask))
138 break;
139 if (cpu < nr_cpu_ids)
140 return per_cpu(x86_cpu_to_logical_apicid, cpu);
141 return BAD_APICID;
142}
143
107static unsigned int get_apic_id(unsigned long x) 144static unsigned int get_apic_id(unsigned long x)
108{ 145{
109 unsigned int id; 146 unsigned int id;
@@ -150,8 +187,10 @@ struct genapic apic_x2apic_cluster = {
150 .send_IPI_all = x2apic_send_IPI_all, 187 .send_IPI_all = x2apic_send_IPI_all,
151 .send_IPI_allbutself = x2apic_send_IPI_allbutself, 188 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
152 .send_IPI_mask = x2apic_send_IPI_mask, 189 .send_IPI_mask = x2apic_send_IPI_mask,
190 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
153 .send_IPI_self = x2apic_send_IPI_self, 191 .send_IPI_self = x2apic_send_IPI_self,
154 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 192 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
193 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
155 .phys_pkg_id = phys_pkg_id, 194 .phys_pkg_id = phys_pkg_id,
156 .get_apic_id = get_apic_id, 195 .get_apic_id = get_apic_id,
157 .set_apic_id = set_apic_id, 196 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index d042211768b7..21bcc0e098ba 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -29,16 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
29 29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
31 31
32static cpumask_t x2apic_target_cpus(void) 32static const struct cpumask *x2apic_target_cpus(void)
33{ 33{
34 return cpumask_of_cpu(0); 34 return cpumask_of(0);
35} 35}
36 36
37static cpumask_t x2apic_vector_allocation_domain(int cpu) 37static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
38{ 38{
39 cpumask_t domain = CPU_MASK_NONE; 39 cpumask_clear(retmask);
40 cpu_set(cpu, domain); 40 cpumask_set_cpu(cpu, retmask);
41 return domain;
42} 41}
43 42
44static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, 43static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -54,32 +53,54 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
54 x2apic_icr_write(cfg, apicid); 53 x2apic_icr_write(cfg, apicid);
55} 54}
56 55
57static void x2apic_send_IPI_mask(cpumask_t mask, int vector) 56static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
58{ 57{
59 unsigned long flags; 58 unsigned long flags;
60 unsigned long query_cpu; 59 unsigned long query_cpu;
61 60
62 local_irq_save(flags); 61 local_irq_save(flags);
63 for_each_cpu_mask(query_cpu, mask) { 62 for_each_cpu(query_cpu, mask) {
64 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), 63 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
65 vector, APIC_DEST_PHYSICAL); 64 vector, APIC_DEST_PHYSICAL);
66 } 65 }
67 local_irq_restore(flags); 66 local_irq_restore(flags);
68} 67}
69 68
70static void x2apic_send_IPI_allbutself(int vector) 69static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
70 int vector)
71{ 71{
72 cpumask_t mask = cpu_online_map; 72 unsigned long flags;
73 unsigned long query_cpu;
74 unsigned long this_cpu = smp_processor_id();
75
76 local_irq_save(flags);
77 for_each_cpu(query_cpu, mask) {
78 if (query_cpu != this_cpu)
79 __x2apic_send_IPI_dest(
80 per_cpu(x86_cpu_to_apicid, query_cpu),
81 vector, APIC_DEST_PHYSICAL);
82 }
83 local_irq_restore(flags);
84}
73 85
74 cpu_clear(smp_processor_id(), mask); 86static void x2apic_send_IPI_allbutself(int vector)
87{
88 unsigned long flags;
89 unsigned long query_cpu;
90 unsigned long this_cpu = smp_processor_id();
75 91
76 if (!cpus_empty(mask)) 92 local_irq_save(flags);
77 x2apic_send_IPI_mask(mask, vector); 93 for_each_online_cpu(query_cpu)
94 if (query_cpu != this_cpu)
95 __x2apic_send_IPI_dest(
96 per_cpu(x86_cpu_to_apicid, query_cpu),
97 vector, APIC_DEST_PHYSICAL);
98 local_irq_restore(flags);
78} 99}
79 100
80static void x2apic_send_IPI_all(int vector) 101static void x2apic_send_IPI_all(int vector)
81{ 102{
82 x2apic_send_IPI_mask(cpu_online_map, vector); 103 x2apic_send_IPI_mask(cpu_online_mask, vector);
83} 104}
84 105
85static int x2apic_apic_id_registered(void) 106static int x2apic_apic_id_registered(void)
@@ -87,7 +108,7 @@ static int x2apic_apic_id_registered(void)
87 return 1; 108 return 1;
88} 109}
89 110
90static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) 111static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
91{ 112{
92 int cpu; 113 int cpu;
93 114
@@ -95,13 +116,30 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
95 * We're using fixed IRQ delivery, can only return one phys APIC ID. 116 * We're using fixed IRQ delivery, can only return one phys APIC ID.
96 * May as well be the first. 117 * May as well be the first.
97 */ 118 */
98 cpu = first_cpu(cpumask); 119 cpu = cpumask_first(cpumask);
99 if ((unsigned)cpu < NR_CPUS) 120 if ((unsigned)cpu < nr_cpu_ids)
100 return per_cpu(x86_cpu_to_apicid, cpu); 121 return per_cpu(x86_cpu_to_apicid, cpu);
101 else 122 else
102 return BAD_APICID; 123 return BAD_APICID;
103} 124}
104 125
126static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
127 const struct cpumask *andmask)
128{
129 int cpu;
130
131 /*
132 * We're using fixed IRQ delivery, can only return one phys APIC ID.
133 * May as well be the first.
134 */
135 for_each_cpu_and(cpu, cpumask, andmask)
136 if (cpumask_test_cpu(cpu, cpu_online_mask))
137 break;
138 if (cpu < nr_cpu_ids)
139 return per_cpu(x86_cpu_to_apicid, cpu);
140 return BAD_APICID;
141}
142
105static unsigned int get_apic_id(unsigned long x) 143static unsigned int get_apic_id(unsigned long x)
106{ 144{
107 unsigned int id; 145 unsigned int id;
@@ -123,12 +161,12 @@ static unsigned int phys_pkg_id(int index_msb)
123 return current_cpu_data.initial_apicid >> index_msb; 161 return current_cpu_data.initial_apicid >> index_msb;
124} 162}
125 163
126void x2apic_send_IPI_self(int vector) 164static void x2apic_send_IPI_self(int vector)
127{ 165{
128 apic_write(APIC_SELF_IPI, vector); 166 apic_write(APIC_SELF_IPI, vector);
129} 167}
130 168
131void init_x2apic_ldr(void) 169static void init_x2apic_ldr(void)
132{ 170{
133 return; 171 return;
134} 172}
@@ -145,8 +183,10 @@ struct genapic apic_x2apic_phys = {
145 .send_IPI_all = x2apic_send_IPI_all, 183 .send_IPI_all = x2apic_send_IPI_all,
146 .send_IPI_allbutself = x2apic_send_IPI_allbutself, 184 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
147 .send_IPI_mask = x2apic_send_IPI_mask, 185 .send_IPI_mask = x2apic_send_IPI_mask,
186 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
148 .send_IPI_self = x2apic_send_IPI_self, 187 .send_IPI_self = x2apic_send_IPI_self,
149 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 188 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
189 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
150 .phys_pkg_id = phys_pkg_id, 190 .phys_pkg_id = phys_pkg_id,
151 .get_apic_id = get_apic_id, 191 .get_apic_id = get_apic_id,
152 .set_apic_id = set_apic_id, 192 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index dece17289731..b193e082f6ce 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -79,16 +79,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
79 79
80/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 80/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
81 81
82static cpumask_t uv_target_cpus(void) 82static const struct cpumask *uv_target_cpus(void)
83{ 83{
84 return cpumask_of_cpu(0); 84 return cpumask_of(0);
85} 85}
86 86
87static cpumask_t uv_vector_allocation_domain(int cpu) 87static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
88{ 88{
89 cpumask_t domain = CPU_MASK_NONE; 89 cpumask_clear(retmask);
90 cpu_set(cpu, domain); 90 cpumask_set_cpu(cpu, retmask);
91 return domain;
92} 91}
93 92
94int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) 93int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
@@ -127,28 +126,37 @@ static void uv_send_IPI_one(int cpu, int vector)
127 uv_write_global_mmr64(pnode, UVH_IPI_INT, val); 126 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
128} 127}
129 128
130static void uv_send_IPI_mask(cpumask_t mask, int vector) 129static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
131{ 130{
132 unsigned int cpu; 131 unsigned int cpu;
133 132
134 for_each_possible_cpu(cpu) 133 for_each_cpu(cpu, mask)
135 if (cpu_isset(cpu, mask)) 134 uv_send_IPI_one(cpu, vector);
135}
136
137static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
138{
139 unsigned int cpu;
140 unsigned int this_cpu = smp_processor_id();
141
142 for_each_cpu(cpu, mask)
143 if (cpu != this_cpu)
136 uv_send_IPI_one(cpu, vector); 144 uv_send_IPI_one(cpu, vector);
137} 145}
138 146
139static void uv_send_IPI_allbutself(int vector) 147static void uv_send_IPI_allbutself(int vector)
140{ 148{
141 cpumask_t mask = cpu_online_map; 149 unsigned int cpu;
142 150 unsigned int this_cpu = smp_processor_id();
143 cpu_clear(smp_processor_id(), mask);
144 151
145 if (!cpus_empty(mask)) 152 for_each_online_cpu(cpu)
146 uv_send_IPI_mask(mask, vector); 153 if (cpu != this_cpu)
154 uv_send_IPI_one(cpu, vector);
147} 155}
148 156
149static void uv_send_IPI_all(int vector) 157static void uv_send_IPI_all(int vector)
150{ 158{
151 uv_send_IPI_mask(cpu_online_map, vector); 159 uv_send_IPI_mask(cpu_online_mask, vector);
152} 160}
153 161
154static int uv_apic_id_registered(void) 162static int uv_apic_id_registered(void)
@@ -160,7 +168,7 @@ static void uv_init_apic_ldr(void)
160{ 168{
161} 169}
162 170
163static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) 171static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
164{ 172{
165 int cpu; 173 int cpu;
166 174
@@ -168,13 +176,30 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
168 * We're using fixed IRQ delivery, can only return one phys APIC ID. 176 * We're using fixed IRQ delivery, can only return one phys APIC ID.
169 * May as well be the first. 177 * May as well be the first.
170 */ 178 */
171 cpu = first_cpu(cpumask); 179 cpu = cpumask_first(cpumask);
172 if ((unsigned)cpu < nr_cpu_ids) 180 if ((unsigned)cpu < nr_cpu_ids)
173 return per_cpu(x86_cpu_to_apicid, cpu); 181 return per_cpu(x86_cpu_to_apicid, cpu);
174 else 182 else
175 return BAD_APICID; 183 return BAD_APICID;
176} 184}
177 185
186static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
187 const struct cpumask *andmask)
188{
189 int cpu;
190
191 /*
192 * We're using fixed IRQ delivery, can only return one phys APIC ID.
193 * May as well be the first.
194 */
195 for_each_cpu_and(cpu, cpumask, andmask)
196 if (cpumask_test_cpu(cpu, cpu_online_mask))
197 break;
198 if (cpu < nr_cpu_ids)
199 return per_cpu(x86_cpu_to_apicid, cpu);
200 return BAD_APICID;
201}
202
178static unsigned int get_apic_id(unsigned long x) 203static unsigned int get_apic_id(unsigned long x)
179{ 204{
180 unsigned int id; 205 unsigned int id;
@@ -222,8 +247,10 @@ struct genapic apic_x2apic_uv_x = {
222 .send_IPI_all = uv_send_IPI_all, 247 .send_IPI_all = uv_send_IPI_all,
223 .send_IPI_allbutself = uv_send_IPI_allbutself, 248 .send_IPI_allbutself = uv_send_IPI_allbutself,
224 .send_IPI_mask = uv_send_IPI_mask, 249 .send_IPI_mask = uv_send_IPI_mask,
250 .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
225 .send_IPI_self = uv_send_IPI_self, 251 .send_IPI_self = uv_send_IPI_self,
226 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 252 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
253 .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
227 .phys_pkg_id = phys_pkg_id, 254 .phys_pkg_id = phys_pkg_id,
228 .get_apic_id = get_apic_id, 255 .get_apic_id = get_apic_id,
229 .set_apic_id = set_apic_id, 256 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 388e05a5fc17..b9a4d8c4b935 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -27,7 +27,7 @@
27#include <asm/trampoline.h> 27#include <asm/trampoline.h>
28 28
29/* boot cpu pda */ 29/* boot cpu pda */
30static struct x8664_pda _boot_cpu_pda __read_mostly; 30static struct x8664_pda _boot_cpu_pda;
31 31
32#ifdef CONFIG_SMP 32#ifdef CONFIG_SMP
33/* 33/*
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 845ea097383e..cd759ad90690 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -248,7 +248,7 @@ static void hpet_legacy_clockevent_register(void)
248 * Start hpet with the boot cpu mask and make it 248 * Start hpet with the boot cpu mask and make it
249 * global after the IO_APIC has been initialized. 249 * global after the IO_APIC has been initialized.
250 */ 250 */
251 hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); 251 hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
252 clockevents_register_device(&hpet_clockevent); 252 clockevents_register_device(&hpet_clockevent);
253 global_clock_event = &hpet_clockevent; 253 global_clock_event = &hpet_clockevent;
254 printk(KERN_DEBUG "hpet clockevent registered\n"); 254 printk(KERN_DEBUG "hpet clockevent registered\n");
@@ -303,7 +303,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
303 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); 303 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
304 hpet_setup_msi_irq(hdev->irq); 304 hpet_setup_msi_irq(hdev->irq);
305 disable_irq(hdev->irq); 305 disable_irq(hdev->irq);
306 irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu)); 306 irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
307 enable_irq(hdev->irq); 307 enable_irq(hdev->irq);
308 } 308 }
309 break; 309 break;
@@ -451,7 +451,7 @@ static int hpet_setup_irq(struct hpet_dev *dev)
451 return -1; 451 return -1;
452 452
453 disable_irq(dev->irq); 453 disable_irq(dev->irq);
454 irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu)); 454 irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
455 enable_irq(dev->irq); 455 enable_irq(dev->irq);
456 456
457 printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", 457 printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
@@ -502,7 +502,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
502 /* 5 usec minimum reprogramming delta. */ 502 /* 5 usec minimum reprogramming delta. */
503 evt->min_delta_ns = 5000; 503 evt->min_delta_ns = 5000;
504 504
505 evt->cpumask = cpumask_of_cpu(hdev->cpu); 505 evt->cpumask = cpumask_of(hdev->cpu);
506 clockevents_register_device(evt); 506 clockevents_register_device(evt);
507} 507}
508 508
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index c1b5e3ece1f2..10f92fb532f3 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -114,7 +114,7 @@ void __init setup_pit_timer(void)
114 * Start pit with the boot cpu mask and make it global after the 114 * Start pit with the boot cpu mask and make it global after the
115 * IO_APIC has been initialized. 115 * IO_APIC has been initialized.
116 */ 116 */
117 pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); 117 pit_clockevent.cpumask = cpumask_of(smp_processor_id());
118 pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 118 pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
119 pit_clockevent.shift); 119 pit_clockevent.shift);
120 pit_clockevent.max_delta_ns = 120 pit_clockevent.max_delta_ns =
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index d39918076bb4..df3bf269beab 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -10,7 +10,6 @@
10#include <asm/pgtable.h> 10#include <asm/pgtable.h>
11#include <asm/desc.h> 11#include <asm/desc.h>
12 12
13static struct fs_struct init_fs = INIT_FS;
14static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 13static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
15static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 14static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
16struct mm_struct init_mm = INIT_MM(init_mm); 15struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index f6ea94b74da1..3639442aa7a4 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -136,8 +136,8 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
136 136
137struct irq_cfg { 137struct irq_cfg {
138 struct irq_pin_list *irq_2_pin; 138 struct irq_pin_list *irq_2_pin;
139 cpumask_t domain; 139 cpumask_var_t domain;
140 cpumask_t old_domain; 140 cpumask_var_t old_domain;
141 unsigned move_cleanup_count; 141 unsigned move_cleanup_count;
142 u8 vector; 142 u8 vector;
143 u8 move_in_progress : 1; 143 u8 move_in_progress : 1;
@@ -152,25 +152,25 @@ static struct irq_cfg irq_cfgx[] = {
152#else 152#else
153static struct irq_cfg irq_cfgx[NR_IRQS] = { 153static struct irq_cfg irq_cfgx[NR_IRQS] = {
154#endif 154#endif
155 [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, 155 [0] = { .vector = IRQ0_VECTOR, },
156 [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, 156 [1] = { .vector = IRQ1_VECTOR, },
157 [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, 157 [2] = { .vector = IRQ2_VECTOR, },
158 [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, 158 [3] = { .vector = IRQ3_VECTOR, },
159 [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, 159 [4] = { .vector = IRQ4_VECTOR, },
160 [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, 160 [5] = { .vector = IRQ5_VECTOR, },
161 [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, 161 [6] = { .vector = IRQ6_VECTOR, },
162 [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, 162 [7] = { .vector = IRQ7_VECTOR, },
163 [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, 163 [8] = { .vector = IRQ8_VECTOR, },
164 [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, 164 [9] = { .vector = IRQ9_VECTOR, },
165 [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, 165 [10] = { .vector = IRQ10_VECTOR, },
166 [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, 166 [11] = { .vector = IRQ11_VECTOR, },
167 [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, 167 [12] = { .vector = IRQ12_VECTOR, },
168 [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, 168 [13] = { .vector = IRQ13_VECTOR, },
169 [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, 169 [14] = { .vector = IRQ14_VECTOR, },
170 [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, 170 [15] = { .vector = IRQ15_VECTOR, },
171}; 171};
172 172
173void __init arch_early_irq_init(void) 173int __init arch_early_irq_init(void)
174{ 174{
175 struct irq_cfg *cfg; 175 struct irq_cfg *cfg;
176 struct irq_desc *desc; 176 struct irq_desc *desc;
@@ -183,7 +183,13 @@ void __init arch_early_irq_init(void)
183 for (i = 0; i < count; i++) { 183 for (i = 0; i < count; i++) {
184 desc = irq_to_desc(i); 184 desc = irq_to_desc(i);
185 desc->chip_data = &cfg[i]; 185 desc->chip_data = &cfg[i];
186 alloc_bootmem_cpumask_var(&cfg[i].domain);
187 alloc_bootmem_cpumask_var(&cfg[i].old_domain);
188 if (i < NR_IRQS_LEGACY)
189 cpumask_setall(cfg[i].domain);
186 } 190 }
191
192 return 0;
187} 193}
188 194
189#ifdef CONFIG_SPARSE_IRQ 195#ifdef CONFIG_SPARSE_IRQ
@@ -207,12 +213,26 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
207 node = cpu_to_node(cpu); 213 node = cpu_to_node(cpu);
208 214
209 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); 215 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
216 if (cfg) {
217 if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
218 kfree(cfg);
219 cfg = NULL;
220 } else if (!alloc_cpumask_var_node(&cfg->old_domain,
221 GFP_ATOMIC, node)) {
222 free_cpumask_var(cfg->domain);
223 kfree(cfg);
224 cfg = NULL;
225 } else {
226 cpumask_clear(cfg->domain);
227 cpumask_clear(cfg->old_domain);
228 }
229 }
210 printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node); 230 printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node);
211 231
212 return cfg; 232 return cfg;
213} 233}
214 234
215void arch_init_chip_data(struct irq_desc *desc, int cpu) 235int arch_init_chip_data(struct irq_desc *desc, int cpu)
216{ 236{
217 struct irq_cfg *cfg; 237 struct irq_cfg *cfg;
218 238
@@ -224,6 +244,8 @@ void arch_init_chip_data(struct irq_desc *desc, int cpu)
224 BUG_ON(1); 244 BUG_ON(1);
225 } 245 }
226 } 246 }
247
248 return 0;
227} 249}
228 250
229#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC 251#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
@@ -329,13 +351,14 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
329 } 351 }
330} 352}
331 353
332static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) 354static void
355set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
333{ 356{
334 struct irq_cfg *cfg = desc->chip_data; 357 struct irq_cfg *cfg = desc->chip_data;
335 358
336 if (!cfg->move_in_progress) { 359 if (!cfg->move_in_progress) {
337 /* it means that domain is not changed */ 360 /* it means that domain is not changed */
338 if (!cpus_intersects(desc->affinity, mask)) 361 if (!cpumask_intersects(&desc->affinity, mask))
339 cfg->move_desc_pending = 1; 362 cfg->move_desc_pending = 1;
340 } 363 }
341} 364}
@@ -350,7 +373,8 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
350#endif 373#endif
351 374
352#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC 375#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
353static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) 376static inline void
377set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
354{ 378{
355} 379}
356#endif 380#endif
@@ -481,6 +505,26 @@ static void ioapic_mask_entry(int apic, int pin)
481} 505}
482 506
483#ifdef CONFIG_SMP 507#ifdef CONFIG_SMP
508static void send_cleanup_vector(struct irq_cfg *cfg)
509{
510 cpumask_var_t cleanup_mask;
511
512 if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
513 unsigned int i;
514 cfg->move_cleanup_count = 0;
515 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
516 cfg->move_cleanup_count++;
517 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
518 send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
519 } else {
520 cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
521 cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
522 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
523 free_cpumask_var(cleanup_mask);
524 }
525 cfg->move_in_progress = 0;
526}
527
484static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) 528static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
485{ 529{
486 int apic, pin; 530 int apic, pin;
@@ -516,41 +560,55 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
516 } 560 }
517} 561}
518 562
519static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask); 563static int
564assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
520 565
521static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) 566/*
567 * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
568 * of that, or returns BAD_APICID and leaves desc->affinity untouched.
569 */
570static unsigned int
571set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
522{ 572{
523 struct irq_cfg *cfg; 573 struct irq_cfg *cfg;
524 unsigned long flags;
525 unsigned int dest;
526 cpumask_t tmp;
527 unsigned int irq; 574 unsigned int irq;
528 575
529 cpus_and(tmp, mask, cpu_online_map); 576 if (!cpumask_intersects(mask, cpu_online_mask))
530 if (cpus_empty(tmp)) 577 return BAD_APICID;
531 return;
532 578
533 irq = desc->irq; 579 irq = desc->irq;
534 cfg = desc->chip_data; 580 cfg = desc->chip_data;
535 if (assign_irq_vector(irq, cfg, mask)) 581 if (assign_irq_vector(irq, cfg, mask))
536 return; 582 return BAD_APICID;
537 583
584 cpumask_and(&desc->affinity, cfg->domain, mask);
538 set_extra_move_desc(desc, mask); 585 set_extra_move_desc(desc, mask);
586 return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
587}
539 588
540 cpus_and(tmp, cfg->domain, mask); 589static void
541 dest = cpu_mask_to_apicid(tmp); 590set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
542 /* 591{
543 * Only the high 8 bits are valid. 592 struct irq_cfg *cfg;
544 */ 593 unsigned long flags;
545 dest = SET_APIC_LOGICAL_ID(dest); 594 unsigned int dest;
595 unsigned int irq;
596
597 irq = desc->irq;
598 cfg = desc->chip_data;
546 599
547 spin_lock_irqsave(&ioapic_lock, flags); 600 spin_lock_irqsave(&ioapic_lock, flags);
548 __target_IO_APIC_irq(irq, dest, cfg); 601 dest = set_desc_affinity(desc, mask);
549 desc->affinity = mask; 602 if (dest != BAD_APICID) {
603 /* Only the high 8 bits are valid. */
604 dest = SET_APIC_LOGICAL_ID(dest);
605 __target_IO_APIC_irq(irq, dest, cfg);
606 }
550 spin_unlock_irqrestore(&ioapic_lock, flags); 607 spin_unlock_irqrestore(&ioapic_lock, flags);
551} 608}
552 609
553static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 610static void
611set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
554{ 612{
555 struct irq_desc *desc; 613 struct irq_desc *desc;
556 614
@@ -648,7 +706,7 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
648} 706}
649 707
650#ifdef CONFIG_X86_64 708#ifdef CONFIG_X86_64
651void io_apic_sync(struct irq_pin_list *entry) 709static void io_apic_sync(struct irq_pin_list *entry)
652{ 710{
653 /* 711 /*
654 * Synchronize the IO-APIC and the CPU by doing 712 * Synchronize the IO-APIC and the CPU by doing
@@ -1218,7 +1276,8 @@ void unlock_vector_lock(void)
1218 spin_unlock(&vector_lock); 1276 spin_unlock(&vector_lock);
1219} 1277}
1220 1278
1221static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) 1279static int
1280__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1222{ 1281{
1223 /* 1282 /*
1224 * NOTE! The local APIC isn't very good at handling 1283 * NOTE! The local APIC isn't very good at handling
@@ -1233,49 +1292,49 @@ static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
1233 */ 1292 */
1234 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; 1293 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
1235 unsigned int old_vector; 1294 unsigned int old_vector;
1236 int cpu; 1295 int cpu, err;
1296 cpumask_var_t tmp_mask;
1237 1297
1238 if ((cfg->move_in_progress) || cfg->move_cleanup_count) 1298 if ((cfg->move_in_progress) || cfg->move_cleanup_count)
1239 return -EBUSY; 1299 return -EBUSY;
1240 1300
1241 /* Only try and allocate irqs on cpus that are present */ 1301 if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
1242 cpus_and(mask, mask, cpu_online_map); 1302 return -ENOMEM;
1243 1303
1244 old_vector = cfg->vector; 1304 old_vector = cfg->vector;
1245 if (old_vector) { 1305 if (old_vector) {
1246 cpumask_t tmp; 1306 cpumask_and(tmp_mask, mask, cpu_online_mask);
1247 cpus_and(tmp, cfg->domain, mask); 1307 cpumask_and(tmp_mask, cfg->domain, tmp_mask);
1248 if (!cpus_empty(tmp)) 1308 if (!cpumask_empty(tmp_mask)) {
1309 free_cpumask_var(tmp_mask);
1249 return 0; 1310 return 0;
1311 }
1250 } 1312 }
1251 1313
1252 for_each_cpu_mask_nr(cpu, mask) { 1314 /* Only try and allocate irqs on cpus that are present */
1253 cpumask_t domain, new_mask; 1315 err = -ENOSPC;
1316 for_each_cpu_and(cpu, mask, cpu_online_mask) {
1254 int new_cpu; 1317 int new_cpu;
1255 int vector, offset; 1318 int vector, offset;
1256 1319
1257 domain = vector_allocation_domain(cpu); 1320 vector_allocation_domain(cpu, tmp_mask);
1258 cpus_and(new_mask, domain, cpu_online_map);
1259 1321
1260 vector = current_vector; 1322 vector = current_vector;
1261 offset = current_offset; 1323 offset = current_offset;
1262next: 1324next:
1263 vector += 8; 1325 vector += 8;
1264 if (vector >= first_system_vector) { 1326 if (vector >= first_system_vector) {
1265 /* If we run out of vectors on large boxen, must share them. */ 1327 /* If out of vectors on large boxen, must share them. */
1266 offset = (offset + 1) % 8; 1328 offset = (offset + 1) % 8;
1267 vector = FIRST_DEVICE_VECTOR + offset; 1329 vector = FIRST_DEVICE_VECTOR + offset;
1268 } 1330 }
1269 if (unlikely(current_vector == vector)) 1331 if (unlikely(current_vector == vector))
1270 continue; 1332 continue;
1271#ifdef CONFIG_X86_64 1333
1272 if (vector == IA32_SYSCALL_VECTOR) 1334 if (test_bit(vector, used_vectors))
1273 goto next;
1274#else
1275 if (vector == SYSCALL_VECTOR)
1276 goto next; 1335 goto next;
1277#endif 1336
1278 for_each_cpu_mask_nr(new_cpu, new_mask) 1337 for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1279 if (per_cpu(vector_irq, new_cpu)[vector] != -1) 1338 if (per_cpu(vector_irq, new_cpu)[vector] != -1)
1280 goto next; 1339 goto next;
1281 /* Found one! */ 1340 /* Found one! */
@@ -1283,18 +1342,21 @@ next:
1283 current_offset = offset; 1342 current_offset = offset;
1284 if (old_vector) { 1343 if (old_vector) {
1285 cfg->move_in_progress = 1; 1344 cfg->move_in_progress = 1;
1286 cfg->old_domain = cfg->domain; 1345 cpumask_copy(cfg->old_domain, cfg->domain);
1287 } 1346 }
1288 for_each_cpu_mask_nr(new_cpu, new_mask) 1347 for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1289 per_cpu(vector_irq, new_cpu)[vector] = irq; 1348 per_cpu(vector_irq, new_cpu)[vector] = irq;
1290 cfg->vector = vector; 1349 cfg->vector = vector;
1291 cfg->domain = domain; 1350 cpumask_copy(cfg->domain, tmp_mask);
1292 return 0; 1351 err = 0;
1352 break;
1293 } 1353 }
1294 return -ENOSPC; 1354 free_cpumask_var(tmp_mask);
1355 return err;
1295} 1356}
1296 1357
1297static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) 1358static int
1359assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1298{ 1360{
1299 int err; 1361 int err;
1300 unsigned long flags; 1362 unsigned long flags;
@@ -1307,23 +1369,20 @@ static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
1307 1369
1308static void __clear_irq_vector(int irq, struct irq_cfg *cfg) 1370static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1309{ 1371{
1310 cpumask_t mask;
1311 int cpu, vector; 1372 int cpu, vector;
1312 1373
1313 BUG_ON(!cfg->vector); 1374 BUG_ON(!cfg->vector);
1314 1375
1315 vector = cfg->vector; 1376 vector = cfg->vector;
1316 cpus_and(mask, cfg->domain, cpu_online_map); 1377 for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
1317 for_each_cpu_mask_nr(cpu, mask)
1318 per_cpu(vector_irq, cpu)[vector] = -1; 1378 per_cpu(vector_irq, cpu)[vector] = -1;
1319 1379
1320 cfg->vector = 0; 1380 cfg->vector = 0;
1321 cpus_clear(cfg->domain); 1381 cpumask_clear(cfg->domain);
1322 1382
1323 if (likely(!cfg->move_in_progress)) 1383 if (likely(!cfg->move_in_progress))
1324 return; 1384 return;
1325 cpus_and(mask, cfg->old_domain, cpu_online_map); 1385 for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
1326 for_each_cpu_mask_nr(cpu, mask) {
1327 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; 1386 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
1328 vector++) { 1387 vector++) {
1329 if (per_cpu(vector_irq, cpu)[vector] != irq) 1388 if (per_cpu(vector_irq, cpu)[vector] != irq)
@@ -1345,10 +1404,8 @@ void __setup_vector_irq(int cpu)
1345 1404
1346 /* Mark the inuse vectors */ 1405 /* Mark the inuse vectors */
1347 for_each_irq_desc(irq, desc) { 1406 for_each_irq_desc(irq, desc) {
1348 if (!desc)
1349 continue;
1350 cfg = desc->chip_data; 1407 cfg = desc->chip_data;
1351 if (!cpu_isset(cpu, cfg->domain)) 1408 if (!cpumask_test_cpu(cpu, cfg->domain))
1352 continue; 1409 continue;
1353 vector = cfg->vector; 1410 vector = cfg->vector;
1354 per_cpu(vector_irq, cpu)[vector] = irq; 1411 per_cpu(vector_irq, cpu)[vector] = irq;
@@ -1360,7 +1417,7 @@ void __setup_vector_irq(int cpu)
1360 continue; 1417 continue;
1361 1418
1362 cfg = irq_cfg(irq); 1419 cfg = irq_cfg(irq);
1363 if (!cpu_isset(cpu, cfg->domain)) 1420 if (!cpumask_test_cpu(cpu, cfg->domain))
1364 per_cpu(vector_irq, cpu)[vector] = -1; 1421 per_cpu(vector_irq, cpu)[vector] = -1;
1365 } 1422 }
1366} 1423}
@@ -1496,18 +1553,17 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
1496{ 1553{
1497 struct irq_cfg *cfg; 1554 struct irq_cfg *cfg;
1498 struct IO_APIC_route_entry entry; 1555 struct IO_APIC_route_entry entry;
1499 cpumask_t mask; 1556 unsigned int dest;
1500 1557
1501 if (!IO_APIC_IRQ(irq)) 1558 if (!IO_APIC_IRQ(irq))
1502 return; 1559 return;
1503 1560
1504 cfg = desc->chip_data; 1561 cfg = desc->chip_data;
1505 1562
1506 mask = TARGET_CPUS; 1563 if (assign_irq_vector(irq, cfg, TARGET_CPUS))
1507 if (assign_irq_vector(irq, cfg, mask))
1508 return; 1564 return;
1509 1565
1510 cpus_and(mask, cfg->domain, mask); 1566 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
1511 1567
1512 apic_printk(APIC_VERBOSE,KERN_DEBUG 1568 apic_printk(APIC_VERBOSE,KERN_DEBUG
1513 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 1569 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1517,8 +1573,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
1517 1573
1518 1574
1519 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, 1575 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
1520 cpu_mask_to_apicid(mask), trigger, polarity, 1576 dest, trigger, polarity, cfg->vector)) {
1521 cfg->vector)) {
1522 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1577 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1523 mp_ioapics[apic].mp_apicid, pin); 1578 mp_ioapics[apic].mp_apicid, pin);
1524 __clear_irq_vector(irq, cfg); 1579 __clear_irq_vector(irq, cfg);
@@ -1730,8 +1785,6 @@ __apicdebuginit(void) print_IO_APIC(void)
1730 for_each_irq_desc(irq, desc) { 1785 for_each_irq_desc(irq, desc) {
1731 struct irq_pin_list *entry; 1786 struct irq_pin_list *entry;
1732 1787
1733 if (!desc)
1734 continue;
1735 cfg = desc->chip_data; 1788 cfg = desc->chip_data;
1736 entry = cfg->irq_2_pin; 1789 entry = cfg->irq_2_pin;
1737 if (!entry) 1790 if (!entry)
@@ -2240,7 +2293,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
2240 unsigned long flags; 2293 unsigned long flags;
2241 2294
2242 spin_lock_irqsave(&vector_lock, flags); 2295 spin_lock_irqsave(&vector_lock, flags);
2243 send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); 2296 send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
2244 spin_unlock_irqrestore(&vector_lock, flags); 2297 spin_unlock_irqrestore(&vector_lock, flags);
2245 2298
2246 return 1; 2299 return 1;
@@ -2289,18 +2342,17 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
2289 * as simple as edge triggered migration and we can do the irq migration 2342 * as simple as edge triggered migration and we can do the irq migration
2290 * with a simple atomic update to IO-APIC RTE. 2343 * with a simple atomic update to IO-APIC RTE.
2291 */ 2344 */
2292static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask) 2345static void
2346migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2293{ 2347{
2294 struct irq_cfg *cfg; 2348 struct irq_cfg *cfg;
2295 cpumask_t tmp, cleanup_mask;
2296 struct irte irte; 2349 struct irte irte;
2297 int modify_ioapic_rte; 2350 int modify_ioapic_rte;
2298 unsigned int dest; 2351 unsigned int dest;
2299 unsigned long flags; 2352 unsigned long flags;
2300 unsigned int irq; 2353 unsigned int irq;
2301 2354
2302 cpus_and(tmp, mask, cpu_online_map); 2355 if (!cpumask_intersects(mask, cpu_online_mask))
2303 if (cpus_empty(tmp))
2304 return; 2356 return;
2305 2357
2306 irq = desc->irq; 2358 irq = desc->irq;
@@ -2313,8 +2365,7 @@ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
2313 2365
2314 set_extra_move_desc(desc, mask); 2366 set_extra_move_desc(desc, mask);
2315 2367
2316 cpus_and(tmp, cfg->domain, mask); 2368 dest = cpu_mask_to_apicid_and(cfg->domain, mask);
2317 dest = cpu_mask_to_apicid(tmp);
2318 2369
2319 modify_ioapic_rte = desc->status & IRQ_LEVEL; 2370 modify_ioapic_rte = desc->status & IRQ_LEVEL;
2320 if (modify_ioapic_rte) { 2371 if (modify_ioapic_rte) {
@@ -2331,14 +2382,10 @@ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
2331 */ 2382 */
2332 modify_irte(irq, &irte); 2383 modify_irte(irq, &irte);
2333 2384
2334 if (cfg->move_in_progress) { 2385 if (cfg->move_in_progress)
2335 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 2386 send_cleanup_vector(cfg);
2336 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2337 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2338 cfg->move_in_progress = 0;
2339 }
2340 2387
2341 desc->affinity = mask; 2388 cpumask_copy(&desc->affinity, mask);
2342} 2389}
2343 2390
2344static int migrate_irq_remapped_level_desc(struct irq_desc *desc) 2391static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@ -2360,11 +2407,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
2360 } 2407 }
2361 2408
2362 /* everthing is clear. we have right of way */ 2409 /* everthing is clear. we have right of way */
2363 migrate_ioapic_irq_desc(desc, desc->pending_mask); 2410 migrate_ioapic_irq_desc(desc, &desc->pending_mask);
2364 2411
2365 ret = 0; 2412 ret = 0;
2366 desc->status &= ~IRQ_MOVE_PENDING; 2413 desc->status &= ~IRQ_MOVE_PENDING;
2367 cpus_clear(desc->pending_mask); 2414 cpumask_clear(&desc->pending_mask);
2368 2415
2369unmask: 2416unmask:
2370 unmask_IO_APIC_irq_desc(desc); 2417 unmask_IO_APIC_irq_desc(desc);
@@ -2378,9 +2425,6 @@ static void ir_irq_migration(struct work_struct *work)
2378 struct irq_desc *desc; 2425 struct irq_desc *desc;
2379 2426
2380 for_each_irq_desc(irq, desc) { 2427 for_each_irq_desc(irq, desc) {
2381 if (!desc)
2382 continue;
2383
2384 if (desc->status & IRQ_MOVE_PENDING) { 2428 if (desc->status & IRQ_MOVE_PENDING) {
2385 unsigned long flags; 2429 unsigned long flags;
2386 2430
@@ -2392,7 +2436,7 @@ static void ir_irq_migration(struct work_struct *work)
2392 continue; 2436 continue;
2393 } 2437 }
2394 2438
2395 desc->chip->set_affinity(irq, desc->pending_mask); 2439 desc->chip->set_affinity(irq, &desc->pending_mask);
2396 spin_unlock_irqrestore(&desc->lock, flags); 2440 spin_unlock_irqrestore(&desc->lock, flags);
2397 } 2441 }
2398 } 2442 }
@@ -2401,18 +2445,20 @@ static void ir_irq_migration(struct work_struct *work)
2401/* 2445/*
2402 * Migrates the IRQ destination in the process context. 2446 * Migrates the IRQ destination in the process context.
2403 */ 2447 */
2404static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) 2448static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2449 const struct cpumask *mask)
2405{ 2450{
2406 if (desc->status & IRQ_LEVEL) { 2451 if (desc->status & IRQ_LEVEL) {
2407 desc->status |= IRQ_MOVE_PENDING; 2452 desc->status |= IRQ_MOVE_PENDING;
2408 desc->pending_mask = mask; 2453 cpumask_copy(&desc->pending_mask, mask);
2409 migrate_irq_remapped_level_desc(desc); 2454 migrate_irq_remapped_level_desc(desc);
2410 return; 2455 return;
2411 } 2456 }
2412 2457
2413 migrate_ioapic_irq_desc(desc, mask); 2458 migrate_ioapic_irq_desc(desc, mask);
2414} 2459}
2415static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 2460static void set_ir_ioapic_affinity_irq(unsigned int irq,
2461 const struct cpumask *mask)
2416{ 2462{
2417 struct irq_desc *desc = irq_to_desc(irq); 2463 struct irq_desc *desc = irq_to_desc(irq);
2418 2464
@@ -2447,7 +2493,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2447 if (!cfg->move_cleanup_count) 2493 if (!cfg->move_cleanup_count)
2448 goto unlock; 2494 goto unlock;
2449 2495
2450 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) 2496 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2451 goto unlock; 2497 goto unlock;
2452 2498
2453 __get_cpu_var(vector_irq)[vector] = -1; 2499 __get_cpu_var(vector_irq)[vector] = -1;
@@ -2484,20 +2530,14 @@ static void irq_complete_move(struct irq_desc **descp)
2484 2530
2485 vector = ~get_irq_regs()->orig_ax; 2531 vector = ~get_irq_regs()->orig_ax;
2486 me = smp_processor_id(); 2532 me = smp_processor_id();
2487 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
2488 cpumask_t cleanup_mask;
2489
2490#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC 2533#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
2491 *descp = desc = move_irq_desc(desc, me); 2534 *descp = desc = move_irq_desc(desc, me);
2492 /* get the new one */ 2535 /* get the new one */
2493 cfg = desc->chip_data; 2536 cfg = desc->chip_data;
2494#endif 2537#endif
2495 2538
2496 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 2539 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2497 cfg->move_cleanup_count = cpus_weight(cleanup_mask); 2540 send_cleanup_vector(cfg);
2498 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2499 cfg->move_in_progress = 0;
2500 }
2501} 2541}
2502#else 2542#else
2503static inline void irq_complete_move(struct irq_desc **descp) {} 2543static inline void irq_complete_move(struct irq_desc **descp) {}
@@ -2670,9 +2710,6 @@ static inline void init_IO_APIC_traps(void)
2670 * 0x80, because int 0x80 is hm, kind of importantish. ;) 2710 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2671 */ 2711 */
2672 for_each_irq_desc(irq, desc) { 2712 for_each_irq_desc(irq, desc) {
2673 if (!desc)
2674 continue;
2675
2676 cfg = desc->chip_data; 2713 cfg = desc->chip_data;
2677 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { 2714 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
2678 /* 2715 /*
@@ -3222,16 +3259,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3222 struct irq_cfg *cfg; 3259 struct irq_cfg *cfg;
3223 int err; 3260 int err;
3224 unsigned dest; 3261 unsigned dest;
3225 cpumask_t tmp;
3226 3262
3227 cfg = irq_cfg(irq); 3263 cfg = irq_cfg(irq);
3228 tmp = TARGET_CPUS; 3264 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3229 err = assign_irq_vector(irq, cfg, tmp);
3230 if (err) 3265 if (err)
3231 return err; 3266 return err;
3232 3267
3233 cpus_and(tmp, cfg->domain, tmp); 3268 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
3234 dest = cpu_mask_to_apicid(tmp);
3235 3269
3236#ifdef CONFIG_INTR_REMAP 3270#ifdef CONFIG_INTR_REMAP
3237 if (irq_remapped(irq)) { 3271 if (irq_remapped(irq)) {
@@ -3285,26 +3319,18 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3285} 3319}
3286 3320
3287#ifdef CONFIG_SMP 3321#ifdef CONFIG_SMP
3288static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) 3322static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3289{ 3323{
3290 struct irq_desc *desc = irq_to_desc(irq); 3324 struct irq_desc *desc = irq_to_desc(irq);
3291 struct irq_cfg *cfg; 3325 struct irq_cfg *cfg;
3292 struct msi_msg msg; 3326 struct msi_msg msg;
3293 unsigned int dest; 3327 unsigned int dest;
3294 cpumask_t tmp;
3295 3328
3296 cpus_and(tmp, mask, cpu_online_map); 3329 dest = set_desc_affinity(desc, mask);
3297 if (cpus_empty(tmp)) 3330 if (dest == BAD_APICID)
3298 return; 3331 return;
3299 3332
3300 cfg = desc->chip_data; 3333 cfg = desc->chip_data;
3301 if (assign_irq_vector(irq, cfg, mask))
3302 return;
3303
3304 set_extra_move_desc(desc, mask);
3305
3306 cpus_and(tmp, cfg->domain, mask);
3307 dest = cpu_mask_to_apicid(tmp);
3308 3334
3309 read_msi_msg_desc(desc, &msg); 3335 read_msi_msg_desc(desc, &msg);
3310 3336
@@ -3314,37 +3340,27 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
3314 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3340 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3315 3341
3316 write_msi_msg_desc(desc, &msg); 3342 write_msi_msg_desc(desc, &msg);
3317 desc->affinity = mask;
3318} 3343}
3319#ifdef CONFIG_INTR_REMAP 3344#ifdef CONFIG_INTR_REMAP
3320/* 3345/*
3321 * Migrate the MSI irq to another cpumask. This migration is 3346 * Migrate the MSI irq to another cpumask. This migration is
3322 * done in the process context using interrupt-remapping hardware. 3347 * done in the process context using interrupt-remapping hardware.
3323 */ 3348 */
3324static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) 3349static void
3350ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3325{ 3351{
3326 struct irq_desc *desc = irq_to_desc(irq); 3352 struct irq_desc *desc = irq_to_desc(irq);
3327 struct irq_cfg *cfg; 3353 struct irq_cfg *cfg = desc->chip_data;
3328 unsigned int dest; 3354 unsigned int dest;
3329 cpumask_t tmp, cleanup_mask;
3330 struct irte irte; 3355 struct irte irte;
3331 3356
3332 cpus_and(tmp, mask, cpu_online_map);
3333 if (cpus_empty(tmp))
3334 return;
3335
3336 if (get_irte(irq, &irte)) 3357 if (get_irte(irq, &irte))
3337 return; 3358 return;
3338 3359
3339 cfg = desc->chip_data; 3360 dest = set_desc_affinity(desc, mask);
3340 if (assign_irq_vector(irq, cfg, mask)) 3361 if (dest == BAD_APICID)
3341 return; 3362 return;
3342 3363
3343 set_extra_move_desc(desc, mask);
3344
3345 cpus_and(tmp, cfg->domain, mask);
3346 dest = cpu_mask_to_apicid(tmp);
3347
3348 irte.vector = cfg->vector; 3364 irte.vector = cfg->vector;
3349 irte.dest_id = IRTE_DEST(dest); 3365 irte.dest_id = IRTE_DEST(dest);
3350 3366
@@ -3358,14 +3374,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
3358 * at the new destination. So, time to cleanup the previous 3374 * at the new destination. So, time to cleanup the previous
3359 * vector allocation. 3375 * vector allocation.
3360 */ 3376 */
3361 if (cfg->move_in_progress) { 3377 if (cfg->move_in_progress)
3362 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 3378 send_cleanup_vector(cfg);
3363 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
3364 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
3365 cfg->move_in_progress = 0;
3366 }
3367
3368 desc->affinity = mask;
3369} 3379}
3370 3380
3371#endif 3381#endif
@@ -3556,26 +3566,18 @@ void arch_teardown_msi_irq(unsigned int irq)
3556 3566
3557#ifdef CONFIG_DMAR 3567#ifdef CONFIG_DMAR
3558#ifdef CONFIG_SMP 3568#ifdef CONFIG_SMP
3559static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) 3569static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3560{ 3570{
3561 struct irq_desc *desc = irq_to_desc(irq); 3571 struct irq_desc *desc = irq_to_desc(irq);
3562 struct irq_cfg *cfg; 3572 struct irq_cfg *cfg;
3563 struct msi_msg msg; 3573 struct msi_msg msg;
3564 unsigned int dest; 3574 unsigned int dest;
3565 cpumask_t tmp;
3566 3575
3567 cpus_and(tmp, mask, cpu_online_map); 3576 dest = set_desc_affinity(desc, mask);
3568 if (cpus_empty(tmp)) 3577 if (dest == BAD_APICID)
3569 return; 3578 return;
3570 3579
3571 cfg = desc->chip_data; 3580 cfg = desc->chip_data;
3572 if (assign_irq_vector(irq, cfg, mask))
3573 return;
3574
3575 set_extra_move_desc(desc, mask);
3576
3577 cpus_and(tmp, cfg->domain, mask);
3578 dest = cpu_mask_to_apicid(tmp);
3579 3581
3580 dmar_msi_read(irq, &msg); 3582 dmar_msi_read(irq, &msg);
3581 3583
@@ -3585,7 +3587,6 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
3585 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3587 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3586 3588
3587 dmar_msi_write(irq, &msg); 3589 dmar_msi_write(irq, &msg);
3588 desc->affinity = mask;
3589} 3590}
3590 3591
3591#endif /* CONFIG_SMP */ 3592#endif /* CONFIG_SMP */
@@ -3619,26 +3620,18 @@ int arch_setup_dmar_msi(unsigned int irq)
3619#ifdef CONFIG_HPET_TIMER 3620#ifdef CONFIG_HPET_TIMER
3620 3621
3621#ifdef CONFIG_SMP 3622#ifdef CONFIG_SMP
3622static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) 3623static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3623{ 3624{
3624 struct irq_desc *desc = irq_to_desc(irq); 3625 struct irq_desc *desc = irq_to_desc(irq);
3625 struct irq_cfg *cfg; 3626 struct irq_cfg *cfg;
3626 struct msi_msg msg; 3627 struct msi_msg msg;
3627 unsigned int dest; 3628 unsigned int dest;
3628 cpumask_t tmp;
3629 3629
3630 cpus_and(tmp, mask, cpu_online_map); 3630 dest = set_desc_affinity(desc, mask);
3631 if (cpus_empty(tmp)) 3631 if (dest == BAD_APICID)
3632 return; 3632 return;
3633 3633
3634 cfg = desc->chip_data; 3634 cfg = desc->chip_data;
3635 if (assign_irq_vector(irq, cfg, mask))
3636 return;
3637
3638 set_extra_move_desc(desc, mask);
3639
3640 cpus_and(tmp, cfg->domain, mask);
3641 dest = cpu_mask_to_apicid(tmp);
3642 3635
3643 hpet_msi_read(irq, &msg); 3636 hpet_msi_read(irq, &msg);
3644 3637
@@ -3648,7 +3641,6 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
3648 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3641 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3649 3642
3650 hpet_msi_write(irq, &msg); 3643 hpet_msi_write(irq, &msg);
3651 desc->affinity = mask;
3652} 3644}
3653 3645
3654#endif /* CONFIG_SMP */ 3646#endif /* CONFIG_SMP */
@@ -3703,28 +3695,19 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
3703 write_ht_irq_msg(irq, &msg); 3695 write_ht_irq_msg(irq, &msg);
3704} 3696}
3705 3697
3706static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) 3698static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
3707{ 3699{
3708 struct irq_desc *desc = irq_to_desc(irq); 3700 struct irq_desc *desc = irq_to_desc(irq);
3709 struct irq_cfg *cfg; 3701 struct irq_cfg *cfg;
3710 unsigned int dest; 3702 unsigned int dest;
3711 cpumask_t tmp;
3712 3703
3713 cpus_and(tmp, mask, cpu_online_map); 3704 dest = set_desc_affinity(desc, mask);
3714 if (cpus_empty(tmp)) 3705 if (dest == BAD_APICID)
3715 return; 3706 return;
3716 3707
3717 cfg = desc->chip_data; 3708 cfg = desc->chip_data;
3718 if (assign_irq_vector(irq, cfg, mask))
3719 return;
3720
3721 set_extra_move_desc(desc, mask);
3722
3723 cpus_and(tmp, cfg->domain, mask);
3724 dest = cpu_mask_to_apicid(tmp);
3725 3709
3726 target_ht_irq(irq, dest, cfg->vector); 3710 target_ht_irq(irq, dest, cfg->vector);
3727 desc->affinity = mask;
3728} 3711}
3729 3712
3730#endif 3713#endif
@@ -3744,17 +3727,14 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3744{ 3727{
3745 struct irq_cfg *cfg; 3728 struct irq_cfg *cfg;
3746 int err; 3729 int err;
3747 cpumask_t tmp;
3748 3730
3749 cfg = irq_cfg(irq); 3731 cfg = irq_cfg(irq);
3750 tmp = TARGET_CPUS; 3732 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3751 err = assign_irq_vector(irq, cfg, tmp);
3752 if (!err) { 3733 if (!err) {
3753 struct ht_irq_msg msg; 3734 struct ht_irq_msg msg;
3754 unsigned dest; 3735 unsigned dest;
3755 3736
3756 cpus_and(tmp, cfg->domain, tmp); 3737 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
3757 dest = cpu_mask_to_apicid(tmp);
3758 3738
3759 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); 3739 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
3760 3740
@@ -3790,7 +3770,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3790int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, 3770int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3791 unsigned long mmr_offset) 3771 unsigned long mmr_offset)
3792{ 3772{
3793 const cpumask_t *eligible_cpu = get_cpu_mask(cpu); 3773 const struct cpumask *eligible_cpu = cpumask_of(cpu);
3794 struct irq_cfg *cfg; 3774 struct irq_cfg *cfg;
3795 int mmr_pnode; 3775 int mmr_pnode;
3796 unsigned long mmr_value; 3776 unsigned long mmr_value;
@@ -3800,7 +3780,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3800 3780
3801 cfg = irq_cfg(irq); 3781 cfg = irq_cfg(irq);
3802 3782
3803 err = assign_irq_vector(irq, cfg, *eligible_cpu); 3783 err = assign_irq_vector(irq, cfg, eligible_cpu);
3804 if (err != 0) 3784 if (err != 0)
3805 return err; 3785 return err;
3806 3786
@@ -3819,7 +3799,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3819 entry->polarity = 0; 3799 entry->polarity = 0;
3820 entry->trigger = 0; 3800 entry->trigger = 0;
3821 entry->mask = 0; 3801 entry->mask = 0;
3822 entry->dest = cpu_mask_to_apicid(*eligible_cpu); 3802 entry->dest = cpu_mask_to_apicid(eligible_cpu);
3823 3803
3824 mmr_pnode = uv_blade_to_pnode(mmr_blade); 3804 mmr_pnode = uv_blade_to_pnode(mmr_blade);
3825 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); 3805 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -4030,7 +4010,7 @@ void __init setup_ioapic_dest(void)
4030 int pin, ioapic, irq, irq_entry; 4010 int pin, ioapic, irq, irq_entry;
4031 struct irq_desc *desc; 4011 struct irq_desc *desc;
4032 struct irq_cfg *cfg; 4012 struct irq_cfg *cfg;
4033 cpumask_t mask; 4013 const struct cpumask *mask;
4034 4014
4035 if (skip_ioapic_setup == 1) 4015 if (skip_ioapic_setup == 1)
4036 return; 4016 return;
@@ -4061,7 +4041,7 @@ void __init setup_ioapic_dest(void)
4061 */ 4041 */
4062 if (desc->status & 4042 if (desc->status &
4063 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 4043 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
4064 mask = desc->affinity; 4044 mask = &desc->affinity;
4065 else 4045 else
4066 mask = TARGET_CPUS; 4046 mask = TARGET_CPUS;
4067 4047
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index f1c688e46f35..285bbf8831fa 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
116/* 116/*
117 * This is only used on smaller machines. 117 * This is only used on smaller machines.
118 */ 118 */
119void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) 119void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector)
120{ 120{
121 unsigned long mask = cpus_addr(cpumask)[0]; 121 unsigned long mask = cpumask_bits(cpumask)[0];
122 unsigned long flags; 122 unsigned long flags;
123 123
124 local_irq_save(flags); 124 local_irq_save(flags);
125 WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); 125 WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
126 __send_IPI_dest_field(mask, vector); 126 __send_IPI_dest_field(mask, vector);
127 local_irq_restore(flags); 127 local_irq_restore(flags);
128} 128}
129 129
130void send_IPI_mask_sequence(cpumask_t mask, int vector) 130void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
131{ 131{
132 unsigned long flags; 132 unsigned long flags;
133 unsigned int query_cpu; 133 unsigned int query_cpu;
@@ -139,12 +139,24 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
139 */ 139 */
140 140
141 local_irq_save(flags); 141 local_irq_save(flags);
142 for_each_possible_cpu(query_cpu) { 142 for_each_cpu(query_cpu, mask)
143 if (cpu_isset(query_cpu, mask)) { 143 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
144 local_irq_restore(flags);
145}
146
147void send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
148{
149 unsigned long flags;
150 unsigned int query_cpu;
151 unsigned int this_cpu = smp_processor_id();
152
153 /* See Hack comment above */
154
155 local_irq_save(flags);
156 for_each_cpu(query_cpu, mask)
157 if (query_cpu != this_cpu)
144 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), 158 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
145 vector); 159 vector);
146 }
147 }
148 local_irq_restore(flags); 160 local_irq_restore(flags);
149} 161}
150 162
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3f1d9d18df67..bce53e1352a0 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -9,6 +9,7 @@
9#include <asm/apic.h> 9#include <asm/apic.h>
10#include <asm/io_apic.h> 10#include <asm/io_apic.h>
11#include <asm/smp.h> 11#include <asm/smp.h>
12#include <asm/irq.h>
12 13
13atomic_t irq_err_count; 14atomic_t irq_err_count;
14 15
@@ -190,3 +191,5 @@ u64 arch_irq_stat(void)
190#endif 191#endif
191 return sum; 192 return sum;
192} 193}
194
195EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 119fc9c8ff7f..9dc5588f336a 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -233,27 +233,28 @@ unsigned int do_IRQ(struct pt_regs *regs)
233#ifdef CONFIG_HOTPLUG_CPU 233#ifdef CONFIG_HOTPLUG_CPU
234#include <mach_apic.h> 234#include <mach_apic.h>
235 235
236void fixup_irqs(cpumask_t map) 236/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
237void fixup_irqs(void)
237{ 238{
238 unsigned int irq; 239 unsigned int irq;
239 static int warned; 240 static int warned;
240 struct irq_desc *desc; 241 struct irq_desc *desc;
241 242
242 for_each_irq_desc(irq, desc) { 243 for_each_irq_desc(irq, desc) {
243 cpumask_t mask; 244 const struct cpumask *affinity;
244 245
245 if (!desc) 246 if (!desc)
246 continue; 247 continue;
247 if (irq == 2) 248 if (irq == 2)
248 continue; 249 continue;
249 250
250 cpus_and(mask, desc->affinity, map); 251 affinity = &desc->affinity;
251 if (any_online_cpu(mask) == NR_CPUS) { 252 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
252 printk("Breaking affinity for irq %i\n", irq); 253 printk("Breaking affinity for irq %i\n", irq);
253 mask = map; 254 affinity = cpu_all_mask;
254 } 255 }
255 if (desc->chip->set_affinity) 256 if (desc->chip->set_affinity)
256 desc->chip->set_affinity(irq, mask); 257 desc->chip->set_affinity(irq, affinity);
257 else if (desc->action && !(warned++)) 258 else if (desc->action && !(warned++))
258 printk("Cannot set affinity for irq %i\n", irq); 259 printk("Cannot set affinity for irq %i\n", irq);
259 } 260 }
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index a174a217eb1a..6383d50f82ea 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -80,16 +80,17 @@ asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
80} 80}
81 81
82#ifdef CONFIG_HOTPLUG_CPU 82#ifdef CONFIG_HOTPLUG_CPU
83void fixup_irqs(cpumask_t map) 83/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
84void fixup_irqs(void)
84{ 85{
85 unsigned int irq; 86 unsigned int irq;
86 static int warned; 87 static int warned;
87 struct irq_desc *desc; 88 struct irq_desc *desc;
88 89
89 for_each_irq_desc(irq, desc) { 90 for_each_irq_desc(irq, desc) {
90 cpumask_t mask;
91 int break_affinity = 0; 91 int break_affinity = 0;
92 int set_affinity = 1; 92 int set_affinity = 1;
93 const struct cpumask *affinity;
93 94
94 if (!desc) 95 if (!desc)
95 continue; 96 continue;
@@ -99,23 +100,23 @@ void fixup_irqs(cpumask_t map)
99 /* interrupt's are disabled at this point */ 100 /* interrupt's are disabled at this point */
100 spin_lock(&desc->lock); 101 spin_lock(&desc->lock);
101 102
103 affinity = &desc->affinity;
102 if (!irq_has_action(irq) || 104 if (!irq_has_action(irq) ||
103 cpus_equal(desc->affinity, map)) { 105 cpumask_equal(affinity, cpu_online_mask)) {
104 spin_unlock(&desc->lock); 106 spin_unlock(&desc->lock);
105 continue; 107 continue;
106 } 108 }
107 109
108 cpus_and(mask, desc->affinity, map); 110 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
109 if (cpus_empty(mask)) {
110 break_affinity = 1; 111 break_affinity = 1;
111 mask = map; 112 affinity = cpu_all_mask;
112 } 113 }
113 114
114 if (desc->chip->mask) 115 if (desc->chip->mask)
115 desc->chip->mask(irq); 116 desc->chip->mask(irq);
116 117
117 if (desc->chip->set_affinity) 118 if (desc->chip->set_affinity)
118 desc->chip->set_affinity(irq, mask); 119 desc->chip->set_affinity(irq, affinity);
119 else if (!(warned++)) 120 else if (!(warned++))
120 set_affinity = 0; 121 set_affinity = 0;
121 122
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 203384ed2b5d..84723295f88a 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -110,6 +110,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
110 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 110 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
111}; 111};
112 112
113int vector_used_by_percpu_irq(unsigned int vector)
114{
115 int cpu;
116
117 for_each_online_cpu(cpu) {
118 if (per_cpu(vector_irq, cpu)[vector] != -1)
119 return 1;
120 }
121
122 return 0;
123}
124
113/* Overridden in paravirt.c */ 125/* Overridden in paravirt.c */
114void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); 126void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
115 127
@@ -146,10 +158,12 @@ void __init native_init_IRQ(void)
146 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 158 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
147 159
148 /* IPI for single call function */ 160 /* IPI for single call function */
149 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); 161 alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
162 call_function_single_interrupt);
150 163
151 /* Low priority IPI to cleanup after moving an irq */ 164 /* Low priority IPI to cleanup after moving an irq */
152 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 165 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
166 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
153#endif 167#endif
154 168
155#ifdef CONFIG_X86_LOCAL_APIC 169#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 6190e6ef546c..31ebfe38e96c 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -69,6 +69,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
69 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 69 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
70}; 70};
71 71
72int vector_used_by_percpu_irq(unsigned int vector)
73{
74 int cpu;
75
76 for_each_online_cpu(cpu) {
77 if (per_cpu(vector_irq, cpu)[vector] != -1)
78 return 1;
79 }
80
81 return 0;
82}
83
72void __init init_ISA_irqs(void) 84void __init init_ISA_irqs(void)
73{ 85{
74 int i; 86 int i;
@@ -121,6 +133,7 @@ static void __init smp_intr_init(void)
121 133
122 /* Low priority IPI to cleanup after moving an irq */ 134 /* Low priority IPI to cleanup after moving an irq */
123 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 135 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
136 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
124#endif 137#endif
125} 138}
126 139
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index e169ae9b6a62..652fce6d2cce 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void)
89 */ 89 */
90static unsigned long kvm_get_tsc_khz(void) 90static unsigned long kvm_get_tsc_khz(void)
91{ 91{
92 return preset_lpj; 92 struct pvclock_vcpu_time_info *src;
93 src = &per_cpu(hv_clock, 0);
94 return pvclock_tsc_khz(src);
93} 95}
94 96
95static void kvm_get_preset_lpj(void) 97static void kvm_get_preset_lpj(void)
96{ 98{
97 struct pvclock_vcpu_time_info *src;
98 unsigned long khz; 99 unsigned long khz;
99 u64 lpj; 100 u64 lpj;
100 101
101 src = &per_cpu(hv_clock, 0); 102 khz = kvm_get_tsc_khz();
102 khz = pvclock_tsc_khz(src);
103 103
104 lpj = ((u64)khz * 1000); 104 lpj = ((u64)khz * 1000);
105 do_div(lpj, HZ); 105 do_div(lpj, HZ);
@@ -194,5 +194,7 @@ void __init kvmclock_init(void)
194#endif 194#endif
195 kvm_get_preset_lpj(); 195 kvm_get_preset_lpj();
196 clocksource_register(&kvm_clock); 196 clocksource_register(&kvm_clock);
197 pv_info.paravirt_enabled = 1;
198 pv_info.name = "KVM";
197 } 199 }
198} 200}
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index eee32b43fee3..71f1d99a635d 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -12,8 +12,8 @@
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/smp.h> 13#include <linux/smp.h>
14#include <linux/vmalloc.h> 14#include <linux/vmalloc.h>
15#include <linux/uaccess.h>
15 16
16#include <asm/uaccess.h>
17#include <asm/system.h> 17#include <asm/system.h>
18#include <asm/ldt.h> 18#include <asm/ldt.h>
19#include <asm/desc.h> 19#include <asm/desc.h>
@@ -93,7 +93,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
93 if (err < 0) 93 if (err < 0)
94 return err; 94 return err;
95 95
96 for(i = 0; i < old->size; i++) 96 for (i = 0; i < old->size; i++)
97 write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); 97 write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
98 return 0; 98 return 0;
99} 99}
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 3b599518c322..c12314c9e86f 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -287,7 +287,7 @@ static struct clock_event_device mfgpt_clockevent = {
287 .set_mode = mfgpt_set_mode, 287 .set_mode = mfgpt_set_mode,
288 .set_next_event = mfgpt_next_event, 288 .set_next_event = mfgpt_next_event,
289 .rating = 250, 289 .rating = 250,
290 .cpumask = CPU_MASK_ALL, 290 .cpumask = cpu_all_mask,
291 .shift = 32 291 .shift = 32
292}; 292};
293 293
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index efc2f361fe85..666e43df51f9 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -13,8 +13,7 @@
13#include <asm/msr.h> 13#include <asm/msr.h>
14#include <asm/acpi.h> 14#include <asm/acpi.h>
15#include <asm/mmconfig.h> 15#include <asm/mmconfig.h>
16 16#include <asm/pci_x86.h>
17#include "../pci/pci.h"
18 17
19struct pci_hostbridge_probe { 18struct pci_hostbridge_probe {
20 u32 bus; 19 u32 bus;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 45e3b69808ba..c5c5b8df1dbc 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -16,14 +16,14 @@
16#include <linux/bitops.h> 16#include <linux/bitops.h>
17#include <linux/acpi.h> 17#include <linux/acpi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/smp.h>
20#include <linux/acpi.h>
19 21
20#include <asm/smp.h>
21#include <asm/mtrr.h> 22#include <asm/mtrr.h>
22#include <asm/mpspec.h> 23#include <asm/mpspec.h>
23#include <asm/pgalloc.h> 24#include <asm/pgalloc.h>
24#include <asm/io_apic.h> 25#include <asm/io_apic.h>
25#include <asm/proto.h> 26#include <asm/proto.h>
26#include <asm/acpi.h>
27#include <asm/bios_ebda.h> 27#include <asm/bios_ebda.h>
28#include <asm/e820.h> 28#include <asm/e820.h>
29#include <asm/trampoline.h> 29#include <asm/trampoline.h>
@@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
95#endif 95#endif
96 96
97 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { 97 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
98 set_bit(m->mpc_busid, mp_bus_not_pci); 98 set_bit(m->mpc_busid, mp_bus_not_pci);
99#if defined(CONFIG_EISA) || defined (CONFIG_MCA) 99#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
100 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; 100 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
101#endif 101#endif
102 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { 102 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
@@ -104,7 +104,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
104 x86_quirks->mpc_oem_pci_bus(m); 104 x86_quirks->mpc_oem_pci_bus(m);
105 105
106 clear_bit(m->mpc_busid, mp_bus_not_pci); 106 clear_bit(m->mpc_busid, mp_bus_not_pci);
107#if defined(CONFIG_EISA) || defined (CONFIG_MCA) 107#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
108 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; 108 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
109 } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { 109 } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
110 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; 110 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 82a7c7ed6d45..726266695b2c 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -136,7 +136,7 @@ static int msr_open(struct inode *inode, struct file *file)
136 lock_kernel(); 136 lock_kernel();
137 cpu = iminor(file->f_path.dentry->d_inode); 137 cpu = iminor(file->f_path.dentry->d_inode);
138 138
139 if (cpu >= NR_CPUS || !cpu_online(cpu)) { 139 if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
140 ret = -ENXIO; /* No such CPU */ 140 ret = -ENXIO; /* No such CPU */
141 goto out; 141 goto out;
142 } 142 }
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 8bd1bf9622a7..45a09ccdc214 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -26,11 +26,10 @@
26#include <linux/kernel_stat.h> 26#include <linux/kernel_stat.h>
27#include <linux/kdebug.h> 27#include <linux/kdebug.h>
28#include <linux/smp.h> 28#include <linux/smp.h>
29#include <linux/nmi.h>
29 30
30#include <asm/i8259.h> 31#include <asm/i8259.h>
31#include <asm/io_apic.h> 32#include <asm/io_apic.h>
32#include <asm/smp.h>
33#include <asm/nmi.h>
34#include <asm/proto.h> 33#include <asm/proto.h>
35#include <asm/timer.h> 34#include <asm/timer.h>
36 35
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index a35eaa379ff6..00c2bcd41463 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -52,7 +52,7 @@ static u32 *iommu_gatt_base; /* Remapping table */
52 * to trigger bugs with some popular PCI cards, in particular 3ware (but 52 * to trigger bugs with some popular PCI cards, in particular 3ware (but
53 * has been also also seen with Qlogic at least). 53 * has been also also seen with Qlogic at least).
54 */ 54 */
55int iommu_fullflush = 1; 55static int iommu_fullflush = 1;
56 56
57/* Allocation bitmap for the remapping area: */ 57/* Allocation bitmap for the remapping area: */
58static DEFINE_SPINLOCK(iommu_bitmap_lock); 58static DEFINE_SPINLOCK(iommu_bitmap_lock);
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 242c3440687f..8cba3749a511 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -13,7 +13,7 @@
13 13
14int swiotlb __read_mostly; 14int swiotlb __read_mostly;
15 15
16void *swiotlb_alloc_boot(size_t size, unsigned long nslabs) 16void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
17{ 17{
18 return alloc_bootmem_low_pages(size); 18 return alloc_bootmem_low_pages(size);
19} 19}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 61f718df6eec..2b46eb41643b 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,6 +12,8 @@
12#include <asm/proto.h> 12#include <asm/proto.h>
13#include <asm/reboot_fixups.h> 13#include <asm/reboot_fixups.h>
14#include <asm/reboot.h> 14#include <asm/reboot.h>
15#include <asm/pci_x86.h>
16#include <asm/virtext.h>
15 17
16#ifdef CONFIG_X86_32 18#ifdef CONFIG_X86_32
17# include <linux/dmi.h> 19# include <linux/dmi.h>
@@ -23,7 +25,6 @@
23 25
24#include <mach_ipi.h> 26#include <mach_ipi.h>
25 27
26
27/* 28/*
28 * Power off function, if any 29 * Power off function, if any
29 */ 30 */
@@ -39,6 +40,12 @@ int reboot_force;
39static int reboot_cpu = -1; 40static int reboot_cpu = -1;
40#endif 41#endif
41 42
43/* This is set if we need to go through the 'emergency' path.
44 * When machine_emergency_restart() is called, we may be on
45 * an inconsistent state and won't be able to do a clean cleanup
46 */
47static int reboot_emergency;
48
42/* This is set by the PCI code if either type 1 or type 2 PCI is detected */ 49/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
43bool port_cf9_safe = false; 50bool port_cf9_safe = false;
44 51
@@ -368,6 +375,48 @@ static inline void kb_wait(void)
368 } 375 }
369} 376}
370 377
378static void vmxoff_nmi(int cpu, struct die_args *args)
379{
380 cpu_emergency_vmxoff();
381}
382
383/* Use NMIs as IPIs to tell all CPUs to disable virtualization
384 */
385static void emergency_vmx_disable_all(void)
386{
387 /* Just make sure we won't change CPUs while doing this */
388 local_irq_disable();
389
390 /* We need to disable VMX on all CPUs before rebooting, otherwise
391 * we risk hanging up the machine, because the CPU ignore INIT
392 * signals when VMX is enabled.
393 *
394 * We can't take any locks and we may be on an inconsistent
395 * state, so we use NMIs as IPIs to tell the other CPUs to disable
396 * VMX and halt.
397 *
398 * For safety, we will avoid running the nmi_shootdown_cpus()
399 * stuff unnecessarily, but we don't have a way to check
400 * if other CPUs have VMX enabled. So we will call it only if the
401 * CPU we are running on has VMX enabled.
402 *
403 * We will miss cases where VMX is not enabled on all CPUs. This
404 * shouldn't do much harm because KVM always enable VMX on all
405 * CPUs anyway. But we can miss it on the small window where KVM
406 * is still enabling VMX.
407 */
408 if (cpu_has_vmx() && cpu_vmx_enabled()) {
409 /* Disable VMX on this CPU.
410 */
411 cpu_vmxoff();
412
413 /* Halt and disable VMX on the other CPUs */
414 nmi_shootdown_cpus(vmxoff_nmi);
415
416 }
417}
418
419
371void __attribute__((weak)) mach_reboot_fixups(void) 420void __attribute__((weak)) mach_reboot_fixups(void)
372{ 421{
373} 422}
@@ -376,6 +425,9 @@ static void native_machine_emergency_restart(void)
376{ 425{
377 int i; 426 int i;
378 427
428 if (reboot_emergency)
429 emergency_vmx_disable_all();
430
379 /* Tell the BIOS if we want cold or warm reboot */ 431 /* Tell the BIOS if we want cold or warm reboot */
380 *((unsigned short *)__va(0x472)) = reboot_mode; 432 *((unsigned short *)__va(0x472)) = reboot_mode;
381 433
@@ -449,7 +501,7 @@ void native_machine_shutdown(void)
449 501
450#ifdef CONFIG_X86_32 502#ifdef CONFIG_X86_32
451 /* See if there has been given a command line override */ 503 /* See if there has been given a command line override */
452 if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && 504 if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) &&
453 cpu_online(reboot_cpu)) 505 cpu_online(reboot_cpu))
454 reboot_cpu_id = reboot_cpu; 506 reboot_cpu_id = reboot_cpu;
455#endif 507#endif
@@ -459,7 +511,7 @@ void native_machine_shutdown(void)
459 reboot_cpu_id = smp_processor_id(); 511 reboot_cpu_id = smp_processor_id();
460 512
461 /* Make certain I only run on the appropriate processor */ 513 /* Make certain I only run on the appropriate processor */
462 set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id)); 514 set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id));
463 515
464 /* O.K Now that I'm on the appropriate processor, 516 /* O.K Now that I'm on the appropriate processor,
465 * stop all of the others. 517 * stop all of the others.
@@ -482,13 +534,19 @@ void native_machine_shutdown(void)
482#endif 534#endif
483} 535}
484 536
537static void __machine_emergency_restart(int emergency)
538{
539 reboot_emergency = emergency;
540 machine_ops.emergency_restart();
541}
542
485static void native_machine_restart(char *__unused) 543static void native_machine_restart(char *__unused)
486{ 544{
487 printk("machine restart\n"); 545 printk("machine restart\n");
488 546
489 if (!reboot_force) 547 if (!reboot_force)
490 machine_shutdown(); 548 machine_shutdown();
491 machine_emergency_restart(); 549 __machine_emergency_restart(0);
492} 550}
493 551
494static void native_machine_halt(void) 552static void native_machine_halt(void)
@@ -532,7 +590,7 @@ void machine_shutdown(void)
532 590
533void machine_emergency_restart(void) 591void machine_emergency_restart(void)
534{ 592{
535 machine_ops.emergency_restart(); 593 __machine_emergency_restart(1);
536} 594}
537 595
538void machine_restart(char *cmd) 596void machine_restart(char *cmd)
@@ -592,10 +650,7 @@ static int crash_nmi_callback(struct notifier_block *self,
592 650
593static void smp_send_nmi_allbutself(void) 651static void smp_send_nmi_allbutself(void)
594{ 652{
595 cpumask_t mask = cpu_online_map; 653 send_IPI_allbutself(NMI_VECTOR);
596 cpu_clear(safe_smp_processor_id(), mask);
597 if (!cpus_empty(mask))
598 send_IPI_mask(mask, NMI_VECTOR);
599} 654}
600 655
601static struct notifier_block crash_nmi_nb = { 656static struct notifier_block crash_nmi_nb = {
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ae0c0d3bb770..a4b619c33106 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -152,8 +152,11 @@ void __init setup_per_cpu_areas(void)
152 old_size = PERCPU_ENOUGH_ROOM; 152 old_size = PERCPU_ENOUGH_ROOM;
153 align = max_t(unsigned long, PAGE_SIZE, align); 153 align = max_t(unsigned long, PAGE_SIZE, align);
154 size = roundup(old_size, align); 154 size = roundup(old_size, align);
155 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", 155
156 size); 156 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
157 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
158
159 pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size);
157 160
158 for_each_possible_cpu(cpu) { 161 for_each_possible_cpu(cpu) {
159#ifndef CONFIG_NEED_MULTIPLE_NODES 162#ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -164,28 +167,21 @@ void __init setup_per_cpu_areas(void)
164 if (!node_online(node) || !NODE_DATA(node)) { 167 if (!node_online(node) || !NODE_DATA(node)) {
165 ptr = __alloc_bootmem(size, align, 168 ptr = __alloc_bootmem(size, align,
166 __pa(MAX_DMA_ADDRESS)); 169 __pa(MAX_DMA_ADDRESS));
167 printk(KERN_INFO 170 pr_info("cpu %d has no node %d or node-local memory\n",
168 "cpu %d has no node %d or node-local memory\n",
169 cpu, node); 171 cpu, node);
170 if (ptr) 172 pr_debug("per cpu data for cpu%d at %016lx\n",
171 printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", 173 cpu, __pa(ptr));
172 cpu, __pa(ptr)); 174 } else {
173 }
174 else {
175 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, 175 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
176 __pa(MAX_DMA_ADDRESS)); 176 __pa(MAX_DMA_ADDRESS));
177 if (ptr) 177 pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
178 printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", 178 cpu, node, __pa(ptr));
179 cpu, node, __pa(ptr));
180 } 179 }
181#endif 180#endif
182 per_cpu_offset(cpu) = ptr - __per_cpu_start; 181 per_cpu_offset(cpu) = ptr - __per_cpu_start;
183 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 182 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
184 } 183 }
185 184
186 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
187 NR_CPUS, nr_cpu_ids, nr_node_ids);
188
189 /* Setup percpu data maps */ 185 /* Setup percpu data maps */
190 setup_per_cpu_maps(); 186 setup_per_cpu_maps();
191 187
@@ -282,7 +278,7 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable)
282 else 278 else
283 cpu_clear(cpu, *mask); 279 cpu_clear(cpu, *mask);
284 280
285 cpulist_scnprintf(buf, sizeof(buf), *mask); 281 cpulist_scnprintf(buf, sizeof(buf), mask);
286 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 282 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
287 enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); 283 enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf);
288 } 284 }
@@ -334,25 +330,25 @@ static const cpumask_t cpu_mask_none;
334/* 330/*
335 * Returns a pointer to the bitmask of CPUs on Node 'node'. 331 * Returns a pointer to the bitmask of CPUs on Node 'node'.
336 */ 332 */
337const cpumask_t *_node_to_cpumask_ptr(int node) 333const cpumask_t *cpumask_of_node(int node)
338{ 334{
339 if (node_to_cpumask_map == NULL) { 335 if (node_to_cpumask_map == NULL) {
340 printk(KERN_WARNING 336 printk(KERN_WARNING
341 "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", 337 "cpumask_of_node(%d): no node_to_cpumask_map!\n",
342 node); 338 node);
343 dump_stack(); 339 dump_stack();
344 return (const cpumask_t *)&cpu_online_map; 340 return (const cpumask_t *)&cpu_online_map;
345 } 341 }
346 if (node >= nr_node_ids) { 342 if (node >= nr_node_ids) {
347 printk(KERN_WARNING 343 printk(KERN_WARNING
348 "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", 344 "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
349 node, nr_node_ids); 345 node, nr_node_ids);
350 dump_stack(); 346 dump_stack();
351 return &cpu_mask_none; 347 return &cpu_mask_none;
352 } 348 }
353 return &node_to_cpumask_map[node]; 349 return &node_to_cpumask_map[node];
354} 350}
355EXPORT_SYMBOL(_node_to_cpumask_ptr); 351EXPORT_SYMBOL(cpumask_of_node);
356 352
357/* 353/*
358 * Returns a bitmask of CPUs on Node 'node'. 354 * Returns a bitmask of CPUs on Node 'node'.
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 7e558db362c1..beea2649a240 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -118,22 +118,22 @@ static void native_smp_send_reschedule(int cpu)
118 WARN_ON(1); 118 WARN_ON(1);
119 return; 119 return;
120 } 120 }
121 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); 121 send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
122} 122}
123 123
124void native_send_call_func_single_ipi(int cpu) 124void native_send_call_func_single_ipi(int cpu)
125{ 125{
126 send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR); 126 send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
127} 127}
128 128
129void native_send_call_func_ipi(cpumask_t mask) 129void native_send_call_func_ipi(const struct cpumask *mask)
130{ 130{
131 cpumask_t allbutself; 131 cpumask_t allbutself;
132 132
133 allbutself = cpu_online_map; 133 allbutself = cpu_online_map;
134 cpu_clear(smp_processor_id(), allbutself); 134 cpu_clear(smp_processor_id(), allbutself);
135 135
136 if (cpus_equal(mask, allbutself) && 136 if (cpus_equal(*mask, allbutself) &&
137 cpus_equal(cpu_online_map, cpu_callout_map)) 137 cpus_equal(cpu_online_map, cpu_callout_map))
138 send_IPI_allbutself(CALL_FUNCTION_VECTOR); 138 send_IPI_allbutself(CALL_FUNCTION_VECTOR);
139 else 139 else
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f8500c969442..6bd4d9b73870 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -102,14 +102,8 @@ EXPORT_SYMBOL(smp_num_siblings);
102/* Last level cache ID of each logical CPU */ 102/* Last level cache ID of each logical CPU */
103DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; 103DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
104 104
105/* bitmap of online cpus */
106cpumask_t cpu_online_map __read_mostly;
107EXPORT_SYMBOL(cpu_online_map);
108
109cpumask_t cpu_callin_map; 105cpumask_t cpu_callin_map;
110cpumask_t cpu_callout_map; 106cpumask_t cpu_callout_map;
111cpumask_t cpu_possible_map;
112EXPORT_SYMBOL(cpu_possible_map);
113 107
114/* representing HT siblings of each logical CPU */ 108/* representing HT siblings of each logical CPU */
115DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); 109DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
@@ -502,7 +496,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
502} 496}
503 497
504/* maps the cpu to the sched domain representing multi-core */ 498/* maps the cpu to the sched domain representing multi-core */
505cpumask_t cpu_coregroup_map(int cpu) 499const struct cpumask *cpu_coregroup_mask(int cpu)
506{ 500{
507 struct cpuinfo_x86 *c = &cpu_data(cpu); 501 struct cpuinfo_x86 *c = &cpu_data(cpu);
508 /* 502 /*
@@ -510,9 +504,14 @@ cpumask_t cpu_coregroup_map(int cpu)
510 * And for power savings, we return cpu_core_map 504 * And for power savings, we return cpu_core_map
511 */ 505 */
512 if (sched_mc_power_savings || sched_smt_power_savings) 506 if (sched_mc_power_savings || sched_smt_power_savings)
513 return per_cpu(cpu_core_map, cpu); 507 return &per_cpu(cpu_core_map, cpu);
514 else 508 else
515 return c->llc_shared_map; 509 return &c->llc_shared_map;
510}
511
512cpumask_t cpu_coregroup_map(int cpu)
513{
514 return *cpu_coregroup_mask(cpu);
516} 515}
517 516
518static void impress_friends(void) 517static void impress_friends(void)
@@ -1155,7 +1154,7 @@ static void __init smp_cpu_index_default(void)
1155 for_each_possible_cpu(i) { 1154 for_each_possible_cpu(i) {
1156 c = &cpu_data(i); 1155 c = &cpu_data(i);
1157 /* mark all to hotplug */ 1156 /* mark all to hotplug */
1158 c->cpu_index = NR_CPUS; 1157 c->cpu_index = nr_cpu_ids;
1159 } 1158 }
1160} 1159}
1161 1160
@@ -1260,6 +1259,15 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1260 check_nmi_watchdog(); 1259 check_nmi_watchdog();
1261} 1260}
1262 1261
1262static int __initdata setup_possible_cpus = -1;
1263static int __init _setup_possible_cpus(char *str)
1264{
1265 get_option(&str, &setup_possible_cpus);
1266 return 0;
1267}
1268early_param("possible_cpus", _setup_possible_cpus);
1269
1270
1263/* 1271/*
1264 * cpu_possible_map should be static, it cannot change as cpu's 1272 * cpu_possible_map should be static, it cannot change as cpu's
1265 * are onlined, or offlined. The reason is per-cpu data-structures 1273 * are onlined, or offlined. The reason is per-cpu data-structures
@@ -1272,7 +1280,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1272 * 1280 *
1273 * Three ways to find out the number of additional hotplug CPUs: 1281 * Three ways to find out the number of additional hotplug CPUs:
1274 * - If the BIOS specified disabled CPUs in ACPI/mptables use that. 1282 * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
1275 * - The user can overwrite it with additional_cpus=NUM 1283 * - The user can overwrite it with possible_cpus=NUM
1276 * - Otherwise don't reserve additional CPUs. 1284 * - Otherwise don't reserve additional CPUs.
1277 * We do this because additional CPUs waste a lot of memory. 1285 * We do this because additional CPUs waste a lot of memory.
1278 * -AK 1286 * -AK
@@ -1285,9 +1293,19 @@ __init void prefill_possible_map(void)
1285 if (!num_processors) 1293 if (!num_processors)
1286 num_processors = 1; 1294 num_processors = 1;
1287 1295
1288 possible = num_processors + disabled_cpus; 1296 if (setup_possible_cpus == -1)
1289 if (possible > NR_CPUS) 1297 possible = num_processors + disabled_cpus;
1290 possible = NR_CPUS; 1298 else
1299 possible = setup_possible_cpus;
1300
1301 total_cpus = max_t(int, possible, num_processors + disabled_cpus);
1302
1303 if (possible > CONFIG_NR_CPUS) {
1304 printk(KERN_WARNING
1305 "%d Processors exceeds NR_CPUS limit of %d\n",
1306 possible, CONFIG_NR_CPUS);
1307 possible = CONFIG_NR_CPUS;
1308 }
1291 1309
1292 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", 1310 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
1293 possible, max_t(int, possible - num_processors, 0)); 1311 possible, max_t(int, possible - num_processors, 0));
@@ -1352,7 +1370,7 @@ void cpu_disable_common(void)
1352 lock_vector_lock(); 1370 lock_vector_lock();
1353 remove_cpu_from_maps(cpu); 1371 remove_cpu_from_maps(cpu);
1354 unlock_vector_lock(); 1372 unlock_vector_lock();
1355 fixup_irqs(cpu_online_map); 1373 fixup_irqs();
1356} 1374}
1357 1375
1358int native_cpu_disable(void) 1376int native_cpu_disable(void)
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index 8da059f949be..ce5054642247 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -163,7 +163,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
163 * We have to send the IPI only to 163 * We have to send the IPI only to
164 * CPUs affected. 164 * CPUs affected.
165 */ 165 */
166 send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); 166 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
167 167
168 while (!cpus_empty(flush_cpumask)) 168 while (!cpus_empty(flush_cpumask))
169 /* nothing. lockup detection does not belong here */ 169 /* nothing. lockup detection does not belong here */
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 29887d7081a9..f8be6f1d2e48 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
191 * We have to send the IPI only to 191 * We have to send the IPI only to
192 * CPUs affected. 192 * CPUs affected.
193 */ 193 */
194 send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); 194 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
195 195
196 while (!cpus_empty(f->flush_cpumask)) 196 while (!cpus_empty(f->flush_cpumask))
197 cpu_relax(); 197 cpu_relax();
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 6a00e5faaa74..f885023167e0 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -582,7 +582,6 @@ static int __init uv_ptc_init(void)
582static struct bau_control * __init uv_table_bases_init(int blade, int node) 582static struct bau_control * __init uv_table_bases_init(int blade, int node)
583{ 583{
584 int i; 584 int i;
585 int *ip;
586 struct bau_msg_status *msp; 585 struct bau_msg_status *msp;
587 struct bau_control *bau_tabp; 586 struct bau_control *bau_tabp;
588 587
@@ -599,13 +598,6 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node)
599 bau_cpubits_clear(&msp->seen_by, (int) 598 bau_cpubits_clear(&msp->seen_by, (int)
600 uv_blade_nr_possible_cpus(blade)); 599 uv_blade_nr_possible_cpus(blade));
601 600
602 bau_tabp->watching =
603 kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node);
604 BUG_ON(!bau_tabp->watching);
605
606 for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++)
607 *ip = 0;
608
609 uv_bau_table_bases[blade] = bau_tabp; 601 uv_bau_table_bases[blade] = bau_tabp;
610 602
611 return bau_tabp; 603 return bau_tabp;
@@ -628,7 +620,6 @@ uv_table_bases_finish(int blade, int node, int cur_cpu,
628 bcp->bau_msg_head = bau_tablesp->va_queue_first; 620 bcp->bau_msg_head = bau_tablesp->va_queue_first;
629 bcp->va_queue_first = bau_tablesp->va_queue_first; 621 bcp->va_queue_first = bau_tablesp->va_queue_first;
630 bcp->va_queue_last = bau_tablesp->va_queue_last; 622 bcp->va_queue_last = bau_tablesp->va_queue_last;
631 bcp->watching = bau_tablesp->watching;
632 bcp->msg_statuses = bau_tablesp->msg_statuses; 623 bcp->msg_statuses = bau_tablesp->msg_statuses;
633 bcp->descriptor_base = adp; 624 bcp->descriptor_base = adp;
634 } 625 }
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 141907ab6e22..ce6650eb64e9 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -72,9 +72,6 @@
72 72
73#include "cpu/mcheck/mce.h" 73#include "cpu/mcheck/mce.h"
74 74
75DECLARE_BITMAP(used_vectors, NR_VECTORS);
76EXPORT_SYMBOL_GPL(used_vectors);
77
78asmlinkage int system_call(void); 75asmlinkage int system_call(void);
79 76
80/* Do we ignore FPU interrupts ? */ 77/* Do we ignore FPU interrupts ? */
@@ -89,6 +86,9 @@ gate_desc idt_table[256]
89 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; 86 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
90#endif 87#endif
91 88
89DECLARE_BITMAP(used_vectors, NR_VECTORS);
90EXPORT_SYMBOL_GPL(used_vectors);
91
92static int ignore_nmis; 92static int ignore_nmis;
93 93
94static inline void conditional_sti(struct pt_regs *regs) 94static inline void conditional_sti(struct pt_regs *regs)
@@ -292,8 +292,10 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
292 tsk->thread.error_code = error_code; 292 tsk->thread.error_code = error_code;
293 tsk->thread.trap_no = 8; 293 tsk->thread.trap_no = 8;
294 294
295 /* This is always a kernel trap and never fixable (and thus must 295 /*
296 never return). */ 296 * This is always a kernel trap and never fixable (and thus must
297 * never return).
298 */
297 for (;;) 299 for (;;)
298 die(str, regs, error_code); 300 die(str, regs, error_code);
299} 301}
@@ -520,9 +522,11 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
520} 522}
521 523
522#ifdef CONFIG_X86_64 524#ifdef CONFIG_X86_64
523/* Help handler running on IST stack to switch back to user stack 525/*
524 for scheduling or signal handling. The actual stack switch is done in 526 * Help handler running on IST stack to switch back to user stack
525 entry.S */ 527 * for scheduling or signal handling. The actual stack switch is done in
528 * entry.S
529 */
526asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) 530asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
527{ 531{
528 struct pt_regs *regs = eregs; 532 struct pt_regs *regs = eregs;
@@ -532,8 +536,10 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
532 /* Exception from user space */ 536 /* Exception from user space */
533 else if (user_mode(eregs)) 537 else if (user_mode(eregs))
534 regs = task_pt_regs(current); 538 regs = task_pt_regs(current);
535 /* Exception from kernel and interrupts are enabled. Move to 539 /*
536 kernel process stack. */ 540 * Exception from kernel and interrupts are enabled. Move to
541 * kernel process stack.
542 */
537 else if (eregs->flags & X86_EFLAGS_IF) 543 else if (eregs->flags & X86_EFLAGS_IF)
538 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); 544 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
539 if (eregs != regs) 545 if (eregs != regs)
@@ -685,12 +691,7 @@ void math_error(void __user *ip)
685 cwd = get_fpu_cwd(task); 691 cwd = get_fpu_cwd(task);
686 swd = get_fpu_swd(task); 692 swd = get_fpu_swd(task);
687 693
688 err = swd & ~cwd & 0x3f; 694 err = swd & ~cwd;
689
690#ifdef CONFIG_X86_32
691 if (!err)
692 return;
693#endif
694 695
695 if (err & 0x001) { /* Invalid op */ 696 if (err & 0x001) { /* Invalid op */
696 /* 697 /*
@@ -708,7 +709,11 @@ void math_error(void __user *ip)
708 } else if (err & 0x020) { /* Precision */ 709 } else if (err & 0x020) { /* Precision */
709 info.si_code = FPE_FLTRES; 710 info.si_code = FPE_FLTRES;
710 } else { 711 } else {
711 info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */ 712 /*
713 * If we're using IRQ 13, or supposedly even some trap 16
714 * implementations, it's possible we get a spurious trap...
715 */
716 return; /* Spurious trap, no error */
712 } 717 }
713 force_sig_info(SIGFPE, &info, task); 718 force_sig_info(SIGFPE, &info, task);
714} 719}
@@ -941,9 +946,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
941 946
942void __init trap_init(void) 947void __init trap_init(void)
943{ 948{
944#ifdef CONFIG_X86_32
945 int i; 949 int i;
946#endif
947 950
948#ifdef CONFIG_EISA 951#ifdef CONFIG_EISA
949 void __iomem *p = early_ioremap(0x0FFFD9, 4); 952 void __iomem *p = early_ioremap(0x0FFFD9, 4);
@@ -1000,11 +1003,15 @@ void __init trap_init(void)
1000 } 1003 }
1001 1004
1002 set_system_trap_gate(SYSCALL_VECTOR, &system_call); 1005 set_system_trap_gate(SYSCALL_VECTOR, &system_call);
1006#endif
1003 1007
1004 /* Reserve all the builtin and the syscall vector: */ 1008 /* Reserve all the builtin and the syscall vector: */
1005 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) 1009 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
1006 set_bit(i, used_vectors); 1010 set_bit(i, used_vectors);
1007 1011
1012#ifdef CONFIG_X86_64
1013 set_bit(IA32_SYSCALL_VECTOR, used_vectors);
1014#else
1008 set_bit(SYSCALL_VECTOR, used_vectors); 1015 set_bit(SYSCALL_VECTOR, used_vectors);
1009#endif 1016#endif
1010 /* 1017 /*
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 254ee07f8635..c4c1f9e09402 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void)
226 /* Upper bound is clockevent's use of ulong for cycle deltas. */ 226 /* Upper bound is clockevent's use of ulong for cycle deltas. */
227 evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); 227 evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt);
228 evt->min_delta_ns = clockevent_delta2ns(1, evt); 228 evt->min_delta_ns = clockevent_delta2ns(1, evt);
229 evt->cpumask = cpumask_of_cpu(cpu); 229 evt->cpumask = cpumask_of(cpu);
230 230
231 printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", 231 printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n",
232 evt->name, evt->mult, evt->shift); 232 evt->name, evt->mult, evt->shift);
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 15c3e6999182..2b54fe002e94 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -159,7 +159,7 @@ int save_i387_xstate(void __user *buf)
159 * Restore the extended state if present. Otherwise, restore the FP/SSE 159 * Restore the extended state if present. Otherwise, restore the FP/SSE
160 * state. 160 * state.
161 */ 161 */
162int restore_user_xstate(void __user *buf) 162static int restore_user_xstate(void __user *buf)
163{ 163{
164 struct _fpx_sw_bytes fx_sw_user; 164 struct _fpx_sw_bytes fx_sw_user;
165 u64 mask; 165 u64 mask;