aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-10 20:42:53 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-10 20:42:53 -0500
commit506c10f26c481b7f8ef27c1c79290f68989b2e9e (patch)
tree03de82e812f00957aa6276dac2fe51c3358e88d7 /arch/x86/kernel
parente1df957670aef74ffd9a4ad93e6d2c90bf6b4845 (diff)
parentc59765042f53a79a7a65585042ff463b69cb248c (diff)
Merge commit 'v2.6.29-rc1' into perfcounters/core
Conflicts: include/linux/kernel_stat.h
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/acpi/boot.c48
-rw-r--r--arch/x86/kernel/acpi/cstate.c74
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/amd_iommu.c666
-rw-r--r--arch/x86/kernel/amd_iommu_init.c19
-rw-r--r--arch/x86/kernel/apic.c56
-rw-r--r--arch/x86/kernel/apm_32.c4
-rw-r--r--arch/x86/kernel/bios_uv.c2
-rw-r--r--arch/x86/kernel/cpu/common.c28
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c194
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c14
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c9
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c30
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c72
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c18
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c9
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c45
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_32.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c108
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/p6.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c12
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c10
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h18
-rw-r--r--arch/x86/kernel/cpuid.c8
-rw-r--r--arch/x86/kernel/crash.c18
-rw-r--r--arch/x86/kernel/e820.c21
-rw-r--r--arch/x86/kernel/early-quirks.c22
-rw-r--r--arch/x86/kernel/early_printk.c2
-rw-r--r--arch/x86/kernel/genapic_flat_64.c107
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c81
-rw-r--r--arch/x86/kernel/genx2apic_phys.c78
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c61
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_64.S2
-rw-r--r--arch/x86/kernel/hpet.c15
-rw-r--r--arch/x86/kernel/i8253.c2
-rw-r--r--arch/x86/kernel/i8259.c8
-rw-r--r--arch/x86/kernel/init_task.c1
-rw-r--r--arch/x86/kernel/io_apic.c1021
-rw-r--r--arch/x86/kernel/ioport.c4
-rw-r--r--arch/x86/kernel/ipi.c28
-rw-r--r--arch/x86/kernel/irq.c8
-rw-r--r--arch/x86/kernel/irq_32.c25
-rw-r--r--arch/x86/kernel/irq_64.c39
-rw-r--r--arch/x86/kernel/irqinit_32.c31
-rw-r--r--arch/x86/kernel/irqinit_64.c24
-rw-r--r--arch/x86/kernel/kprobes.c9
-rw-r--r--arch/x86/kernel/kvmclock.c10
-rw-r--r--arch/x86/kernel/ldt.c4
-rw-r--r--arch/x86/kernel/mfgpt_32.c4
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c3
-rw-r--r--arch/x86/kernel/mpparse.c356
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/nmi.c2
-rw-r--r--arch/x86/kernel/numaq_32.c38
-rw-r--r--arch/x86/kernel/pci-dma.c15
-rw-r--r--arch/x86/kernel/pci-gart_64.c2
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c29
-rw-r--r--arch/x86/kernel/process_32.c19
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/reboot.c73
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/setup_percpu.c72
-rw-r--r--arch/x86/kernel/smp.c25
-rw-r--r--arch/x86/kernel/smpboot.c176
-rw-r--r--arch/x86/kernel/time_32.c4
-rw-r--r--arch/x86/kernel/time_64.c2
-rw-r--r--arch/x86/kernel/tlb_32.c2
-rw-r--r--arch/x86/kernel/tlb_64.c2
-rw-r--r--arch/x86/kernel/tlb_uv.c9
-rw-r--r--arch/x86/kernel/traps.c50
-rw-r--r--arch/x86/kernel/visws_quirks.c32
-rw-r--r--arch/x86/kernel/vmiclock_32.c2
-rw-r--r--arch/x86/kernel/xsave.c2
79 files changed, 2619 insertions, 1390 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 88dd768eab6d..d364df03c1d6 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -109,6 +109,8 @@ obj-$(CONFIG_MICROCODE) += microcode.o
109 109
110obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o 110obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
111 111
112obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64
113
112### 114###
113# 64 bit specific files 115# 64 bit specific files
114ifeq ($(CONFIG_X86_64),y) 116ifeq ($(CONFIG_X86_64),y)
@@ -122,7 +124,6 @@ ifeq ($(CONFIG_X86_64),y)
122 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o 124 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
123 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o 125 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
124 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o 126 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
125 obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o
126 127
127 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o 128 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
128endif 129endif
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 65d0b72777ea..d37593c2f438 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -47,7 +47,7 @@
47#endif 47#endif
48 48
49static int __initdata acpi_force = 0; 49static int __initdata acpi_force = 0;
50 50u32 acpi_rsdt_forced;
51#ifdef CONFIG_ACPI 51#ifdef CONFIG_ACPI
52int acpi_disabled = 0; 52int acpi_disabled = 0;
53#else 53#else
@@ -538,9 +538,10 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
538 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 538 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
539 union acpi_object *obj; 539 union acpi_object *obj;
540 struct acpi_madt_local_apic *lapic; 540 struct acpi_madt_local_apic *lapic;
541 cpumask_t tmp_map, new_map; 541 cpumask_var_t tmp_map, new_map;
542 u8 physid; 542 u8 physid;
543 int cpu; 543 int cpu;
544 int retval = -ENOMEM;
544 545
545 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) 546 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
546 return -EINVAL; 547 return -EINVAL;
@@ -569,23 +570,37 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
569 buffer.length = ACPI_ALLOCATE_BUFFER; 570 buffer.length = ACPI_ALLOCATE_BUFFER;
570 buffer.pointer = NULL; 571 buffer.pointer = NULL;
571 572
572 tmp_map = cpu_present_map; 573 if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL))
574 goto out;
575
576 if (!alloc_cpumask_var(&new_map, GFP_KERNEL))
577 goto free_tmp_map;
578
579 cpumask_copy(tmp_map, cpu_present_mask);
573 acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); 580 acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED);
574 581
575 /* 582 /*
576 * If mp_register_lapic successfully generates a new logical cpu 583 * If mp_register_lapic successfully generates a new logical cpu
577 * number, then the following will get us exactly what was mapped 584 * number, then the following will get us exactly what was mapped
578 */ 585 */
579 cpus_andnot(new_map, cpu_present_map, tmp_map); 586 cpumask_andnot(new_map, cpu_present_mask, tmp_map);
580 if (cpus_empty(new_map)) { 587 if (cpumask_empty(new_map)) {
581 printk ("Unable to map lapic to logical cpu number\n"); 588 printk ("Unable to map lapic to logical cpu number\n");
582 return -EINVAL; 589 retval = -EINVAL;
590 goto free_new_map;
583 } 591 }
584 592
585 cpu = first_cpu(new_map); 593 cpu = cpumask_first(new_map);
586 594
587 *pcpu = cpu; 595 *pcpu = cpu;
588 return 0; 596 retval = 0;
597
598free_new_map:
599 free_cpumask_var(new_map);
600free_tmp_map:
601 free_cpumask_var(tmp_map);
602out:
603 return retval;
589} 604}
590 605
591/* wrapper to silence section mismatch warning */ 606/* wrapper to silence section mismatch warning */
@@ -598,7 +613,7 @@ EXPORT_SYMBOL(acpi_map_lsapic);
598int acpi_unmap_lsapic(int cpu) 613int acpi_unmap_lsapic(int cpu)
599{ 614{
600 per_cpu(x86_cpu_to_apicid, cpu) = -1; 615 per_cpu(x86_cpu_to_apicid, cpu) = -1;
601 cpu_clear(cpu, cpu_present_map); 616 set_cpu_present(cpu, false);
602 num_processors--; 617 num_processors--;
603 618
604 return (0); 619 return (0);
@@ -1359,6 +1374,17 @@ static void __init acpi_process_madt(void)
1359 "Invalid BIOS MADT, disabling ACPI\n"); 1374 "Invalid BIOS MADT, disabling ACPI\n");
1360 disable_acpi(); 1375 disable_acpi();
1361 } 1376 }
1377 } else {
1378 /*
1379 * ACPI found no MADT, and so ACPI wants UP PIC mode.
1380 * In the event an MPS table was found, forget it.
1381 * Boot with "acpi=off" to use MPS on such a system.
1382 */
1383 if (smp_found_config) {
1384 printk(KERN_WARNING PREFIX
1385 "No APIC-table, disabling MPS\n");
1386 smp_found_config = 0;
1387 }
1362 } 1388 }
1363 1389
1364 /* 1390 /*
@@ -1794,6 +1820,10 @@ static int __init parse_acpi(char *arg)
1794 disable_acpi(); 1820 disable_acpi();
1795 acpi_ht = 1; 1821 acpi_ht = 1;
1796 } 1822 }
1823 /* acpi=rsdt use RSDT instead of XSDT */
1824 else if (strcmp(arg, "rsdt") == 0) {
1825 acpi_rsdt_forced = 1;
1826 }
1797 /* "acpi=noirq" disables ACPI interrupt routing */ 1827 /* "acpi=noirq" disables ACPI interrupt routing */
1798 else if (strcmp(arg, "noirq") == 0) { 1828 else if (strcmp(arg, "noirq") == 0) {
1799 acpi_noirq_set(); 1829 acpi_noirq_set();
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index c2502eb9aa83..bbbe4bbb6f34 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -56,6 +56,7 @@ static struct cstate_entry *cpu_cstate_entry; /* per CPU ptr */
56static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; 56static short mwait_supported[ACPI_PROCESSOR_MAX_POWER];
57 57
58#define MWAIT_SUBSTATE_MASK (0xf) 58#define MWAIT_SUBSTATE_MASK (0xf)
59#define MWAIT_CSTATE_MASK (0xf)
59#define MWAIT_SUBSTATE_SIZE (4) 60#define MWAIT_SUBSTATE_SIZE (4)
60 61
61#define CPUID_MWAIT_LEAF (5) 62#define CPUID_MWAIT_LEAF (5)
@@ -66,39 +67,20 @@ static short mwait_supported[ACPI_PROCESSOR_MAX_POWER];
66 67
67#define NATIVE_CSTATE_BEYOND_HALT (2) 68#define NATIVE_CSTATE_BEYOND_HALT (2)
68 69
69int acpi_processor_ffh_cstate_probe(unsigned int cpu, 70static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
70 struct acpi_processor_cx *cx, struct acpi_power_register *reg)
71{ 71{
72 struct cstate_entry *percpu_entry; 72 struct acpi_processor_cx *cx = _cx;
73 struct cpuinfo_x86 *c = &cpu_data(cpu); 73 long retval;
74
75 cpumask_t saved_mask;
76 int retval;
77 unsigned int eax, ebx, ecx, edx; 74 unsigned int eax, ebx, ecx, edx;
78 unsigned int edx_part; 75 unsigned int edx_part;
79 unsigned int cstate_type; /* C-state type and not ACPI C-state type */ 76 unsigned int cstate_type; /* C-state type and not ACPI C-state type */
80 unsigned int num_cstate_subtype; 77 unsigned int num_cstate_subtype;
81 78
82 if (!cpu_cstate_entry || c->cpuid_level < CPUID_MWAIT_LEAF )
83 return -1;
84
85 if (reg->bit_offset != NATIVE_CSTATE_BEYOND_HALT)
86 return -1;
87
88 percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu);
89 percpu_entry->states[cx->index].eax = 0;
90 percpu_entry->states[cx->index].ecx = 0;
91
92 /* Make sure we are running on right CPU */
93 saved_mask = current->cpus_allowed;
94 retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
95 if (retval)
96 return -1;
97
98 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); 79 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
99 80
100 /* Check whether this particular cx_type (in CST) is supported or not */ 81 /* Check whether this particular cx_type (in CST) is supported or not */
101 cstate_type = (cx->address >> MWAIT_SUBSTATE_SIZE) + 1; 82 cstate_type = ((cx->address >> MWAIT_SUBSTATE_SIZE) &
83 MWAIT_CSTATE_MASK) + 1;
102 edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE); 84 edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE);
103 num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK; 85 num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK;
104 86
@@ -114,21 +96,45 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
114 retval = -1; 96 retval = -1;
115 goto out; 97 goto out;
116 } 98 }
117 percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK;
118
119 /* Use the hint in CST */
120 percpu_entry->states[cx->index].eax = cx->address;
121 99
122 if (!mwait_supported[cstate_type]) { 100 if (!mwait_supported[cstate_type]) {
123 mwait_supported[cstate_type] = 1; 101 mwait_supported[cstate_type] = 1;
124 printk(KERN_DEBUG "Monitor-Mwait will be used to enter C-%d " 102 printk(KERN_DEBUG
125 "state\n", cx->type); 103 "Monitor-Mwait will be used to enter C-%d "
104 "state\n", cx->type);
126 } 105 }
127 snprintf(cx->desc, ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x", 106 snprintf(cx->desc,
128 cx->address); 107 ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x",
129 108 cx->address);
130out: 109out:
131 set_cpus_allowed_ptr(current, &saved_mask); 110 return retval;
111}
112
113int acpi_processor_ffh_cstate_probe(unsigned int cpu,
114 struct acpi_processor_cx *cx, struct acpi_power_register *reg)
115{
116 struct cstate_entry *percpu_entry;
117 struct cpuinfo_x86 *c = &cpu_data(cpu);
118 long retval;
119
120 if (!cpu_cstate_entry || c->cpuid_level < CPUID_MWAIT_LEAF)
121 return -1;
122
123 if (reg->bit_offset != NATIVE_CSTATE_BEYOND_HALT)
124 return -1;
125
126 percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu);
127 percpu_entry->states[cx->index].eax = 0;
128 percpu_entry->states[cx->index].ecx = 0;
129
130 /* Make sure we are running on right CPU */
131
132 retval = work_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx);
133 if (retval == 0) {
134 /* Use the hint in CST */
135 percpu_entry->states[cx->index].eax = cx->address;
136 percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK;
137 }
132 return retval; 138 return retval;
133} 139}
134EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); 140EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 806b4e9051b4..707c1f6f95fa 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -159,6 +159,8 @@ static int __init acpi_sleep_setup(char *str)
159#endif 159#endif
160 if (strncmp(str, "old_ordering", 12) == 0) 160 if (strncmp(str, "old_ordering", 12) == 0)
161 acpi_old_suspend_ordering(); 161 acpi_old_suspend_ordering();
162 if (strncmp(str, "s4_nonvs", 8) == 0)
163 acpi_s4_no_nvs();
162 str = strchr(str, ','); 164 str = strchr(str, ',');
163 if (str != NULL) 165 if (str != NULL)
164 str += strspn(str, ", \t"); 166 str += strspn(str, ", \t");
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2e2da717b350..5113c080f0c4 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -20,8 +20,12 @@
20#include <linux/pci.h> 20#include <linux/pci.h>
21#include <linux/gfp.h> 21#include <linux/gfp.h>
22#include <linux/bitops.h> 22#include <linux/bitops.h>
23#include <linux/debugfs.h>
23#include <linux/scatterlist.h> 24#include <linux/scatterlist.h>
24#include <linux/iommu-helper.h> 25#include <linux/iommu-helper.h>
26#ifdef CONFIG_IOMMU_API
27#include <linux/iommu.h>
28#endif
25#include <asm/proto.h> 29#include <asm/proto.h>
26#include <asm/iommu.h> 30#include <asm/iommu.h>
27#include <asm/gart.h> 31#include <asm/gart.h>
@@ -38,6 +42,10 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock);
38static LIST_HEAD(iommu_pd_list); 42static LIST_HEAD(iommu_pd_list);
39static DEFINE_SPINLOCK(iommu_pd_list_lock); 43static DEFINE_SPINLOCK(iommu_pd_list_lock);
40 44
45#ifdef CONFIG_IOMMU_API
46static struct iommu_ops amd_iommu_ops;
47#endif
48
41/* 49/*
42 * general struct to manage commands send to an IOMMU 50 * general struct to manage commands send to an IOMMU
43 */ 51 */
@@ -47,6 +55,68 @@ struct iommu_cmd {
47 55
48static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, 56static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
49 struct unity_map_entry *e); 57 struct unity_map_entry *e);
58static struct dma_ops_domain *find_protection_domain(u16 devid);
59
60
61#ifdef CONFIG_AMD_IOMMU_STATS
62
63/*
64 * Initialization code for statistics collection
65 */
66
67DECLARE_STATS_COUNTER(compl_wait);
68DECLARE_STATS_COUNTER(cnt_map_single);
69DECLARE_STATS_COUNTER(cnt_unmap_single);
70DECLARE_STATS_COUNTER(cnt_map_sg);
71DECLARE_STATS_COUNTER(cnt_unmap_sg);
72DECLARE_STATS_COUNTER(cnt_alloc_coherent);
73DECLARE_STATS_COUNTER(cnt_free_coherent);
74DECLARE_STATS_COUNTER(cross_page);
75DECLARE_STATS_COUNTER(domain_flush_single);
76DECLARE_STATS_COUNTER(domain_flush_all);
77DECLARE_STATS_COUNTER(alloced_io_mem);
78DECLARE_STATS_COUNTER(total_map_requests);
79
80static struct dentry *stats_dir;
81static struct dentry *de_isolate;
82static struct dentry *de_fflush;
83
84static void amd_iommu_stats_add(struct __iommu_counter *cnt)
85{
86 if (stats_dir == NULL)
87 return;
88
89 cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
90 &cnt->value);
91}
92
93static void amd_iommu_stats_init(void)
94{
95 stats_dir = debugfs_create_dir("amd-iommu", NULL);
96 if (stats_dir == NULL)
97 return;
98
99 de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
100 (u32 *)&amd_iommu_isolate);
101
102 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
103 (u32 *)&amd_iommu_unmap_flush);
104
105 amd_iommu_stats_add(&compl_wait);
106 amd_iommu_stats_add(&cnt_map_single);
107 amd_iommu_stats_add(&cnt_unmap_single);
108 amd_iommu_stats_add(&cnt_map_sg);
109 amd_iommu_stats_add(&cnt_unmap_sg);
110 amd_iommu_stats_add(&cnt_alloc_coherent);
111 amd_iommu_stats_add(&cnt_free_coherent);
112 amd_iommu_stats_add(&cross_page);
113 amd_iommu_stats_add(&domain_flush_single);
114 amd_iommu_stats_add(&domain_flush_all);
115 amd_iommu_stats_add(&alloced_io_mem);
116 amd_iommu_stats_add(&total_map_requests);
117}
118
119#endif
50 120
51/* returns !0 if the IOMMU is caching non-present entries in its TLB */ 121/* returns !0 if the IOMMU is caching non-present entries in its TLB */
52static int iommu_has_npcache(struct amd_iommu *iommu) 122static int iommu_has_npcache(struct amd_iommu *iommu)
@@ -189,13 +259,55 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
189 spin_lock_irqsave(&iommu->lock, flags); 259 spin_lock_irqsave(&iommu->lock, flags);
190 ret = __iommu_queue_command(iommu, cmd); 260 ret = __iommu_queue_command(iommu, cmd);
191 if (!ret) 261 if (!ret)
192 iommu->need_sync = 1; 262 iommu->need_sync = true;
193 spin_unlock_irqrestore(&iommu->lock, flags); 263 spin_unlock_irqrestore(&iommu->lock, flags);
194 264
195 return ret; 265 return ret;
196} 266}
197 267
198/* 268/*
269 * This function waits until an IOMMU has completed a completion
270 * wait command
271 */
272static void __iommu_wait_for_completion(struct amd_iommu *iommu)
273{
274 int ready = 0;
275 unsigned status = 0;
276 unsigned long i = 0;
277
278 INC_STATS_COUNTER(compl_wait);
279
280 while (!ready && (i < EXIT_LOOP_COUNT)) {
281 ++i;
282 /* wait for the bit to become one */
283 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
284 ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
285 }
286
287 /* set bit back to zero */
288 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
289 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
290
291 if (unlikely(i == EXIT_LOOP_COUNT))
292 panic("AMD IOMMU: Completion wait loop failed\n");
293}
294
295/*
296 * This function queues a completion wait command into the command
297 * buffer of an IOMMU
298 */
299static int __iommu_completion_wait(struct amd_iommu *iommu)
300{
301 struct iommu_cmd cmd;
302
303 memset(&cmd, 0, sizeof(cmd));
304 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
305 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
306
307 return __iommu_queue_command(iommu, &cmd);
308}
309
310/*
199 * This function is called whenever we need to ensure that the IOMMU has 311 * This function is called whenever we need to ensure that the IOMMU has
200 * completed execution of all commands we sent. It sends a 312 * completed execution of all commands we sent. It sends a
201 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs 313 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
@@ -204,40 +316,22 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
204 */ 316 */
205static int iommu_completion_wait(struct amd_iommu *iommu) 317static int iommu_completion_wait(struct amd_iommu *iommu)
206{ 318{
207 int ret = 0, ready = 0; 319 int ret = 0;
208 unsigned status = 0; 320 unsigned long flags;
209 struct iommu_cmd cmd;
210 unsigned long flags, i = 0;
211
212 memset(&cmd, 0, sizeof(cmd));
213 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
214 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
215 321
216 spin_lock_irqsave(&iommu->lock, flags); 322 spin_lock_irqsave(&iommu->lock, flags);
217 323
218 if (!iommu->need_sync) 324 if (!iommu->need_sync)
219 goto out; 325 goto out;
220 326
221 iommu->need_sync = 0; 327 ret = __iommu_completion_wait(iommu);
222 328
223 ret = __iommu_queue_command(iommu, &cmd); 329 iommu->need_sync = false;
224 330
225 if (ret) 331 if (ret)
226 goto out; 332 goto out;
227 333
228 while (!ready && (i < EXIT_LOOP_COUNT)) { 334 __iommu_wait_for_completion(iommu);
229 ++i;
230 /* wait for the bit to become one */
231 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
232 ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
233 }
234
235 /* set bit back to zero */
236 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
237 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
238
239 if (unlikely(i == EXIT_LOOP_COUNT))
240 panic("AMD IOMMU: Completion wait loop failed\n");
241 335
242out: 336out:
243 spin_unlock_irqrestore(&iommu->lock, flags); 337 spin_unlock_irqrestore(&iommu->lock, flags);
@@ -264,6 +358,21 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
264 return ret; 358 return ret;
265} 359}
266 360
361static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
362 u16 domid, int pde, int s)
363{
364 memset(cmd, 0, sizeof(*cmd));
365 address &= PAGE_MASK;
366 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
367 cmd->data[1] |= domid;
368 cmd->data[2] = lower_32_bits(address);
369 cmd->data[3] = upper_32_bits(address);
370 if (s) /* size bit - we flush more than one 4kb page */
371 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
372 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
373 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
374}
375
267/* 376/*
268 * Generic command send function for invalidaing TLB entries 377 * Generic command send function for invalidaing TLB entries
269 */ 378 */
@@ -273,16 +382,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
273 struct iommu_cmd cmd; 382 struct iommu_cmd cmd;
274 int ret; 383 int ret;
275 384
276 memset(&cmd, 0, sizeof(cmd)); 385 __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
277 address &= PAGE_MASK;
278 CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
279 cmd.data[1] |= domid;
280 cmd.data[2] = lower_32_bits(address);
281 cmd.data[3] = upper_32_bits(address);
282 if (s) /* size bit - we flush more than one 4kb page */
283 cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
284 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
285 cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
286 386
287 ret = iommu_queue_command(iommu, &cmd); 387 ret = iommu_queue_command(iommu, &cmd);
288 388
@@ -321,9 +421,35 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
321{ 421{
322 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; 422 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
323 423
424 INC_STATS_COUNTER(domain_flush_single);
425
324 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); 426 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
325} 427}
326 428
429/*
430 * This function is used to flush the IO/TLB for a given protection domain
431 * on every IOMMU in the system
432 */
433static void iommu_flush_domain(u16 domid)
434{
435 unsigned long flags;
436 struct amd_iommu *iommu;
437 struct iommu_cmd cmd;
438
439 INC_STATS_COUNTER(domain_flush_all);
440
441 __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
442 domid, 1, 1);
443
444 list_for_each_entry(iommu, &amd_iommu_list, list) {
445 spin_lock_irqsave(&iommu->lock, flags);
446 __iommu_queue_command(iommu, &cmd);
447 __iommu_completion_wait(iommu);
448 __iommu_wait_for_completion(iommu);
449 spin_unlock_irqrestore(&iommu->lock, flags);
450 }
451}
452
327/**************************************************************************** 453/****************************************************************************
328 * 454 *
329 * The functions below are used the create the page table mappings for 455 * The functions below are used the create the page table mappings for
@@ -338,10 +464,10 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
338 * supporting all features of AMD IOMMU page tables like level skipping 464 * supporting all features of AMD IOMMU page tables like level skipping
339 * and full 64 bit address spaces. 465 * and full 64 bit address spaces.
340 */ 466 */
341static int iommu_map(struct protection_domain *dom, 467static int iommu_map_page(struct protection_domain *dom,
342 unsigned long bus_addr, 468 unsigned long bus_addr,
343 unsigned long phys_addr, 469 unsigned long phys_addr,
344 int prot) 470 int prot)
345{ 471{
346 u64 __pte, *pte, *page; 472 u64 __pte, *pte, *page;
347 473
@@ -388,6 +514,28 @@ static int iommu_map(struct protection_domain *dom,
388 return 0; 514 return 0;
389} 515}
390 516
517static void iommu_unmap_page(struct protection_domain *dom,
518 unsigned long bus_addr)
519{
520 u64 *pte;
521
522 pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
523
524 if (!IOMMU_PTE_PRESENT(*pte))
525 return;
526
527 pte = IOMMU_PTE_PAGE(*pte);
528 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
529
530 if (!IOMMU_PTE_PRESENT(*pte))
531 return;
532
533 pte = IOMMU_PTE_PAGE(*pte);
534 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
535
536 *pte = 0;
537}
538
391/* 539/*
392 * This function checks if a specific unity mapping entry is needed for 540 * This function checks if a specific unity mapping entry is needed for
393 * this specific IOMMU. 541 * this specific IOMMU.
@@ -440,7 +588,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
440 588
441 for (addr = e->address_start; addr < e->address_end; 589 for (addr = e->address_start; addr < e->address_end;
442 addr += PAGE_SIZE) { 590 addr += PAGE_SIZE) {
443 ret = iommu_map(&dma_dom->domain, addr, addr, e->prot); 591 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);
444 if (ret) 592 if (ret)
445 return ret; 593 return ret;
446 /* 594 /*
@@ -571,6 +719,16 @@ static u16 domain_id_alloc(void)
571 return id; 719 return id;
572} 720}
573 721
722static void domain_id_free(int id)
723{
724 unsigned long flags;
725
726 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
727 if (id > 0 && id < MAX_DOMAIN_ID)
728 __clear_bit(id, amd_iommu_pd_alloc_bitmap);
729 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
730}
731
574/* 732/*
575 * Used to reserve address ranges in the aperture (e.g. for exclusion 733 * Used to reserve address ranges in the aperture (e.g. for exclusion
576 * ranges. 734 * ranges.
@@ -587,12 +745,12 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
587 iommu_area_reserve(dom->bitmap, start_page, pages); 745 iommu_area_reserve(dom->bitmap, start_page, pages);
588} 746}
589 747
590static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) 748static void free_pagetable(struct protection_domain *domain)
591{ 749{
592 int i, j; 750 int i, j;
593 u64 *p1, *p2, *p3; 751 u64 *p1, *p2, *p3;
594 752
595 p1 = dma_dom->domain.pt_root; 753 p1 = domain->pt_root;
596 754
597 if (!p1) 755 if (!p1)
598 return; 756 return;
@@ -613,6 +771,8 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
613 } 771 }
614 772
615 free_page((unsigned long)p1); 773 free_page((unsigned long)p1);
774
775 domain->pt_root = NULL;
616} 776}
617 777
618/* 778/*
@@ -624,7 +784,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
624 if (!dom) 784 if (!dom)
625 return; 785 return;
626 786
627 dma_ops_free_pagetable(dom); 787 free_pagetable(&dom->domain);
628 788
629 kfree(dom->pte_pages); 789 kfree(dom->pte_pages);
630 790
@@ -663,6 +823,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
663 goto free_dma_dom; 823 goto free_dma_dom;
664 dma_dom->domain.mode = PAGE_MODE_3_LEVEL; 824 dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
665 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); 825 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
826 dma_dom->domain.flags = PD_DMA_OPS_MASK;
666 dma_dom->domain.priv = dma_dom; 827 dma_dom->domain.priv = dma_dom;
667 if (!dma_dom->domain.pt_root) 828 if (!dma_dom->domain.pt_root)
668 goto free_dma_dom; 829 goto free_dma_dom;
@@ -725,6 +886,15 @@ free_dma_dom:
725} 886}
726 887
727/* 888/*
889 * little helper function to check whether a given protection domain is a
890 * dma_ops domain
891 */
892static bool dma_ops_domain(struct protection_domain *domain)
893{
894 return domain->flags & PD_DMA_OPS_MASK;
895}
896
897/*
728 * Find out the protection domain structure for a given PCI device. This 898 * Find out the protection domain structure for a given PCI device. This
729 * will give us the pointer to the page table root for example. 899 * will give us the pointer to the page table root for example.
730 */ 900 */
@@ -744,14 +914,15 @@ static struct protection_domain *domain_for_device(u16 devid)
744 * If a device is not yet associated with a domain, this function does 914 * If a device is not yet associated with a domain, this function does
745 * assigns it visible for the hardware 915 * assigns it visible for the hardware
746 */ 916 */
747static void set_device_domain(struct amd_iommu *iommu, 917static void attach_device(struct amd_iommu *iommu,
748 struct protection_domain *domain, 918 struct protection_domain *domain,
749 u16 devid) 919 u16 devid)
750{ 920{
751 unsigned long flags; 921 unsigned long flags;
752
753 u64 pte_root = virt_to_phys(domain->pt_root); 922 u64 pte_root = virt_to_phys(domain->pt_root);
754 923
924 domain->dev_cnt += 1;
925
755 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) 926 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
756 << DEV_ENTRY_MODE_SHIFT; 927 << DEV_ENTRY_MODE_SHIFT;
757 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; 928 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
@@ -767,6 +938,116 @@ static void set_device_domain(struct amd_iommu *iommu,
767 iommu_queue_inv_dev_entry(iommu, devid); 938 iommu_queue_inv_dev_entry(iommu, devid);
768} 939}
769 940
941/*
942 * Removes a device from a protection domain (unlocked)
943 */
944static void __detach_device(struct protection_domain *domain, u16 devid)
945{
946
947 /* lock domain */
948 spin_lock(&domain->lock);
949
950 /* remove domain from the lookup table */
951 amd_iommu_pd_table[devid] = NULL;
952
953 /* remove entry from the device table seen by the hardware */
954 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
955 amd_iommu_dev_table[devid].data[1] = 0;
956 amd_iommu_dev_table[devid].data[2] = 0;
957
958 /* decrease reference counter */
959 domain->dev_cnt -= 1;
960
961 /* ready */
962 spin_unlock(&domain->lock);
963}
964
965/*
966 * Removes a device from a protection domain (with devtable_lock held)
967 */
968static void detach_device(struct protection_domain *domain, u16 devid)
969{
970 unsigned long flags;
971
972 /* lock device table */
973 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
974 __detach_device(domain, devid);
975 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
976}
977
978static int device_change_notifier(struct notifier_block *nb,
979 unsigned long action, void *data)
980{
981 struct device *dev = data;
982 struct pci_dev *pdev = to_pci_dev(dev);
983 u16 devid = calc_devid(pdev->bus->number, pdev->devfn);
984 struct protection_domain *domain;
985 struct dma_ops_domain *dma_domain;
986 struct amd_iommu *iommu;
987 int order = amd_iommu_aperture_order;
988 unsigned long flags;
989
990 if (devid > amd_iommu_last_bdf)
991 goto out;
992
993 devid = amd_iommu_alias_table[devid];
994
995 iommu = amd_iommu_rlookup_table[devid];
996 if (iommu == NULL)
997 goto out;
998
999 domain = domain_for_device(devid);
1000
1001 if (domain && !dma_ops_domain(domain))
1002 WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound "
1003 "to a non-dma-ops domain\n", dev_name(dev));
1004
1005 switch (action) {
1006 case BUS_NOTIFY_BOUND_DRIVER:
1007 if (domain)
1008 goto out;
1009 dma_domain = find_protection_domain(devid);
1010 if (!dma_domain)
1011 dma_domain = iommu->default_dom;
1012 attach_device(iommu, &dma_domain->domain, devid);
1013 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
1014 "device %s\n", dma_domain->domain.id, dev_name(dev));
1015 break;
1016 case BUS_NOTIFY_UNBIND_DRIVER:
1017 if (!domain)
1018 goto out;
1019 detach_device(domain, devid);
1020 break;
1021 case BUS_NOTIFY_ADD_DEVICE:
1022 /* allocate a protection domain if a device is added */
1023 dma_domain = find_protection_domain(devid);
1024 if (dma_domain)
1025 goto out;
1026 dma_domain = dma_ops_domain_alloc(iommu, order);
1027 if (!dma_domain)
1028 goto out;
1029 dma_domain->target_dev = devid;
1030
1031 spin_lock_irqsave(&iommu_pd_list_lock, flags);
1032 list_add_tail(&dma_domain->list, &iommu_pd_list);
1033 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1034
1035 break;
1036 default:
1037 goto out;
1038 }
1039
1040 iommu_queue_inv_dev_entry(iommu, devid);
1041 iommu_completion_wait(iommu);
1042
1043out:
1044 return 0;
1045}
1046
1047struct notifier_block device_nb = {
1048 .notifier_call = device_change_notifier,
1049};
1050
770/***************************************************************************** 1051/*****************************************************************************
771 * 1052 *
772 * The next functions belong to the dma_ops mapping/unmapping code. 1053 * The next functions belong to the dma_ops mapping/unmapping code.
@@ -802,7 +1083,6 @@ static struct dma_ops_domain *find_protection_domain(u16 devid)
802 list_for_each_entry(entry, &iommu_pd_list, list) { 1083 list_for_each_entry(entry, &iommu_pd_list, list) {
803 if (entry->target_dev == devid) { 1084 if (entry->target_dev == devid) {
804 ret = entry; 1085 ret = entry;
805 list_del(&ret->list);
806 break; 1086 break;
807 } 1087 }
808 } 1088 }
@@ -853,14 +1133,13 @@ static int get_device_resources(struct device *dev,
853 if (!dma_dom) 1133 if (!dma_dom)
854 dma_dom = (*iommu)->default_dom; 1134 dma_dom = (*iommu)->default_dom;
855 *domain = &dma_dom->domain; 1135 *domain = &dma_dom->domain;
856 set_device_domain(*iommu, *domain, *bdf); 1136 attach_device(*iommu, *domain, *bdf);
857 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " 1137 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
858 "device ", (*domain)->id); 1138 "device %s\n", (*domain)->id, dev_name(dev));
859 print_devid(_bdf, 1);
860 } 1139 }
861 1140
862 if (domain_for_device(_bdf) == NULL) 1141 if (domain_for_device(_bdf) == NULL)
863 set_device_domain(*iommu, *domain, _bdf); 1142 attach_device(*iommu, *domain, _bdf);
864 1143
865 return 1; 1144 return 1;
866} 1145}
@@ -946,6 +1225,11 @@ static dma_addr_t __map_single(struct device *dev,
946 pages = iommu_num_pages(paddr, size, PAGE_SIZE); 1225 pages = iommu_num_pages(paddr, size, PAGE_SIZE);
947 paddr &= PAGE_MASK; 1226 paddr &= PAGE_MASK;
948 1227
1228 INC_STATS_COUNTER(total_map_requests);
1229
1230 if (pages > 1)
1231 INC_STATS_COUNTER(cross_page);
1232
949 if (align) 1233 if (align)
950 align_mask = (1UL << get_order(size)) - 1; 1234 align_mask = (1UL << get_order(size)) - 1;
951 1235
@@ -962,6 +1246,8 @@ static dma_addr_t __map_single(struct device *dev,
962 } 1246 }
963 address += offset; 1247 address += offset;
964 1248
1249 ADD_STATS_COUNTER(alloced_io_mem, size);
1250
965 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { 1251 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
966 iommu_flush_tlb(iommu, dma_dom->domain.id); 1252 iommu_flush_tlb(iommu, dma_dom->domain.id);
967 dma_dom->need_flush = false; 1253 dma_dom->need_flush = false;
@@ -998,6 +1284,8 @@ static void __unmap_single(struct amd_iommu *iommu,
998 start += PAGE_SIZE; 1284 start += PAGE_SIZE;
999 } 1285 }
1000 1286
1287 SUB_STATS_COUNTER(alloced_io_mem, size);
1288
1001 dma_ops_free_addresses(dma_dom, dma_addr, pages); 1289 dma_ops_free_addresses(dma_dom, dma_addr, pages);
1002 1290
1003 if (amd_iommu_unmap_flush || dma_dom->need_flush) { 1291 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
@@ -1019,6 +1307,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
1019 dma_addr_t addr; 1307 dma_addr_t addr;
1020 u64 dma_mask; 1308 u64 dma_mask;
1021 1309
1310 INC_STATS_COUNTER(cnt_map_single);
1311
1022 if (!check_device(dev)) 1312 if (!check_device(dev))
1023 return bad_dma_address; 1313 return bad_dma_address;
1024 1314
@@ -1030,6 +1320,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
1030 /* device not handled by any AMD IOMMU */ 1320 /* device not handled by any AMD IOMMU */
1031 return (dma_addr_t)paddr; 1321 return (dma_addr_t)paddr;
1032 1322
1323 if (!dma_ops_domain(domain))
1324 return bad_dma_address;
1325
1033 spin_lock_irqsave(&domain->lock, flags); 1326 spin_lock_irqsave(&domain->lock, flags);
1034 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, 1327 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
1035 dma_mask); 1328 dma_mask);
@@ -1055,11 +1348,16 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
1055 struct protection_domain *domain; 1348 struct protection_domain *domain;
1056 u16 devid; 1349 u16 devid;
1057 1350
1351 INC_STATS_COUNTER(cnt_unmap_single);
1352
1058 if (!check_device(dev) || 1353 if (!check_device(dev) ||
1059 !get_device_resources(dev, &iommu, &domain, &devid)) 1354 !get_device_resources(dev, &iommu, &domain, &devid))
1060 /* device not handled by any AMD IOMMU */ 1355 /* device not handled by any AMD IOMMU */
1061 return; 1356 return;
1062 1357
1358 if (!dma_ops_domain(domain))
1359 return;
1360
1063 spin_lock_irqsave(&domain->lock, flags); 1361 spin_lock_irqsave(&domain->lock, flags);
1064 1362
1065 __unmap_single(iommu, domain->priv, dma_addr, size, dir); 1363 __unmap_single(iommu, domain->priv, dma_addr, size, dir);
@@ -1104,6 +1402,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1104 int mapped_elems = 0; 1402 int mapped_elems = 0;
1105 u64 dma_mask; 1403 u64 dma_mask;
1106 1404
1405 INC_STATS_COUNTER(cnt_map_sg);
1406
1107 if (!check_device(dev)) 1407 if (!check_device(dev))
1108 return 0; 1408 return 0;
1109 1409
@@ -1114,6 +1414,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1114 if (!iommu || !domain) 1414 if (!iommu || !domain)
1115 return map_sg_no_iommu(dev, sglist, nelems, dir); 1415 return map_sg_no_iommu(dev, sglist, nelems, dir);
1116 1416
1417 if (!dma_ops_domain(domain))
1418 return 0;
1419
1117 spin_lock_irqsave(&domain->lock, flags); 1420 spin_lock_irqsave(&domain->lock, flags);
1118 1421
1119 for_each_sg(sglist, s, nelems, i) { 1422 for_each_sg(sglist, s, nelems, i) {
@@ -1163,10 +1466,15 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
1163 u16 devid; 1466 u16 devid;
1164 int i; 1467 int i;
1165 1468
1469 INC_STATS_COUNTER(cnt_unmap_sg);
1470
1166 if (!check_device(dev) || 1471 if (!check_device(dev) ||
1167 !get_device_resources(dev, &iommu, &domain, &devid)) 1472 !get_device_resources(dev, &iommu, &domain, &devid))
1168 return; 1473 return;
1169 1474
1475 if (!dma_ops_domain(domain))
1476 return;
1477
1170 spin_lock_irqsave(&domain->lock, flags); 1478 spin_lock_irqsave(&domain->lock, flags);
1171 1479
1172 for_each_sg(sglist, s, nelems, i) { 1480 for_each_sg(sglist, s, nelems, i) {
@@ -1194,6 +1502,8 @@ static void *alloc_coherent(struct device *dev, size_t size,
1194 phys_addr_t paddr; 1502 phys_addr_t paddr;
1195 u64 dma_mask = dev->coherent_dma_mask; 1503 u64 dma_mask = dev->coherent_dma_mask;
1196 1504
1505 INC_STATS_COUNTER(cnt_alloc_coherent);
1506
1197 if (!check_device(dev)) 1507 if (!check_device(dev))
1198 return NULL; 1508 return NULL;
1199 1509
@@ -1212,6 +1522,9 @@ static void *alloc_coherent(struct device *dev, size_t size,
1212 return virt_addr; 1522 return virt_addr;
1213 } 1523 }
1214 1524
1525 if (!dma_ops_domain(domain))
1526 goto out_free;
1527
1215 if (!dma_mask) 1528 if (!dma_mask)
1216 dma_mask = *dev->dma_mask; 1529 dma_mask = *dev->dma_mask;
1217 1530
@@ -1220,18 +1533,20 @@ static void *alloc_coherent(struct device *dev, size_t size,
1220 *dma_addr = __map_single(dev, iommu, domain->priv, paddr, 1533 *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
1221 size, DMA_BIDIRECTIONAL, true, dma_mask); 1534 size, DMA_BIDIRECTIONAL, true, dma_mask);
1222 1535
1223 if (*dma_addr == bad_dma_address) { 1536 if (*dma_addr == bad_dma_address)
1224 free_pages((unsigned long)virt_addr, get_order(size)); 1537 goto out_free;
1225 virt_addr = NULL;
1226 goto out;
1227 }
1228 1538
1229 iommu_completion_wait(iommu); 1539 iommu_completion_wait(iommu);
1230 1540
1231out:
1232 spin_unlock_irqrestore(&domain->lock, flags); 1541 spin_unlock_irqrestore(&domain->lock, flags);
1233 1542
1234 return virt_addr; 1543 return virt_addr;
1544
1545out_free:
1546
1547 free_pages((unsigned long)virt_addr, get_order(size));
1548
1549 return NULL;
1235} 1550}
1236 1551
1237/* 1552/*
@@ -1245,6 +1560,8 @@ static void free_coherent(struct device *dev, size_t size,
1245 struct protection_domain *domain; 1560 struct protection_domain *domain;
1246 u16 devid; 1561 u16 devid;
1247 1562
1563 INC_STATS_COUNTER(cnt_free_coherent);
1564
1248 if (!check_device(dev)) 1565 if (!check_device(dev))
1249 return; 1566 return;
1250 1567
@@ -1253,6 +1570,9 @@ static void free_coherent(struct device *dev, size_t size,
1253 if (!iommu || !domain) 1570 if (!iommu || !domain)
1254 goto free_mem; 1571 goto free_mem;
1255 1572
1573 if (!dma_ops_domain(domain))
1574 goto free_mem;
1575
1256 spin_lock_irqsave(&domain->lock, flags); 1576 spin_lock_irqsave(&domain->lock, flags);
1257 1577
1258 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); 1578 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
@@ -1296,7 +1616,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
1296 * we don't need to preallocate the protection domains anymore. 1616 * we don't need to preallocate the protection domains anymore.
1297 * For now we have to. 1617 * For now we have to.
1298 */ 1618 */
1299void prealloc_protection_domains(void) 1619static void prealloc_protection_domains(void)
1300{ 1620{
1301 struct pci_dev *dev = NULL; 1621 struct pci_dev *dev = NULL;
1302 struct dma_ops_domain *dma_dom; 1622 struct dma_ops_domain *dma_dom;
@@ -1305,7 +1625,7 @@ void prealloc_protection_domains(void)
1305 u16 devid; 1625 u16 devid;
1306 1626
1307 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 1627 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1308 devid = (dev->bus->number << 8) | dev->devfn; 1628 devid = calc_devid(dev->bus->number, dev->devfn);
1309 if (devid > amd_iommu_last_bdf) 1629 if (devid > amd_iommu_last_bdf)
1310 continue; 1630 continue;
1311 devid = amd_iommu_alias_table[devid]; 1631 devid = amd_iommu_alias_table[devid];
@@ -1352,6 +1672,7 @@ int __init amd_iommu_init_dma_ops(void)
1352 iommu->default_dom = dma_ops_domain_alloc(iommu, order); 1672 iommu->default_dom = dma_ops_domain_alloc(iommu, order);
1353 if (iommu->default_dom == NULL) 1673 if (iommu->default_dom == NULL)
1354 return -ENOMEM; 1674 return -ENOMEM;
1675 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
1355 ret = iommu_init_unity_mappings(iommu); 1676 ret = iommu_init_unity_mappings(iommu);
1356 if (ret) 1677 if (ret)
1357 goto free_domains; 1678 goto free_domains;
@@ -1375,6 +1696,12 @@ int __init amd_iommu_init_dma_ops(void)
1375 /* Make the driver finally visible to the drivers */ 1696 /* Make the driver finally visible to the drivers */
1376 dma_ops = &amd_iommu_dma_ops; 1697 dma_ops = &amd_iommu_dma_ops;
1377 1698
1699 register_iommu(&amd_iommu_ops);
1700
1701 bus_register_notifier(&pci_bus_type, &device_nb);
1702
1703 amd_iommu_stats_init();
1704
1378 return 0; 1705 return 0;
1379 1706
1380free_domains: 1707free_domains:
@@ -1386,3 +1713,224 @@ free_domains:
1386 1713
1387 return ret; 1714 return ret;
1388} 1715}
1716
1717/*****************************************************************************
1718 *
1719 * The following functions belong to the exported interface of AMD IOMMU
1720 *
1721 * This interface allows access to lower level functions of the IOMMU
1722 * like protection domain handling and assignement of devices to domains
1723 * which is not possible with the dma_ops interface.
1724 *
1725 *****************************************************************************/
1726
1727static void cleanup_domain(struct protection_domain *domain)
1728{
1729 unsigned long flags;
1730 u16 devid;
1731
1732 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1733
1734 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
1735 if (amd_iommu_pd_table[devid] == domain)
1736 __detach_device(domain, devid);
1737
1738 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1739}
1740
1741static int amd_iommu_domain_init(struct iommu_domain *dom)
1742{
1743 struct protection_domain *domain;
1744
1745 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
1746 if (!domain)
1747 return -ENOMEM;
1748
1749 spin_lock_init(&domain->lock);
1750 domain->mode = PAGE_MODE_3_LEVEL;
1751 domain->id = domain_id_alloc();
1752 if (!domain->id)
1753 goto out_free;
1754 domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
1755 if (!domain->pt_root)
1756 goto out_free;
1757
1758 dom->priv = domain;
1759
1760 return 0;
1761
1762out_free:
1763 kfree(domain);
1764
1765 return -ENOMEM;
1766}
1767
1768static void amd_iommu_domain_destroy(struct iommu_domain *dom)
1769{
1770 struct protection_domain *domain = dom->priv;
1771
1772 if (!domain)
1773 return;
1774
1775 if (domain->dev_cnt > 0)
1776 cleanup_domain(domain);
1777
1778 BUG_ON(domain->dev_cnt != 0);
1779
1780 free_pagetable(domain);
1781
1782 domain_id_free(domain->id);
1783
1784 kfree(domain);
1785
1786 dom->priv = NULL;
1787}
1788
1789static void amd_iommu_detach_device(struct iommu_domain *dom,
1790 struct device *dev)
1791{
1792 struct protection_domain *domain = dom->priv;
1793 struct amd_iommu *iommu;
1794 struct pci_dev *pdev;
1795 u16 devid;
1796
1797 if (dev->bus != &pci_bus_type)
1798 return;
1799
1800 pdev = to_pci_dev(dev);
1801
1802 devid = calc_devid(pdev->bus->number, pdev->devfn);
1803
1804 if (devid > 0)
1805 detach_device(domain, devid);
1806
1807 iommu = amd_iommu_rlookup_table[devid];
1808 if (!iommu)
1809 return;
1810
1811 iommu_queue_inv_dev_entry(iommu, devid);
1812 iommu_completion_wait(iommu);
1813}
1814
1815static int amd_iommu_attach_device(struct iommu_domain *dom,
1816 struct device *dev)
1817{
1818 struct protection_domain *domain = dom->priv;
1819 struct protection_domain *old_domain;
1820 struct amd_iommu *iommu;
1821 struct pci_dev *pdev;
1822 u16 devid;
1823
1824 if (dev->bus != &pci_bus_type)
1825 return -EINVAL;
1826
1827 pdev = to_pci_dev(dev);
1828
1829 devid = calc_devid(pdev->bus->number, pdev->devfn);
1830
1831 if (devid >= amd_iommu_last_bdf ||
1832 devid != amd_iommu_alias_table[devid])
1833 return -EINVAL;
1834
1835 iommu = amd_iommu_rlookup_table[devid];
1836 if (!iommu)
1837 return -EINVAL;
1838
1839 old_domain = domain_for_device(devid);
1840 if (old_domain)
1841 return -EBUSY;
1842
1843 attach_device(iommu, domain, devid);
1844
1845 iommu_completion_wait(iommu);
1846
1847 return 0;
1848}
1849
1850static int amd_iommu_map_range(struct iommu_domain *dom,
1851 unsigned long iova, phys_addr_t paddr,
1852 size_t size, int iommu_prot)
1853{
1854 struct protection_domain *domain = dom->priv;
1855 unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE);
1856 int prot = 0;
1857 int ret;
1858
1859 if (iommu_prot & IOMMU_READ)
1860 prot |= IOMMU_PROT_IR;
1861 if (iommu_prot & IOMMU_WRITE)
1862 prot |= IOMMU_PROT_IW;
1863
1864 iova &= PAGE_MASK;
1865 paddr &= PAGE_MASK;
1866
1867 for (i = 0; i < npages; ++i) {
1868 ret = iommu_map_page(domain, iova, paddr, prot);
1869 if (ret)
1870 return ret;
1871
1872 iova += PAGE_SIZE;
1873 paddr += PAGE_SIZE;
1874 }
1875
1876 return 0;
1877}
1878
1879static void amd_iommu_unmap_range(struct iommu_domain *dom,
1880 unsigned long iova, size_t size)
1881{
1882
1883 struct protection_domain *domain = dom->priv;
1884 unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE);
1885
1886 iova &= PAGE_MASK;
1887
1888 for (i = 0; i < npages; ++i) {
1889 iommu_unmap_page(domain, iova);
1890 iova += PAGE_SIZE;
1891 }
1892
1893 iommu_flush_domain(domain->id);
1894}
1895
1896static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
1897 unsigned long iova)
1898{
1899 struct protection_domain *domain = dom->priv;
1900 unsigned long offset = iova & ~PAGE_MASK;
1901 phys_addr_t paddr;
1902 u64 *pte;
1903
1904 pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];
1905
1906 if (!IOMMU_PTE_PRESENT(*pte))
1907 return 0;
1908
1909 pte = IOMMU_PTE_PAGE(*pte);
1910 pte = &pte[IOMMU_PTE_L1_INDEX(iova)];
1911
1912 if (!IOMMU_PTE_PRESENT(*pte))
1913 return 0;
1914
1915 pte = IOMMU_PTE_PAGE(*pte);
1916 pte = &pte[IOMMU_PTE_L0_INDEX(iova)];
1917
1918 if (!IOMMU_PTE_PRESENT(*pte))
1919 return 0;
1920
1921 paddr = *pte & IOMMU_PAGE_MASK;
1922 paddr |= offset;
1923
1924 return paddr;
1925}
1926
1927static struct iommu_ops amd_iommu_ops = {
1928 .domain_init = amd_iommu_domain_init,
1929 .domain_destroy = amd_iommu_domain_destroy,
1930 .attach_dev = amd_iommu_attach_device,
1931 .detach_dev = amd_iommu_detach_device,
1932 .map = amd_iommu_map_range,
1933 .unmap = amd_iommu_unmap_range,
1934 .iova_to_phys = amd_iommu_iova_to_phys,
1935};
1936
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c625800c55ca..42c33cebf00f 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -122,7 +122,8 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
122LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings 122LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
123 we find in ACPI */ 123 we find in ACPI */
124unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ 124unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
125int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */ 125bool amd_iommu_isolate = true; /* if true, device isolation is
126 enabled */
126bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ 127bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
127 128
128LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the 129LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
@@ -243,20 +244,16 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
243} 244}
244 245
245/* Function to enable the hardware */ 246/* Function to enable the hardware */
246void __init iommu_enable(struct amd_iommu *iommu) 247static void __init iommu_enable(struct amd_iommu *iommu)
247{ 248{
248 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " 249 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
249 "at %02x:%02x.%x cap 0x%hx\n", 250 dev_name(&iommu->dev->dev), iommu->cap_ptr);
250 iommu->dev->bus->number,
251 PCI_SLOT(iommu->dev->devfn),
252 PCI_FUNC(iommu->dev->devfn),
253 iommu->cap_ptr);
254 251
255 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 252 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
256} 253}
257 254
258/* Function to enable IOMMU event logging and event interrupts */ 255/* Function to enable IOMMU event logging and event interrupts */
259void __init iommu_enable_event_logging(struct amd_iommu *iommu) 256static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
260{ 257{
261 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); 258 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
262 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); 259 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
@@ -1218,9 +1215,9 @@ static int __init parse_amd_iommu_options(char *str)
1218{ 1215{
1219 for (; *str; ++str) { 1216 for (; *str; ++str) {
1220 if (strncmp(str, "isolate", 7) == 0) 1217 if (strncmp(str, "isolate", 7) == 0)
1221 amd_iommu_isolate = 1; 1218 amd_iommu_isolate = true;
1222 if (strncmp(str, "share", 5) == 0) 1219 if (strncmp(str, "share", 5) == 0)
1223 amd_iommu_isolate = 0; 1220 amd_iommu_isolate = false;
1224 if (strncmp(str, "fullflush", 9) == 0) 1221 if (strncmp(str, "fullflush", 9) == 0)
1225 amd_iommu_unmap_flush = true; 1222 amd_iommu_unmap_flush = true;
1226 } 1223 }
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 6c83ac10e6d3..d2d17b8d10f8 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -31,10 +31,12 @@
31#include <linux/dmi.h> 31#include <linux/dmi.h>
32#include <linux/dmar.h> 32#include <linux/dmar.h>
33#include <linux/ftrace.h> 33#include <linux/ftrace.h>
34#include <linux/smp.h>
35#include <linux/nmi.h>
36#include <linux/timex.h>
34 37
35#include <asm/perf_counter.h> 38#include <asm/perf_counter.h>
36#include <asm/atomic.h> 39#include <asm/atomic.h>
37#include <asm/smp.h>
38#include <asm/mtrr.h> 40#include <asm/mtrr.h>
39#include <asm/mpspec.h> 41#include <asm/mpspec.h>
40#include <asm/desc.h> 42#include <asm/desc.h>
@@ -42,10 +44,8 @@
42#include <asm/hpet.h> 44#include <asm/hpet.h>
43#include <asm/pgalloc.h> 45#include <asm/pgalloc.h>
44#include <asm/i8253.h> 46#include <asm/i8253.h>
45#include <asm/nmi.h>
46#include <asm/idle.h> 47#include <asm/idle.h>
47#include <asm/proto.h> 48#include <asm/proto.h>
48#include <asm/timex.h>
49#include <asm/apic.h> 49#include <asm/apic.h>
50#include <asm/i8259.h> 50#include <asm/i8259.h>
51 51
@@ -99,8 +99,8 @@ __setup("apicpmtimer", setup_apicpmtimer);
99#ifdef HAVE_X2APIC 99#ifdef HAVE_X2APIC
100int x2apic; 100int x2apic;
101/* x2apic enabled before OS handover */ 101/* x2apic enabled before OS handover */
102int x2apic_preenabled; 102static int x2apic_preenabled;
103int disable_x2apic; 103static int disable_x2apic;
104static __init int setup_nox2apic(char *str) 104static __init int setup_nox2apic(char *str)
105{ 105{
106 disable_x2apic = 1; 106 disable_x2apic = 1;
@@ -120,8 +120,6 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
120 120
121int first_system_vector = 0xfe; 121int first_system_vector = 0xfe;
122 122
123char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
124
125/* 123/*
126 * Debug level, exported for io_apic.c 124 * Debug level, exported for io_apic.c
127 */ 125 */
@@ -143,7 +141,7 @@ static int lapic_next_event(unsigned long delta,
143 struct clock_event_device *evt); 141 struct clock_event_device *evt);
144static void lapic_timer_setup(enum clock_event_mode mode, 142static void lapic_timer_setup(enum clock_event_mode mode,
145 struct clock_event_device *evt); 143 struct clock_event_device *evt);
146static void lapic_timer_broadcast(cpumask_t mask); 144static void lapic_timer_broadcast(const struct cpumask *mask);
147static void apic_pm_activate(void); 145static void apic_pm_activate(void);
148 146
149/* 147/*
@@ -229,7 +227,7 @@ void xapic_icr_write(u32 low, u32 id)
229 apic_write(APIC_ICR, low); 227 apic_write(APIC_ICR, low);
230} 228}
231 229
232u64 xapic_icr_read(void) 230static u64 xapic_icr_read(void)
233{ 231{
234 u32 icr1, icr2; 232 u32 icr1, icr2;
235 233
@@ -269,7 +267,7 @@ void x2apic_icr_write(u32 low, u32 id)
269 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); 267 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
270} 268}
271 269
272u64 x2apic_icr_read(void) 270static u64 x2apic_icr_read(void)
273{ 271{
274 unsigned long val; 272 unsigned long val;
275 273
@@ -456,7 +454,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
456/* 454/*
457 * Local APIC timer broadcast function 455 * Local APIC timer broadcast function
458 */ 456 */
459static void lapic_timer_broadcast(cpumask_t mask) 457static void lapic_timer_broadcast(const struct cpumask *mask)
460{ 458{
461#ifdef CONFIG_SMP 459#ifdef CONFIG_SMP
462 send_IPI_mask(mask, LOCAL_TIMER_VECTOR); 460 send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
@@ -472,7 +470,7 @@ static void __cpuinit setup_APIC_timer(void)
472 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 470 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
473 471
474 memcpy(levt, &lapic_clockevent, sizeof(*levt)); 472 memcpy(levt, &lapic_clockevent, sizeof(*levt));
475 levt->cpumask = cpumask_of_cpu(smp_processor_id()); 473 levt->cpumask = cpumask_of(smp_processor_id());
476 474
477 clockevents_register_device(levt); 475 clockevents_register_device(levt);
478} 476}
@@ -690,7 +688,7 @@ static int __init calibrate_APIC_clock(void)
690 local_irq_enable(); 688 local_irq_enable();
691 689
692 if (levt->features & CLOCK_EVT_FEAT_DUMMY) { 690 if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
693 pr_warning("APIC timer disabled due to verification failure.\n"); 691 pr_warning("APIC timer disabled due to verification failure\n");
694 return -1; 692 return -1;
695 } 693 }
696 694
@@ -1809,28 +1807,32 @@ void disconnect_bsp_APIC(int virt_wire_setup)
1809void __cpuinit generic_processor_info(int apicid, int version) 1807void __cpuinit generic_processor_info(int apicid, int version)
1810{ 1808{
1811 int cpu; 1809 int cpu;
1812 cpumask_t tmp_map;
1813 1810
1814 /* 1811 /*
1815 * Validate version 1812 * Validate version
1816 */ 1813 */
1817 if (version == 0x0) { 1814 if (version == 0x0) {
1818 pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " 1815 pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
1819 "fixing up to 0x10. (tell your hw vendor)\n", 1816 "fixing up to 0x10. (tell your hw vendor)\n",
1820 version); 1817 version);
1821 version = 0x10; 1818 version = 0x10;
1822 } 1819 }
1823 apic_version[apicid] = version; 1820 apic_version[apicid] = version;
1824 1821
1825 if (num_processors >= NR_CPUS) { 1822 if (num_processors >= nr_cpu_ids) {
1826 pr_warning("WARNING: NR_CPUS limit of %i reached." 1823 int max = nr_cpu_ids;
1827 " Processor ignored.\n", NR_CPUS); 1824 int thiscpu = max + disabled_cpus;
1825
1826 pr_warning(
1827 "ACPI: NR_CPUS/possible_cpus limit of %i reached."
1828 " Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
1829
1830 disabled_cpus++;
1828 return; 1831 return;
1829 } 1832 }
1830 1833
1831 num_processors++; 1834 num_processors++;
1832 cpus_complement(tmp_map, cpu_present_map); 1835 cpu = cpumask_next_zero(-1, cpu_present_mask);
1833 cpu = first_cpu(tmp_map);
1834 1836
1835 physid_set(apicid, phys_cpu_present_map); 1837 physid_set(apicid, phys_cpu_present_map);
1836 if (apicid == boot_cpu_physical_apicid) { 1838 if (apicid == boot_cpu_physical_apicid) {
@@ -1880,8 +1882,8 @@ void __cpuinit generic_processor_info(int apicid, int version)
1880 } 1882 }
1881#endif 1883#endif
1882 1884
1883 cpu_set(cpu, cpu_possible_map); 1885 set_cpu_possible(cpu, true);
1884 cpu_set(cpu, cpu_present_map); 1886 set_cpu_present(cpu, true);
1885} 1887}
1886 1888
1887#ifdef CONFIG_X86_64 1889#ifdef CONFIG_X86_64
@@ -2083,18 +2085,16 @@ __cpuinit int apic_is_clustered_box(void)
2083 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); 2085 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
2084 bitmap_zero(clustermap, NUM_APIC_CLUSTERS); 2086 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
2085 2087
2086 for (i = 0; i < NR_CPUS; i++) { 2088 for (i = 0; i < nr_cpu_ids; i++) {
2087 /* are we being called early in kernel startup? */ 2089 /* are we being called early in kernel startup? */
2088 if (bios_cpu_apicid) { 2090 if (bios_cpu_apicid) {
2089 id = bios_cpu_apicid[i]; 2091 id = bios_cpu_apicid[i];
2090 } 2092 } else if (i < nr_cpu_ids) {
2091 else if (i < nr_cpu_ids) {
2092 if (cpu_present(i)) 2093 if (cpu_present(i))
2093 id = per_cpu(x86_bios_cpu_apicid, i); 2094 id = per_cpu(x86_bios_cpu_apicid, i);
2094 else 2095 else
2095 continue; 2096 continue;
2096 } 2097 } else
2097 else
2098 break; 2098 break;
2099 2099
2100 if (id != BAD_APICID) 2100 if (id != BAD_APICID)
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 3a26525a3f31..98807bb095ad 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -160,9 +160,9 @@
160 * Work around byte swap bug in one of the Vaio's BIOS's 160 * Work around byte swap bug in one of the Vaio's BIOS's
161 * (Marc Boucher <marc@mbsi.ca>). 161 * (Marc Boucher <marc@mbsi.ca>).
162 * Exposed the disable flag to dmi so that we can handle known 162 * Exposed the disable flag to dmi so that we can handle known
163 * broken APM (Alan Cox <alan@redhat.com>). 163 * broken APM (Alan Cox <alan@lxorguk.ukuu.org.uk>).
164 * 1.14ac: If the BIOS says "I slowed the CPU down" then don't spin 164 * 1.14ac: If the BIOS says "I slowed the CPU down" then don't spin
165 * calling it - instead idle. (Alan Cox <alan@redhat.com>) 165 * calling it - instead idle. (Alan Cox <alan@lxorguk.ukuu.org.uk>)
166 * If an APM idle fails log it and idle sensibly 166 * If an APM idle fails log it and idle sensibly
167 * 1.15: Don't queue events to clients who open the device O_WRONLY. 167 * 1.15: Don't queue events to clients who open the device O_WRONLY.
168 * Don't expect replies from clients who open the device O_RDONLY. 168 * Don't expect replies from clients who open the device O_RDONLY.
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index 2a0a2a3cac26..f63882728d91 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -25,7 +25,7 @@
25#include <asm/uv/bios.h> 25#include <asm/uv/bios.h>
26#include <asm/uv/uv_hub.h> 26#include <asm/uv/uv_hub.h>
27 27
28struct uv_systab uv_systab; 28static struct uv_systab uv_systab;
29 29
30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) 30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
31{ 31{
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 376b9f9d8d23..667e5d561ed7 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -41,6 +41,26 @@
41 41
42#include "cpu.h" 42#include "cpu.h"
43 43
44#ifdef CONFIG_X86_64
45
46/* all of these masks are initialized in setup_cpu_local_masks() */
47cpumask_var_t cpu_callin_mask;
48cpumask_var_t cpu_callout_mask;
49cpumask_var_t cpu_initialized_mask;
50
51/* representing cpus for which sibling maps can be computed */
52cpumask_var_t cpu_sibling_setup_mask;
53
54#else /* CONFIG_X86_32 */
55
56cpumask_t cpu_callin_map;
57cpumask_t cpu_callout_map;
58cpumask_t cpu_initialized;
59cpumask_t cpu_sibling_setup_map;
60
61#endif /* CONFIG_X86_32 */
62
63
44static struct cpu_dev *this_cpu __cpuinitdata; 64static struct cpu_dev *this_cpu __cpuinitdata;
45 65
46#ifdef CONFIG_X86_64 66#ifdef CONFIG_X86_64
@@ -356,7 +376,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
356 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 376 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
357 } else if (smp_num_siblings > 1) { 377 } else if (smp_num_siblings > 1) {
358 378
359 if (smp_num_siblings > NR_CPUS) { 379 if (smp_num_siblings > nr_cpu_ids) {
360 printk(KERN_WARNING "CPU: Unsupported number of siblings %d", 380 printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
361 smp_num_siblings); 381 smp_num_siblings);
362 smp_num_siblings = 1; 382 smp_num_siblings = 1;
@@ -858,8 +878,6 @@ static __init int setup_disablecpuid(char *arg)
858} 878}
859__setup("clearcpuid=", setup_disablecpuid); 879__setup("clearcpuid=", setup_disablecpuid);
860 880
861cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
862
863#ifdef CONFIG_X86_64 881#ifdef CONFIG_X86_64
864struct x8664_pda **_cpu_pda __read_mostly; 882struct x8664_pda **_cpu_pda __read_mostly;
865EXPORT_SYMBOL(_cpu_pda); 883EXPORT_SYMBOL(_cpu_pda);
@@ -978,7 +996,7 @@ void __cpuinit cpu_init(void)
978 996
979 me = current; 997 me = current;
980 998
981 if (cpu_test_and_set(cpu, cpu_initialized)) 999 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask))
982 panic("CPU#%d already initialized!\n", cpu); 1000 panic("CPU#%d already initialized!\n", cpu);
983 1001
984 printk(KERN_INFO "Initializing CPU#%d\n", cpu); 1002 printk(KERN_INFO "Initializing CPU#%d\n", cpu);
@@ -1087,7 +1105,7 @@ void __cpuinit cpu_init(void)
1087 struct tss_struct *t = &per_cpu(init_tss, cpu); 1105 struct tss_struct *t = &per_cpu(init_tss, cpu);
1088 struct thread_struct *thread = &curr->thread; 1106 struct thread_struct *thread = &curr->thread;
1089 1107
1090 if (cpu_test_and_set(cpu, cpu_initialized)) { 1108 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
1091 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); 1109 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
1092 for (;;) local_irq_enable(); 1110 for (;;) local_irq_enable();
1093 } 1111 }
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 88ea02dcb622..06fcd8f9323c 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -145,13 +145,14 @@ typedef union {
145 145
146struct drv_cmd { 146struct drv_cmd {
147 unsigned int type; 147 unsigned int type;
148 cpumask_t mask; 148 cpumask_var_t mask;
149 drv_addr_union addr; 149 drv_addr_union addr;
150 u32 val; 150 u32 val;
151}; 151};
152 152
153static void do_drv_read(struct drv_cmd *cmd) 153static long do_drv_read(void *_cmd)
154{ 154{
155 struct drv_cmd *cmd = _cmd;
155 u32 h; 156 u32 h;
156 157
157 switch (cmd->type) { 158 switch (cmd->type) {
@@ -166,10 +167,12 @@ static void do_drv_read(struct drv_cmd *cmd)
166 default: 167 default:
167 break; 168 break;
168 } 169 }
170 return 0;
169} 171}
170 172
171static void do_drv_write(struct drv_cmd *cmd) 173static long do_drv_write(void *_cmd)
172{ 174{
175 struct drv_cmd *cmd = _cmd;
173 u32 lo, hi; 176 u32 lo, hi;
174 177
175 switch (cmd->type) { 178 switch (cmd->type) {
@@ -186,48 +189,41 @@ static void do_drv_write(struct drv_cmd *cmd)
186 default: 189 default:
187 break; 190 break;
188 } 191 }
192 return 0;
189} 193}
190 194
191static void drv_read(struct drv_cmd *cmd) 195static void drv_read(struct drv_cmd *cmd)
192{ 196{
193 cpumask_t saved_mask = current->cpus_allowed;
194 cmd->val = 0; 197 cmd->val = 0;
195 198
196 set_cpus_allowed_ptr(current, &cmd->mask); 199 work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd);
197 do_drv_read(cmd);
198 set_cpus_allowed_ptr(current, &saved_mask);
199} 200}
200 201
201static void drv_write(struct drv_cmd *cmd) 202static void drv_write(struct drv_cmd *cmd)
202{ 203{
203 cpumask_t saved_mask = current->cpus_allowed;
204 unsigned int i; 204 unsigned int i;
205 205
206 for_each_cpu_mask_nr(i, cmd->mask) { 206 for_each_cpu(i, cmd->mask) {
207 set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); 207 work_on_cpu(i, do_drv_write, cmd);
208 do_drv_write(cmd);
209 } 208 }
210
211 set_cpus_allowed_ptr(current, &saved_mask);
212 return;
213} 209}
214 210
215static u32 get_cur_val(const cpumask_t *mask) 211static u32 get_cur_val(const struct cpumask *mask)
216{ 212{
217 struct acpi_processor_performance *perf; 213 struct acpi_processor_performance *perf;
218 struct drv_cmd cmd; 214 struct drv_cmd cmd;
219 215
220 if (unlikely(cpus_empty(*mask))) 216 if (unlikely(cpumask_empty(mask)))
221 return 0; 217 return 0;
222 218
223 switch (per_cpu(drv_data, first_cpu(*mask))->cpu_feature) { 219 switch (per_cpu(drv_data, cpumask_first(mask))->cpu_feature) {
224 case SYSTEM_INTEL_MSR_CAPABLE: 220 case SYSTEM_INTEL_MSR_CAPABLE:
225 cmd.type = SYSTEM_INTEL_MSR_CAPABLE; 221 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
226 cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; 222 cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
227 break; 223 break;
228 case SYSTEM_IO_CAPABLE: 224 case SYSTEM_IO_CAPABLE:
229 cmd.type = SYSTEM_IO_CAPABLE; 225 cmd.type = SYSTEM_IO_CAPABLE;
230 perf = per_cpu(drv_data, first_cpu(*mask))->acpi_data; 226 perf = per_cpu(drv_data, cpumask_first(mask))->acpi_data;
231 cmd.addr.io.port = perf->control_register.address; 227 cmd.addr.io.port = perf->control_register.address;
232 cmd.addr.io.bit_width = perf->control_register.bit_width; 228 cmd.addr.io.bit_width = perf->control_register.bit_width;
233 break; 229 break;
@@ -235,15 +231,44 @@ static u32 get_cur_val(const cpumask_t *mask)
235 return 0; 231 return 0;
236 } 232 }
237 233
238 cmd.mask = *mask; 234 if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL)))
235 return 0;
236
237 cpumask_copy(cmd.mask, mask);
239 238
240 drv_read(&cmd); 239 drv_read(&cmd);
241 240
241 free_cpumask_var(cmd.mask);
242
242 dprintk("get_cur_val = %u\n", cmd.val); 243 dprintk("get_cur_val = %u\n", cmd.val);
243 244
244 return cmd.val; 245 return cmd.val;
245} 246}
246 247
248struct perf_cur {
249 union {
250 struct {
251 u32 lo;
252 u32 hi;
253 } split;
254 u64 whole;
255 } aperf_cur, mperf_cur;
256};
257
258
259static long read_measured_perf_ctrs(void *_cur)
260{
261 struct perf_cur *cur = _cur;
262
263 rdmsr(MSR_IA32_APERF, cur->aperf_cur.split.lo, cur->aperf_cur.split.hi);
264 rdmsr(MSR_IA32_MPERF, cur->mperf_cur.split.lo, cur->mperf_cur.split.hi);
265
266 wrmsr(MSR_IA32_APERF, 0, 0);
267 wrmsr(MSR_IA32_MPERF, 0, 0);
268
269 return 0;
270}
271
247/* 272/*
248 * Return the measured active (C0) frequency on this CPU since last call 273 * Return the measured active (C0) frequency on this CPU since last call
249 * to this function. 274 * to this function.
@@ -260,31 +285,12 @@ static u32 get_cur_val(const cpumask_t *mask)
260static unsigned int get_measured_perf(struct cpufreq_policy *policy, 285static unsigned int get_measured_perf(struct cpufreq_policy *policy,
261 unsigned int cpu) 286 unsigned int cpu)
262{ 287{
263 union { 288 struct perf_cur cur;
264 struct {
265 u32 lo;
266 u32 hi;
267 } split;
268 u64 whole;
269 } aperf_cur, mperf_cur;
270
271 cpumask_t saved_mask;
272 unsigned int perf_percent; 289 unsigned int perf_percent;
273 unsigned int retval; 290 unsigned int retval;
274 291
275 saved_mask = current->cpus_allowed; 292 if (!work_on_cpu(cpu, read_measured_perf_ctrs, &cur))
276 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
277 if (get_cpu() != cpu) {
278 /* We were not able to run on requested processor */
279 put_cpu();
280 return 0; 293 return 0;
281 }
282
283 rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi);
284 rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi);
285
286 wrmsr(MSR_IA32_APERF, 0,0);
287 wrmsr(MSR_IA32_MPERF, 0,0);
288 294
289#ifdef __i386__ 295#ifdef __i386__
290 /* 296 /*
@@ -292,37 +298,39 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
292 * Get an approximate value. Return failure in case we cannot get 298 * Get an approximate value. Return failure in case we cannot get
293 * an approximate value. 299 * an approximate value.
294 */ 300 */
295 if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) { 301 if (unlikely(cur.aperf_cur.split.hi || cur.mperf_cur.split.hi)) {
296 int shift_count; 302 int shift_count;
297 u32 h; 303 u32 h;
298 304
299 h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi); 305 h = max_t(u32, cur.aperf_cur.split.hi, cur.mperf_cur.split.hi);
300 shift_count = fls(h); 306 shift_count = fls(h);
301 307
302 aperf_cur.whole >>= shift_count; 308 cur.aperf_cur.whole >>= shift_count;
303 mperf_cur.whole >>= shift_count; 309 cur.mperf_cur.whole >>= shift_count;
304 } 310 }
305 311
306 if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) { 312 if (((unsigned long)(-1) / 100) < cur.aperf_cur.split.lo) {
307 int shift_count = 7; 313 int shift_count = 7;
308 aperf_cur.split.lo >>= shift_count; 314 cur.aperf_cur.split.lo >>= shift_count;
309 mperf_cur.split.lo >>= shift_count; 315 cur.mperf_cur.split.lo >>= shift_count;
310 } 316 }
311 317
312 if (aperf_cur.split.lo && mperf_cur.split.lo) 318 if (cur.aperf_cur.split.lo && cur.mperf_cur.split.lo)
313 perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo; 319 perf_percent = (cur.aperf_cur.split.lo * 100) /
320 cur.mperf_cur.split.lo;
314 else 321 else
315 perf_percent = 0; 322 perf_percent = 0;
316 323
317#else 324#else
318 if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) { 325 if (unlikely(((unsigned long)(-1) / 100) < cur.aperf_cur.whole)) {
319 int shift_count = 7; 326 int shift_count = 7;
320 aperf_cur.whole >>= shift_count; 327 cur.aperf_cur.whole >>= shift_count;
321 mperf_cur.whole >>= shift_count; 328 cur.mperf_cur.whole >>= shift_count;
322 } 329 }
323 330
324 if (aperf_cur.whole && mperf_cur.whole) 331 if (cur.aperf_cur.whole && cur.mperf_cur.whole)
325 perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole; 332 perf_percent = (cur.aperf_cur.whole * 100) /
333 cur.mperf_cur.whole;
326 else 334 else
327 perf_percent = 0; 335 perf_percent = 0;
328 336
@@ -330,10 +338,6 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
330 338
331 retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100; 339 retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100;
332 340
333 put_cpu();
334 set_cpus_allowed_ptr(current, &saved_mask);
335
336 dprintk("cpu %d: performance percent %d\n", cpu, perf_percent);
337 return retval; 341 return retval;
338} 342}
339 343
@@ -351,7 +355,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
351 } 355 }
352 356
353 cached_freq = data->freq_table[data->acpi_data->state].frequency; 357 cached_freq = data->freq_table[data->acpi_data->state].frequency;
354 freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data); 358 freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
355 if (freq != cached_freq) { 359 if (freq != cached_freq) {
356 /* 360 /*
357 * The dreaded BIOS frequency change behind our back. 361 * The dreaded BIOS frequency change behind our back.
@@ -386,7 +390,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
386 struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); 390 struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
387 struct acpi_processor_performance *perf; 391 struct acpi_processor_performance *perf;
388 struct cpufreq_freqs freqs; 392 struct cpufreq_freqs freqs;
389 cpumask_t online_policy_cpus;
390 struct drv_cmd cmd; 393 struct drv_cmd cmd;
391 unsigned int next_state = 0; /* Index into freq_table */ 394 unsigned int next_state = 0; /* Index into freq_table */
392 unsigned int next_perf_state = 0; /* Index into perf table */ 395 unsigned int next_perf_state = 0; /* Index into perf table */
@@ -401,20 +404,18 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
401 return -ENODEV; 404 return -ENODEV;
402 } 405 }
403 406
407 if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL)))
408 return -ENOMEM;
409
404 perf = data->acpi_data; 410 perf = data->acpi_data;
405 result = cpufreq_frequency_table_target(policy, 411 result = cpufreq_frequency_table_target(policy,
406 data->freq_table, 412 data->freq_table,
407 target_freq, 413 target_freq,
408 relation, &next_state); 414 relation, &next_state);
409 if (unlikely(result)) 415 if (unlikely(result)) {
410 return -ENODEV; 416 result = -ENODEV;
411 417 goto out;
412#ifdef CONFIG_HOTPLUG_CPU 418 }
413 /* cpufreq holds the hotplug lock, so we are safe from here on */
414 cpus_and(online_policy_cpus, cpu_online_map, policy->cpus);
415#else
416 online_policy_cpus = policy->cpus;
417#endif
418 419
419 next_perf_state = data->freq_table[next_state].index; 420 next_perf_state = data->freq_table[next_state].index;
420 if (perf->state == next_perf_state) { 421 if (perf->state == next_perf_state) {
@@ -425,7 +426,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
425 } else { 426 } else {
426 dprintk("Already at target state (P%d)\n", 427 dprintk("Already at target state (P%d)\n",
427 next_perf_state); 428 next_perf_state);
428 return 0; 429 goto out;
429 } 430 }
430 } 431 }
431 432
@@ -444,19 +445,19 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
444 cmd.val = (u32) perf->states[next_perf_state].control; 445 cmd.val = (u32) perf->states[next_perf_state].control;
445 break; 446 break;
446 default: 447 default:
447 return -ENODEV; 448 result = -ENODEV;
449 goto out;
448 } 450 }
449 451
450 cpus_clear(cmd.mask); 452 /* cpufreq holds the hotplug lock, so we are safe from here on */
451
452 if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) 453 if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
453 cmd.mask = online_policy_cpus; 454 cpumask_and(cmd.mask, cpu_online_mask, policy->cpus);
454 else 455 else
455 cpu_set(policy->cpu, cmd.mask); 456 cpumask_copy(cmd.mask, cpumask_of(policy->cpu));
456 457
457 freqs.old = perf->states[perf->state].core_frequency * 1000; 458 freqs.old = perf->states[perf->state].core_frequency * 1000;
458 freqs.new = data->freq_table[next_state].frequency; 459 freqs.new = data->freq_table[next_state].frequency;
459 for_each_cpu_mask_nr(i, cmd.mask) { 460 for_each_cpu(i, cmd.mask) {
460 freqs.cpu = i; 461 freqs.cpu = i;
461 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 462 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
462 } 463 }
@@ -464,19 +465,22 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
464 drv_write(&cmd); 465 drv_write(&cmd);
465 466
466 if (acpi_pstate_strict) { 467 if (acpi_pstate_strict) {
467 if (!check_freqs(&cmd.mask, freqs.new, data)) { 468 if (!check_freqs(cmd.mask, freqs.new, data)) {
468 dprintk("acpi_cpufreq_target failed (%d)\n", 469 dprintk("acpi_cpufreq_target failed (%d)\n",
469 policy->cpu); 470 policy->cpu);
470 return -EAGAIN; 471 result = -EAGAIN;
472 goto out;
471 } 473 }
472 } 474 }
473 475
474 for_each_cpu_mask_nr(i, cmd.mask) { 476 for_each_cpu(i, cmd.mask) {
475 freqs.cpu = i; 477 freqs.cpu = i;
476 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 478 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
477 } 479 }
478 perf->state = next_perf_state; 480 perf->state = next_perf_state;
479 481
482out:
483 free_cpumask_var(cmd.mask);
480 return result; 484 return result;
481} 485}
482 486
@@ -517,6 +521,17 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
517 } 521 }
518} 522}
519 523
524static void free_acpi_perf_data(void)
525{
526 unsigned int i;
527
528 /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
529 for_each_possible_cpu(i)
530 free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
531 ->shared_cpu_map);
532 free_percpu(acpi_perf_data);
533}
534
520/* 535/*
521 * acpi_cpufreq_early_init - initialize ACPI P-States library 536 * acpi_cpufreq_early_init - initialize ACPI P-States library
522 * 537 *
@@ -527,6 +542,7 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
527 */ 542 */
528static int __init acpi_cpufreq_early_init(void) 543static int __init acpi_cpufreq_early_init(void)
529{ 544{
545 unsigned int i;
530 dprintk("acpi_cpufreq_early_init\n"); 546 dprintk("acpi_cpufreq_early_init\n");
531 547
532 acpi_perf_data = alloc_percpu(struct acpi_processor_performance); 548 acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
@@ -534,6 +550,16 @@ static int __init acpi_cpufreq_early_init(void)
534 dprintk("Memory allocation error for acpi_perf_data.\n"); 550 dprintk("Memory allocation error for acpi_perf_data.\n");
535 return -ENOMEM; 551 return -ENOMEM;
536 } 552 }
553 for_each_possible_cpu(i) {
554 if (!alloc_cpumask_var_node(
555 &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
556 GFP_KERNEL, cpu_to_node(i))) {
557
558 /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
559 free_acpi_perf_data();
560 return -ENOMEM;
561 }
562 }
537 563
538 /* Do initialization in ACPI core */ 564 /* Do initialization in ACPI core */
539 acpi_processor_preregister_performance(acpi_perf_data); 565 acpi_processor_preregister_performance(acpi_perf_data);
@@ -604,15 +630,15 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
604 */ 630 */
605 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || 631 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
606 policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { 632 policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
607 policy->cpus = perf->shared_cpu_map; 633 cpumask_copy(policy->cpus, perf->shared_cpu_map);
608 } 634 }
609 policy->related_cpus = perf->shared_cpu_map; 635 cpumask_copy(policy->related_cpus, perf->shared_cpu_map);
610 636
611#ifdef CONFIG_SMP 637#ifdef CONFIG_SMP
612 dmi_check_system(sw_any_bug_dmi_table); 638 dmi_check_system(sw_any_bug_dmi_table);
613 if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) { 639 if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) {
614 policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; 640 policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
615 policy->cpus = per_cpu(cpu_core_map, cpu); 641 cpumask_copy(policy->cpus, cpu_core_mask(cpu));
616 } 642 }
617#endif 643#endif
618 644
@@ -795,7 +821,7 @@ static int __init acpi_cpufreq_init(void)
795 821
796 ret = cpufreq_register_driver(&acpi_cpufreq_driver); 822 ret = cpufreq_register_driver(&acpi_cpufreq_driver);
797 if (ret) 823 if (ret)
798 free_percpu(acpi_perf_data); 824 free_acpi_perf_data();
799 825
800 return ret; 826 return ret;
801} 827}
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index b0461856acfb..a4cff5d6e380 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -982,7 +982,7 @@ static int __init longhaul_init(void)
982 case 10: 982 case 10:
983 printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n"); 983 printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n");
984 default: 984 default:
985 ;; 985 ;
986 } 986 }
987 987
988 return -ENODEV; 988 return -ENODEV;
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index b8e05ee4f736..b585e04cbc9e 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -122,7 +122,7 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
122 return 0; 122 return 0;
123 123
124 /* notifiers */ 124 /* notifiers */
125 for_each_cpu_mask_nr(i, policy->cpus) { 125 for_each_cpu(i, policy->cpus) {
126 freqs.cpu = i; 126 freqs.cpu = i;
127 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 127 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
128 } 128 }
@@ -130,11 +130,11 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
130 /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software 130 /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software
131 * Developer's Manual, Volume 3 131 * Developer's Manual, Volume 3
132 */ 132 */
133 for_each_cpu_mask_nr(i, policy->cpus) 133 for_each_cpu(i, policy->cpus)
134 cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); 134 cpufreq_p4_setdc(i, p4clockmod_table[newstate].index);
135 135
136 /* notifiers */ 136 /* notifiers */
137 for_each_cpu_mask_nr(i, policy->cpus) { 137 for_each_cpu(i, policy->cpus) {
138 freqs.cpu = i; 138 freqs.cpu = i;
139 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 139 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
140 } 140 }
@@ -160,6 +160,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
160 switch (c->x86_model) { 160 switch (c->x86_model) {
161 case 0x0E: /* Core */ 161 case 0x0E: /* Core */
162 case 0x0F: /* Core Duo */ 162 case 0x0F: /* Core Duo */
163 case 0x16: /* Celeron Core */
163 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; 164 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
164 return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE); 165 return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE);
165 case 0x0D: /* Pentium M (Dothan) */ 166 case 0x0D: /* Pentium M (Dothan) */
@@ -171,7 +172,9 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
171 } 172 }
172 173
173 if (c->x86 != 0xF) { 174 if (c->x86 != 0xF) {
174 printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@vger.kernel.org>\n"); 175 if (!cpu_has(c, X86_FEATURE_EST))
176 printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. "
177 "Please send an e-mail to <cpufreq@vger.kernel.org>\n");
175 return 0; 178 return 0;
176 } 179 }
177 180
@@ -200,7 +203,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
200 unsigned int i; 203 unsigned int i;
201 204
202#ifdef CONFIG_SMP 205#ifdef CONFIG_SMP
203 policy->cpus = per_cpu(cpu_sibling_map, policy->cpu); 206 cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu));
204#endif 207#endif
205 208
206 /* Errata workaround */ 209 /* Errata workaround */
@@ -274,6 +277,7 @@ static struct cpufreq_driver p4clockmod_driver = {
274 .name = "p4-clockmod", 277 .name = "p4-clockmod",
275 .owner = THIS_MODULE, 278 .owner = THIS_MODULE,
276 .attr = p4clockmod_attr, 279 .attr = p4clockmod_attr,
280 .hide_interface = 1,
277}; 281};
278 282
279 283
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 7c7d56b43136..1b446d79a8fd 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -310,6 +310,12 @@ static int powernow_acpi_init(void)
310 goto err0; 310 goto err0;
311 } 311 }
312 312
313 if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
314 GFP_KERNEL)) {
315 retval = -ENOMEM;
316 goto err05;
317 }
318
313 if (acpi_processor_register_performance(acpi_processor_perf, 0)) { 319 if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
314 retval = -EIO; 320 retval = -EIO;
315 goto err1; 321 goto err1;
@@ -412,6 +418,8 @@ static int powernow_acpi_init(void)
412err2: 418err2:
413 acpi_processor_unregister_performance(acpi_processor_perf, 0); 419 acpi_processor_unregister_performance(acpi_processor_perf, 0);
414err1: 420err1:
421 free_cpumask_var(acpi_processor_perf->shared_cpu_map);
422err05:
415 kfree(acpi_processor_perf); 423 kfree(acpi_processor_perf);
416err0: 424err0:
417 printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); 425 printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n");
@@ -652,6 +660,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) {
652#ifdef CONFIG_X86_POWERNOW_K7_ACPI 660#ifdef CONFIG_X86_POWERNOW_K7_ACPI
653 if (acpi_processor_perf) { 661 if (acpi_processor_perf) {
654 acpi_processor_unregister_performance(acpi_processor_perf, 0); 662 acpi_processor_unregister_performance(acpi_processor_perf, 0);
663 free_cpumask_var(acpi_processor_perf->shared_cpu_map);
655 kfree(acpi_processor_perf); 664 kfree(acpi_processor_perf);
656 } 665 }
657#endif 666#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 7f05f44b97e9..5c28b37dea11 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -766,7 +766,7 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned
766static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) 766static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
767{ 767{
768 struct cpufreq_frequency_table *powernow_table; 768 struct cpufreq_frequency_table *powernow_table;
769 int ret_val; 769 int ret_val = -ENODEV;
770 770
771 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { 771 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
772 dprintk("register performance failed: bad ACPI data\n"); 772 dprintk("register performance failed: bad ACPI data\n");
@@ -815,6 +815,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
815 /* notify BIOS that we exist */ 815 /* notify BIOS that we exist */
816 acpi_processor_notify_smm(THIS_MODULE); 816 acpi_processor_notify_smm(THIS_MODULE);
817 817
818 if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
819 printk(KERN_ERR PFX
820 "unable to alloc powernow_k8_data cpumask\n");
821 ret_val = -ENOMEM;
822 goto err_out_mem;
823 }
824
818 return 0; 825 return 0;
819 826
820err_out_mem: 827err_out_mem:
@@ -826,7 +833,7 @@ err_out:
826 /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ 833 /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
827 data->acpi_data.state_count = 0; 834 data->acpi_data.state_count = 0;
828 835
829 return -ENODEV; 836 return ret_val;
830} 837}
831 838
832static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) 839static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
@@ -929,6 +936,7 @@ static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
929{ 936{
930 if (data->acpi_data.state_count) 937 if (data->acpi_data.state_count)
931 acpi_processor_unregister_performance(&data->acpi_data, data->cpu); 938 acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
939 free_cpumask_var(data->acpi_data.shared_cpu_map);
932} 940}
933 941
934#else 942#else
@@ -1134,7 +1142,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1134 data->cpu = pol->cpu; 1142 data->cpu = pol->cpu;
1135 data->currpstate = HW_PSTATE_INVALID; 1143 data->currpstate = HW_PSTATE_INVALID;
1136 1144
1137 if (powernow_k8_cpu_init_acpi(data)) { 1145 rc = powernow_k8_cpu_init_acpi(data);
1146 if (rc) {
1138 /* 1147 /*
1139 * Use the PSB BIOS structure. This is only availabe on 1148 * Use the PSB BIOS structure. This is only availabe on
1140 * an UP version, and is deprecated by AMD. 1149 * an UP version, and is deprecated by AMD.
@@ -1152,20 +1161,17 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1152 "ACPI maintainers and complain to your BIOS " 1161 "ACPI maintainers and complain to your BIOS "
1153 "vendor.\n"); 1162 "vendor.\n");
1154#endif 1163#endif
1155 kfree(data); 1164 goto err_out;
1156 return -ENODEV;
1157 } 1165 }
1158 if (pol->cpu != 0) { 1166 if (pol->cpu != 0) {
1159 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " 1167 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
1160 "CPU other than CPU0. Complain to your BIOS " 1168 "CPU other than CPU0. Complain to your BIOS "
1161 "vendor.\n"); 1169 "vendor.\n");
1162 kfree(data); 1170 goto err_out;
1163 return -ENODEV;
1164 } 1171 }
1165 rc = find_psb_table(data); 1172 rc = find_psb_table(data);
1166 if (rc) { 1173 if (rc) {
1167 kfree(data); 1174 goto err_out;
1168 return -ENODEV;
1169 } 1175 }
1170 } 1176 }
1171 1177
@@ -1193,10 +1199,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1193 set_cpus_allowed_ptr(current, &oldmask); 1199 set_cpus_allowed_ptr(current, &oldmask);
1194 1200
1195 if (cpu_family == CPU_HW_PSTATE) 1201 if (cpu_family == CPU_HW_PSTATE)
1196 pol->cpus = cpumask_of_cpu(pol->cpu); 1202 cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
1197 else 1203 else
1198 pol->cpus = per_cpu(cpu_core_map, pol->cpu); 1204 cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu));
1199 data->available_cores = &(pol->cpus); 1205 data->available_cores = pol->cpus;
1200 1206
1201 /* Take a crude guess here. 1207 /* Take a crude guess here.
1202 * That guess was in microseconds, so multiply with 1000 */ 1208 * That guess was in microseconds, so multiply with 1000 */
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index 65cfb5d7f77f..8ecc75b6c7c3 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -53,7 +53,7 @@ struct powernow_k8_data {
53 /* we need to keep track of associated cores, but let cpufreq 53 /* we need to keep track of associated cores, but let cpufreq
54 * handle hotplug events - so just point at cpufreq pol->cpus 54 * handle hotplug events - so just point at cpufreq pol->cpus
55 * structure */ 55 * structure */
56 cpumask_t *available_cores; 56 struct cpumask *available_cores;
57}; 57};
58 58
59 59
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 3b5f06423e77..f08998278a3a 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -458,13 +458,6 @@ static int centrino_verify (struct cpufreq_policy *policy)
458 * 458 *
459 * Sets a new CPUFreq policy. 459 * Sets a new CPUFreq policy.
460 */ 460 */
461struct allmasks {
462 cpumask_t online_policy_cpus;
463 cpumask_t saved_mask;
464 cpumask_t set_mask;
465 cpumask_t covered_cpus;
466};
467
468static int centrino_target (struct cpufreq_policy *policy, 461static int centrino_target (struct cpufreq_policy *policy,
469 unsigned int target_freq, 462 unsigned int target_freq,
470 unsigned int relation) 463 unsigned int relation)
@@ -474,14 +467,15 @@ static int centrino_target (struct cpufreq_policy *policy,
474 struct cpufreq_freqs freqs; 467 struct cpufreq_freqs freqs;
475 int retval = 0; 468 int retval = 0;
476 unsigned int j, k, first_cpu, tmp; 469 unsigned int j, k, first_cpu, tmp;
477 CPUMASK_ALLOC(allmasks); 470 cpumask_var_t saved_mask, covered_cpus;
478 CPUMASK_PTR(online_policy_cpus, allmasks);
479 CPUMASK_PTR(saved_mask, allmasks);
480 CPUMASK_PTR(set_mask, allmasks);
481 CPUMASK_PTR(covered_cpus, allmasks);
482 471
483 if (unlikely(allmasks == NULL)) 472 if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL)))
473 return -ENOMEM;
474 if (unlikely(!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))) {
475 free_cpumask_var(saved_mask);
484 return -ENOMEM; 476 return -ENOMEM;
477 }
478 cpumask_copy(saved_mask, &current->cpus_allowed);
485 479
486 if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { 480 if (unlikely(per_cpu(centrino_model, cpu) == NULL)) {
487 retval = -ENODEV; 481 retval = -ENODEV;
@@ -497,30 +491,26 @@ static int centrino_target (struct cpufreq_policy *policy,
497 goto out; 491 goto out;
498 } 492 }
499 493
500#ifdef CONFIG_HOTPLUG_CPU
501 /* cpufreq holds the hotplug lock, so we are safe from here on */
502 cpus_and(*online_policy_cpus, cpu_online_map, policy->cpus);
503#else
504 *online_policy_cpus = policy->cpus;
505#endif
506
507 *saved_mask = current->cpus_allowed;
508 first_cpu = 1; 494 first_cpu = 1;
509 cpus_clear(*covered_cpus); 495 for_each_cpu(j, policy->cpus) {
510 for_each_cpu_mask_nr(j, *online_policy_cpus) { 496 const struct cpumask *mask;
497
498 /* cpufreq holds the hotplug lock, so we are safe here */
499 if (!cpu_online(j))
500 continue;
501
511 /* 502 /*
512 * Support for SMP systems. 503 * Support for SMP systems.
513 * Make sure we are running on CPU that wants to change freq 504 * Make sure we are running on CPU that wants to change freq
514 */ 505 */
515 cpus_clear(*set_mask);
516 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) 506 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
517 cpus_or(*set_mask, *set_mask, *online_policy_cpus); 507 mask = policy->cpus;
518 else 508 else
519 cpu_set(j, *set_mask); 509 mask = cpumask_of(j);
520 510
521 set_cpus_allowed_ptr(current, set_mask); 511 set_cpus_allowed_ptr(current, mask);
522 preempt_disable(); 512 preempt_disable();
523 if (unlikely(!cpu_isset(smp_processor_id(), *set_mask))) { 513 if (unlikely(!cpu_isset(smp_processor_id(), *mask))) {
524 dprintk("couldn't limit to CPUs in this domain\n"); 514 dprintk("couldn't limit to CPUs in this domain\n");
525 retval = -EAGAIN; 515 retval = -EAGAIN;
526 if (first_cpu) { 516 if (first_cpu) {
@@ -548,7 +538,9 @@ static int centrino_target (struct cpufreq_policy *policy,
548 dprintk("target=%dkHz old=%d new=%d msr=%04x\n", 538 dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
549 target_freq, freqs.old, freqs.new, msr); 539 target_freq, freqs.old, freqs.new, msr);
550 540
551 for_each_cpu_mask_nr(k, *online_policy_cpus) { 541 for_each_cpu(k, policy->cpus) {
542 if (!cpu_online(k))
543 continue;
552 freqs.cpu = k; 544 freqs.cpu = k;
553 cpufreq_notify_transition(&freqs, 545 cpufreq_notify_transition(&freqs,
554 CPUFREQ_PRECHANGE); 546 CPUFREQ_PRECHANGE);
@@ -571,7 +563,9 @@ static int centrino_target (struct cpufreq_policy *policy,
571 preempt_enable(); 563 preempt_enable();
572 } 564 }
573 565
574 for_each_cpu_mask_nr(k, *online_policy_cpus) { 566 for_each_cpu(k, policy->cpus) {
567 if (!cpu_online(k))
568 continue;
575 freqs.cpu = k; 569 freqs.cpu = k;
576 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 570 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
577 } 571 }
@@ -584,18 +578,17 @@ static int centrino_target (struct cpufreq_policy *policy,
584 * Best effort undo.. 578 * Best effort undo..
585 */ 579 */
586 580
587 if (!cpus_empty(*covered_cpus)) 581 for_each_cpu_mask_nr(j, *covered_cpus) {
588 for_each_cpu_mask_nr(j, *covered_cpus) { 582 set_cpus_allowed_ptr(current, &cpumask_of_cpu(j));
589 set_cpus_allowed_ptr(current, 583 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
590 &cpumask_of_cpu(j)); 584 }
591 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
592 }
593 585
594 tmp = freqs.new; 586 tmp = freqs.new;
595 freqs.new = freqs.old; 587 freqs.new = freqs.old;
596 freqs.old = tmp; 588 freqs.old = tmp;
597 for_each_cpu_mask_nr(j, *online_policy_cpus) { 589 for_each_cpu(j, policy->cpus) {
598 freqs.cpu = j; 590 if (!cpu_online(j))
591 continue;
599 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 592 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
600 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 593 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
601 } 594 }
@@ -608,7 +601,8 @@ migrate_end:
608 preempt_enable(); 601 preempt_enable();
609 set_cpus_allowed_ptr(current, saved_mask); 602 set_cpus_allowed_ptr(current, saved_mask);
610out: 603out:
611 CPUMASK_FREE(allmasks); 604 free_cpumask_var(saved_mask);
605 free_cpumask_var(covered_cpus);
612 return retval; 606 return retval;
613} 607}
614 608
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 04d0376b64b0..dedc1e98f168 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -229,7 +229,7 @@ static unsigned int speedstep_detect_chipset (void)
229 return 0; 229 return 0;
230} 230}
231 231
232static unsigned int _speedstep_get(const cpumask_t *cpus) 232static unsigned int _speedstep_get(const struct cpumask *cpus)
233{ 233{
234 unsigned int speed; 234 unsigned int speed;
235 cpumask_t cpus_allowed; 235 cpumask_t cpus_allowed;
@@ -244,7 +244,7 @@ static unsigned int _speedstep_get(const cpumask_t *cpus)
244 244
245static unsigned int speedstep_get(unsigned int cpu) 245static unsigned int speedstep_get(unsigned int cpu)
246{ 246{
247 return _speedstep_get(&cpumask_of_cpu(cpu)); 247 return _speedstep_get(cpumask_of(cpu));
248} 248}
249 249
250/** 250/**
@@ -267,7 +267,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
267 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) 267 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate))
268 return -EINVAL; 268 return -EINVAL;
269 269
270 freqs.old = _speedstep_get(&policy->cpus); 270 freqs.old = _speedstep_get(policy->cpus);
271 freqs.new = speedstep_freqs[newstate].frequency; 271 freqs.new = speedstep_freqs[newstate].frequency;
272 freqs.cpu = policy->cpu; 272 freqs.cpu = policy->cpu;
273 273
@@ -279,20 +279,20 @@ static int speedstep_target (struct cpufreq_policy *policy,
279 279
280 cpus_allowed = current->cpus_allowed; 280 cpus_allowed = current->cpus_allowed;
281 281
282 for_each_cpu_mask_nr(i, policy->cpus) { 282 for_each_cpu(i, policy->cpus) {
283 freqs.cpu = i; 283 freqs.cpu = i;
284 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 284 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
285 } 285 }
286 286
287 /* switch to physical CPU where state is to be changed */ 287 /* switch to physical CPU where state is to be changed */
288 set_cpus_allowed_ptr(current, &policy->cpus); 288 set_cpus_allowed_ptr(current, policy->cpus);
289 289
290 speedstep_set_state(newstate); 290 speedstep_set_state(newstate);
291 291
292 /* allow to be run on all CPUs */ 292 /* allow to be run on all CPUs */
293 set_cpus_allowed_ptr(current, &cpus_allowed); 293 set_cpus_allowed_ptr(current, &cpus_allowed);
294 294
295 for_each_cpu_mask_nr(i, policy->cpus) { 295 for_each_cpu(i, policy->cpus) {
296 freqs.cpu = i; 296 freqs.cpu = i;
297 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 297 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
298 } 298 }
@@ -322,11 +322,11 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
322 322
323 /* only run on CPU to be set, or on its sibling */ 323 /* only run on CPU to be set, or on its sibling */
324#ifdef CONFIG_SMP 324#ifdef CONFIG_SMP
325 policy->cpus = per_cpu(cpu_sibling_map, policy->cpu); 325 cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu));
326#endif 326#endif
327 327
328 cpus_allowed = current->cpus_allowed; 328 cpus_allowed = current->cpus_allowed;
329 set_cpus_allowed_ptr(current, &policy->cpus); 329 set_cpus_allowed_ptr(current, policy->cpus);
330 330
331 /* detect low and high frequency and transition latency */ 331 /* detect low and high frequency and transition latency */
332 result = speedstep_get_freqs(speedstep_processor, 332 result = speedstep_get_freqs(speedstep_processor,
@@ -339,7 +339,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
339 return result; 339 return result;
340 340
341 /* get current speed setting */ 341 /* get current speed setting */
342 speed = _speedstep_get(&policy->cpus); 342 speed = _speedstep_get(policy->cpus);
343 if (!speed) 343 if (!speed)
344 return -EIO; 344 return -EIO;
345 345
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index 98d4fdb7dc04..cdac7d62369b 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -139,6 +139,15 @@ static unsigned int pentium_core_get_frequency(void)
139 case 3: 139 case 3:
140 fsb = 166667; 140 fsb = 166667;
141 break; 141 break;
142 case 2:
143 fsb = 200000;
144 break;
145 case 0:
146 fsb = 266667;
147 break;
148 case 4:
149 fsb = 333333;
150 break;
142 default: 151 default:
143 printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value"); 152 printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value");
144 } 153 }
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 68b5d8681cbb..48533d77be78 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -534,31 +534,16 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
534 per_cpu(cpuid4_info, cpu) = NULL; 534 per_cpu(cpuid4_info, cpu) = NULL;
535} 535}
536 536
537static int __cpuinit detect_cache_attributes(unsigned int cpu) 537static void __cpuinit get_cpu_leaves(void *_retval)
538{ 538{
539 struct _cpuid4_info *this_leaf; 539 int j, *retval = _retval, cpu = smp_processor_id();
540 unsigned long j;
541 int retval;
542 cpumask_t oldmask;
543
544 if (num_cache_leaves == 0)
545 return -ENOENT;
546
547 per_cpu(cpuid4_info, cpu) = kzalloc(
548 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
549 if (per_cpu(cpuid4_info, cpu) == NULL)
550 return -ENOMEM;
551
552 oldmask = current->cpus_allowed;
553 retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
554 if (retval)
555 goto out;
556 540
557 /* Do cpuid and store the results */ 541 /* Do cpuid and store the results */
558 for (j = 0; j < num_cache_leaves; j++) { 542 for (j = 0; j < num_cache_leaves; j++) {
543 struct _cpuid4_info *this_leaf;
559 this_leaf = CPUID4_INFO_IDX(cpu, j); 544 this_leaf = CPUID4_INFO_IDX(cpu, j);
560 retval = cpuid4_cache_lookup(j, this_leaf); 545 *retval = cpuid4_cache_lookup(j, this_leaf);
561 if (unlikely(retval < 0)) { 546 if (unlikely(*retval < 0)) {
562 int i; 547 int i;
563 548
564 for (i = 0; i < j; i++) 549 for (i = 0; i < j; i++)
@@ -567,9 +552,21 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
567 } 552 }
568 cache_shared_cpu_map_setup(cpu, j); 553 cache_shared_cpu_map_setup(cpu, j);
569 } 554 }
570 set_cpus_allowed_ptr(current, &oldmask); 555}
556
557static int __cpuinit detect_cache_attributes(unsigned int cpu)
558{
559 int retval;
560
561 if (num_cache_leaves == 0)
562 return -ENOENT;
563
564 per_cpu(cpuid4_info, cpu) = kzalloc(
565 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
566 if (per_cpu(cpuid4_info, cpu) == NULL)
567 return -ENOMEM;
571 568
572out: 569 smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
573 if (retval) { 570 if (retval) {
574 kfree(per_cpu(cpuid4_info, cpu)); 571 kfree(per_cpu(cpuid4_info, cpu));
575 per_cpu(cpuid4_info, cpu) = NULL; 572 per_cpu(cpuid4_info, cpu) = NULL;
@@ -626,8 +623,8 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
626 cpumask_t *mask = &this_leaf->shared_cpu_map; 623 cpumask_t *mask = &this_leaf->shared_cpu_map;
627 624
628 n = type? 625 n = type?
629 cpulist_scnprintf(buf, len-2, *mask): 626 cpulist_scnprintf(buf, len-2, mask) :
630 cpumask_scnprintf(buf, len-2, *mask); 627 cpumask_scnprintf(buf, len-2, mask);
631 buf[n++] = '\n'; 628 buf[n++] = '\n';
632 buf[n] = '\0'; 629 buf[n] = '\0';
633 } 630 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
index 0ebf3fc6a610..dfaebce3633e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_32.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * mce.c - x86 Machine Check Exception Reporting 2 * mce.c - x86 Machine Check Exception Reporting
3 * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@redhat.com> 3 * (c) 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>, Dave Jones <davej@redhat.com>
4 */ 4 */
5 5
6#include <linux/init.h> 6#include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 748c8f9e7a05..8ae8c4ff094d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -83,34 +83,41 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
83 * CPU Initialization 83 * CPU Initialization
84 */ 84 */
85 85
86struct thresh_restart {
87 struct threshold_block *b;
88 int reset;
89 u16 old_limit;
90};
91
86/* must be called with correct cpu affinity */ 92/* must be called with correct cpu affinity */
87static void threshold_restart_bank(struct threshold_block *b, 93static long threshold_restart_bank(void *_tr)
88 int reset, u16 old_limit)
89{ 94{
95 struct thresh_restart *tr = _tr;
90 u32 mci_misc_hi, mci_misc_lo; 96 u32 mci_misc_hi, mci_misc_lo;
91 97
92 rdmsr(b->address, mci_misc_lo, mci_misc_hi); 98 rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
93 99
94 if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) 100 if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
95 reset = 1; /* limit cannot be lower than err count */ 101 tr->reset = 1; /* limit cannot be lower than err count */
96 102
97 if (reset) { /* reset err count and overflow bit */ 103 if (tr->reset) { /* reset err count and overflow bit */
98 mci_misc_hi = 104 mci_misc_hi =
99 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | 105 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
100 (THRESHOLD_MAX - b->threshold_limit); 106 (THRESHOLD_MAX - tr->b->threshold_limit);
101 } else if (old_limit) { /* change limit w/o reset */ 107 } else if (tr->old_limit) { /* change limit w/o reset */
102 int new_count = (mci_misc_hi & THRESHOLD_MAX) + 108 int new_count = (mci_misc_hi & THRESHOLD_MAX) +
103 (old_limit - b->threshold_limit); 109 (tr->old_limit - tr->b->threshold_limit);
104 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | 110 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
105 (new_count & THRESHOLD_MAX); 111 (new_count & THRESHOLD_MAX);
106 } 112 }
107 113
108 b->interrupt_enable ? 114 tr->b->interrupt_enable ?
109 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : 115 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
110 (mci_misc_hi &= ~MASK_INT_TYPE_HI); 116 (mci_misc_hi &= ~MASK_INT_TYPE_HI);
111 117
112 mci_misc_hi |= MASK_COUNT_EN_HI; 118 mci_misc_hi |= MASK_COUNT_EN_HI;
113 wrmsr(b->address, mci_misc_lo, mci_misc_hi); 119 wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
120 return 0;
114} 121}
115 122
116/* cpu init entry point, called from mce.c with preempt off */ 123/* cpu init entry point, called from mce.c with preempt off */
@@ -120,6 +127,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
120 unsigned int cpu = smp_processor_id(); 127 unsigned int cpu = smp_processor_id();
121 u8 lvt_off; 128 u8 lvt_off;
122 u32 low = 0, high = 0, address = 0; 129 u32 low = 0, high = 0, address = 0;
130 struct thresh_restart tr;
123 131
124 for (bank = 0; bank < NR_BANKS; ++bank) { 132 for (bank = 0; bank < NR_BANKS; ++bank) {
125 for (block = 0; block < NR_BLOCKS; ++block) { 133 for (block = 0; block < NR_BLOCKS; ++block) {
@@ -162,7 +170,10 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
162 wrmsr(address, low, high); 170 wrmsr(address, low, high);
163 171
164 threshold_defaults.address = address; 172 threshold_defaults.address = address;
165 threshold_restart_bank(&threshold_defaults, 0, 0); 173 tr.b = &threshold_defaults;
174 tr.reset = 0;
175 tr.old_limit = 0;
176 threshold_restart_bank(&tr);
166 } 177 }
167 } 178 }
168} 179}
@@ -251,20 +262,6 @@ struct threshold_attr {
251 ssize_t(*store) (struct threshold_block *, const char *, size_t count); 262 ssize_t(*store) (struct threshold_block *, const char *, size_t count);
252}; 263};
253 264
254static void affinity_set(unsigned int cpu, cpumask_t *oldmask,
255 cpumask_t *newmask)
256{
257 *oldmask = current->cpus_allowed;
258 cpus_clear(*newmask);
259 cpu_set(cpu, *newmask);
260 set_cpus_allowed_ptr(current, newmask);
261}
262
263static void affinity_restore(const cpumask_t *oldmask)
264{
265 set_cpus_allowed_ptr(current, oldmask);
266}
267
268#define SHOW_FIELDS(name) \ 265#define SHOW_FIELDS(name) \
269static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ 266static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
270{ \ 267{ \
@@ -277,15 +274,16 @@ static ssize_t store_interrupt_enable(struct threshold_block *b,
277 const char *buf, size_t count) 274 const char *buf, size_t count)
278{ 275{
279 char *end; 276 char *end;
280 cpumask_t oldmask, newmask; 277 struct thresh_restart tr;
281 unsigned long new = simple_strtoul(buf, &end, 0); 278 unsigned long new = simple_strtoul(buf, &end, 0);
282 if (end == buf) 279 if (end == buf)
283 return -EINVAL; 280 return -EINVAL;
284 b->interrupt_enable = !!new; 281 b->interrupt_enable = !!new;
285 282
286 affinity_set(b->cpu, &oldmask, &newmask); 283 tr.b = b;
287 threshold_restart_bank(b, 0, 0); 284 tr.reset = 0;
288 affinity_restore(&oldmask); 285 tr.old_limit = 0;
286 work_on_cpu(b->cpu, threshold_restart_bank, &tr);
289 287
290 return end - buf; 288 return end - buf;
291} 289}
@@ -294,8 +292,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
294 const char *buf, size_t count) 292 const char *buf, size_t count)
295{ 293{
296 char *end; 294 char *end;
297 cpumask_t oldmask, newmask; 295 struct thresh_restart tr;
298 u16 old;
299 unsigned long new = simple_strtoul(buf, &end, 0); 296 unsigned long new = simple_strtoul(buf, &end, 0);
300 if (end == buf) 297 if (end == buf)
301 return -EINVAL; 298 return -EINVAL;
@@ -303,34 +300,36 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
303 new = THRESHOLD_MAX; 300 new = THRESHOLD_MAX;
304 if (new < 1) 301 if (new < 1)
305 new = 1; 302 new = 1;
306 old = b->threshold_limit; 303 tr.old_limit = b->threshold_limit;
307 b->threshold_limit = new; 304 b->threshold_limit = new;
305 tr.b = b;
306 tr.reset = 0;
308 307
309 affinity_set(b->cpu, &oldmask, &newmask); 308 work_on_cpu(b->cpu, threshold_restart_bank, &tr);
310 threshold_restart_bank(b, 0, old);
311 affinity_restore(&oldmask);
312 309
313 return end - buf; 310 return end - buf;
314} 311}
315 312
316static ssize_t show_error_count(struct threshold_block *b, char *buf) 313static long local_error_count(void *_b)
317{ 314{
318 u32 high, low; 315 struct threshold_block *b = _b;
319 cpumask_t oldmask, newmask; 316 u32 low, high;
320 affinity_set(b->cpu, &oldmask, &newmask); 317
321 rdmsr(b->address, low, high); 318 rdmsr(b->address, low, high);
322 affinity_restore(&oldmask); 319 return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
323 return sprintf(buf, "%x\n", 320}
324 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); 321
322static ssize_t show_error_count(struct threshold_block *b, char *buf)
323{
324 return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b));
325} 325}
326 326
327static ssize_t store_error_count(struct threshold_block *b, 327static ssize_t store_error_count(struct threshold_block *b,
328 const char *buf, size_t count) 328 const char *buf, size_t count)
329{ 329{
330 cpumask_t oldmask, newmask; 330 struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
331 affinity_set(b->cpu, &oldmask, &newmask); 331
332 threshold_restart_bank(b, 1, 0); 332 work_on_cpu(b->cpu, threshold_restart_bank, &tr);
333 affinity_restore(&oldmask);
334 return 1; 333 return 1;
335} 334}
336 335
@@ -463,12 +462,19 @@ out_free:
463 return err; 462 return err;
464} 463}
465 464
465static __cpuinit long local_allocate_threshold_blocks(void *_bank)
466{
467 unsigned int *bank = _bank;
468
469 return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
470 MSR_IA32_MC0_MISC + *bank * 4);
471}
472
466/* symlinks sibling shared banks to first core. first core owns dir/files. */ 473/* symlinks sibling shared banks to first core. first core owns dir/files. */
467static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) 474static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
468{ 475{
469 int i, err = 0; 476 int i, err = 0;
470 struct threshold_bank *b = NULL; 477 struct threshold_bank *b = NULL;
471 cpumask_t oldmask, newmask;
472 char name[32]; 478 char name[32];
473 479
474 sprintf(name, "threshold_bank%i", bank); 480 sprintf(name, "threshold_bank%i", bank);
@@ -519,11 +525,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
519 525
520 per_cpu(threshold_banks, cpu)[bank] = b; 526 per_cpu(threshold_banks, cpu)[bank] = b;
521 527
522 affinity_set(cpu, &oldmask, &newmask); 528 err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank);
523 err = allocate_threshold_blocks(cpu, bank, 0,
524 MSR_IA32_MC0_MISC + bank * 4);
525 affinity_restore(&oldmask);
526
527 if (err) 529 if (err)
528 goto out_free; 530 goto out_free;
529 531
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index bfa5817afdda..c9f77ea69edc 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * P5 specific Machine Check Exception Reporting 2 * P5 specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Alan Cox <alan@redhat.com> 3 * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
4 */ 4 */
5 5
6#include <linux/init.h> 6#include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
index 62efc9c2b3af..2ac52d7b434b 100644
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * P6 specific Machine Check Exception Reporting 2 * P6 specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Alan Cox <alan@redhat.com> 3 * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
4 */ 4 */
5 5
6#include <linux/init.h> 6#include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index f2be3e190c6b..2a043d89811d 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * IDT Winchip specific Machine Check Exception Reporting 2 * IDT Winchip specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Alan Cox <alan@redhat.com> 3 * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
4 */ 4 */
5 5
6#include <linux/init.h> 6#include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 4e8d77f01eeb..b59ddcc88cd8 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -14,14 +14,6 @@
14#include <asm/pat.h> 14#include <asm/pat.h>
15#include "mtrr.h" 15#include "mtrr.h"
16 16
17struct mtrr_state {
18 struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
19 mtrr_type fixed_ranges[NUM_FIXED_RANGES];
20 unsigned char enabled;
21 unsigned char have_fixed;
22 mtrr_type def_type;
23};
24
25struct fixed_range_block { 17struct fixed_range_block {
26 int base_msr; /* start address of an MTRR block */ 18 int base_msr; /* start address of an MTRR block */
27 int ranges; /* number of MTRRs in this block */ 19 int ranges; /* number of MTRRs in this block */
@@ -35,10 +27,12 @@ static struct fixed_range_block fixed_range_blocks[] = {
35}; 27};
36 28
37static unsigned long smp_changes_mask; 29static unsigned long smp_changes_mask;
38static struct mtrr_state mtrr_state = {};
39static int mtrr_state_set; 30static int mtrr_state_set;
40u64 mtrr_tom2; 31u64 mtrr_tom2;
41 32
33struct mtrr_state_type mtrr_state = {};
34EXPORT_SYMBOL_GPL(mtrr_state);
35
42#undef MODULE_PARAM_PREFIX 36#undef MODULE_PARAM_PREFIX
43#define MODULE_PARAM_PREFIX "mtrr." 37#define MODULE_PARAM_PREFIX "mtrr."
44 38
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 1159e269e596..d259e5d2e054 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -49,7 +49,7 @@
49 49
50u32 num_var_ranges = 0; 50u32 num_var_ranges = 0;
51 51
52unsigned int mtrr_usage_table[MAX_VAR_RANGES]; 52unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
53static DEFINE_MUTEX(mtrr_mutex); 53static DEFINE_MUTEX(mtrr_mutex);
54 54
55u64 size_or_mask, size_and_mask; 55u64 size_or_mask, size_and_mask;
@@ -574,7 +574,7 @@ struct mtrr_value {
574 unsigned long lsize; 574 unsigned long lsize;
575}; 575};
576 576
577static struct mtrr_value mtrr_state[MAX_VAR_RANGES]; 577static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES];
578 578
579static int mtrr_save(struct sys_device * sysdev, pm_message_t state) 579static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
580{ 580{
@@ -824,16 +824,14 @@ static int enable_mtrr_cleanup __initdata =
824 824
825static int __init disable_mtrr_cleanup_setup(char *str) 825static int __init disable_mtrr_cleanup_setup(char *str)
826{ 826{
827 if (enable_mtrr_cleanup != -1) 827 enable_mtrr_cleanup = 0;
828 enable_mtrr_cleanup = 0;
829 return 0; 828 return 0;
830} 829}
831early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); 830early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
832 831
833static int __init enable_mtrr_cleanup_setup(char *str) 832static int __init enable_mtrr_cleanup_setup(char *str)
834{ 833{
835 if (enable_mtrr_cleanup != -1) 834 enable_mtrr_cleanup = 1;
836 enable_mtrr_cleanup = 1;
837 return 0; 835 return 0;
838} 836}
839early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); 837early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 2dc4ec656b23..ffd60409cc6d 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -8,11 +8,6 @@
8#define MTRRcap_MSR 0x0fe 8#define MTRRcap_MSR 0x0fe
9#define MTRRdefType_MSR 0x2ff 9#define MTRRdefType_MSR 0x2ff
10 10
11#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
12#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
13
14#define NUM_FIXED_RANGES 88
15#define MAX_VAR_RANGES 256
16#define MTRRfix64K_00000_MSR 0x250 11#define MTRRfix64K_00000_MSR 0x250
17#define MTRRfix16K_80000_MSR 0x258 12#define MTRRfix16K_80000_MSR 0x258
18#define MTRRfix16K_A0000_MSR 0x259 13#define MTRRfix16K_A0000_MSR 0x259
@@ -29,11 +24,7 @@
29#define MTRR_CHANGE_MASK_VARIABLE 0x02 24#define MTRR_CHANGE_MASK_VARIABLE 0x02
30#define MTRR_CHANGE_MASK_DEFTYPE 0x04 25#define MTRR_CHANGE_MASK_DEFTYPE 0x04
31 26
32/* In the Intel processor's MTRR interface, the MTRR type is always held in 27extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
33 an 8 bit field: */
34typedef u8 mtrr_type;
35
36extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
37 28
38struct mtrr_ops { 29struct mtrr_ops {
39 u32 vendor; 30 u32 vendor;
@@ -70,13 +61,6 @@ struct set_mtrr_context {
70 u32 ccr3; 61 u32 ccr3;
71}; 62};
72 63
73struct mtrr_var_range {
74 u32 base_lo;
75 u32 base_hi;
76 u32 mask_lo;
77 u32 mask_hi;
78};
79
80void set_mtrr_done(struct set_mtrr_context *ctxt); 64void set_mtrr_done(struct set_mtrr_context *ctxt);
81void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); 65void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
82void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); 66void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 72cefd1e649b..2ac1f0c2beb3 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -39,10 +39,10 @@
39#include <linux/device.h> 39#include <linux/device.h>
40#include <linux/cpu.h> 40#include <linux/cpu.h>
41#include <linux/notifier.h> 41#include <linux/notifier.h>
42#include <linux/uaccess.h>
42 43
43#include <asm/processor.h> 44#include <asm/processor.h>
44#include <asm/msr.h> 45#include <asm/msr.h>
45#include <asm/uaccess.h>
46#include <asm/system.h> 46#include <asm/system.h>
47 47
48static struct class *cpuid_class; 48static struct class *cpuid_class;
@@ -82,7 +82,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
82} 82}
83 83
84static ssize_t cpuid_read(struct file *file, char __user *buf, 84static ssize_t cpuid_read(struct file *file, char __user *buf,
85 size_t count, loff_t * ppos) 85 size_t count, loff_t *ppos)
86{ 86{
87 char __user *tmp = buf; 87 char __user *tmp = buf;
88 struct cpuid_regs cmd; 88 struct cpuid_regs cmd;
@@ -117,11 +117,11 @@ static int cpuid_open(struct inode *inode, struct file *file)
117 unsigned int cpu; 117 unsigned int cpu;
118 struct cpuinfo_x86 *c; 118 struct cpuinfo_x86 *c;
119 int ret = 0; 119 int ret = 0;
120 120
121 lock_kernel(); 121 lock_kernel();
122 122
123 cpu = iminor(file->f_path.dentry->d_inode); 123 cpu = iminor(file->f_path.dentry->d_inode);
124 if (cpu >= NR_CPUS || !cpu_online(cpu)) { 124 if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
125 ret = -ENXIO; /* No such CPU */ 125 ret = -ENXIO; /* No such CPU */
126 goto out; 126 goto out;
127 } 127 }
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index d84a852e4cd7..c689d19e35ab 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -26,6 +26,7 @@
26#include <linux/kdebug.h> 26#include <linux/kdebug.h>
27#include <asm/smp.h> 27#include <asm/smp.h>
28#include <asm/reboot.h> 28#include <asm/reboot.h>
29#include <asm/virtext.h>
29 30
30#include <mach_ipi.h> 31#include <mach_ipi.h>
31 32
@@ -49,6 +50,15 @@ static void kdump_nmi_callback(int cpu, struct die_args *args)
49#endif 50#endif
50 crash_save_cpu(regs, cpu); 51 crash_save_cpu(regs, cpu);
51 52
53 /* Disable VMX or SVM if needed.
54 *
55 * We need to disable virtualization on all CPUs.
56 * Having VMX or SVM enabled on any CPU may break rebooting
57 * after the kdump kernel has finished its task.
58 */
59 cpu_emergency_vmxoff();
60 cpu_emergency_svm_disable();
61
52 disable_local_APIC(); 62 disable_local_APIC();
53} 63}
54 64
@@ -80,6 +90,14 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
80 local_irq_disable(); 90 local_irq_disable();
81 91
82 kdump_nmi_shootdown_cpus(); 92 kdump_nmi_shootdown_cpus();
93
94 /* Booting kdump kernel with VMX or SVM enabled won't work,
95 * because (among other limitations) we can't disable paging
96 * with the virt flags.
97 */
98 cpu_emergency_vmxoff();
99 cpu_emergency_svm_disable();
100
83 lapic_shutdown(); 101 lapic_shutdown();
84#if defined(CONFIG_X86_IO_APIC) 102#if defined(CONFIG_X86_IO_APIC)
85 disable_IO_APIC(); 103 disable_IO_APIC();
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 65a13943e098..e85826829cf2 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -665,6 +665,27 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn)
665} 665}
666#endif 666#endif
667 667
668#ifdef CONFIG_HIBERNATION
669/**
670 * Mark ACPI NVS memory region, so that we can save/restore it during
671 * hibernation and the subsequent resume.
672 */
673static int __init e820_mark_nvs_memory(void)
674{
675 int i;
676
677 for (i = 0; i < e820.nr_map; i++) {
678 struct e820entry *ei = &e820.map[i];
679
680 if (ei->type == E820_NVS)
681 hibernate_nvs_register(ei->addr, ei->size);
682 }
683
684 return 0;
685}
686core_initcall(e820_mark_nvs_memory);
687#endif
688
668/* 689/*
669 * Early reserved memory areas. 690 * Early reserved memory areas.
670 */ 691 */
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 744aa7fc49d5..76b8cd953dee 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -201,6 +201,12 @@ struct chipset {
201 void (*f)(int num, int slot, int func); 201 void (*f)(int num, int slot, int func);
202}; 202};
203 203
204/*
205 * Only works for devices on the root bus. If you add any devices
206 * not on bus 0 readd another loop level in early_quirks(). But
207 * be careful because at least the Nvidia quirk here relies on
208 * only matching on bus 0.
209 */
204static struct chipset early_qrk[] __initdata = { 210static struct chipset early_qrk[] __initdata = {
205 { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, 211 { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
206 PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs }, 212 PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs },
@@ -267,17 +273,17 @@ static int __init check_dev_quirk(int num, int slot, int func)
267 273
268void __init early_quirks(void) 274void __init early_quirks(void)
269{ 275{
270 int num, slot, func; 276 int slot, func;
271 277
272 if (!early_pci_allowed()) 278 if (!early_pci_allowed())
273 return; 279 return;
274 280
275 /* Poor man's PCI discovery */ 281 /* Poor man's PCI discovery */
276 for (num = 0; num < 32; num++) 282 /* Only scan the root bus */
277 for (slot = 0; slot < 32; slot++) 283 for (slot = 0; slot < 32; slot++)
278 for (func = 0; func < 8; func++) { 284 for (func = 0; func < 8; func++) {
279 /* Only probe function 0 on single fn devices */ 285 /* Only probe function 0 on single fn devices */
280 if (check_dev_quirk(num, slot, func)) 286 if (check_dev_quirk(0, slot, func))
281 break; 287 break;
282 } 288 }
283} 289}
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 23b138e31e9c..504ad198e4ad 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -886,7 +886,7 @@ asmlinkage void early_printk(const char *fmt, ...)
886 va_list ap; 886 va_list ap;
887 887
888 va_start(ap, fmt); 888 va_start(ap, fmt);
889 n = vscnprintf(buf, 512, fmt, ap); 889 n = vscnprintf(buf, sizeof(buf), fmt, ap);
890 early_console->write(early_console, buf, n); 890 early_console->write(early_console, buf, n);
891 va_end(ap); 891 va_end(ap);
892} 892}
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index c0262791bda4..34185488e4fb 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
30 return 1; 30 return 1;
31} 31}
32 32
33static cpumask_t flat_target_cpus(void) 33static const struct cpumask *flat_target_cpus(void)
34{ 34{
35 return cpu_online_map; 35 return cpu_online_mask;
36} 36}
37 37
38static cpumask_t flat_vector_allocation_domain(int cpu) 38static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
39{ 39{
40 /* Careful. Some cpus do not strictly honor the set of cpus 40 /* Careful. Some cpus do not strictly honor the set of cpus
41 * specified in the interrupt destination when using lowest 41 * specified in the interrupt destination when using lowest
@@ -45,8 +45,8 @@ static cpumask_t flat_vector_allocation_domain(int cpu)
45 * deliver interrupts to the wrong hyperthread when only one 45 * deliver interrupts to the wrong hyperthread when only one
46 * hyperthread was specified in the interrupt desitination. 46 * hyperthread was specified in the interrupt desitination.
47 */ 47 */
48 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; 48 cpumask_clear(retmask);
49 return domain; 49 cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
50} 50}
51 51
52/* 52/*
@@ -69,9 +69,8 @@ static void flat_init_apic_ldr(void)
69 apic_write(APIC_LDR, val); 69 apic_write(APIC_LDR, val);
70} 70}
71 71
72static void flat_send_IPI_mask(cpumask_t cpumask, int vector) 72static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
73{ 73{
74 unsigned long mask = cpus_addr(cpumask)[0];
75 unsigned long flags; 74 unsigned long flags;
76 75
77 local_irq_save(flags); 76 local_irq_save(flags);
@@ -79,20 +78,41 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
79 local_irq_restore(flags); 78 local_irq_restore(flags);
80} 79}
81 80
81static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
82{
83 unsigned long mask = cpumask_bits(cpumask)[0];
84
85 _flat_send_IPI_mask(mask, vector);
86}
87
88static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
89 int vector)
90{
91 unsigned long mask = cpumask_bits(cpumask)[0];
92 int cpu = smp_processor_id();
93
94 if (cpu < BITS_PER_LONG)
95 clear_bit(cpu, &mask);
96 _flat_send_IPI_mask(mask, vector);
97}
98
82static void flat_send_IPI_allbutself(int vector) 99static void flat_send_IPI_allbutself(int vector)
83{ 100{
101 int cpu = smp_processor_id();
84#ifdef CONFIG_HOTPLUG_CPU 102#ifdef CONFIG_HOTPLUG_CPU
85 int hotplug = 1; 103 int hotplug = 1;
86#else 104#else
87 int hotplug = 0; 105 int hotplug = 0;
88#endif 106#endif
89 if (hotplug || vector == NMI_VECTOR) { 107 if (hotplug || vector == NMI_VECTOR) {
90 cpumask_t allbutme = cpu_online_map; 108 if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
109 unsigned long mask = cpumask_bits(cpu_online_mask)[0];
91 110
92 cpu_clear(smp_processor_id(), allbutme); 111 if (cpu < BITS_PER_LONG)
112 clear_bit(cpu, &mask);
93 113
94 if (!cpus_empty(allbutme)) 114 _flat_send_IPI_mask(mask, vector);
95 flat_send_IPI_mask(allbutme, vector); 115 }
96 } else if (num_online_cpus() > 1) { 116 } else if (num_online_cpus() > 1) {
97 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); 117 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
98 } 118 }
@@ -101,7 +121,7 @@ static void flat_send_IPI_allbutself(int vector)
101static void flat_send_IPI_all(int vector) 121static void flat_send_IPI_all(int vector)
102{ 122{
103 if (vector == NMI_VECTOR) 123 if (vector == NMI_VECTOR)
104 flat_send_IPI_mask(cpu_online_map, vector); 124 flat_send_IPI_mask(cpu_online_mask, vector);
105 else 125 else
106 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 126 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
107} 127}
@@ -135,9 +155,18 @@ static int flat_apic_id_registered(void)
135 return physid_isset(read_xapic_id(), phys_cpu_present_map); 155 return physid_isset(read_xapic_id(), phys_cpu_present_map);
136} 156}
137 157
138static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) 158static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
159{
160 return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
161}
162
163static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
164 const struct cpumask *andmask)
139{ 165{
140 return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; 166 unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
167 unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
168
169 return mask1 & mask2;
141} 170}
142 171
143static unsigned int phys_pkg_id(int index_msb) 172static unsigned int phys_pkg_id(int index_msb)
@@ -157,8 +186,10 @@ struct genapic apic_flat = {
157 .send_IPI_all = flat_send_IPI_all, 186 .send_IPI_all = flat_send_IPI_all,
158 .send_IPI_allbutself = flat_send_IPI_allbutself, 187 .send_IPI_allbutself = flat_send_IPI_allbutself,
159 .send_IPI_mask = flat_send_IPI_mask, 188 .send_IPI_mask = flat_send_IPI_mask,
189 .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
160 .send_IPI_self = apic_send_IPI_self, 190 .send_IPI_self = apic_send_IPI_self,
161 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 191 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
192 .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
162 .phys_pkg_id = phys_pkg_id, 193 .phys_pkg_id = phys_pkg_id,
163 .get_apic_id = get_apic_id, 194 .get_apic_id = get_apic_id,
164 .set_apic_id = set_apic_id, 195 .set_apic_id = set_apic_id,
@@ -188,35 +219,39 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
188 return 0; 219 return 0;
189} 220}
190 221
191static cpumask_t physflat_target_cpus(void) 222static const struct cpumask *physflat_target_cpus(void)
192{ 223{
193 return cpu_online_map; 224 return cpu_online_mask;
194} 225}
195 226
196static cpumask_t physflat_vector_allocation_domain(int cpu) 227static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
197{ 228{
198 return cpumask_of_cpu(cpu); 229 cpumask_clear(retmask);
230 cpumask_set_cpu(cpu, retmask);
199} 231}
200 232
201static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) 233static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
202{ 234{
203 send_IPI_mask_sequence(cpumask, vector); 235 send_IPI_mask_sequence(cpumask, vector);
204} 236}
205 237
206static void physflat_send_IPI_allbutself(int vector) 238static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
239 int vector)
207{ 240{
208 cpumask_t allbutme = cpu_online_map; 241 send_IPI_mask_allbutself(cpumask, vector);
242}
209 243
210 cpu_clear(smp_processor_id(), allbutme); 244static void physflat_send_IPI_allbutself(int vector)
211 physflat_send_IPI_mask(allbutme, vector); 245{
246 send_IPI_mask_allbutself(cpu_online_mask, vector);
212} 247}
213 248
214static void physflat_send_IPI_all(int vector) 249static void physflat_send_IPI_all(int vector)
215{ 250{
216 physflat_send_IPI_mask(cpu_online_map, vector); 251 physflat_send_IPI_mask(cpu_online_mask, vector);
217} 252}
218 253
219static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) 254static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
220{ 255{
221 int cpu; 256 int cpu;
222 257
@@ -224,13 +259,31 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
224 * We're using fixed IRQ delivery, can only return one phys APIC ID. 259 * We're using fixed IRQ delivery, can only return one phys APIC ID.
225 * May as well be the first. 260 * May as well be the first.
226 */ 261 */
227 cpu = first_cpu(cpumask); 262 cpu = cpumask_first(cpumask);
228 if ((unsigned)cpu < nr_cpu_ids) 263 if ((unsigned)cpu < nr_cpu_ids)
229 return per_cpu(x86_cpu_to_apicid, cpu); 264 return per_cpu(x86_cpu_to_apicid, cpu);
230 else 265 else
231 return BAD_APICID; 266 return BAD_APICID;
232} 267}
233 268
269static unsigned int
270physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
271 const struct cpumask *andmask)
272{
273 int cpu;
274
275 /*
276 * We're using fixed IRQ delivery, can only return one phys APIC ID.
277 * May as well be the first.
278 */
279 for_each_cpu_and(cpu, cpumask, andmask)
280 if (cpumask_test_cpu(cpu, cpu_online_mask))
281 break;
282 if (cpu < nr_cpu_ids)
283 return per_cpu(x86_cpu_to_apicid, cpu);
284 return BAD_APICID;
285}
286
234struct genapic apic_physflat = { 287struct genapic apic_physflat = {
235 .name = "physical flat", 288 .name = "physical flat",
236 .acpi_madt_oem_check = physflat_acpi_madt_oem_check, 289 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
@@ -243,8 +296,10 @@ struct genapic apic_physflat = {
243 .send_IPI_all = physflat_send_IPI_all, 296 .send_IPI_all = physflat_send_IPI_all,
244 .send_IPI_allbutself = physflat_send_IPI_allbutself, 297 .send_IPI_allbutself = physflat_send_IPI_allbutself,
245 .send_IPI_mask = physflat_send_IPI_mask, 298 .send_IPI_mask = physflat_send_IPI_mask,
299 .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
246 .send_IPI_self = apic_send_IPI_self, 300 .send_IPI_self = apic_send_IPI_self,
247 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 301 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
302 .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
248 .phys_pkg_id = phys_pkg_id, 303 .phys_pkg_id = phys_pkg_id,
249 .get_apic_id = get_apic_id, 304 .get_apic_id = get_apic_id,
250 .set_apic_id = set_apic_id, 305 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index f6a2c8eb48a6..6ce497cc372d 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -22,19 +22,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
22 22
23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
24 24
25static cpumask_t x2apic_target_cpus(void) 25static const struct cpumask *x2apic_target_cpus(void)
26{ 26{
27 return cpumask_of_cpu(0); 27 return cpumask_of(0);
28} 28}
29 29
30/* 30/*
31 * for now each logical cpu is in its own vector allocation domain. 31 * for now each logical cpu is in its own vector allocation domain.
32 */ 32 */
33static cpumask_t x2apic_vector_allocation_domain(int cpu) 33static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
34{ 34{
35 cpumask_t domain = CPU_MASK_NONE; 35 cpumask_clear(retmask);
36 cpu_set(cpu, domain); 36 cpumask_set_cpu(cpu, retmask);
37 return domain;
38} 37}
39 38
40static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, 39static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -56,32 +55,53 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
56 * at once. We have 16 cpu's in a cluster. This will minimize IPI register 55 * at once. We have 16 cpu's in a cluster. This will minimize IPI register
57 * writes. 56 * writes.
58 */ 57 */
59static void x2apic_send_IPI_mask(cpumask_t mask, int vector) 58static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
60{ 59{
61 unsigned long flags; 60 unsigned long flags;
62 unsigned long query_cpu; 61 unsigned long query_cpu;
63 62
64 local_irq_save(flags); 63 local_irq_save(flags);
65 for_each_cpu_mask(query_cpu, mask) { 64 for_each_cpu(query_cpu, mask)
66 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), 65 __x2apic_send_IPI_dest(
67 vector, APIC_DEST_LOGICAL); 66 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
68 } 67 vector, APIC_DEST_LOGICAL);
69 local_irq_restore(flags); 68 local_irq_restore(flags);
70} 69}
71 70
72static void x2apic_send_IPI_allbutself(int vector) 71static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
72 int vector)
73{ 73{
74 cpumask_t mask = cpu_online_map; 74 unsigned long flags;
75 unsigned long query_cpu;
76 unsigned long this_cpu = smp_processor_id();
75 77
76 cpu_clear(smp_processor_id(), mask); 78 local_irq_save(flags);
79 for_each_cpu(query_cpu, mask)
80 if (query_cpu != this_cpu)
81 __x2apic_send_IPI_dest(
82 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
83 vector, APIC_DEST_LOGICAL);
84 local_irq_restore(flags);
85}
86
87static void x2apic_send_IPI_allbutself(int vector)
88{
89 unsigned long flags;
90 unsigned long query_cpu;
91 unsigned long this_cpu = smp_processor_id();
77 92
78 if (!cpus_empty(mask)) 93 local_irq_save(flags);
79 x2apic_send_IPI_mask(mask, vector); 94 for_each_online_cpu(query_cpu)
95 if (query_cpu != this_cpu)
96 __x2apic_send_IPI_dest(
97 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
98 vector, APIC_DEST_LOGICAL);
99 local_irq_restore(flags);
80} 100}
81 101
82static void x2apic_send_IPI_all(int vector) 102static void x2apic_send_IPI_all(int vector)
83{ 103{
84 x2apic_send_IPI_mask(cpu_online_map, vector); 104 x2apic_send_IPI_mask(cpu_online_mask, vector);
85} 105}
86 106
87static int x2apic_apic_id_registered(void) 107static int x2apic_apic_id_registered(void)
@@ -89,21 +109,38 @@ static int x2apic_apic_id_registered(void)
89 return 1; 109 return 1;
90} 110}
91 111
92static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) 112static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
93{ 113{
94 int cpu; 114 int cpu;
95 115
96 /* 116 /*
97 * We're using fixed IRQ delivery, can only return one phys APIC ID. 117 * We're using fixed IRQ delivery, can only return one logical APIC ID.
98 * May as well be the first. 118 * May as well be the first.
99 */ 119 */
100 cpu = first_cpu(cpumask); 120 cpu = cpumask_first(cpumask);
101 if ((unsigned)cpu < NR_CPUS) 121 if ((unsigned)cpu < nr_cpu_ids)
102 return per_cpu(x86_cpu_to_logical_apicid, cpu); 122 return per_cpu(x86_cpu_to_logical_apicid, cpu);
103 else 123 else
104 return BAD_APICID; 124 return BAD_APICID;
105} 125}
106 126
127static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
128 const struct cpumask *andmask)
129{
130 int cpu;
131
132 /*
133 * We're using fixed IRQ delivery, can only return one logical APIC ID.
134 * May as well be the first.
135 */
136 for_each_cpu_and(cpu, cpumask, andmask)
137 if (cpumask_test_cpu(cpu, cpu_online_mask))
138 break;
139 if (cpu < nr_cpu_ids)
140 return per_cpu(x86_cpu_to_logical_apicid, cpu);
141 return BAD_APICID;
142}
143
107static unsigned int get_apic_id(unsigned long x) 144static unsigned int get_apic_id(unsigned long x)
108{ 145{
109 unsigned int id; 146 unsigned int id;
@@ -150,8 +187,10 @@ struct genapic apic_x2apic_cluster = {
150 .send_IPI_all = x2apic_send_IPI_all, 187 .send_IPI_all = x2apic_send_IPI_all,
151 .send_IPI_allbutself = x2apic_send_IPI_allbutself, 188 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
152 .send_IPI_mask = x2apic_send_IPI_mask, 189 .send_IPI_mask = x2apic_send_IPI_mask,
190 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
153 .send_IPI_self = x2apic_send_IPI_self, 191 .send_IPI_self = x2apic_send_IPI_self,
154 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 192 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
193 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
155 .phys_pkg_id = phys_pkg_id, 194 .phys_pkg_id = phys_pkg_id,
156 .get_apic_id = get_apic_id, 195 .get_apic_id = get_apic_id,
157 .set_apic_id = set_apic_id, 196 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index d042211768b7..21bcc0e098ba 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -29,16 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
29 29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
31 31
32static cpumask_t x2apic_target_cpus(void) 32static const struct cpumask *x2apic_target_cpus(void)
33{ 33{
34 return cpumask_of_cpu(0); 34 return cpumask_of(0);
35} 35}
36 36
37static cpumask_t x2apic_vector_allocation_domain(int cpu) 37static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
38{ 38{
39 cpumask_t domain = CPU_MASK_NONE; 39 cpumask_clear(retmask);
40 cpu_set(cpu, domain); 40 cpumask_set_cpu(cpu, retmask);
41 return domain;
42} 41}
43 42
44static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, 43static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -54,32 +53,54 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
54 x2apic_icr_write(cfg, apicid); 53 x2apic_icr_write(cfg, apicid);
55} 54}
56 55
57static void x2apic_send_IPI_mask(cpumask_t mask, int vector) 56static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
58{ 57{
59 unsigned long flags; 58 unsigned long flags;
60 unsigned long query_cpu; 59 unsigned long query_cpu;
61 60
62 local_irq_save(flags); 61 local_irq_save(flags);
63 for_each_cpu_mask(query_cpu, mask) { 62 for_each_cpu(query_cpu, mask) {
64 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), 63 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
65 vector, APIC_DEST_PHYSICAL); 64 vector, APIC_DEST_PHYSICAL);
66 } 65 }
67 local_irq_restore(flags); 66 local_irq_restore(flags);
68} 67}
69 68
70static void x2apic_send_IPI_allbutself(int vector) 69static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
70 int vector)
71{ 71{
72 cpumask_t mask = cpu_online_map; 72 unsigned long flags;
73 unsigned long query_cpu;
74 unsigned long this_cpu = smp_processor_id();
75
76 local_irq_save(flags);
77 for_each_cpu(query_cpu, mask) {
78 if (query_cpu != this_cpu)
79 __x2apic_send_IPI_dest(
80 per_cpu(x86_cpu_to_apicid, query_cpu),
81 vector, APIC_DEST_PHYSICAL);
82 }
83 local_irq_restore(flags);
84}
73 85
74 cpu_clear(smp_processor_id(), mask); 86static void x2apic_send_IPI_allbutself(int vector)
87{
88 unsigned long flags;
89 unsigned long query_cpu;
90 unsigned long this_cpu = smp_processor_id();
75 91
76 if (!cpus_empty(mask)) 92 local_irq_save(flags);
77 x2apic_send_IPI_mask(mask, vector); 93 for_each_online_cpu(query_cpu)
94 if (query_cpu != this_cpu)
95 __x2apic_send_IPI_dest(
96 per_cpu(x86_cpu_to_apicid, query_cpu),
97 vector, APIC_DEST_PHYSICAL);
98 local_irq_restore(flags);
78} 99}
79 100
80static void x2apic_send_IPI_all(int vector) 101static void x2apic_send_IPI_all(int vector)
81{ 102{
82 x2apic_send_IPI_mask(cpu_online_map, vector); 103 x2apic_send_IPI_mask(cpu_online_mask, vector);
83} 104}
84 105
85static int x2apic_apic_id_registered(void) 106static int x2apic_apic_id_registered(void)
@@ -87,7 +108,7 @@ static int x2apic_apic_id_registered(void)
87 return 1; 108 return 1;
88} 109}
89 110
90static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) 111static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
91{ 112{
92 int cpu; 113 int cpu;
93 114
@@ -95,13 +116,30 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
95 * We're using fixed IRQ delivery, can only return one phys APIC ID. 116 * We're using fixed IRQ delivery, can only return one phys APIC ID.
96 * May as well be the first. 117 * May as well be the first.
97 */ 118 */
98 cpu = first_cpu(cpumask); 119 cpu = cpumask_first(cpumask);
99 if ((unsigned)cpu < NR_CPUS) 120 if ((unsigned)cpu < nr_cpu_ids)
100 return per_cpu(x86_cpu_to_apicid, cpu); 121 return per_cpu(x86_cpu_to_apicid, cpu);
101 else 122 else
102 return BAD_APICID; 123 return BAD_APICID;
103} 124}
104 125
126static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
127 const struct cpumask *andmask)
128{
129 int cpu;
130
131 /*
132 * We're using fixed IRQ delivery, can only return one phys APIC ID.
133 * May as well be the first.
134 */
135 for_each_cpu_and(cpu, cpumask, andmask)
136 if (cpumask_test_cpu(cpu, cpu_online_mask))
137 break;
138 if (cpu < nr_cpu_ids)
139 return per_cpu(x86_cpu_to_apicid, cpu);
140 return BAD_APICID;
141}
142
105static unsigned int get_apic_id(unsigned long x) 143static unsigned int get_apic_id(unsigned long x)
106{ 144{
107 unsigned int id; 145 unsigned int id;
@@ -123,12 +161,12 @@ static unsigned int phys_pkg_id(int index_msb)
123 return current_cpu_data.initial_apicid >> index_msb; 161 return current_cpu_data.initial_apicid >> index_msb;
124} 162}
125 163
126void x2apic_send_IPI_self(int vector) 164static void x2apic_send_IPI_self(int vector)
127{ 165{
128 apic_write(APIC_SELF_IPI, vector); 166 apic_write(APIC_SELF_IPI, vector);
129} 167}
130 168
131void init_x2apic_ldr(void) 169static void init_x2apic_ldr(void)
132{ 170{
133 return; 171 return;
134} 172}
@@ -145,8 +183,10 @@ struct genapic apic_x2apic_phys = {
145 .send_IPI_all = x2apic_send_IPI_all, 183 .send_IPI_all = x2apic_send_IPI_all,
146 .send_IPI_allbutself = x2apic_send_IPI_allbutself, 184 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
147 .send_IPI_mask = x2apic_send_IPI_mask, 185 .send_IPI_mask = x2apic_send_IPI_mask,
186 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
148 .send_IPI_self = x2apic_send_IPI_self, 187 .send_IPI_self = x2apic_send_IPI_self,
149 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 188 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
189 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
150 .phys_pkg_id = phys_pkg_id, 190 .phys_pkg_id = phys_pkg_id,
151 .get_apic_id = get_apic_id, 191 .get_apic_id = get_apic_id,
152 .set_apic_id = set_apic_id, 192 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index dece17289731..b193e082f6ce 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -79,16 +79,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
79 79
80/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 80/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
81 81
82static cpumask_t uv_target_cpus(void) 82static const struct cpumask *uv_target_cpus(void)
83{ 83{
84 return cpumask_of_cpu(0); 84 return cpumask_of(0);
85} 85}
86 86
87static cpumask_t uv_vector_allocation_domain(int cpu) 87static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
88{ 88{
89 cpumask_t domain = CPU_MASK_NONE; 89 cpumask_clear(retmask);
90 cpu_set(cpu, domain); 90 cpumask_set_cpu(cpu, retmask);
91 return domain;
92} 91}
93 92
94int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) 93int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
@@ -127,28 +126,37 @@ static void uv_send_IPI_one(int cpu, int vector)
127 uv_write_global_mmr64(pnode, UVH_IPI_INT, val); 126 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
128} 127}
129 128
130static void uv_send_IPI_mask(cpumask_t mask, int vector) 129static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
131{ 130{
132 unsigned int cpu; 131 unsigned int cpu;
133 132
134 for_each_possible_cpu(cpu) 133 for_each_cpu(cpu, mask)
135 if (cpu_isset(cpu, mask)) 134 uv_send_IPI_one(cpu, vector);
135}
136
137static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
138{
139 unsigned int cpu;
140 unsigned int this_cpu = smp_processor_id();
141
142 for_each_cpu(cpu, mask)
143 if (cpu != this_cpu)
136 uv_send_IPI_one(cpu, vector); 144 uv_send_IPI_one(cpu, vector);
137} 145}
138 146
139static void uv_send_IPI_allbutself(int vector) 147static void uv_send_IPI_allbutself(int vector)
140{ 148{
141 cpumask_t mask = cpu_online_map; 149 unsigned int cpu;
142 150 unsigned int this_cpu = smp_processor_id();
143 cpu_clear(smp_processor_id(), mask);
144 151
145 if (!cpus_empty(mask)) 152 for_each_online_cpu(cpu)
146 uv_send_IPI_mask(mask, vector); 153 if (cpu != this_cpu)
154 uv_send_IPI_one(cpu, vector);
147} 155}
148 156
149static void uv_send_IPI_all(int vector) 157static void uv_send_IPI_all(int vector)
150{ 158{
151 uv_send_IPI_mask(cpu_online_map, vector); 159 uv_send_IPI_mask(cpu_online_mask, vector);
152} 160}
153 161
154static int uv_apic_id_registered(void) 162static int uv_apic_id_registered(void)
@@ -160,7 +168,7 @@ static void uv_init_apic_ldr(void)
160{ 168{
161} 169}
162 170
163static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) 171static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
164{ 172{
165 int cpu; 173 int cpu;
166 174
@@ -168,13 +176,30 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
168 * We're using fixed IRQ delivery, can only return one phys APIC ID. 176 * We're using fixed IRQ delivery, can only return one phys APIC ID.
169 * May as well be the first. 177 * May as well be the first.
170 */ 178 */
171 cpu = first_cpu(cpumask); 179 cpu = cpumask_first(cpumask);
172 if ((unsigned)cpu < nr_cpu_ids) 180 if ((unsigned)cpu < nr_cpu_ids)
173 return per_cpu(x86_cpu_to_apicid, cpu); 181 return per_cpu(x86_cpu_to_apicid, cpu);
174 else 182 else
175 return BAD_APICID; 183 return BAD_APICID;
176} 184}
177 185
186static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
187 const struct cpumask *andmask)
188{
189 int cpu;
190
191 /*
192 * We're using fixed IRQ delivery, can only return one phys APIC ID.
193 * May as well be the first.
194 */
195 for_each_cpu_and(cpu, cpumask, andmask)
196 if (cpumask_test_cpu(cpu, cpu_online_mask))
197 break;
198 if (cpu < nr_cpu_ids)
199 return per_cpu(x86_cpu_to_apicid, cpu);
200 return BAD_APICID;
201}
202
178static unsigned int get_apic_id(unsigned long x) 203static unsigned int get_apic_id(unsigned long x)
179{ 204{
180 unsigned int id; 205 unsigned int id;
@@ -222,8 +247,10 @@ struct genapic apic_x2apic_uv_x = {
222 .send_IPI_all = uv_send_IPI_all, 247 .send_IPI_all = uv_send_IPI_all,
223 .send_IPI_allbutself = uv_send_IPI_allbutself, 248 .send_IPI_allbutself = uv_send_IPI_allbutself,
224 .send_IPI_mask = uv_send_IPI_mask, 249 .send_IPI_mask = uv_send_IPI_mask,
250 .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
225 .send_IPI_self = uv_send_IPI_self, 251 .send_IPI_self = uv_send_IPI_self,
226 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 252 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
253 .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
227 .phys_pkg_id = phys_pkg_id, 254 .phys_pkg_id = phys_pkg_id,
228 .get_apic_id = get_apic_id, 255 .get_apic_id = get_apic_id,
229 .set_apic_id = set_apic_id, 256 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 388e05a5fc17..b9a4d8c4b935 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -27,7 +27,7 @@
27#include <asm/trampoline.h> 27#include <asm/trampoline.h>
28 28
29/* boot cpu pda */ 29/* boot cpu pda */
30static struct x8664_pda _boot_cpu_pda __read_mostly; 30static struct x8664_pda _boot_cpu_pda;
31 31
32#ifdef CONFIG_SMP 32#ifdef CONFIG_SMP
33/* 33/*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 26cfdc1d7c7f..0e275d495563 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -305,7 +305,7 @@ ENTRY(early_idt_handler)
305 call dump_stack 305 call dump_stack
306#ifdef CONFIG_KALLSYMS 306#ifdef CONFIG_KALLSYMS
307 leaq early_idt_ripmsg(%rip),%rdi 307 leaq early_idt_ripmsg(%rip),%rdi
308 movq 8(%rsp),%rsi # get rip again 308 movq 0(%rsp),%rsi # get rip again
309 call __print_symbol 309 call __print_symbol
310#endif 310#endif
311#endif /* EARLY_PRINTK */ 311#endif /* EARLY_PRINTK */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 3f0a3edf0a57..cd759ad90690 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -248,7 +248,7 @@ static void hpet_legacy_clockevent_register(void)
248 * Start hpet with the boot cpu mask and make it 248 * Start hpet with the boot cpu mask and make it
249 * global after the IO_APIC has been initialized. 249 * global after the IO_APIC has been initialized.
250 */ 250 */
251 hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); 251 hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
252 clockevents_register_device(&hpet_clockevent); 252 clockevents_register_device(&hpet_clockevent);
253 global_clock_event = &hpet_clockevent; 253 global_clock_event = &hpet_clockevent;
254 printk(KERN_DEBUG "hpet clockevent registered\n"); 254 printk(KERN_DEBUG "hpet clockevent registered\n");
@@ -303,7 +303,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
303 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); 303 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
304 hpet_setup_msi_irq(hdev->irq); 304 hpet_setup_msi_irq(hdev->irq);
305 disable_irq(hdev->irq); 305 disable_irq(hdev->irq);
306 irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu)); 306 irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
307 enable_irq(hdev->irq); 307 enable_irq(hdev->irq);
308 } 308 }
309 break; 309 break;
@@ -451,7 +451,7 @@ static int hpet_setup_irq(struct hpet_dev *dev)
451 return -1; 451 return -1;
452 452
453 disable_irq(dev->irq); 453 disable_irq(dev->irq);
454 irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu)); 454 irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
455 enable_irq(dev->irq); 455 enable_irq(dev->irq);
456 456
457 printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", 457 printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
@@ -502,7 +502,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
502 /* 5 usec minimum reprogramming delta. */ 502 /* 5 usec minimum reprogramming delta. */
503 evt->min_delta_ns = 5000; 503 evt->min_delta_ns = 5000;
504 504
505 evt->cpumask = cpumask_of_cpu(hdev->cpu); 505 evt->cpumask = cpumask_of(hdev->cpu);
506 clockevents_register_device(evt); 506 clockevents_register_device(evt);
507} 507}
508 508
@@ -813,7 +813,7 @@ int __init hpet_enable(void)
813 813
814out_nohpet: 814out_nohpet:
815 hpet_clear_mapping(); 815 hpet_clear_mapping();
816 boot_hpet_disable = 1; 816 hpet_address = 0;
817 return 0; 817 return 0;
818} 818}
819 819
@@ -836,10 +836,11 @@ static __init int hpet_late_init(void)
836 836
837 hpet_address = force_hpet_address; 837 hpet_address = force_hpet_address;
838 hpet_enable(); 838 hpet_enable();
839 if (!hpet_virt_address)
840 return -ENODEV;
841 } 839 }
842 840
841 if (!hpet_virt_address)
842 return -ENODEV;
843
843 hpet_reserve_platform_timers(hpet_readl(HPET_ID)); 844 hpet_reserve_platform_timers(hpet_readl(HPET_ID));
844 845
845 for_each_online_cpu(cpu) { 846 for_each_online_cpu(cpu) {
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index c1b5e3ece1f2..10f92fb532f3 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -114,7 +114,7 @@ void __init setup_pit_timer(void)
114 * Start pit with the boot cpu mask and make it global after the 114 * Start pit with the boot cpu mask and make it global after the
115 * IO_APIC has been initialized. 115 * IO_APIC has been initialized.
116 */ 116 */
117 pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); 117 pit_clockevent.cpumask = cpumask_of(smp_processor_id());
118 pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 118 pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
119 pit_clockevent.shift); 119 pit_clockevent.shift);
120 pit_clockevent.max_delta_ns = 120 pit_clockevent.max_delta_ns =
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 4b8a53d841f7..11d5093eb281 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -11,15 +11,15 @@
11#include <linux/kernel_stat.h> 11#include <linux/kernel_stat.h>
12#include <linux/sysdev.h> 12#include <linux/sysdev.h>
13#include <linux/bitops.h> 13#include <linux/bitops.h>
14#include <linux/acpi.h>
15#include <linux/io.h>
16#include <linux/delay.h>
14 17
15#include <asm/acpi.h>
16#include <asm/atomic.h> 18#include <asm/atomic.h>
17#include <asm/system.h> 19#include <asm/system.h>
18#include <asm/io.h>
19#include <asm/timer.h> 20#include <asm/timer.h>
20#include <asm/hw_irq.h> 21#include <asm/hw_irq.h>
21#include <asm/pgtable.h> 22#include <asm/pgtable.h>
22#include <asm/delay.h>
23#include <asm/desc.h> 23#include <asm/desc.h>
24#include <asm/apic.h> 24#include <asm/apic.h>
25#include <asm/arch_hooks.h> 25#include <asm/arch_hooks.h>
@@ -323,7 +323,7 @@ void init_8259A(int auto_eoi)
323 outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ 323 outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */
324 324
325 /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64, 325 /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64,
326 to 0x20-0x27 on i386 */ 326 to 0x20-0x27 on i386 */
327 outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); 327 outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
328 328
329 /* 8259A-1 (the master) has a slave on IR2 */ 329 /* 8259A-1 (the master) has a slave on IR2 */
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index d39918076bb4..df3bf269beab 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -10,7 +10,6 @@
10#include <asm/pgtable.h> 10#include <asm/pgtable.h>
11#include <asm/desc.h> 11#include <asm/desc.h>
12 12
13static struct fs_struct init_fs = INIT_FS;
14static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 13static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
15static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 14static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
16struct mm_struct init_mm = INIT_MM(init_mm); 15struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 679e7bbbbcd6..1c4a1302536c 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -108,94 +108,275 @@ static int __init parse_noapic(char *str)
108early_param("noapic", parse_noapic); 108early_param("noapic", parse_noapic);
109 109
110struct irq_pin_list; 110struct irq_pin_list;
111
112/*
113 * This is performance-critical, we want to do it O(1)
114 *
115 * the indexing order of this array favors 1:1 mappings
116 * between pins and IRQs.
117 */
118
119struct irq_pin_list {
120 int apic, pin;
121 struct irq_pin_list *next;
122};
123
124static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
125{
126 struct irq_pin_list *pin;
127 int node;
128
129 node = cpu_to_node(cpu);
130
131 pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
132
133 return pin;
134}
135
111struct irq_cfg { 136struct irq_cfg {
112 unsigned int irq;
113 struct irq_pin_list *irq_2_pin; 137 struct irq_pin_list *irq_2_pin;
114 cpumask_t domain; 138 cpumask_var_t domain;
115 cpumask_t old_domain; 139 cpumask_var_t old_domain;
116 unsigned move_cleanup_count; 140 unsigned move_cleanup_count;
117 u8 vector; 141 u8 vector;
118 u8 move_in_progress : 1; 142 u8 move_in_progress : 1;
143#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
144 u8 move_desc_pending : 1;
145#endif
119}; 146};
120 147
121/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 148/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
149#ifdef CONFIG_SPARSE_IRQ
150static struct irq_cfg irq_cfgx[] = {
151#else
122static struct irq_cfg irq_cfgx[NR_IRQS] = { 152static struct irq_cfg irq_cfgx[NR_IRQS] = {
123 [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, 153#endif
124 [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, 154 [0] = { .vector = IRQ0_VECTOR, },
125 [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, 155 [1] = { .vector = IRQ1_VECTOR, },
126 [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, 156 [2] = { .vector = IRQ2_VECTOR, },
127 [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, 157 [3] = { .vector = IRQ3_VECTOR, },
128 [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, 158 [4] = { .vector = IRQ4_VECTOR, },
129 [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, 159 [5] = { .vector = IRQ5_VECTOR, },
130 [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, 160 [6] = { .vector = IRQ6_VECTOR, },
131 [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, 161 [7] = { .vector = IRQ7_VECTOR, },
132 [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, 162 [8] = { .vector = IRQ8_VECTOR, },
133 [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, 163 [9] = { .vector = IRQ9_VECTOR, },
134 [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, 164 [10] = { .vector = IRQ10_VECTOR, },
135 [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, 165 [11] = { .vector = IRQ11_VECTOR, },
136 [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, 166 [12] = { .vector = IRQ12_VECTOR, },
137 [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, 167 [13] = { .vector = IRQ13_VECTOR, },
138 [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, 168 [14] = { .vector = IRQ14_VECTOR, },
169 [15] = { .vector = IRQ15_VECTOR, },
139}; 170};
140 171
141#define for_each_irq_cfg(irq, cfg) \ 172int __init arch_early_irq_init(void)
142 for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++) 173{
174 struct irq_cfg *cfg;
175 struct irq_desc *desc;
176 int count;
177 int i;
178
179 cfg = irq_cfgx;
180 count = ARRAY_SIZE(irq_cfgx);
181
182 for (i = 0; i < count; i++) {
183 desc = irq_to_desc(i);
184 desc->chip_data = &cfg[i];
185 alloc_bootmem_cpumask_var(&cfg[i].domain);
186 alloc_bootmem_cpumask_var(&cfg[i].old_domain);
187 if (i < NR_IRQS_LEGACY)
188 cpumask_setall(cfg[i].domain);
189 }
190
191 return 0;
192}
143 193
194#ifdef CONFIG_SPARSE_IRQ
144static struct irq_cfg *irq_cfg(unsigned int irq) 195static struct irq_cfg *irq_cfg(unsigned int irq)
145{ 196{
146 return irq < nr_irqs ? irq_cfgx + irq : NULL; 197 struct irq_cfg *cfg = NULL;
198 struct irq_desc *desc;
199
200 desc = irq_to_desc(irq);
201 if (desc)
202 cfg = desc->chip_data;
203
204 return cfg;
147} 205}
148 206
149static struct irq_cfg *irq_cfg_alloc(unsigned int irq) 207static struct irq_cfg *get_one_free_irq_cfg(int cpu)
150{ 208{
151 return irq_cfg(irq); 209 struct irq_cfg *cfg;
210 int node;
211
212 node = cpu_to_node(cpu);
213
214 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
215 if (cfg) {
216 if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
217 kfree(cfg);
218 cfg = NULL;
219 } else if (!alloc_cpumask_var_node(&cfg->old_domain,
220 GFP_ATOMIC, node)) {
221 free_cpumask_var(cfg->domain);
222 kfree(cfg);
223 cfg = NULL;
224 } else {
225 cpumask_clear(cfg->domain);
226 cpumask_clear(cfg->old_domain);
227 }
228 }
229
230 return cfg;
152} 231}
153 232
154/* 233int arch_init_chip_data(struct irq_desc *desc, int cpu)
155 * Rough estimation of how many shared IRQs there are, can be changed 234{
156 * anytime. 235 struct irq_cfg *cfg;
157 */
158#define MAX_PLUS_SHARED_IRQS NR_IRQS
159#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
160 236
161/* 237 cfg = desc->chip_data;
162 * This is performance-critical, we want to do it O(1) 238 if (!cfg) {
163 * 239 desc->chip_data = get_one_free_irq_cfg(cpu);
164 * the indexing order of this array favors 1:1 mappings 240 if (!desc->chip_data) {
165 * between pins and IRQs. 241 printk(KERN_ERR "can not alloc irq_cfg\n");
166 */ 242 BUG_ON(1);
243 }
244 }
167 245
168struct irq_pin_list { 246 return 0;
169 int apic, pin; 247}
170 struct irq_pin_list *next;
171};
172 248
173static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; 249#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
174static struct irq_pin_list *irq_2_pin_ptr;
175 250
176static void __init irq_2_pin_init(void) 251static void
252init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
177{ 253{
178 struct irq_pin_list *pin = irq_2_pin_head; 254 struct irq_pin_list *old_entry, *head, *tail, *entry;
179 int i; 255
256 cfg->irq_2_pin = NULL;
257 old_entry = old_cfg->irq_2_pin;
258 if (!old_entry)
259 return;
260
261 entry = get_one_free_irq_2_pin(cpu);
262 if (!entry)
263 return;
180 264
181 for (i = 1; i < PIN_MAP_SIZE; i++) 265 entry->apic = old_entry->apic;
182 pin[i-1].next = &pin[i]; 266 entry->pin = old_entry->pin;
267 head = entry;
268 tail = entry;
269 old_entry = old_entry->next;
270 while (old_entry) {
271 entry = get_one_free_irq_2_pin(cpu);
272 if (!entry) {
273 entry = head;
274 while (entry) {
275 head = entry->next;
276 kfree(entry);
277 entry = head;
278 }
279 /* still use the old one */
280 return;
281 }
282 entry->apic = old_entry->apic;
283 entry->pin = old_entry->pin;
284 tail->next = entry;
285 tail = entry;
286 old_entry = old_entry->next;
287 }
183 288
184 irq_2_pin_ptr = &pin[0]; 289 tail->next = NULL;
290 cfg->irq_2_pin = head;
185} 291}
186 292
187static struct irq_pin_list *get_one_free_irq_2_pin(void) 293static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
188{ 294{
189 struct irq_pin_list *pin = irq_2_pin_ptr; 295 struct irq_pin_list *entry, *next;
190 296
191 if (!pin) 297 if (old_cfg->irq_2_pin == cfg->irq_2_pin)
192 panic("can not get more irq_2_pin\n"); 298 return;
193 299
194 irq_2_pin_ptr = pin->next; 300 entry = old_cfg->irq_2_pin;
195 pin->next = NULL; 301
196 return pin; 302 while (entry) {
303 next = entry->next;
304 kfree(entry);
305 entry = next;
306 }
307 old_cfg->irq_2_pin = NULL;
308}
309
310void arch_init_copy_chip_data(struct irq_desc *old_desc,
311 struct irq_desc *desc, int cpu)
312{
313 struct irq_cfg *cfg;
314 struct irq_cfg *old_cfg;
315
316 cfg = get_one_free_irq_cfg(cpu);
317
318 if (!cfg)
319 return;
320
321 desc->chip_data = cfg;
322
323 old_cfg = old_desc->chip_data;
324
325 memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
326
327 init_copy_irq_2_pin(old_cfg, cfg, cpu);
197} 328}
198 329
330static void free_irq_cfg(struct irq_cfg *old_cfg)
331{
332 kfree(old_cfg);
333}
334
335void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
336{
337 struct irq_cfg *old_cfg, *cfg;
338
339 old_cfg = old_desc->chip_data;
340 cfg = desc->chip_data;
341
342 if (old_cfg == cfg)
343 return;
344
345 if (old_cfg) {
346 free_irq_2_pin(old_cfg, cfg);
347 free_irq_cfg(old_cfg);
348 old_desc->chip_data = NULL;
349 }
350}
351
352static void
353set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
354{
355 struct irq_cfg *cfg = desc->chip_data;
356
357 if (!cfg->move_in_progress) {
358 /* it means that domain is not changed */
359 if (!cpumask_intersects(&desc->affinity, mask))
360 cfg->move_desc_pending = 1;
361 }
362}
363#endif
364
365#else
366static struct irq_cfg *irq_cfg(unsigned int irq)
367{
368 return irq < nr_irqs ? irq_cfgx + irq : NULL;
369}
370
371#endif
372
373#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
374static inline void
375set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
376{
377}
378#endif
379
199struct io_apic { 380struct io_apic {
200 unsigned int index; 381 unsigned int index;
201 unsigned int unused[3]; 382 unsigned int unused[3];
@@ -237,11 +418,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
237 writel(value, &io_apic->data); 418 writel(value, &io_apic->data);
238} 419}
239 420
240static bool io_apic_level_ack_pending(unsigned int irq) 421static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
241{ 422{
242 struct irq_pin_list *entry; 423 struct irq_pin_list *entry;
243 unsigned long flags; 424 unsigned long flags;
244 struct irq_cfg *cfg = irq_cfg(irq);
245 425
246 spin_lock_irqsave(&ioapic_lock, flags); 426 spin_lock_irqsave(&ioapic_lock, flags);
247 entry = cfg->irq_2_pin; 427 entry = cfg->irq_2_pin;
@@ -323,13 +503,32 @@ static void ioapic_mask_entry(int apic, int pin)
323} 503}
324 504
325#ifdef CONFIG_SMP 505#ifdef CONFIG_SMP
326static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) 506static void send_cleanup_vector(struct irq_cfg *cfg)
507{
508 cpumask_var_t cleanup_mask;
509
510 if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
511 unsigned int i;
512 cfg->move_cleanup_count = 0;
513 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
514 cfg->move_cleanup_count++;
515 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
516 send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
517 } else {
518 cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
519 cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
520 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
521 free_cpumask_var(cleanup_mask);
522 }
523 cfg->move_in_progress = 0;
524}
525
526static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
327{ 527{
328 int apic, pin; 528 int apic, pin;
329 struct irq_cfg *cfg;
330 struct irq_pin_list *entry; 529 struct irq_pin_list *entry;
530 u8 vector = cfg->vector;
331 531
332 cfg = irq_cfg(irq);
333 entry = cfg->irq_2_pin; 532 entry = cfg->irq_2_pin;
334 for (;;) { 533 for (;;) {
335 unsigned int reg; 534 unsigned int reg;
@@ -359,36 +558,61 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
359 } 558 }
360} 559}
361 560
362static int assign_irq_vector(int irq, cpumask_t mask); 561static int
562assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
363 563
364static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 564/*
565 * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
566 * of that, or returns BAD_APICID and leaves desc->affinity untouched.
567 */
568static unsigned int
569set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
570{
571 struct irq_cfg *cfg;
572 unsigned int irq;
573
574 if (!cpumask_intersects(mask, cpu_online_mask))
575 return BAD_APICID;
576
577 irq = desc->irq;
578 cfg = desc->chip_data;
579 if (assign_irq_vector(irq, cfg, mask))
580 return BAD_APICID;
581
582 cpumask_and(&desc->affinity, cfg->domain, mask);
583 set_extra_move_desc(desc, mask);
584 return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
585}
586
587static void
588set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
365{ 589{
366 struct irq_cfg *cfg; 590 struct irq_cfg *cfg;
367 unsigned long flags; 591 unsigned long flags;
368 unsigned int dest; 592 unsigned int dest;
369 cpumask_t tmp; 593 unsigned int irq;
370 struct irq_desc *desc;
371 594
372 cpus_and(tmp, mask, cpu_online_map); 595 irq = desc->irq;
373 if (cpus_empty(tmp)) 596 cfg = desc->chip_data;
374 return;
375 597
376 cfg = irq_cfg(irq); 598 spin_lock_irqsave(&ioapic_lock, flags);
377 if (assign_irq_vector(irq, mask)) 599 dest = set_desc_affinity(desc, mask);
378 return; 600 if (dest != BAD_APICID) {
601 /* Only the high 8 bits are valid. */
602 dest = SET_APIC_LOGICAL_ID(dest);
603 __target_IO_APIC_irq(irq, dest, cfg);
604 }
605 spin_unlock_irqrestore(&ioapic_lock, flags);
606}
379 607
380 cpus_and(tmp, cfg->domain, mask); 608static void
381 dest = cpu_mask_to_apicid(tmp); 609set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
382 /* 610{
383 * Only the high 8 bits are valid. 611 struct irq_desc *desc;
384 */
385 dest = SET_APIC_LOGICAL_ID(dest);
386 612
387 desc = irq_to_desc(irq); 613 desc = irq_to_desc(irq);
388 spin_lock_irqsave(&ioapic_lock, flags); 614
389 __target_IO_APIC_irq(irq, dest, cfg->vector); 615 set_ioapic_affinity_irq_desc(desc, mask);
390 desc->affinity = mask;
391 spin_unlock_irqrestore(&ioapic_lock, flags);
392} 616}
393#endif /* CONFIG_SMP */ 617#endif /* CONFIG_SMP */
394 618
@@ -397,16 +621,18 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
397 * shared ISA-space IRQs, so we have to support them. We are super 621 * shared ISA-space IRQs, so we have to support them. We are super
398 * fast in the common case, and fast for shared ISA-space IRQs. 622 * fast in the common case, and fast for shared ISA-space IRQs.
399 */ 623 */
400static void add_pin_to_irq(unsigned int irq, int apic, int pin) 624static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
401{ 625{
402 struct irq_cfg *cfg;
403 struct irq_pin_list *entry; 626 struct irq_pin_list *entry;
404 627
405 /* first time to refer irq_cfg, so with new */
406 cfg = irq_cfg_alloc(irq);
407 entry = cfg->irq_2_pin; 628 entry = cfg->irq_2_pin;
408 if (!entry) { 629 if (!entry) {
409 entry = get_one_free_irq_2_pin(); 630 entry = get_one_free_irq_2_pin(cpu);
631 if (!entry) {
632 printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
633 apic, pin);
634 return;
635 }
410 cfg->irq_2_pin = entry; 636 cfg->irq_2_pin = entry;
411 entry->apic = apic; 637 entry->apic = apic;
412 entry->pin = pin; 638 entry->pin = pin;
@@ -421,7 +647,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
421 entry = entry->next; 647 entry = entry->next;
422 } 648 }
423 649
424 entry->next = get_one_free_irq_2_pin(); 650 entry->next = get_one_free_irq_2_pin(cpu);
425 entry = entry->next; 651 entry = entry->next;
426 entry->apic = apic; 652 entry->apic = apic;
427 entry->pin = pin; 653 entry->pin = pin;
@@ -430,11 +656,10 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
430/* 656/*
431 * Reroute an IRQ to a different pin. 657 * Reroute an IRQ to a different pin.
432 */ 658 */
433static void __init replace_pin_at_irq(unsigned int irq, 659static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
434 int oldapic, int oldpin, 660 int oldapic, int oldpin,
435 int newapic, int newpin) 661 int newapic, int newpin)
436{ 662{
437 struct irq_cfg *cfg = irq_cfg(irq);
438 struct irq_pin_list *entry = cfg->irq_2_pin; 663 struct irq_pin_list *entry = cfg->irq_2_pin;
439 int replaced = 0; 664 int replaced = 0;
440 665
@@ -451,18 +676,16 @@ static void __init replace_pin_at_irq(unsigned int irq,
451 676
452 /* why? call replace before add? */ 677 /* why? call replace before add? */
453 if (!replaced) 678 if (!replaced)
454 add_pin_to_irq(irq, newapic, newpin); 679 add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
455} 680}
456 681
457static inline void io_apic_modify_irq(unsigned int irq, 682static inline void io_apic_modify_irq(struct irq_cfg *cfg,
458 int mask_and, int mask_or, 683 int mask_and, int mask_or,
459 void (*final)(struct irq_pin_list *entry)) 684 void (*final)(struct irq_pin_list *entry))
460{ 685{
461 int pin; 686 int pin;
462 struct irq_cfg *cfg;
463 struct irq_pin_list *entry; 687 struct irq_pin_list *entry;
464 688
465 cfg = irq_cfg(irq);
466 for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { 689 for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
467 unsigned int reg; 690 unsigned int reg;
468 pin = entry->pin; 691 pin = entry->pin;
@@ -475,13 +698,13 @@ static inline void io_apic_modify_irq(unsigned int irq,
475 } 698 }
476} 699}
477 700
478static void __unmask_IO_APIC_irq(unsigned int irq) 701static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
479{ 702{
480 io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); 703 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
481} 704}
482 705
483#ifdef CONFIG_X86_64 706#ifdef CONFIG_X86_64
484void io_apic_sync(struct irq_pin_list *entry) 707static void io_apic_sync(struct irq_pin_list *entry)
485{ 708{
486 /* 709 /*
487 * Synchronize the IO-APIC and the CPU by doing 710 * Synchronize the IO-APIC and the CPU by doing
@@ -492,47 +715,64 @@ void io_apic_sync(struct irq_pin_list *entry)
492 readl(&io_apic->data); 715 readl(&io_apic->data);
493} 716}
494 717
495static void __mask_IO_APIC_irq(unsigned int irq) 718static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
496{ 719{
497 io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); 720 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
498} 721}
499#else /* CONFIG_X86_32 */ 722#else /* CONFIG_X86_32 */
500static void __mask_IO_APIC_irq(unsigned int irq) 723static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
501{ 724{
502 io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); 725 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
503} 726}
504 727
505static void __mask_and_edge_IO_APIC_irq(unsigned int irq) 728static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
506{ 729{
507 io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, 730 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
508 IO_APIC_REDIR_MASKED, NULL); 731 IO_APIC_REDIR_MASKED, NULL);
509} 732}
510 733
511static void __unmask_and_level_IO_APIC_irq(unsigned int irq) 734static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
512{ 735{
513 io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 736 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
514 IO_APIC_REDIR_LEVEL_TRIGGER, NULL); 737 IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
515} 738}
516#endif /* CONFIG_X86_32 */ 739#endif /* CONFIG_X86_32 */
517 740
518static void mask_IO_APIC_irq (unsigned int irq) 741static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
519{ 742{
743 struct irq_cfg *cfg = desc->chip_data;
520 unsigned long flags; 744 unsigned long flags;
521 745
746 BUG_ON(!cfg);
747
522 spin_lock_irqsave(&ioapic_lock, flags); 748 spin_lock_irqsave(&ioapic_lock, flags);
523 __mask_IO_APIC_irq(irq); 749 __mask_IO_APIC_irq(cfg);
524 spin_unlock_irqrestore(&ioapic_lock, flags); 750 spin_unlock_irqrestore(&ioapic_lock, flags);
525} 751}
526 752
527static void unmask_IO_APIC_irq (unsigned int irq) 753static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
528{ 754{
755 struct irq_cfg *cfg = desc->chip_data;
529 unsigned long flags; 756 unsigned long flags;
530 757
531 spin_lock_irqsave(&ioapic_lock, flags); 758 spin_lock_irqsave(&ioapic_lock, flags);
532 __unmask_IO_APIC_irq(irq); 759 __unmask_IO_APIC_irq(cfg);
533 spin_unlock_irqrestore(&ioapic_lock, flags); 760 spin_unlock_irqrestore(&ioapic_lock, flags);
534} 761}
535 762
763static void mask_IO_APIC_irq(unsigned int irq)
764{
765 struct irq_desc *desc = irq_to_desc(irq);
766
767 mask_IO_APIC_irq_desc(desc);
768}
769static void unmask_IO_APIC_irq(unsigned int irq)
770{
771 struct irq_desc *desc = irq_to_desc(irq);
772
773 unmask_IO_APIC_irq_desc(desc);
774}
775
536static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) 776static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
537{ 777{
538 struct IO_APIC_route_entry entry; 778 struct IO_APIC_route_entry entry;
@@ -809,7 +1049,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
809 */ 1049 */
810static int EISA_ELCR(unsigned int irq) 1050static int EISA_ELCR(unsigned int irq)
811{ 1051{
812 if (irq < 16) { 1052 if (irq < NR_IRQS_LEGACY) {
813 unsigned int port = 0x4d0 + (irq >> 3); 1053 unsigned int port = 0x4d0 + (irq >> 3);
814 return (inb(port) >> (irq & 7)) & 1; 1054 return (inb(port) >> (irq & 7)) & 1;
815 } 1055 }
@@ -1034,7 +1274,8 @@ void unlock_vector_lock(void)
1034 spin_unlock(&vector_lock); 1274 spin_unlock(&vector_lock);
1035} 1275}
1036 1276
1037static int __assign_irq_vector(int irq, cpumask_t mask) 1277static int
1278__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1038{ 1279{
1039 /* 1280 /*
1040 * NOTE! The local APIC isn't very good at handling 1281 * NOTE! The local APIC isn't very good at handling
@@ -1049,52 +1290,49 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
1049 */ 1290 */
1050 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; 1291 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
1051 unsigned int old_vector; 1292 unsigned int old_vector;
1052 int cpu; 1293 int cpu, err;
1053 struct irq_cfg *cfg; 1294 cpumask_var_t tmp_mask;
1054
1055 cfg = irq_cfg(irq);
1056
1057 /* Only try and allocate irqs on cpus that are present */
1058 cpus_and(mask, mask, cpu_online_map);
1059 1295
1060 if ((cfg->move_in_progress) || cfg->move_cleanup_count) 1296 if ((cfg->move_in_progress) || cfg->move_cleanup_count)
1061 return -EBUSY; 1297 return -EBUSY;
1062 1298
1299 if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
1300 return -ENOMEM;
1301
1063 old_vector = cfg->vector; 1302 old_vector = cfg->vector;
1064 if (old_vector) { 1303 if (old_vector) {
1065 cpumask_t tmp; 1304 cpumask_and(tmp_mask, mask, cpu_online_mask);
1066 cpus_and(tmp, cfg->domain, mask); 1305 cpumask_and(tmp_mask, cfg->domain, tmp_mask);
1067 if (!cpus_empty(tmp)) 1306 if (!cpumask_empty(tmp_mask)) {
1307 free_cpumask_var(tmp_mask);
1068 return 0; 1308 return 0;
1309 }
1069 } 1310 }
1070 1311
1071 for_each_cpu_mask_nr(cpu, mask) { 1312 /* Only try and allocate irqs on cpus that are present */
1072 cpumask_t domain, new_mask; 1313 err = -ENOSPC;
1314 for_each_cpu_and(cpu, mask, cpu_online_mask) {
1073 int new_cpu; 1315 int new_cpu;
1074 int vector, offset; 1316 int vector, offset;
1075 1317
1076 domain = vector_allocation_domain(cpu); 1318 vector_allocation_domain(cpu, tmp_mask);
1077 cpus_and(new_mask, domain, cpu_online_map);
1078 1319
1079 vector = current_vector; 1320 vector = current_vector;
1080 offset = current_offset; 1321 offset = current_offset;
1081next: 1322next:
1082 vector += 8; 1323 vector += 8;
1083 if (vector >= first_system_vector) { 1324 if (vector >= first_system_vector) {
1084 /* If we run out of vectors on large boxen, must share them. */ 1325 /* If out of vectors on large boxen, must share them. */
1085 offset = (offset + 1) % 8; 1326 offset = (offset + 1) % 8;
1086 vector = FIRST_DEVICE_VECTOR + offset; 1327 vector = FIRST_DEVICE_VECTOR + offset;
1087 } 1328 }
1088 if (unlikely(current_vector == vector)) 1329 if (unlikely(current_vector == vector))
1089 continue; 1330 continue;
1090#ifdef CONFIG_X86_64 1331
1091 if (vector == IA32_SYSCALL_VECTOR) 1332 if (test_bit(vector, used_vectors))
1092 goto next;
1093#else
1094 if (vector == SYSCALL_VECTOR)
1095 goto next; 1333 goto next;
1096#endif 1334
1097 for_each_cpu_mask_nr(new_cpu, new_mask) 1335 for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1098 if (per_cpu(vector_irq, new_cpu)[vector] != -1) 1336 if (per_cpu(vector_irq, new_cpu)[vector] != -1)
1099 goto next; 1337 goto next;
1100 /* Found one! */ 1338 /* Found one! */
@@ -1102,49 +1340,47 @@ next:
1102 current_offset = offset; 1340 current_offset = offset;
1103 if (old_vector) { 1341 if (old_vector) {
1104 cfg->move_in_progress = 1; 1342 cfg->move_in_progress = 1;
1105 cfg->old_domain = cfg->domain; 1343 cpumask_copy(cfg->old_domain, cfg->domain);
1106 } 1344 }
1107 for_each_cpu_mask_nr(new_cpu, new_mask) 1345 for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1108 per_cpu(vector_irq, new_cpu)[vector] = irq; 1346 per_cpu(vector_irq, new_cpu)[vector] = irq;
1109 cfg->vector = vector; 1347 cfg->vector = vector;
1110 cfg->domain = domain; 1348 cpumask_copy(cfg->domain, tmp_mask);
1111 return 0; 1349 err = 0;
1350 break;
1112 } 1351 }
1113 return -ENOSPC; 1352 free_cpumask_var(tmp_mask);
1353 return err;
1114} 1354}
1115 1355
1116static int assign_irq_vector(int irq, cpumask_t mask) 1356static int
1357assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1117{ 1358{
1118 int err; 1359 int err;
1119 unsigned long flags; 1360 unsigned long flags;
1120 1361
1121 spin_lock_irqsave(&vector_lock, flags); 1362 spin_lock_irqsave(&vector_lock, flags);
1122 err = __assign_irq_vector(irq, mask); 1363 err = __assign_irq_vector(irq, cfg, mask);
1123 spin_unlock_irqrestore(&vector_lock, flags); 1364 spin_unlock_irqrestore(&vector_lock, flags);
1124 return err; 1365 return err;
1125} 1366}
1126 1367
1127static void __clear_irq_vector(int irq) 1368static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1128{ 1369{
1129 struct irq_cfg *cfg;
1130 cpumask_t mask;
1131 int cpu, vector; 1370 int cpu, vector;
1132 1371
1133 cfg = irq_cfg(irq);
1134 BUG_ON(!cfg->vector); 1372 BUG_ON(!cfg->vector);
1135 1373
1136 vector = cfg->vector; 1374 vector = cfg->vector;
1137 cpus_and(mask, cfg->domain, cpu_online_map); 1375 for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
1138 for_each_cpu_mask_nr(cpu, mask)
1139 per_cpu(vector_irq, cpu)[vector] = -1; 1376 per_cpu(vector_irq, cpu)[vector] = -1;
1140 1377
1141 cfg->vector = 0; 1378 cfg->vector = 0;
1142 cpus_clear(cfg->domain); 1379 cpumask_clear(cfg->domain);
1143 1380
1144 if (likely(!cfg->move_in_progress)) 1381 if (likely(!cfg->move_in_progress))
1145 return; 1382 return;
1146 cpus_and(mask, cfg->old_domain, cpu_online_map); 1383 for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
1147 for_each_cpu_mask_nr(cpu, mask) {
1148 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; 1384 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
1149 vector++) { 1385 vector++) {
1150 if (per_cpu(vector_irq, cpu)[vector] != irq) 1386 if (per_cpu(vector_irq, cpu)[vector] != irq)
@@ -1162,10 +1398,12 @@ void __setup_vector_irq(int cpu)
1162 /* This function must be called with vector_lock held */ 1398 /* This function must be called with vector_lock held */
1163 int irq, vector; 1399 int irq, vector;
1164 struct irq_cfg *cfg; 1400 struct irq_cfg *cfg;
1401 struct irq_desc *desc;
1165 1402
1166 /* Mark the inuse vectors */ 1403 /* Mark the inuse vectors */
1167 for_each_irq_cfg(irq, cfg) { 1404 for_each_irq_desc(irq, desc) {
1168 if (!cpu_isset(cpu, cfg->domain)) 1405 cfg = desc->chip_data;
1406 if (!cpumask_test_cpu(cpu, cfg->domain))
1169 continue; 1407 continue;
1170 vector = cfg->vector; 1408 vector = cfg->vector;
1171 per_cpu(vector_irq, cpu)[vector] = irq; 1409 per_cpu(vector_irq, cpu)[vector] = irq;
@@ -1177,7 +1415,7 @@ void __setup_vector_irq(int cpu)
1177 continue; 1415 continue;
1178 1416
1179 cfg = irq_cfg(irq); 1417 cfg = irq_cfg(irq);
1180 if (!cpu_isset(cpu, cfg->domain)) 1418 if (!cpumask_test_cpu(cpu, cfg->domain))
1181 per_cpu(vector_irq, cpu)[vector] = -1; 1419 per_cpu(vector_irq, cpu)[vector] = -1;
1182 } 1420 }
1183} 1421}
@@ -1215,11 +1453,8 @@ static inline int IO_APIC_irq_trigger(int irq)
1215} 1453}
1216#endif 1454#endif
1217 1455
1218static void ioapic_register_intr(int irq, unsigned long trigger) 1456static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
1219{ 1457{
1220 struct irq_desc *desc;
1221
1222 desc = irq_to_desc(irq);
1223 1458
1224 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1459 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1225 trigger == IOAPIC_LEVEL) 1460 trigger == IOAPIC_LEVEL)
@@ -1311,23 +1546,22 @@ static int setup_ioapic_entry(int apic, int irq,
1311 return 0; 1546 return 0;
1312} 1547}
1313 1548
1314static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1549static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
1315 int trigger, int polarity) 1550 int trigger, int polarity)
1316{ 1551{
1317 struct irq_cfg *cfg; 1552 struct irq_cfg *cfg;
1318 struct IO_APIC_route_entry entry; 1553 struct IO_APIC_route_entry entry;
1319 cpumask_t mask; 1554 unsigned int dest;
1320 1555
1321 if (!IO_APIC_IRQ(irq)) 1556 if (!IO_APIC_IRQ(irq))
1322 return; 1557 return;
1323 1558
1324 cfg = irq_cfg(irq); 1559 cfg = desc->chip_data;
1325 1560
1326 mask = TARGET_CPUS; 1561 if (assign_irq_vector(irq, cfg, TARGET_CPUS))
1327 if (assign_irq_vector(irq, mask))
1328 return; 1562 return;
1329 1563
1330 cpus_and(mask, cfg->domain, mask); 1564 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
1331 1565
1332 apic_printk(APIC_VERBOSE,KERN_DEBUG 1566 apic_printk(APIC_VERBOSE,KERN_DEBUG
1333 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 1567 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1337,16 +1571,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
1337 1571
1338 1572
1339 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, 1573 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
1340 cpu_mask_to_apicid(mask), trigger, polarity, 1574 dest, trigger, polarity, cfg->vector)) {
1341 cfg->vector)) {
1342 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1575 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1343 mp_ioapics[apic].mp_apicid, pin); 1576 mp_ioapics[apic].mp_apicid, pin);
1344 __clear_irq_vector(irq); 1577 __clear_irq_vector(irq, cfg);
1345 return; 1578 return;
1346 } 1579 }
1347 1580
1348 ioapic_register_intr(irq, trigger); 1581 ioapic_register_intr(irq, desc, trigger);
1349 if (irq < 16) 1582 if (irq < NR_IRQS_LEGACY)
1350 disable_8259A_irq(irq); 1583 disable_8259A_irq(irq);
1351 1584
1352 ioapic_write_entry(apic, pin, entry); 1585 ioapic_write_entry(apic, pin, entry);
@@ -1356,6 +1589,9 @@ static void __init setup_IO_APIC_irqs(void)
1356{ 1589{
1357 int apic, pin, idx, irq; 1590 int apic, pin, idx, irq;
1358 int notcon = 0; 1591 int notcon = 0;
1592 struct irq_desc *desc;
1593 struct irq_cfg *cfg;
1594 int cpu = boot_cpu_id;
1359 1595
1360 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1596 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1361 1597
@@ -1387,9 +1623,15 @@ static void __init setup_IO_APIC_irqs(void)
1387 if (multi_timer_check(apic, irq)) 1623 if (multi_timer_check(apic, irq))
1388 continue; 1624 continue;
1389#endif 1625#endif
1390 add_pin_to_irq(irq, apic, pin); 1626 desc = irq_to_desc_alloc_cpu(irq, cpu);
1627 if (!desc) {
1628 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1629 continue;
1630 }
1631 cfg = desc->chip_data;
1632 add_pin_to_irq_cpu(cfg, cpu, apic, pin);
1391 1633
1392 setup_IO_APIC_irq(apic, pin, irq, 1634 setup_IO_APIC_irq(apic, pin, irq, desc,
1393 irq_trigger(idx), irq_polarity(idx)); 1635 irq_trigger(idx), irq_polarity(idx));
1394 } 1636 }
1395 } 1637 }
@@ -1448,6 +1690,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1448 union IO_APIC_reg_03 reg_03; 1690 union IO_APIC_reg_03 reg_03;
1449 unsigned long flags; 1691 unsigned long flags;
1450 struct irq_cfg *cfg; 1692 struct irq_cfg *cfg;
1693 struct irq_desc *desc;
1451 unsigned int irq; 1694 unsigned int irq;
1452 1695
1453 if (apic_verbosity == APIC_QUIET) 1696 if (apic_verbosity == APIC_QUIET)
@@ -1537,8 +1780,11 @@ __apicdebuginit(void) print_IO_APIC(void)
1537 } 1780 }
1538 } 1781 }
1539 printk(KERN_DEBUG "IRQ to pin mappings:\n"); 1782 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1540 for_each_irq_cfg(irq, cfg) { 1783 for_each_irq_desc(irq, desc) {
1541 struct irq_pin_list *entry = cfg->irq_2_pin; 1784 struct irq_pin_list *entry;
1785
1786 cfg = desc->chip_data;
1787 entry = cfg->irq_2_pin;
1542 if (!entry) 1788 if (!entry)
1543 continue; 1789 continue;
1544 printk(KERN_DEBUG "IRQ%d ", irq); 1790 printk(KERN_DEBUG "IRQ%d ", irq);
@@ -2022,14 +2268,16 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
2022{ 2268{
2023 int was_pending = 0; 2269 int was_pending = 0;
2024 unsigned long flags; 2270 unsigned long flags;
2271 struct irq_cfg *cfg;
2025 2272
2026 spin_lock_irqsave(&ioapic_lock, flags); 2273 spin_lock_irqsave(&ioapic_lock, flags);
2027 if (irq < 16) { 2274 if (irq < NR_IRQS_LEGACY) {
2028 disable_8259A_irq(irq); 2275 disable_8259A_irq(irq);
2029 if (i8259A_irq_pending(irq)) 2276 if (i8259A_irq_pending(irq))
2030 was_pending = 1; 2277 was_pending = 1;
2031 } 2278 }
2032 __unmask_IO_APIC_irq(irq); 2279 cfg = irq_cfg(irq);
2280 __unmask_IO_APIC_irq(cfg);
2033 spin_unlock_irqrestore(&ioapic_lock, flags); 2281 spin_unlock_irqrestore(&ioapic_lock, flags);
2034 2282
2035 return was_pending; 2283 return was_pending;
@@ -2043,7 +2291,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
2043 unsigned long flags; 2291 unsigned long flags;
2044 2292
2045 spin_lock_irqsave(&vector_lock, flags); 2293 spin_lock_irqsave(&vector_lock, flags);
2046 send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); 2294 send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
2047 spin_unlock_irqrestore(&vector_lock, flags); 2295 spin_unlock_irqrestore(&vector_lock, flags);
2048 2296
2049 return 1; 2297 return 1;
@@ -2092,35 +2340,35 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
2092 * as simple as edge triggered migration and we can do the irq migration 2340 * as simple as edge triggered migration and we can do the irq migration
2093 * with a simple atomic update to IO-APIC RTE. 2341 * with a simple atomic update to IO-APIC RTE.
2094 */ 2342 */
2095static void migrate_ioapic_irq(int irq, cpumask_t mask) 2343static void
2344migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2096{ 2345{
2097 struct irq_cfg *cfg; 2346 struct irq_cfg *cfg;
2098 struct irq_desc *desc;
2099 cpumask_t tmp, cleanup_mask;
2100 struct irte irte; 2347 struct irte irte;
2101 int modify_ioapic_rte; 2348 int modify_ioapic_rte;
2102 unsigned int dest; 2349 unsigned int dest;
2103 unsigned long flags; 2350 unsigned long flags;
2351 unsigned int irq;
2104 2352
2105 cpus_and(tmp, mask, cpu_online_map); 2353 if (!cpumask_intersects(mask, cpu_online_mask))
2106 if (cpus_empty(tmp))
2107 return; 2354 return;
2108 2355
2356 irq = desc->irq;
2109 if (get_irte(irq, &irte)) 2357 if (get_irte(irq, &irte))
2110 return; 2358 return;
2111 2359
2112 if (assign_irq_vector(irq, mask)) 2360 cfg = desc->chip_data;
2361 if (assign_irq_vector(irq, cfg, mask))
2113 return; 2362 return;
2114 2363
2115 cfg = irq_cfg(irq); 2364 set_extra_move_desc(desc, mask);
2116 cpus_and(tmp, cfg->domain, mask); 2365
2117 dest = cpu_mask_to_apicid(tmp); 2366 dest = cpu_mask_to_apicid_and(cfg->domain, mask);
2118 2367
2119 desc = irq_to_desc(irq);
2120 modify_ioapic_rte = desc->status & IRQ_LEVEL; 2368 modify_ioapic_rte = desc->status & IRQ_LEVEL;
2121 if (modify_ioapic_rte) { 2369 if (modify_ioapic_rte) {
2122 spin_lock_irqsave(&ioapic_lock, flags); 2370 spin_lock_irqsave(&ioapic_lock, flags);
2123 __target_IO_APIC_irq(irq, dest, cfg->vector); 2371 __target_IO_APIC_irq(irq, dest, cfg);
2124 spin_unlock_irqrestore(&ioapic_lock, flags); 2372 spin_unlock_irqrestore(&ioapic_lock, flags);
2125 } 2373 }
2126 2374
@@ -2132,24 +2380,20 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
2132 */ 2380 */
2133 modify_irte(irq, &irte); 2381 modify_irte(irq, &irte);
2134 2382
2135 if (cfg->move_in_progress) { 2383 if (cfg->move_in_progress)
2136 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 2384 send_cleanup_vector(cfg);
2137 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2138 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2139 cfg->move_in_progress = 0;
2140 }
2141 2385
2142 desc->affinity = mask; 2386 cpumask_copy(&desc->affinity, mask);
2143} 2387}
2144 2388
2145static int migrate_irq_remapped_level(int irq) 2389static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
2146{ 2390{
2147 int ret = -1; 2391 int ret = -1;
2148 struct irq_desc *desc = irq_to_desc(irq); 2392 struct irq_cfg *cfg = desc->chip_data;
2149 2393
2150 mask_IO_APIC_irq(irq); 2394 mask_IO_APIC_irq_desc(desc);
2151 2395
2152 if (io_apic_level_ack_pending(irq)) { 2396 if (io_apic_level_ack_pending(cfg)) {
2153 /* 2397 /*
2154 * Interrupt in progress. Migrating irq now will change the 2398 * Interrupt in progress. Migrating irq now will change the
2155 * vector information in the IO-APIC RTE and that will confuse 2399 * vector information in the IO-APIC RTE and that will confuse
@@ -2161,14 +2405,15 @@ static int migrate_irq_remapped_level(int irq)
2161 } 2405 }
2162 2406
2163 /* everthing is clear. we have right of way */ 2407 /* everthing is clear. we have right of way */
2164 migrate_ioapic_irq(irq, desc->pending_mask); 2408 migrate_ioapic_irq_desc(desc, &desc->pending_mask);
2165 2409
2166 ret = 0; 2410 ret = 0;
2167 desc->status &= ~IRQ_MOVE_PENDING; 2411 desc->status &= ~IRQ_MOVE_PENDING;
2168 cpus_clear(desc->pending_mask); 2412 cpumask_clear(&desc->pending_mask);
2169 2413
2170unmask: 2414unmask:
2171 unmask_IO_APIC_irq(irq); 2415 unmask_IO_APIC_irq_desc(desc);
2416
2172 return ret; 2417 return ret;
2173} 2418}
2174 2419
@@ -2189,7 +2434,7 @@ static void ir_irq_migration(struct work_struct *work)
2189 continue; 2434 continue;
2190 } 2435 }
2191 2436
2192 desc->chip->set_affinity(irq, desc->pending_mask); 2437 desc->chip->set_affinity(irq, &desc->pending_mask);
2193 spin_unlock_irqrestore(&desc->lock, flags); 2438 spin_unlock_irqrestore(&desc->lock, flags);
2194 } 2439 }
2195 } 2440 }
@@ -2198,18 +2443,24 @@ static void ir_irq_migration(struct work_struct *work)
2198/* 2443/*
2199 * Migrates the IRQ destination in the process context. 2444 * Migrates the IRQ destination in the process context.
2200 */ 2445 */
2201static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 2446static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2447 const struct cpumask *mask)
2202{ 2448{
2203 struct irq_desc *desc = irq_to_desc(irq);
2204
2205 if (desc->status & IRQ_LEVEL) { 2449 if (desc->status & IRQ_LEVEL) {
2206 desc->status |= IRQ_MOVE_PENDING; 2450 desc->status |= IRQ_MOVE_PENDING;
2207 desc->pending_mask = mask; 2451 cpumask_copy(&desc->pending_mask, mask);
2208 migrate_irq_remapped_level(irq); 2452 migrate_irq_remapped_level_desc(desc);
2209 return; 2453 return;
2210 } 2454 }
2211 2455
2212 migrate_ioapic_irq(irq, mask); 2456 migrate_ioapic_irq_desc(desc, mask);
2457}
2458static void set_ir_ioapic_affinity_irq(unsigned int irq,
2459 const struct cpumask *mask)
2460{
2461 struct irq_desc *desc = irq_to_desc(irq);
2462
2463 set_ir_ioapic_affinity_irq_desc(desc, mask);
2213} 2464}
2214#endif 2465#endif
2215 2466
@@ -2228,6 +2479,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2228 struct irq_cfg *cfg; 2479 struct irq_cfg *cfg;
2229 irq = __get_cpu_var(vector_irq)[vector]; 2480 irq = __get_cpu_var(vector_irq)[vector];
2230 2481
2482 if (irq == -1)
2483 continue;
2484
2231 desc = irq_to_desc(irq); 2485 desc = irq_to_desc(irq);
2232 if (!desc) 2486 if (!desc)
2233 continue; 2487 continue;
@@ -2237,7 +2491,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2237 if (!cfg->move_cleanup_count) 2491 if (!cfg->move_cleanup_count)
2238 goto unlock; 2492 goto unlock;
2239 2493
2240 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) 2494 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2241 goto unlock; 2495 goto unlock;
2242 2496
2243 __get_cpu_var(vector_irq)[vector] = -1; 2497 __get_cpu_var(vector_irq)[vector] = -1;
@@ -2249,28 +2503,44 @@ unlock:
2249 irq_exit(); 2503 irq_exit();
2250} 2504}
2251 2505
2252static void irq_complete_move(unsigned int irq) 2506static void irq_complete_move(struct irq_desc **descp)
2253{ 2507{
2254 struct irq_cfg *cfg = irq_cfg(irq); 2508 struct irq_desc *desc = *descp;
2509 struct irq_cfg *cfg = desc->chip_data;
2255 unsigned vector, me; 2510 unsigned vector, me;
2256 2511
2257 if (likely(!cfg->move_in_progress)) 2512 if (likely(!cfg->move_in_progress)) {
2513#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
2514 if (likely(!cfg->move_desc_pending))
2515 return;
2516
2517 /* domain has not changed, but affinity did */
2518 me = smp_processor_id();
2519 if (cpu_isset(me, desc->affinity)) {
2520 *descp = desc = move_irq_desc(desc, me);
2521 /* get the new one */
2522 cfg = desc->chip_data;
2523 cfg->move_desc_pending = 0;
2524 }
2525#endif
2258 return; 2526 return;
2527 }
2259 2528
2260 vector = ~get_irq_regs()->orig_ax; 2529 vector = ~get_irq_regs()->orig_ax;
2261 me = smp_processor_id(); 2530 me = smp_processor_id();
2262 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { 2531#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
2263 cpumask_t cleanup_mask; 2532 *descp = desc = move_irq_desc(desc, me);
2533 /* get the new one */
2534 cfg = desc->chip_data;
2535#endif
2264 2536
2265 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 2537 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2266 cfg->move_cleanup_count = cpus_weight(cleanup_mask); 2538 send_cleanup_vector(cfg);
2267 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2268 cfg->move_in_progress = 0;
2269 }
2270} 2539}
2271#else 2540#else
2272static inline void irq_complete_move(unsigned int irq) {} 2541static inline void irq_complete_move(struct irq_desc **descp) {}
2273#endif 2542#endif
2543
2274#ifdef CONFIG_INTR_REMAP 2544#ifdef CONFIG_INTR_REMAP
2275static void ack_x2apic_level(unsigned int irq) 2545static void ack_x2apic_level(unsigned int irq)
2276{ 2546{
@@ -2281,11 +2551,14 @@ static void ack_x2apic_edge(unsigned int irq)
2281{ 2551{
2282 ack_x2APIC_irq(); 2552 ack_x2APIC_irq();
2283} 2553}
2554
2284#endif 2555#endif
2285 2556
2286static void ack_apic_edge(unsigned int irq) 2557static void ack_apic_edge(unsigned int irq)
2287{ 2558{
2288 irq_complete_move(irq); 2559 struct irq_desc *desc = irq_to_desc(irq);
2560
2561 irq_complete_move(&desc);
2289 move_native_irq(irq); 2562 move_native_irq(irq);
2290 ack_APIC_irq(); 2563 ack_APIC_irq();
2291} 2564}
@@ -2294,18 +2567,21 @@ atomic_t irq_mis_count;
2294 2567
2295static void ack_apic_level(unsigned int irq) 2568static void ack_apic_level(unsigned int irq)
2296{ 2569{
2570 struct irq_desc *desc = irq_to_desc(irq);
2571
2297#ifdef CONFIG_X86_32 2572#ifdef CONFIG_X86_32
2298 unsigned long v; 2573 unsigned long v;
2299 int i; 2574 int i;
2300#endif 2575#endif
2576 struct irq_cfg *cfg;
2301 int do_unmask_irq = 0; 2577 int do_unmask_irq = 0;
2302 2578
2303 irq_complete_move(irq); 2579 irq_complete_move(&desc);
2304#ifdef CONFIG_GENERIC_PENDING_IRQ 2580#ifdef CONFIG_GENERIC_PENDING_IRQ
2305 /* If we are moving the irq we need to mask it */ 2581 /* If we are moving the irq we need to mask it */
2306 if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { 2582 if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
2307 do_unmask_irq = 1; 2583 do_unmask_irq = 1;
2308 mask_IO_APIC_irq(irq); 2584 mask_IO_APIC_irq_desc(desc);
2309 } 2585 }
2310#endif 2586#endif
2311 2587
@@ -2329,7 +2605,8 @@ static void ack_apic_level(unsigned int irq)
2329 * operation to prevent an edge-triggered interrupt escaping meanwhile. 2605 * operation to prevent an edge-triggered interrupt escaping meanwhile.
2330 * The idea is from Manfred Spraul. --macro 2606 * The idea is from Manfred Spraul. --macro
2331 */ 2607 */
2332 i = irq_cfg(irq)->vector; 2608 cfg = desc->chip_data;
2609 i = cfg->vector;
2333 2610
2334 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); 2611 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2335#endif 2612#endif
@@ -2368,17 +2645,18 @@ static void ack_apic_level(unsigned int irq)
2368 * accurate and is causing problems then it is a hardware bug 2645 * accurate and is causing problems then it is a hardware bug
2369 * and you can go talk to the chipset vendor about it. 2646 * and you can go talk to the chipset vendor about it.
2370 */ 2647 */
2371 if (!io_apic_level_ack_pending(irq)) 2648 cfg = desc->chip_data;
2649 if (!io_apic_level_ack_pending(cfg))
2372 move_masked_irq(irq); 2650 move_masked_irq(irq);
2373 unmask_IO_APIC_irq(irq); 2651 unmask_IO_APIC_irq_desc(desc);
2374 } 2652 }
2375 2653
2376#ifdef CONFIG_X86_32 2654#ifdef CONFIG_X86_32
2377 if (!(v & (1 << (i & 0x1f)))) { 2655 if (!(v & (1 << (i & 0x1f)))) {
2378 atomic_inc(&irq_mis_count); 2656 atomic_inc(&irq_mis_count);
2379 spin_lock(&ioapic_lock); 2657 spin_lock(&ioapic_lock);
2380 __mask_and_edge_IO_APIC_irq(irq); 2658 __mask_and_edge_IO_APIC_irq(cfg);
2381 __unmask_and_level_IO_APIC_irq(irq); 2659 __unmask_and_level_IO_APIC_irq(cfg);
2382 spin_unlock(&ioapic_lock); 2660 spin_unlock(&ioapic_lock);
2383 } 2661 }
2384#endif 2662#endif
@@ -2429,20 +2707,19 @@ static inline void init_IO_APIC_traps(void)
2429 * Also, we've got to be careful not to trash gate 2707 * Also, we've got to be careful not to trash gate
2430 * 0x80, because int 0x80 is hm, kind of importantish. ;) 2708 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2431 */ 2709 */
2432 for_each_irq_cfg(irq, cfg) { 2710 for_each_irq_desc(irq, desc) {
2433 if (IO_APIC_IRQ(irq) && !cfg->vector) { 2711 cfg = desc->chip_data;
2712 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
2434 /* 2713 /*
2435 * Hmm.. We don't have an entry for this, 2714 * Hmm.. We don't have an entry for this,
2436 * so default to an old-fashioned 8259 2715 * so default to an old-fashioned 8259
2437 * interrupt if we can.. 2716 * interrupt if we can..
2438 */ 2717 */
2439 if (irq < 16) 2718 if (irq < NR_IRQS_LEGACY)
2440 make_8259A_irq(irq); 2719 make_8259A_irq(irq);
2441 else { 2720 else
2442 desc = irq_to_desc(irq);
2443 /* Strange. Oh, well.. */ 2721 /* Strange. Oh, well.. */
2444 desc->chip = &no_irq_chip; 2722 desc->chip = &no_irq_chip;
2445 }
2446 } 2723 }
2447 } 2724 }
2448} 2725}
@@ -2467,7 +2744,7 @@ static void unmask_lapic_irq(unsigned int irq)
2467 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); 2744 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
2468} 2745}
2469 2746
2470static void ack_lapic_irq (unsigned int irq) 2747static void ack_lapic_irq(unsigned int irq)
2471{ 2748{
2472 ack_APIC_irq(); 2749 ack_APIC_irq();
2473} 2750}
@@ -2479,11 +2756,8 @@ static struct irq_chip lapic_chip __read_mostly = {
2479 .ack = ack_lapic_irq, 2756 .ack = ack_lapic_irq,
2480}; 2757};
2481 2758
2482static void lapic_register_intr(int irq) 2759static void lapic_register_intr(int irq, struct irq_desc *desc)
2483{ 2760{
2484 struct irq_desc *desc;
2485
2486 desc = irq_to_desc(irq);
2487 desc->status &= ~IRQ_LEVEL; 2761 desc->status &= ~IRQ_LEVEL;
2488 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, 2762 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2489 "edge"); 2763 "edge");
@@ -2587,7 +2861,9 @@ int timer_through_8259 __initdata;
2587 */ 2861 */
2588static inline void __init check_timer(void) 2862static inline void __init check_timer(void)
2589{ 2863{
2590 struct irq_cfg *cfg = irq_cfg(0); 2864 struct irq_desc *desc = irq_to_desc(0);
2865 struct irq_cfg *cfg = desc->chip_data;
2866 int cpu = boot_cpu_id;
2591 int apic1, pin1, apic2, pin2; 2867 int apic1, pin1, apic2, pin2;
2592 unsigned long flags; 2868 unsigned long flags;
2593 unsigned int ver; 2869 unsigned int ver;
@@ -2602,7 +2878,7 @@ static inline void __init check_timer(void)
2602 * get/set the timer IRQ vector: 2878 * get/set the timer IRQ vector:
2603 */ 2879 */
2604 disable_8259A_irq(0); 2880 disable_8259A_irq(0);
2605 assign_irq_vector(0, TARGET_CPUS); 2881 assign_irq_vector(0, cfg, TARGET_CPUS);
2606 2882
2607 /* 2883 /*
2608 * As IRQ0 is to be enabled in the 8259A, the virtual 2884 * As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2653,10 +2929,10 @@ static inline void __init check_timer(void)
2653 * Ok, does IRQ0 through the IOAPIC work? 2929 * Ok, does IRQ0 through the IOAPIC work?
2654 */ 2930 */
2655 if (no_pin1) { 2931 if (no_pin1) {
2656 add_pin_to_irq(0, apic1, pin1); 2932 add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
2657 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); 2933 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
2658 } 2934 }
2659 unmask_IO_APIC_irq(0); 2935 unmask_IO_APIC_irq_desc(desc);
2660 if (timer_irq_works()) { 2936 if (timer_irq_works()) {
2661 if (nmi_watchdog == NMI_IO_APIC) { 2937 if (nmi_watchdog == NMI_IO_APIC) {
2662 setup_nmi(); 2938 setup_nmi();
@@ -2682,9 +2958,9 @@ static inline void __init check_timer(void)
2682 /* 2958 /*
2683 * legacy devices should be connected to IO APIC #0 2959 * legacy devices should be connected to IO APIC #0
2684 */ 2960 */
2685 replace_pin_at_irq(0, apic1, pin1, apic2, pin2); 2961 replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
2686 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); 2962 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
2687 unmask_IO_APIC_irq(0); 2963 unmask_IO_APIC_irq_desc(desc);
2688 enable_8259A_irq(0); 2964 enable_8259A_irq(0);
2689 if (timer_irq_works()) { 2965 if (timer_irq_works()) {
2690 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2966 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2716,7 +2992,7 @@ static inline void __init check_timer(void)
2716 apic_printk(APIC_QUIET, KERN_INFO 2992 apic_printk(APIC_QUIET, KERN_INFO
2717 "...trying to set up timer as Virtual Wire IRQ...\n"); 2993 "...trying to set up timer as Virtual Wire IRQ...\n");
2718 2994
2719 lapic_register_intr(0); 2995 lapic_register_intr(0, desc);
2720 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ 2996 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
2721 enable_8259A_irq(0); 2997 enable_8259A_irq(0);
2722 2998
@@ -2901,22 +3177,26 @@ unsigned int create_irq_nr(unsigned int irq_want)
2901 unsigned int irq; 3177 unsigned int irq;
2902 unsigned int new; 3178 unsigned int new;
2903 unsigned long flags; 3179 unsigned long flags;
2904 struct irq_cfg *cfg_new; 3180 struct irq_cfg *cfg_new = NULL;
2905 3181 int cpu = boot_cpu_id;
2906 irq_want = nr_irqs - 1; 3182 struct irq_desc *desc_new = NULL;
2907 3183
2908 irq = 0; 3184 irq = 0;
2909 spin_lock_irqsave(&vector_lock, flags); 3185 spin_lock_irqsave(&vector_lock, flags);
2910 for (new = irq_want; new > 0; new--) { 3186 for (new = irq_want; new < NR_IRQS; new++) {
2911 if (platform_legacy_irq(new)) 3187 if (platform_legacy_irq(new))
2912 continue; 3188 continue;
2913 cfg_new = irq_cfg(new); 3189
2914 if (cfg_new && cfg_new->vector != 0) 3190 desc_new = irq_to_desc_alloc_cpu(new, cpu);
3191 if (!desc_new) {
3192 printk(KERN_INFO "can not get irq_desc for %d\n", new);
2915 continue; 3193 continue;
2916 /* check if need to create one */ 3194 }
2917 if (!cfg_new) 3195 cfg_new = desc_new->chip_data;
2918 cfg_new = irq_cfg_alloc(new); 3196
2919 if (__assign_irq_vector(new, TARGET_CPUS) == 0) 3197 if (cfg_new->vector != 0)
3198 continue;
3199 if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
2920 irq = new; 3200 irq = new;
2921 break; 3201 break;
2922 } 3202 }
@@ -2924,15 +3204,21 @@ unsigned int create_irq_nr(unsigned int irq_want)
2924 3204
2925 if (irq > 0) { 3205 if (irq > 0) {
2926 dynamic_irq_init(irq); 3206 dynamic_irq_init(irq);
3207 /* restore it, in case dynamic_irq_init clear it */
3208 if (desc_new)
3209 desc_new->chip_data = cfg_new;
2927 } 3210 }
2928 return irq; 3211 return irq;
2929} 3212}
2930 3213
3214static int nr_irqs_gsi = NR_IRQS_LEGACY;
2931int create_irq(void) 3215int create_irq(void)
2932{ 3216{
3217 unsigned int irq_want;
2933 int irq; 3218 int irq;
2934 3219
2935 irq = create_irq_nr(nr_irqs - 1); 3220 irq_want = nr_irqs_gsi;
3221 irq = create_irq_nr(irq_want);
2936 3222
2937 if (irq == 0) 3223 if (irq == 0)
2938 irq = -1; 3224 irq = -1;
@@ -2943,14 +3229,22 @@ int create_irq(void)
2943void destroy_irq(unsigned int irq) 3229void destroy_irq(unsigned int irq)
2944{ 3230{
2945 unsigned long flags; 3231 unsigned long flags;
3232 struct irq_cfg *cfg;
3233 struct irq_desc *desc;
2946 3234
3235 /* store it, in case dynamic_irq_cleanup clear it */
3236 desc = irq_to_desc(irq);
3237 cfg = desc->chip_data;
2947 dynamic_irq_cleanup(irq); 3238 dynamic_irq_cleanup(irq);
3239 /* connect back irq_cfg */
3240 if (desc)
3241 desc->chip_data = cfg;
2948 3242
2949#ifdef CONFIG_INTR_REMAP 3243#ifdef CONFIG_INTR_REMAP
2950 free_irte(irq); 3244 free_irte(irq);
2951#endif 3245#endif
2952 spin_lock_irqsave(&vector_lock, flags); 3246 spin_lock_irqsave(&vector_lock, flags);
2953 __clear_irq_vector(irq); 3247 __clear_irq_vector(irq, cfg);
2954 spin_unlock_irqrestore(&vector_lock, flags); 3248 spin_unlock_irqrestore(&vector_lock, flags);
2955} 3249}
2956 3250
@@ -2963,16 +3257,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
2963 struct irq_cfg *cfg; 3257 struct irq_cfg *cfg;
2964 int err; 3258 int err;
2965 unsigned dest; 3259 unsigned dest;
2966 cpumask_t tmp;
2967 3260
2968 tmp = TARGET_CPUS; 3261 cfg = irq_cfg(irq);
2969 err = assign_irq_vector(irq, tmp); 3262 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
2970 if (err) 3263 if (err)
2971 return err; 3264 return err;
2972 3265
2973 cfg = irq_cfg(irq); 3266 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
2974 cpus_and(tmp, cfg->domain, tmp);
2975 dest = cpu_mask_to_apicid(tmp);
2976 3267
2977#ifdef CONFIG_INTR_REMAP 3268#ifdef CONFIG_INTR_REMAP
2978 if (irq_remapped(irq)) { 3269 if (irq_remapped(irq)) {
@@ -3026,64 +3317,48 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3026} 3317}
3027 3318
3028#ifdef CONFIG_SMP 3319#ifdef CONFIG_SMP
3029static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) 3320static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3030{ 3321{
3322 struct irq_desc *desc = irq_to_desc(irq);
3031 struct irq_cfg *cfg; 3323 struct irq_cfg *cfg;
3032 struct msi_msg msg; 3324 struct msi_msg msg;
3033 unsigned int dest; 3325 unsigned int dest;
3034 cpumask_t tmp;
3035 struct irq_desc *desc;
3036 3326
3037 cpus_and(tmp, mask, cpu_online_map); 3327 dest = set_desc_affinity(desc, mask);
3038 if (cpus_empty(tmp)) 3328 if (dest == BAD_APICID)
3039 return; 3329 return;
3040 3330
3041 if (assign_irq_vector(irq, mask)) 3331 cfg = desc->chip_data;
3042 return;
3043 3332
3044 cfg = irq_cfg(irq); 3333 read_msi_msg_desc(desc, &msg);
3045 cpus_and(tmp, cfg->domain, mask);
3046 dest = cpu_mask_to_apicid(tmp);
3047
3048 read_msi_msg(irq, &msg);
3049 3334
3050 msg.data &= ~MSI_DATA_VECTOR_MASK; 3335 msg.data &= ~MSI_DATA_VECTOR_MASK;
3051 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3336 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3052 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3337 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3053 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3338 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3054 3339
3055 write_msi_msg(irq, &msg); 3340 write_msi_msg_desc(desc, &msg);
3056 desc = irq_to_desc(irq);
3057 desc->affinity = mask;
3058} 3341}
3059
3060#ifdef CONFIG_INTR_REMAP 3342#ifdef CONFIG_INTR_REMAP
3061/* 3343/*
3062 * Migrate the MSI irq to another cpumask. This migration is 3344 * Migrate the MSI irq to another cpumask. This migration is
3063 * done in the process context using interrupt-remapping hardware. 3345 * done in the process context using interrupt-remapping hardware.
3064 */ 3346 */
3065static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) 3347static void
3348ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3066{ 3349{
3067 struct irq_cfg *cfg; 3350 struct irq_desc *desc = irq_to_desc(irq);
3351 struct irq_cfg *cfg = desc->chip_data;
3068 unsigned int dest; 3352 unsigned int dest;
3069 cpumask_t tmp, cleanup_mask;
3070 struct irte irte; 3353 struct irte irte;
3071 struct irq_desc *desc;
3072
3073 cpus_and(tmp, mask, cpu_online_map);
3074 if (cpus_empty(tmp))
3075 return;
3076 3354
3077 if (get_irte(irq, &irte)) 3355 if (get_irte(irq, &irte))
3078 return; 3356 return;
3079 3357
3080 if (assign_irq_vector(irq, mask)) 3358 dest = set_desc_affinity(desc, mask);
3359 if (dest == BAD_APICID)
3081 return; 3360 return;
3082 3361
3083 cfg = irq_cfg(irq);
3084 cpus_and(tmp, cfg->domain, mask);
3085 dest = cpu_mask_to_apicid(tmp);
3086
3087 irte.vector = cfg->vector; 3362 irte.vector = cfg->vector;
3088 irte.dest_id = IRTE_DEST(dest); 3363 irte.dest_id = IRTE_DEST(dest);
3089 3364
@@ -3097,16 +3372,10 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
3097 * at the new destination. So, time to cleanup the previous 3372 * at the new destination. So, time to cleanup the previous
3098 * vector allocation. 3373 * vector allocation.
3099 */ 3374 */
3100 if (cfg->move_in_progress) { 3375 if (cfg->move_in_progress)
3101 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 3376 send_cleanup_vector(cfg);
3102 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
3103 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
3104 cfg->move_in_progress = 0;
3105 }
3106
3107 desc = irq_to_desc(irq);
3108 desc->affinity = mask;
3109} 3377}
3378
3110#endif 3379#endif
3111#endif /* CONFIG_SMP */ 3380#endif /* CONFIG_SMP */
3112 3381
@@ -3165,7 +3434,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
3165} 3434}
3166#endif 3435#endif
3167 3436
3168static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) 3437static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3169{ 3438{
3170 int ret; 3439 int ret;
3171 struct msi_msg msg; 3440 struct msi_msg msg;
@@ -3174,7 +3443,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
3174 if (ret < 0) 3443 if (ret < 0)
3175 return ret; 3444 return ret;
3176 3445
3177 set_irq_msi(irq, desc); 3446 set_irq_msi(irq, msidesc);
3178 write_msi_msg(irq, &msg); 3447 write_msi_msg(irq, &msg);
3179 3448
3180#ifdef CONFIG_INTR_REMAP 3449#ifdef CONFIG_INTR_REMAP
@@ -3194,26 +3463,13 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
3194 return 0; 3463 return 0;
3195} 3464}
3196 3465
3197static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) 3466int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
3198{
3199 unsigned int irq;
3200
3201 irq = dev->bus->number;
3202 irq <<= 8;
3203 irq |= dev->devfn;
3204 irq <<= 12;
3205
3206 return irq;
3207}
3208
3209int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
3210{ 3467{
3211 unsigned int irq; 3468 unsigned int irq;
3212 int ret; 3469 int ret;
3213 unsigned int irq_want; 3470 unsigned int irq_want;
3214 3471
3215 irq_want = build_irq_for_pci_dev(dev) + 0x100; 3472 irq_want = nr_irqs_gsi;
3216
3217 irq = create_irq_nr(irq_want); 3473 irq = create_irq_nr(irq_want);
3218 if (irq == 0) 3474 if (irq == 0)
3219 return -1; 3475 return -1;
@@ -3227,7 +3483,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
3227 goto error; 3483 goto error;
3228no_ir: 3484no_ir:
3229#endif 3485#endif
3230 ret = setup_msi_irq(dev, desc, irq); 3486 ret = setup_msi_irq(dev, msidesc, irq);
3231 if (ret < 0) { 3487 if (ret < 0) {
3232 destroy_irq(irq); 3488 destroy_irq(irq);
3233 return ret; 3489 return ret;
@@ -3245,7 +3501,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3245{ 3501{
3246 unsigned int irq; 3502 unsigned int irq;
3247 int ret, sub_handle; 3503 int ret, sub_handle;
3248 struct msi_desc *desc; 3504 struct msi_desc *msidesc;
3249 unsigned int irq_want; 3505 unsigned int irq_want;
3250 3506
3251#ifdef CONFIG_INTR_REMAP 3507#ifdef CONFIG_INTR_REMAP
@@ -3253,10 +3509,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3253 int index = 0; 3509 int index = 0;
3254#endif 3510#endif
3255 3511
3256 irq_want = build_irq_for_pci_dev(dev) + 0x100; 3512 irq_want = nr_irqs_gsi;
3257 sub_handle = 0; 3513 sub_handle = 0;
3258 list_for_each_entry(desc, &dev->msi_list, list) { 3514 list_for_each_entry(msidesc, &dev->msi_list, list) {
3259 irq = create_irq_nr(irq_want--); 3515 irq = create_irq_nr(irq_want);
3516 irq_want++;
3260 if (irq == 0) 3517 if (irq == 0)
3261 return -1; 3518 return -1;
3262#ifdef CONFIG_INTR_REMAP 3519#ifdef CONFIG_INTR_REMAP
@@ -3288,7 +3545,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3288 } 3545 }
3289no_ir: 3546no_ir:
3290#endif 3547#endif
3291 ret = setup_msi_irq(dev, desc, irq); 3548 ret = setup_msi_irq(dev, msidesc, irq);
3292 if (ret < 0) 3549 if (ret < 0)
3293 goto error; 3550 goto error;
3294 sub_handle++; 3551 sub_handle++;
@@ -3307,24 +3564,18 @@ void arch_teardown_msi_irq(unsigned int irq)
3307 3564
3308#ifdef CONFIG_DMAR 3565#ifdef CONFIG_DMAR
3309#ifdef CONFIG_SMP 3566#ifdef CONFIG_SMP
3310static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) 3567static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3311{ 3568{
3569 struct irq_desc *desc = irq_to_desc(irq);
3312 struct irq_cfg *cfg; 3570 struct irq_cfg *cfg;
3313 struct msi_msg msg; 3571 struct msi_msg msg;
3314 unsigned int dest; 3572 unsigned int dest;
3315 cpumask_t tmp;
3316 struct irq_desc *desc;
3317 3573
3318 cpus_and(tmp, mask, cpu_online_map); 3574 dest = set_desc_affinity(desc, mask);
3319 if (cpus_empty(tmp)) 3575 if (dest == BAD_APICID)
3320 return; 3576 return;
3321 3577
3322 if (assign_irq_vector(irq, mask)) 3578 cfg = desc->chip_data;
3323 return;
3324
3325 cfg = irq_cfg(irq);
3326 cpus_and(tmp, cfg->domain, mask);
3327 dest = cpu_mask_to_apicid(tmp);
3328 3579
3329 dmar_msi_read(irq, &msg); 3580 dmar_msi_read(irq, &msg);
3330 3581
@@ -3334,9 +3585,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
3334 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3585 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3335 3586
3336 dmar_msi_write(irq, &msg); 3587 dmar_msi_write(irq, &msg);
3337 desc = irq_to_desc(irq);
3338 desc->affinity = mask;
3339} 3588}
3589
3340#endif /* CONFIG_SMP */ 3590#endif /* CONFIG_SMP */
3341 3591
3342struct irq_chip dmar_msi_type = { 3592struct irq_chip dmar_msi_type = {
@@ -3368,24 +3618,18 @@ int arch_setup_dmar_msi(unsigned int irq)
3368#ifdef CONFIG_HPET_TIMER 3618#ifdef CONFIG_HPET_TIMER
3369 3619
3370#ifdef CONFIG_SMP 3620#ifdef CONFIG_SMP
3371static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) 3621static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3372{ 3622{
3623 struct irq_desc *desc = irq_to_desc(irq);
3373 struct irq_cfg *cfg; 3624 struct irq_cfg *cfg;
3374 struct irq_desc *desc;
3375 struct msi_msg msg; 3625 struct msi_msg msg;
3376 unsigned int dest; 3626 unsigned int dest;
3377 cpumask_t tmp;
3378 3627
3379 cpus_and(tmp, mask, cpu_online_map); 3628 dest = set_desc_affinity(desc, mask);
3380 if (cpus_empty(tmp)) 3629 if (dest == BAD_APICID)
3381 return; 3630 return;
3382 3631
3383 if (assign_irq_vector(irq, mask)) 3632 cfg = desc->chip_data;
3384 return;
3385
3386 cfg = irq_cfg(irq);
3387 cpus_and(tmp, cfg->domain, mask);
3388 dest = cpu_mask_to_apicid(tmp);
3389 3633
3390 hpet_msi_read(irq, &msg); 3634 hpet_msi_read(irq, &msg);
3391 3635
@@ -3395,9 +3639,8 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
3395 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3639 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3396 3640
3397 hpet_msi_write(irq, &msg); 3641 hpet_msi_write(irq, &msg);
3398 desc = irq_to_desc(irq);
3399 desc->affinity = mask;
3400} 3642}
3643
3401#endif /* CONFIG_SMP */ 3644#endif /* CONFIG_SMP */
3402 3645
3403struct irq_chip hpet_msi_type = { 3646struct irq_chip hpet_msi_type = {
@@ -3450,28 +3693,21 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
3450 write_ht_irq_msg(irq, &msg); 3693 write_ht_irq_msg(irq, &msg);
3451} 3694}
3452 3695
3453static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) 3696static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
3454{ 3697{
3698 struct irq_desc *desc = irq_to_desc(irq);
3455 struct irq_cfg *cfg; 3699 struct irq_cfg *cfg;
3456 unsigned int dest; 3700 unsigned int dest;
3457 cpumask_t tmp;
3458 struct irq_desc *desc;
3459 3701
3460 cpus_and(tmp, mask, cpu_online_map); 3702 dest = set_desc_affinity(desc, mask);
3461 if (cpus_empty(tmp)) 3703 if (dest == BAD_APICID)
3462 return; 3704 return;
3463 3705
3464 if (assign_irq_vector(irq, mask)) 3706 cfg = desc->chip_data;
3465 return;
3466
3467 cfg = irq_cfg(irq);
3468 cpus_and(tmp, cfg->domain, mask);
3469 dest = cpu_mask_to_apicid(tmp);
3470 3707
3471 target_ht_irq(irq, dest, cfg->vector); 3708 target_ht_irq(irq, dest, cfg->vector);
3472 desc = irq_to_desc(irq);
3473 desc->affinity = mask;
3474} 3709}
3710
3475#endif 3711#endif
3476 3712
3477static struct irq_chip ht_irq_chip = { 3713static struct irq_chip ht_irq_chip = {
@@ -3489,17 +3725,14 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3489{ 3725{
3490 struct irq_cfg *cfg; 3726 struct irq_cfg *cfg;
3491 int err; 3727 int err;
3492 cpumask_t tmp;
3493 3728
3494 tmp = TARGET_CPUS; 3729 cfg = irq_cfg(irq);
3495 err = assign_irq_vector(irq, tmp); 3730 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3496 if (!err) { 3731 if (!err) {
3497 struct ht_irq_msg msg; 3732 struct ht_irq_msg msg;
3498 unsigned dest; 3733 unsigned dest;
3499 3734
3500 cfg = irq_cfg(irq); 3735 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
3501 cpus_and(tmp, cfg->domain, tmp);
3502 dest = cpu_mask_to_apicid(tmp);
3503 3736
3504 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); 3737 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
3505 3738
@@ -3535,7 +3768,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3535int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, 3768int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3536 unsigned long mmr_offset) 3769 unsigned long mmr_offset)
3537{ 3770{
3538 const cpumask_t *eligible_cpu = get_cpu_mask(cpu); 3771 const struct cpumask *eligible_cpu = cpumask_of(cpu);
3539 struct irq_cfg *cfg; 3772 struct irq_cfg *cfg;
3540 int mmr_pnode; 3773 int mmr_pnode;
3541 unsigned long mmr_value; 3774 unsigned long mmr_value;
@@ -3543,7 +3776,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3543 unsigned long flags; 3776 unsigned long flags;
3544 int err; 3777 int err;
3545 3778
3546 err = assign_irq_vector(irq, *eligible_cpu); 3779 cfg = irq_cfg(irq);
3780
3781 err = assign_irq_vector(irq, cfg, eligible_cpu);
3547 if (err != 0) 3782 if (err != 0)
3548 return err; 3783 return err;
3549 3784
@@ -3552,8 +3787,6 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3552 irq_name); 3787 irq_name);
3553 spin_unlock_irqrestore(&vector_lock, flags); 3788 spin_unlock_irqrestore(&vector_lock, flags);
3554 3789
3555 cfg = irq_cfg(irq);
3556
3557 mmr_value = 0; 3790 mmr_value = 0;
3558 entry = (struct uv_IO_APIC_route_entry *)&mmr_value; 3791 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
3559 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); 3792 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
@@ -3564,7 +3797,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3564 entry->polarity = 0; 3797 entry->polarity = 0;
3565 entry->trigger = 0; 3798 entry->trigger = 0;
3566 entry->mask = 0; 3799 entry->mask = 0;
3567 entry->dest = cpu_mask_to_apicid(*eligible_cpu); 3800 entry->dest = cpu_mask_to_apicid(eligible_cpu);
3568 3801
3569 mmr_pnode = uv_blade_to_pnode(mmr_blade); 3802 mmr_pnode = uv_blade_to_pnode(mmr_blade);
3570 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); 3803 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -3605,9 +3838,16 @@ int __init io_apic_get_redir_entries (int ioapic)
3605 return reg_01.bits.entries; 3838 return reg_01.bits.entries;
3606} 3839}
3607 3840
3608int __init probe_nr_irqs(void) 3841void __init probe_nr_irqs_gsi(void)
3609{ 3842{
3610 return NR_IRQS; 3843 int idx;
3844 int nr = 0;
3845
3846 for (idx = 0; idx < nr_ioapics; idx++)
3847 nr += io_apic_get_redir_entries(idx) + 1;
3848
3849 if (nr > nr_irqs_gsi)
3850 nr_irqs_gsi = nr;
3611} 3851}
3612 3852
3613/* -------------------------------------------------------------------------- 3853/* --------------------------------------------------------------------------
@@ -3706,19 +3946,31 @@ int __init io_apic_get_version(int ioapic)
3706 3946
3707int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) 3947int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
3708{ 3948{
3949 struct irq_desc *desc;
3950 struct irq_cfg *cfg;
3951 int cpu = boot_cpu_id;
3952
3709 if (!IO_APIC_IRQ(irq)) { 3953 if (!IO_APIC_IRQ(irq)) {
3710 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", 3954 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
3711 ioapic); 3955 ioapic);
3712 return -EINVAL; 3956 return -EINVAL;
3713 } 3957 }
3714 3958
3959 desc = irq_to_desc_alloc_cpu(irq, cpu);
3960 if (!desc) {
3961 printk(KERN_INFO "can not get irq_desc %d\n", irq);
3962 return 0;
3963 }
3964
3715 /* 3965 /*
3716 * IRQs < 16 are already in the irq_2_pin[] map 3966 * IRQs < 16 are already in the irq_2_pin[] map
3717 */ 3967 */
3718 if (irq >= 16) 3968 if (irq >= NR_IRQS_LEGACY) {
3719 add_pin_to_irq(irq, ioapic, pin); 3969 cfg = desc->chip_data;
3970 add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
3971 }
3720 3972
3721 setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); 3973 setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
3722 3974
3723 return 0; 3975 return 0;
3724} 3976}
@@ -3756,7 +4008,7 @@ void __init setup_ioapic_dest(void)
3756 int pin, ioapic, irq, irq_entry; 4008 int pin, ioapic, irq, irq_entry;
3757 struct irq_desc *desc; 4009 struct irq_desc *desc;
3758 struct irq_cfg *cfg; 4010 struct irq_cfg *cfg;
3759 cpumask_t mask; 4011 const struct cpumask *mask;
3760 4012
3761 if (skip_ioapic_setup == 1) 4013 if (skip_ioapic_setup == 1)
3762 return; 4014 return;
@@ -3772,9 +4024,10 @@ void __init setup_ioapic_dest(void)
3772 * when you have too many devices, because at that time only boot 4024 * when you have too many devices, because at that time only boot
3773 * cpu is online. 4025 * cpu is online.
3774 */ 4026 */
3775 cfg = irq_cfg(irq); 4027 desc = irq_to_desc(irq);
4028 cfg = desc->chip_data;
3776 if (!cfg->vector) { 4029 if (!cfg->vector) {
3777 setup_IO_APIC_irq(ioapic, pin, irq, 4030 setup_IO_APIC_irq(ioapic, pin, irq, desc,
3778 irq_trigger(irq_entry), 4031 irq_trigger(irq_entry),
3779 irq_polarity(irq_entry)); 4032 irq_polarity(irq_entry));
3780 continue; 4033 continue;
@@ -3784,19 +4037,18 @@ void __init setup_ioapic_dest(void)
3784 /* 4037 /*
3785 * Honour affinities which have been set in early boot 4038 * Honour affinities which have been set in early boot
3786 */ 4039 */
3787 desc = irq_to_desc(irq);
3788 if (desc->status & 4040 if (desc->status &
3789 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 4041 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
3790 mask = desc->affinity; 4042 mask = &desc->affinity;
3791 else 4043 else
3792 mask = TARGET_CPUS; 4044 mask = TARGET_CPUS;
3793 4045
3794#ifdef CONFIG_INTR_REMAP 4046#ifdef CONFIG_INTR_REMAP
3795 if (intr_remapping_enabled) 4047 if (intr_remapping_enabled)
3796 set_ir_ioapic_affinity_irq(irq, mask); 4048 set_ir_ioapic_affinity_irq_desc(desc, mask);
3797 else 4049 else
3798#endif 4050#endif
3799 set_ioapic_affinity_irq(irq, mask); 4051 set_ioapic_affinity_irq_desc(desc, mask);
3800 } 4052 }
3801 4053
3802 } 4054 }
@@ -3845,7 +4097,6 @@ void __init ioapic_init_mappings(void)
3845 struct resource *ioapic_res; 4097 struct resource *ioapic_res;
3846 int i; 4098 int i;
3847 4099
3848 irq_2_pin_init();
3849 ioapic_res = ioapic_setup_resources(); 4100 ioapic_res = ioapic_setup_resources();
3850 for (i = 0; i < nr_ioapics; i++) { 4101 for (i = 0; i < nr_ioapics; i++) {
3851 if (smp_found_config) { 4102 if (smp_found_config) {
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 191914302744..b12208f4dfee 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -35,8 +35,8 @@ static void set_bitmap(unsigned long *bitmap, unsigned int base,
35 */ 35 */
36asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) 36asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
37{ 37{
38 struct thread_struct * t = &current->thread; 38 struct thread_struct *t = &current->thread;
39 struct tss_struct * tss; 39 struct tss_struct *tss;
40 unsigned int i, max_long, bytes, bytes_updated; 40 unsigned int i, max_long, bytes, bytes_updated;
41 41
42 if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) 42 if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index f1c688e46f35..285bbf8831fa 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
116/* 116/*
117 * This is only used on smaller machines. 117 * This is only used on smaller machines.
118 */ 118 */
119void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) 119void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector)
120{ 120{
121 unsigned long mask = cpus_addr(cpumask)[0]; 121 unsigned long mask = cpumask_bits(cpumask)[0];
122 unsigned long flags; 122 unsigned long flags;
123 123
124 local_irq_save(flags); 124 local_irq_save(flags);
125 WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); 125 WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
126 __send_IPI_dest_field(mask, vector); 126 __send_IPI_dest_field(mask, vector);
127 local_irq_restore(flags); 127 local_irq_restore(flags);
128} 128}
129 129
130void send_IPI_mask_sequence(cpumask_t mask, int vector) 130void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
131{ 131{
132 unsigned long flags; 132 unsigned long flags;
133 unsigned int query_cpu; 133 unsigned int query_cpu;
@@ -139,12 +139,24 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
139 */ 139 */
140 140
141 local_irq_save(flags); 141 local_irq_save(flags);
142 for_each_possible_cpu(query_cpu) { 142 for_each_cpu(query_cpu, mask)
143 if (cpu_isset(query_cpu, mask)) { 143 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
144 local_irq_restore(flags);
145}
146
147void send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
148{
149 unsigned long flags;
150 unsigned int query_cpu;
151 unsigned int this_cpu = smp_processor_id();
152
153 /* See Hack comment above */
154
155 local_irq_save(flags);
156 for_each_cpu(query_cpu, mask)
157 if (query_cpu != this_cpu)
144 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), 158 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
145 vector); 159 vector);
146 }
147 }
148 local_irq_restore(flags); 160 local_irq_restore(flags);
149} 161}
150 162
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index d92bc71e41a7..22f650db917f 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -5,10 +5,11 @@
5#include <linux/interrupt.h> 5#include <linux/interrupt.h>
6#include <linux/kernel_stat.h> 6#include <linux/kernel_stat.h>
7#include <linux/seq_file.h> 7#include <linux/seq_file.h>
8#include <linux/smp.h>
8 9
9#include <asm/apic.h> 10#include <asm/apic.h>
10#include <asm/io_apic.h> 11#include <asm/io_apic.h>
11#include <asm/smp.h> 12#include <asm/irq.h>
12 13
13atomic_t irq_err_count; 14atomic_t irq_err_count;
14 15
@@ -122,6 +123,9 @@ int show_interrupts(struct seq_file *p, void *v)
122 } 123 }
123 124
124 desc = irq_to_desc(i); 125 desc = irq_to_desc(i);
126 if (!desc)
127 return 0;
128
125 spin_lock_irqsave(&desc->lock, flags); 129 spin_lock_irqsave(&desc->lock, flags);
126#ifndef CONFIG_SMP 130#ifndef CONFIG_SMP
127 any_count = kstat_irqs(i); 131 any_count = kstat_irqs(i);
@@ -192,3 +196,5 @@ u64 arch_irq_stat(void)
192#endif 196#endif
193 return sum; 197 return sum;
194} 198}
199
200EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index a51382672de0..74b9ff7341e9 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -15,9 +15,9 @@
15#include <linux/notifier.h> 15#include <linux/notifier.h>
16#include <linux/cpu.h> 16#include <linux/cpu.h>
17#include <linux/delay.h> 17#include <linux/delay.h>
18#include <linux/uaccess.h>
18 19
19#include <asm/apic.h> 20#include <asm/apic.h>
20#include <asm/uaccess.h>
21 21
22DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); 22DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
23EXPORT_PER_CPU_SYMBOL(irq_stat); 23EXPORT_PER_CPU_SYMBOL(irq_stat);
@@ -93,7 +93,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
93 return 0; 93 return 0;
94 94
95 /* build the stack frame on the IRQ stack */ 95 /* build the stack frame on the IRQ stack */
96 isp = (u32 *) ((char*)irqctx + sizeof(*irqctx)); 96 isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));
97 irqctx->tinfo.task = curctx->tinfo.task; 97 irqctx->tinfo.task = curctx->tinfo.task;
98 irqctx->tinfo.previous_esp = current_stack_pointer; 98 irqctx->tinfo.previous_esp = current_stack_pointer;
99 99
@@ -137,7 +137,7 @@ void __cpuinit irq_ctx_init(int cpu)
137 137
138 hardirq_ctx[cpu] = irqctx; 138 hardirq_ctx[cpu] = irqctx;
139 139
140 irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE]; 140 irqctx = (union irq_ctx *) &softirq_stack[cpu*THREAD_SIZE];
141 irqctx->tinfo.task = NULL; 141 irqctx->tinfo.task = NULL;
142 irqctx->tinfo.exec_domain = NULL; 142 irqctx->tinfo.exec_domain = NULL;
143 irqctx->tinfo.cpu = cpu; 143 irqctx->tinfo.cpu = cpu;
@@ -147,7 +147,7 @@ void __cpuinit irq_ctx_init(int cpu)
147 softirq_ctx[cpu] = irqctx; 147 softirq_ctx[cpu] = irqctx;
148 148
149 printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", 149 printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
150 cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); 150 cpu, hardirq_ctx[cpu], softirq_ctx[cpu]);
151} 151}
152 152
153void irq_ctx_exit(int cpu) 153void irq_ctx_exit(int cpu)
@@ -174,7 +174,7 @@ asmlinkage void do_softirq(void)
174 irqctx->tinfo.previous_esp = current_stack_pointer; 174 irqctx->tinfo.previous_esp = current_stack_pointer;
175 175
176 /* build the stack frame on the softirq stack */ 176 /* build the stack frame on the softirq stack */
177 isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); 177 isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));
178 178
179 call_on_stack(__do_softirq, isp); 179 call_on_stack(__do_softirq, isp);
180 /* 180 /*
@@ -233,25 +233,28 @@ unsigned int do_IRQ(struct pt_regs *regs)
233#ifdef CONFIG_HOTPLUG_CPU 233#ifdef CONFIG_HOTPLUG_CPU
234#include <mach_apic.h> 234#include <mach_apic.h>
235 235
236void fixup_irqs(cpumask_t map) 236/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
237void fixup_irqs(void)
237{ 238{
238 unsigned int irq; 239 unsigned int irq;
239 static int warned; 240 static int warned;
240 struct irq_desc *desc; 241 struct irq_desc *desc;
241 242
242 for_each_irq_desc(irq, desc) { 243 for_each_irq_desc(irq, desc) {
243 cpumask_t mask; 244 const struct cpumask *affinity;
244 245
246 if (!desc)
247 continue;
245 if (irq == 2) 248 if (irq == 2)
246 continue; 249 continue;
247 250
248 cpus_and(mask, desc->affinity, map); 251 affinity = &desc->affinity;
249 if (any_online_cpu(mask) == NR_CPUS) { 252 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
250 printk("Breaking affinity for irq %i\n", irq); 253 printk("Breaking affinity for irq %i\n", irq);
251 mask = map; 254 affinity = cpu_all_mask;
252 } 255 }
253 if (desc->chip->set_affinity) 256 if (desc->chip->set_affinity)
254 desc->chip->set_affinity(irq, mask); 257 desc->chip->set_affinity(irq, affinity);
255 else if (desc->action && !(warned++)) 258 else if (desc->action && !(warned++))
256 printk("Cannot set affinity for irq %i\n", irq); 259 printk("Cannot set affinity for irq %i\n", irq);
257 } 260 }
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 1df869e5bd0b..63c88e6ec025 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -14,10 +14,10 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/delay.h> 15#include <linux/delay.h>
16#include <linux/ftrace.h> 16#include <linux/ftrace.h>
17#include <asm/uaccess.h> 17#include <linux/uaccess.h>
18#include <linux/smp.h>
18#include <asm/io_apic.h> 19#include <asm/io_apic.h>
19#include <asm/idle.h> 20#include <asm/idle.h>
20#include <asm/smp.h>
21 21
22/* 22/*
23 * Probabilistic stack overflow check: 23 * Probabilistic stack overflow check:
@@ -80,40 +80,43 @@ asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
80} 80}
81 81
82#ifdef CONFIG_HOTPLUG_CPU 82#ifdef CONFIG_HOTPLUG_CPU
83void fixup_irqs(cpumask_t map) 83/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
84void fixup_irqs(void)
84{ 85{
85 unsigned int irq; 86 unsigned int irq;
86 static int warned; 87 static int warned;
87 struct irq_desc *desc; 88 struct irq_desc *desc;
88 89
89 for_each_irq_desc(irq, desc) { 90 for_each_irq_desc(irq, desc) {
90 cpumask_t mask;
91 int break_affinity = 0; 91 int break_affinity = 0;
92 int set_affinity = 1; 92 int set_affinity = 1;
93 const struct cpumask *affinity;
93 94
95 if (!desc)
96 continue;
94 if (irq == 2) 97 if (irq == 2)
95 continue; 98 continue;
96 99
97 /* interrupt's are disabled at this point */ 100 /* interrupt's are disabled at this point */
98 spin_lock(&desc->lock); 101 spin_lock(&desc->lock);
99 102
103 affinity = &desc->affinity;
100 if (!irq_has_action(irq) || 104 if (!irq_has_action(irq) ||
101 cpus_equal(desc->affinity, map)) { 105 cpumask_equal(affinity, cpu_online_mask)) {
102 spin_unlock(&desc->lock); 106 spin_unlock(&desc->lock);
103 continue; 107 continue;
104 } 108 }
105 109
106 cpus_and(mask, desc->affinity, map); 110 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
107 if (cpus_empty(mask)) {
108 break_affinity = 1; 111 break_affinity = 1;
109 mask = map; 112 affinity = cpu_all_mask;
110 } 113 }
111 114
112 if (desc->chip->mask) 115 if (desc->chip->mask)
113 desc->chip->mask(irq); 116 desc->chip->mask(irq);
114 117
115 if (desc->chip->set_affinity) 118 if (desc->chip->set_affinity)
116 desc->chip->set_affinity(irq, mask); 119 desc->chip->set_affinity(irq, affinity);
117 else if (!(warned++)) 120 else if (!(warned++))
118 set_affinity = 0; 121 set_affinity = 0;
119 122
@@ -139,18 +142,18 @@ extern void call_softirq(void);
139 142
140asmlinkage void do_softirq(void) 143asmlinkage void do_softirq(void)
141{ 144{
142 __u32 pending; 145 __u32 pending;
143 unsigned long flags; 146 unsigned long flags;
144 147
145 if (in_interrupt()) 148 if (in_interrupt())
146 return; 149 return;
147 150
148 local_irq_save(flags); 151 local_irq_save(flags);
149 pending = local_softirq_pending(); 152 pending = local_softirq_pending();
150 /* Switch to interrupt stack */ 153 /* Switch to interrupt stack */
151 if (pending) { 154 if (pending) {
152 call_softirq(); 155 call_softirq();
153 WARN_ON_ONCE(softirq_count()); 156 WARN_ON_ONCE(softirq_count());
154 } 157 }
155 local_irq_restore(flags); 158 local_irq_restore(flags);
156} 159}
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 6a33b5e30161..0bef6280f30c 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -9,18 +9,18 @@
9#include <linux/kernel_stat.h> 9#include <linux/kernel_stat.h>
10#include <linux/sysdev.h> 10#include <linux/sysdev.h>
11#include <linux/bitops.h> 11#include <linux/bitops.h>
12#include <linux/io.h>
13#include <linux/delay.h>
12 14
13#include <asm/atomic.h> 15#include <asm/atomic.h>
14#include <asm/system.h> 16#include <asm/system.h>
15#include <asm/io.h>
16#include <asm/timer.h> 17#include <asm/timer.h>
17#include <asm/pgtable.h> 18#include <asm/pgtable.h>
18#include <asm/delay.h>
19#include <asm/desc.h> 19#include <asm/desc.h>
20#include <asm/apic.h> 20#include <asm/apic.h>
21#include <asm/arch_hooks.h> 21#include <asm/arch_hooks.h>
22#include <asm/i8259.h> 22#include <asm/i8259.h>
23 23#include <asm/traps.h>
24 24
25 25
26/* 26/*
@@ -34,12 +34,10 @@
34 * leads to races. IBM designers who came up with it should 34 * leads to races. IBM designers who came up with it should
35 * be shot. 35 * be shot.
36 */ 36 */
37
38 37
39static irqreturn_t math_error_irq(int cpl, void *dev_id) 38static irqreturn_t math_error_irq(int cpl, void *dev_id)
40{ 39{
41 extern void math_error(void __user *); 40 outb(0, 0xF0);
42 outb(0,0xF0);
43 if (ignore_fpu_irq || !boot_cpu_data.hard_math) 41 if (ignore_fpu_irq || !boot_cpu_data.hard_math)
44 return IRQ_NONE; 42 return IRQ_NONE;
45 math_error((void __user *)get_irq_regs()->ip); 43 math_error((void __user *)get_irq_regs()->ip);
@@ -56,7 +54,7 @@ static struct irqaction fpu_irq = {
56 .name = "fpu", 54 .name = "fpu",
57}; 55};
58 56
59void __init init_ISA_irqs (void) 57void __init init_ISA_irqs(void)
60{ 58{
61 int i; 59 int i;
62 60
@@ -68,8 +66,7 @@ void __init init_ISA_irqs (void)
68 /* 66 /*
69 * 16 old-style INTA-cycle interrupts: 67 * 16 old-style INTA-cycle interrupts:
70 */ 68 */
71 for (i = 0; i < 16; i++) { 69 for (i = 0; i < NR_IRQS_LEGACY; i++) {
72 /* first time call this irq_desc */
73 struct irq_desc *desc = irq_to_desc(i); 70 struct irq_desc *desc = irq_to_desc(i);
74 71
75 desc->status = IRQ_DISABLED; 72 desc->status = IRQ_DISABLED;
@@ -111,6 +108,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
111 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 108 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
112}; 109};
113 110
111int vector_used_by_percpu_irq(unsigned int vector)
112{
113 int cpu;
114
115 for_each_online_cpu(cpu) {
116 if (per_cpu(vector_irq, cpu)[vector] != -1)
117 return 1;
118 }
119
120 return 0;
121}
122
114/* Overridden in paravirt.c */ 123/* Overridden in paravirt.c */
115void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); 124void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
116 125
@@ -147,10 +156,12 @@ void __init native_init_IRQ(void)
147 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 156 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
148 157
149 /* IPI for single call function */ 158 /* IPI for single call function */
150 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); 159 alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
160 call_function_single_interrupt);
151 161
152 /* Low priority IPI to cleanup after moving an irq */ 162 /* Low priority IPI to cleanup after moving an irq */
153 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 163 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
164 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
154#endif 165#endif
155 166
156#ifdef CONFIG_X86_LOCAL_APIC 167#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 91d785c25ad9..6a71bfc51e51 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -11,14 +11,14 @@
11#include <linux/kernel_stat.h> 11#include <linux/kernel_stat.h>
12#include <linux/sysdev.h> 12#include <linux/sysdev.h>
13#include <linux/bitops.h> 13#include <linux/bitops.h>
14#include <linux/acpi.h>
15#include <linux/io.h>
16#include <linux/delay.h>
14 17
15#include <asm/acpi.h>
16#include <asm/atomic.h> 18#include <asm/atomic.h>
17#include <asm/system.h> 19#include <asm/system.h>
18#include <asm/io.h>
19#include <asm/hw_irq.h> 20#include <asm/hw_irq.h>
20#include <asm/pgtable.h> 21#include <asm/pgtable.h>
21#include <asm/delay.h>
22#include <asm/desc.h> 22#include <asm/desc.h>
23#include <asm/apic.h> 23#include <asm/apic.h>
24#include <asm/i8259.h> 24#include <asm/i8259.h>
@@ -69,15 +69,26 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
69 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 69 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
70}; 70};
71 71
72void __init init_ISA_irqs(void) 72int vector_used_by_percpu_irq(unsigned int vector)
73{
74 int cpu;
75
76 for_each_online_cpu(cpu) {
77 if (per_cpu(vector_irq, cpu)[vector] != -1)
78 return 1;
79 }
80
81 return 0;
82}
83
84static void __init init_ISA_irqs(void)
73{ 85{
74 int i; 86 int i;
75 87
76 init_bsp_APIC(); 88 init_bsp_APIC();
77 init_8259A(0); 89 init_8259A(0);
78 90
79 for (i = 0; i < 16; i++) { 91 for (i = 0; i < NR_IRQS_LEGACY; i++) {
80 /* first time call this irq_desc */
81 struct irq_desc *desc = irq_to_desc(i); 92 struct irq_desc *desc = irq_to_desc(i);
82 93
83 desc->status = IRQ_DISABLED; 94 desc->status = IRQ_DISABLED;
@@ -122,6 +133,7 @@ static void __init smp_intr_init(void)
122 133
123 /* Low priority IPI to cleanup after moving an irq */ 134 /* Low priority IPI to cleanup after moving an irq */
124 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 135 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
136 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
125#endif 137#endif
126} 138}
127 139
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 6c27679ec6aa..884d985b8b82 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -376,9 +376,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
376 376
377void __kprobes arch_remove_kprobe(struct kprobe *p) 377void __kprobes arch_remove_kprobe(struct kprobe *p)
378{ 378{
379 mutex_lock(&kprobe_mutex); 379 if (p->ainsn.insn) {
380 free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); 380 free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
381 mutex_unlock(&kprobe_mutex); 381 p->ainsn.insn = NULL;
382 }
382} 383}
383 384
384static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) 385static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
@@ -694,7 +695,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
694 /* 695 /*
695 * It is possible to have multiple instances associated with a given 696 * It is possible to have multiple instances associated with a given
696 * task either because multiple functions in the call path have 697 * task either because multiple functions in the call path have
697 * return probes installed on them, and/or more then one 698 * return probes installed on them, and/or more than one
698 * return probe was registered for a target function. 699 * return probe was registered for a target function.
699 * 700 *
700 * We can handle this because: 701 * We can handle this because:
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index e169ae9b6a62..652fce6d2cce 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void)
89 */ 89 */
90static unsigned long kvm_get_tsc_khz(void) 90static unsigned long kvm_get_tsc_khz(void)
91{ 91{
92 return preset_lpj; 92 struct pvclock_vcpu_time_info *src;
93 src = &per_cpu(hv_clock, 0);
94 return pvclock_tsc_khz(src);
93} 95}
94 96
95static void kvm_get_preset_lpj(void) 97static void kvm_get_preset_lpj(void)
96{ 98{
97 struct pvclock_vcpu_time_info *src;
98 unsigned long khz; 99 unsigned long khz;
99 u64 lpj; 100 u64 lpj;
100 101
101 src = &per_cpu(hv_clock, 0); 102 khz = kvm_get_tsc_khz();
102 khz = pvclock_tsc_khz(src);
103 103
104 lpj = ((u64)khz * 1000); 104 lpj = ((u64)khz * 1000);
105 do_div(lpj, HZ); 105 do_div(lpj, HZ);
@@ -194,5 +194,7 @@ void __init kvmclock_init(void)
194#endif 194#endif
195 kvm_get_preset_lpj(); 195 kvm_get_preset_lpj();
196 clocksource_register(&kvm_clock); 196 clocksource_register(&kvm_clock);
197 pv_info.paravirt_enabled = 1;
198 pv_info.name = "KVM";
197 } 199 }
198} 200}
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index eee32b43fee3..71f1d99a635d 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -12,8 +12,8 @@
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/smp.h> 13#include <linux/smp.h>
14#include <linux/vmalloc.h> 14#include <linux/vmalloc.h>
15#include <linux/uaccess.h>
15 16
16#include <asm/uaccess.h>
17#include <asm/system.h> 17#include <asm/system.h>
18#include <asm/ldt.h> 18#include <asm/ldt.h>
19#include <asm/desc.h> 19#include <asm/desc.h>
@@ -93,7 +93,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
93 if (err < 0) 93 if (err < 0)
94 return err; 94 return err;
95 95
96 for(i = 0; i < old->size; i++) 96 for (i = 0; i < old->size; i++)
97 write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); 97 write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
98 return 0; 98 return 0;
99} 99}
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 3b599518c322..8815f3c7fec7 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -252,7 +252,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer);
252/* 252/*
253 * The MFPGT timers on the CS5536 provide us with suitable timers to use 253 * The MFPGT timers on the CS5536 provide us with suitable timers to use
254 * as clock event sources - not as good as a HPET or APIC, but certainly 254 * as clock event sources - not as good as a HPET or APIC, but certainly
255 * better then the PIT. This isn't a general purpose MFGPT driver, but 255 * better than the PIT. This isn't a general purpose MFGPT driver, but
256 * a simplified one designed specifically to act as a clock event source. 256 * a simplified one designed specifically to act as a clock event source.
257 * For full details about the MFGPT, please consult the CS5536 data sheet. 257 * For full details about the MFGPT, please consult the CS5536 data sheet.
258 */ 258 */
@@ -287,7 +287,7 @@ static struct clock_event_device mfgpt_clockevent = {
287 .set_mode = mfgpt_set_mode, 287 .set_mode = mfgpt_set_mode,
288 .set_next_event = mfgpt_next_event, 288 .set_next_event = mfgpt_next_event,
289 .rating = 250, 289 .rating = 250,
290 .cpumask = CPU_MASK_ALL, 290 .cpumask = cpu_all_mask,
291 .shift = 32 291 .shift = 32
292}; 292};
293 293
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index efc2f361fe85..666e43df51f9 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -13,8 +13,7 @@
13#include <asm/msr.h> 13#include <asm/msr.h>
14#include <asm/acpi.h> 14#include <asm/acpi.h>
15#include <asm/mmconfig.h> 15#include <asm/mmconfig.h>
16 16#include <asm/pci_x86.h>
17#include "../pci/pci.h"
18 17
19struct pci_hostbridge_probe { 18struct pci_hostbridge_probe {
20 u32 bus; 19 u32 bus;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 45e3b69808ba..c0601c2848a1 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -2,7 +2,7 @@
2 * Intel Multiprocessor Specification 1.1 and 1.4 2 * Intel Multiprocessor Specification 1.1 and 1.4
3 * compliant MP-table parsing routines. 3 * compliant MP-table parsing routines.
4 * 4 *
5 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> 5 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
6 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> 6 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
7 * (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de> 7 * (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de>
8 */ 8 */
@@ -16,14 +16,13 @@
16#include <linux/bitops.h> 16#include <linux/bitops.h>
17#include <linux/acpi.h> 17#include <linux/acpi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/smp.h>
19 20
20#include <asm/smp.h>
21#include <asm/mtrr.h> 21#include <asm/mtrr.h>
22#include <asm/mpspec.h> 22#include <asm/mpspec.h>
23#include <asm/pgalloc.h> 23#include <asm/pgalloc.h>
24#include <asm/io_apic.h> 24#include <asm/io_apic.h>
25#include <asm/proto.h> 25#include <asm/proto.h>
26#include <asm/acpi.h>
27#include <asm/bios_ebda.h> 26#include <asm/bios_ebda.h>
28#include <asm/e820.h> 27#include <asm/e820.h>
29#include <asm/trampoline.h> 28#include <asm/trampoline.h>
@@ -49,12 +48,12 @@ static int __init mpf_checksum(unsigned char *mp, int len)
49 return sum & 0xFF; 48 return sum & 0xFF;
50} 49}
51 50
52static void __init MP_processor_info(struct mpc_config_processor *m) 51static void __init MP_processor_info(struct mpc_cpu *m)
53{ 52{
54 int apicid; 53 int apicid;
55 char *bootup_cpu = ""; 54 char *bootup_cpu = "";
56 55
57 if (!(m->mpc_cpuflag & CPU_ENABLED)) { 56 if (!(m->cpuflag & CPU_ENABLED)) {
58 disabled_cpus++; 57 disabled_cpus++;
59 return; 58 return;
60 } 59 }
@@ -62,54 +61,54 @@ static void __init MP_processor_info(struct mpc_config_processor *m)
62 if (x86_quirks->mpc_apic_id) 61 if (x86_quirks->mpc_apic_id)
63 apicid = x86_quirks->mpc_apic_id(m); 62 apicid = x86_quirks->mpc_apic_id(m);
64 else 63 else
65 apicid = m->mpc_apicid; 64 apicid = m->apicid;
66 65
67 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { 66 if (m->cpuflag & CPU_BOOTPROCESSOR) {
68 bootup_cpu = " (Bootup-CPU)"; 67 bootup_cpu = " (Bootup-CPU)";
69 boot_cpu_physical_apicid = m->mpc_apicid; 68 boot_cpu_physical_apicid = m->apicid;
70 } 69 }
71 70
72 printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu); 71 printk(KERN_INFO "Processor #%d%s\n", m->apicid, bootup_cpu);
73 generic_processor_info(apicid, m->mpc_apicver); 72 generic_processor_info(apicid, m->apicver);
74} 73}
75 74
76#ifdef CONFIG_X86_IO_APIC 75#ifdef CONFIG_X86_IO_APIC
77static void __init MP_bus_info(struct mpc_config_bus *m) 76static void __init MP_bus_info(struct mpc_bus *m)
78{ 77{
79 char str[7]; 78 char str[7];
80 memcpy(str, m->mpc_bustype, 6); 79 memcpy(str, m->bustype, 6);
81 str[6] = 0; 80 str[6] = 0;
82 81
83 if (x86_quirks->mpc_oem_bus_info) 82 if (x86_quirks->mpc_oem_bus_info)
84 x86_quirks->mpc_oem_bus_info(m, str); 83 x86_quirks->mpc_oem_bus_info(m, str);
85 else 84 else
86 apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str); 85 apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str);
87 86
88#if MAX_MP_BUSSES < 256 87#if MAX_MP_BUSSES < 256
89 if (m->mpc_busid >= MAX_MP_BUSSES) { 88 if (m->busid >= MAX_MP_BUSSES) {
90 printk(KERN_WARNING "MP table busid value (%d) for bustype %s " 89 printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
91 " is too large, max. supported is %d\n", 90 " is too large, max. supported is %d\n",
92 m->mpc_busid, str, MAX_MP_BUSSES - 1); 91 m->busid, str, MAX_MP_BUSSES - 1);
93 return; 92 return;
94 } 93 }
95#endif 94#endif
96 95
97 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { 96 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
98 set_bit(m->mpc_busid, mp_bus_not_pci); 97 set_bit(m->busid, mp_bus_not_pci);
99#if defined(CONFIG_EISA) || defined (CONFIG_MCA) 98#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
100 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; 99 mp_bus_id_to_type[m->busid] = MP_BUS_ISA;
101#endif 100#endif
102 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { 101 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
103 if (x86_quirks->mpc_oem_pci_bus) 102 if (x86_quirks->mpc_oem_pci_bus)
104 x86_quirks->mpc_oem_pci_bus(m); 103 x86_quirks->mpc_oem_pci_bus(m);
105 104
106 clear_bit(m->mpc_busid, mp_bus_not_pci); 105 clear_bit(m->busid, mp_bus_not_pci);
107#if defined(CONFIG_EISA) || defined (CONFIG_MCA) 106#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
108 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; 107 mp_bus_id_to_type[m->busid] = MP_BUS_PCI;
109 } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { 108 } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
110 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; 109 mp_bus_id_to_type[m->busid] = MP_BUS_EISA;
111 } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) { 110 } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) {
112 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; 111 mp_bus_id_to_type[m->busid] = MP_BUS_MCA;
113#endif 112#endif
114 } else 113 } else
115 printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); 114 printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
@@ -133,32 +132,31 @@ static int bad_ioapic(unsigned long address)
133 return 0; 132 return 0;
134} 133}
135 134
136static void __init MP_ioapic_info(struct mpc_config_ioapic *m) 135static void __init MP_ioapic_info(struct mpc_ioapic *m)
137{ 136{
138 if (!(m->mpc_flags & MPC_APIC_USABLE)) 137 if (!(m->flags & MPC_APIC_USABLE))
139 return; 138 return;
140 139
141 printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", 140 printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
142 m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); 141 m->apicid, m->apicver, m->apicaddr);
143 142
144 if (bad_ioapic(m->mpc_apicaddr)) 143 if (bad_ioapic(m->apicaddr))
145 return; 144 return;
146 145
147 mp_ioapics[nr_ioapics].mp_apicaddr = m->mpc_apicaddr; 146 mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr;
148 mp_ioapics[nr_ioapics].mp_apicid = m->mpc_apicid; 147 mp_ioapics[nr_ioapics].mp_apicid = m->apicid;
149 mp_ioapics[nr_ioapics].mp_type = m->mpc_type; 148 mp_ioapics[nr_ioapics].mp_type = m->type;
150 mp_ioapics[nr_ioapics].mp_apicver = m->mpc_apicver; 149 mp_ioapics[nr_ioapics].mp_apicver = m->apicver;
151 mp_ioapics[nr_ioapics].mp_flags = m->mpc_flags; 150 mp_ioapics[nr_ioapics].mp_flags = m->flags;
152 nr_ioapics++; 151 nr_ioapics++;
153} 152}
154 153
155static void print_MP_intsrc_info(struct mpc_config_intsrc *m) 154static void print_MP_intsrc_info(struct mpc_intsrc *m)
156{ 155{
157 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," 156 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
158 " IRQ %02x, APIC ID %x, APIC INT %02x\n", 157 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
159 m->mpc_irqtype, m->mpc_irqflag & 3, 158 m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
160 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, 159 m->srcbusirq, m->dstapic, m->dstirq);
161 m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
162} 160}
163 161
164static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) 162static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
@@ -170,52 +168,52 @@ static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
170 mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); 168 mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq);
171} 169}
172 170
173static void __init assign_to_mp_irq(struct mpc_config_intsrc *m, 171static void __init assign_to_mp_irq(struct mpc_intsrc *m,
174 struct mp_config_intsrc *mp_irq) 172 struct mp_config_intsrc *mp_irq)
175{ 173{
176 mp_irq->mp_dstapic = m->mpc_dstapic; 174 mp_irq->mp_dstapic = m->dstapic;
177 mp_irq->mp_type = m->mpc_type; 175 mp_irq->mp_type = m->type;
178 mp_irq->mp_irqtype = m->mpc_irqtype; 176 mp_irq->mp_irqtype = m->irqtype;
179 mp_irq->mp_irqflag = m->mpc_irqflag; 177 mp_irq->mp_irqflag = m->irqflag;
180 mp_irq->mp_srcbus = m->mpc_srcbus; 178 mp_irq->mp_srcbus = m->srcbus;
181 mp_irq->mp_srcbusirq = m->mpc_srcbusirq; 179 mp_irq->mp_srcbusirq = m->srcbusirq;
182 mp_irq->mp_dstirq = m->mpc_dstirq; 180 mp_irq->mp_dstirq = m->dstirq;
183} 181}
184 182
185static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, 183static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq,
186 struct mpc_config_intsrc *m) 184 struct mpc_intsrc *m)
187{ 185{
188 m->mpc_dstapic = mp_irq->mp_dstapic; 186 m->dstapic = mp_irq->mp_dstapic;
189 m->mpc_type = mp_irq->mp_type; 187 m->type = mp_irq->mp_type;
190 m->mpc_irqtype = mp_irq->mp_irqtype; 188 m->irqtype = mp_irq->mp_irqtype;
191 m->mpc_irqflag = mp_irq->mp_irqflag; 189 m->irqflag = mp_irq->mp_irqflag;
192 m->mpc_srcbus = mp_irq->mp_srcbus; 190 m->srcbus = mp_irq->mp_srcbus;
193 m->mpc_srcbusirq = mp_irq->mp_srcbusirq; 191 m->srcbusirq = mp_irq->mp_srcbusirq;
194 m->mpc_dstirq = mp_irq->mp_dstirq; 192 m->dstirq = mp_irq->mp_dstirq;
195} 193}
196 194
197static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, 195static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
198 struct mpc_config_intsrc *m) 196 struct mpc_intsrc *m)
199{ 197{
200 if (mp_irq->mp_dstapic != m->mpc_dstapic) 198 if (mp_irq->mp_dstapic != m->dstapic)
201 return 1; 199 return 1;
202 if (mp_irq->mp_type != m->mpc_type) 200 if (mp_irq->mp_type != m->type)
203 return 2; 201 return 2;
204 if (mp_irq->mp_irqtype != m->mpc_irqtype) 202 if (mp_irq->mp_irqtype != m->irqtype)
205 return 3; 203 return 3;
206 if (mp_irq->mp_irqflag != m->mpc_irqflag) 204 if (mp_irq->mp_irqflag != m->irqflag)
207 return 4; 205 return 4;
208 if (mp_irq->mp_srcbus != m->mpc_srcbus) 206 if (mp_irq->mp_srcbus != m->srcbus)
209 return 5; 207 return 5;
210 if (mp_irq->mp_srcbusirq != m->mpc_srcbusirq) 208 if (mp_irq->mp_srcbusirq != m->srcbusirq)
211 return 6; 209 return 6;
212 if (mp_irq->mp_dstirq != m->mpc_dstirq) 210 if (mp_irq->mp_dstirq != m->dstirq)
213 return 7; 211 return 7;
214 212
215 return 0; 213 return 0;
216} 214}
217 215
218static void __init MP_intsrc_info(struct mpc_config_intsrc *m) 216static void __init MP_intsrc_info(struct mpc_intsrc *m)
219{ 217{
220 int i; 218 int i;
221 219
@@ -233,57 +231,55 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
233 231
234#endif 232#endif
235 233
236static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) 234static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
237{ 235{
238 apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," 236 apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
239 " IRQ %02x, APIC ID %x, APIC LINT %02x\n", 237 " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
240 m->mpc_irqtype, m->mpc_irqflag & 3, 238 m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbusid,
241 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, 239 m->srcbusirq, m->destapic, m->destapiclint);
242 m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
243} 240}
244 241
245/* 242/*
246 * Read/parse the MPC 243 * Read/parse the MPC
247 */ 244 */
248 245
249static int __init smp_check_mpc(struct mp_config_table *mpc, char *oem, 246static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str)
250 char *str)
251{ 247{
252 248
253 if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { 249 if (memcmp(mpc->signature, MPC_SIGNATURE, 4)) {
254 printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", 250 printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n",
255 mpc->mpc_signature[0], mpc->mpc_signature[1], 251 mpc->signature[0], mpc->signature[1],
256 mpc->mpc_signature[2], mpc->mpc_signature[3]); 252 mpc->signature[2], mpc->signature[3]);
257 return 0; 253 return 0;
258 } 254 }
259 if (mpf_checksum((unsigned char *)mpc, mpc->mpc_length)) { 255 if (mpf_checksum((unsigned char *)mpc, mpc->length)) {
260 printk(KERN_ERR "MPTABLE: checksum error!\n"); 256 printk(KERN_ERR "MPTABLE: checksum error!\n");
261 return 0; 257 return 0;
262 } 258 }
263 if (mpc->mpc_spec != 0x01 && mpc->mpc_spec != 0x04) { 259 if (mpc->spec != 0x01 && mpc->spec != 0x04) {
264 printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n", 260 printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
265 mpc->mpc_spec); 261 mpc->spec);
266 return 0; 262 return 0;
267 } 263 }
268 if (!mpc->mpc_lapic) { 264 if (!mpc->lapic) {
269 printk(KERN_ERR "MPTABLE: null local APIC address!\n"); 265 printk(KERN_ERR "MPTABLE: null local APIC address!\n");
270 return 0; 266 return 0;
271 } 267 }
272 memcpy(oem, mpc->mpc_oem, 8); 268 memcpy(oem, mpc->oem, 8);
273 oem[8] = 0; 269 oem[8] = 0;
274 printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem); 270 printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem);
275 271
276 memcpy(str, mpc->mpc_productid, 12); 272 memcpy(str, mpc->productid, 12);
277 str[12] = 0; 273 str[12] = 0;
278 274
279 printk(KERN_INFO "MPTABLE: Product ID: %s\n", str); 275 printk(KERN_INFO "MPTABLE: Product ID: %s\n", str);
280 276
281 printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); 277 printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->lapic);
282 278
283 return 1; 279 return 1;
284} 280}
285 281
286static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) 282static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
287{ 283{
288 char str[16]; 284 char str[16];
289 char oem[10]; 285 char oem[10];
@@ -308,14 +304,14 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
308#endif 304#endif
309 /* save the local APIC address, it might be non-default */ 305 /* save the local APIC address, it might be non-default */
310 if (!acpi_lapic) 306 if (!acpi_lapic)
311 mp_lapic_addr = mpc->mpc_lapic; 307 mp_lapic_addr = mpc->lapic;
312 308
313 if (early) 309 if (early)
314 return 1; 310 return 1;
315 311
316 if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) { 312 if (mpc->oemptr && x86_quirks->smp_read_mpc_oem) {
317 struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr; 313 struct mpc_oemtable *oem_table = (void *)(long)mpc->oemptr;
318 x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize); 314 x86_quirks->smp_read_mpc_oem(oem_table, mpc->oemsize);
319 } 315 }
320 316
321 /* 317 /*
@@ -324,12 +320,11 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
324 if (x86_quirks->mpc_record) 320 if (x86_quirks->mpc_record)
325 *x86_quirks->mpc_record = 0; 321 *x86_quirks->mpc_record = 0;
326 322
327 while (count < mpc->mpc_length) { 323 while (count < mpc->length) {
328 switch (*mpt) { 324 switch (*mpt) {
329 case MP_PROCESSOR: 325 case MP_PROCESSOR:
330 { 326 {
331 struct mpc_config_processor *m = 327 struct mpc_cpu *m = (struct mpc_cpu *)mpt;
332 (struct mpc_config_processor *)mpt;
333 /* ACPI may have already provided this data */ 328 /* ACPI may have already provided this data */
334 if (!acpi_lapic) 329 if (!acpi_lapic)
335 MP_processor_info(m); 330 MP_processor_info(m);
@@ -339,8 +334,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
339 } 334 }
340 case MP_BUS: 335 case MP_BUS:
341 { 336 {
342 struct mpc_config_bus *m = 337 struct mpc_bus *m = (struct mpc_bus *)mpt;
343 (struct mpc_config_bus *)mpt;
344#ifdef CONFIG_X86_IO_APIC 338#ifdef CONFIG_X86_IO_APIC
345 MP_bus_info(m); 339 MP_bus_info(m);
346#endif 340#endif
@@ -351,30 +345,28 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
351 case MP_IOAPIC: 345 case MP_IOAPIC:
352 { 346 {
353#ifdef CONFIG_X86_IO_APIC 347#ifdef CONFIG_X86_IO_APIC
354 struct mpc_config_ioapic *m = 348 struct mpc_ioapic *m = (struct mpc_ioapic *)mpt;
355 (struct mpc_config_ioapic *)mpt;
356 MP_ioapic_info(m); 349 MP_ioapic_info(m);
357#endif 350#endif
358 mpt += sizeof(struct mpc_config_ioapic); 351 mpt += sizeof(struct mpc_ioapic);
359 count += sizeof(struct mpc_config_ioapic); 352 count += sizeof(struct mpc_ioapic);
360 break; 353 break;
361 } 354 }
362 case MP_INTSRC: 355 case MP_INTSRC:
363 { 356 {
364#ifdef CONFIG_X86_IO_APIC 357#ifdef CONFIG_X86_IO_APIC
365 struct mpc_config_intsrc *m = 358 struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
366 (struct mpc_config_intsrc *)mpt;
367 359
368 MP_intsrc_info(m); 360 MP_intsrc_info(m);
369#endif 361#endif
370 mpt += sizeof(struct mpc_config_intsrc); 362 mpt += sizeof(struct mpc_intsrc);
371 count += sizeof(struct mpc_config_intsrc); 363 count += sizeof(struct mpc_intsrc);
372 break; 364 break;
373 } 365 }
374 case MP_LINTSRC: 366 case MP_LINTSRC:
375 { 367 {
376 struct mpc_config_lintsrc *m = 368 struct mpc_lintsrc *m =
377 (struct mpc_config_lintsrc *)mpt; 369 (struct mpc_lintsrc *)mpt;
378 MP_lintsrc_info(m); 370 MP_lintsrc_info(m);
379 mpt += sizeof(*m); 371 mpt += sizeof(*m);
380 count += sizeof(*m); 372 count += sizeof(*m);
@@ -385,8 +377,8 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
385 printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); 377 printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
386 printk(KERN_ERR "type %x\n", *mpt); 378 printk(KERN_ERR "type %x\n", *mpt);
387 print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, 379 print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
388 1, mpc, mpc->mpc_length, 1); 380 1, mpc, mpc->length, 1);
389 count = mpc->mpc_length; 381 count = mpc->length;
390 break; 382 break;
391 } 383 }
392 if (x86_quirks->mpc_record) 384 if (x86_quirks->mpc_record)
@@ -417,16 +409,16 @@ static int __init ELCR_trigger(unsigned int irq)
417 409
418static void __init construct_default_ioirq_mptable(int mpc_default_type) 410static void __init construct_default_ioirq_mptable(int mpc_default_type)
419{ 411{
420 struct mpc_config_intsrc intsrc; 412 struct mpc_intsrc intsrc;
421 int i; 413 int i;
422 int ELCR_fallback = 0; 414 int ELCR_fallback = 0;
423 415
424 intsrc.mpc_type = MP_INTSRC; 416 intsrc.type = MP_INTSRC;
425 intsrc.mpc_irqflag = 0; /* conforming */ 417 intsrc.irqflag = 0; /* conforming */
426 intsrc.mpc_srcbus = 0; 418 intsrc.srcbus = 0;
427 intsrc.mpc_dstapic = mp_ioapics[0].mp_apicid; 419 intsrc.dstapic = mp_ioapics[0].mp_apicid;
428 420
429 intsrc.mpc_irqtype = mp_INT; 421 intsrc.irqtype = mp_INT;
430 422
431 /* 423 /*
432 * If true, we have an ISA/PCI system with no IRQ entries 424 * If true, we have an ISA/PCI system with no IRQ entries
@@ -469,30 +461,30 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
469 * irqflag field (level sensitive, active high polarity). 461 * irqflag field (level sensitive, active high polarity).
470 */ 462 */
471 if (ELCR_trigger(i)) 463 if (ELCR_trigger(i))
472 intsrc.mpc_irqflag = 13; 464 intsrc.irqflag = 13;
473 else 465 else
474 intsrc.mpc_irqflag = 0; 466 intsrc.irqflag = 0;
475 } 467 }
476 468
477 intsrc.mpc_srcbusirq = i; 469 intsrc.srcbusirq = i;
478 intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ 470 intsrc.dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
479 MP_intsrc_info(&intsrc); 471 MP_intsrc_info(&intsrc);
480 } 472 }
481 473
482 intsrc.mpc_irqtype = mp_ExtINT; 474 intsrc.irqtype = mp_ExtINT;
483 intsrc.mpc_srcbusirq = 0; 475 intsrc.srcbusirq = 0;
484 intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ 476 intsrc.dstirq = 0; /* 8259A to INTIN0 */
485 MP_intsrc_info(&intsrc); 477 MP_intsrc_info(&intsrc);
486} 478}
487 479
488 480
489static void __init construct_ioapic_table(int mpc_default_type) 481static void __init construct_ioapic_table(int mpc_default_type)
490{ 482{
491 struct mpc_config_ioapic ioapic; 483 struct mpc_ioapic ioapic;
492 struct mpc_config_bus bus; 484 struct mpc_bus bus;
493 485
494 bus.mpc_type = MP_BUS; 486 bus.type = MP_BUS;
495 bus.mpc_busid = 0; 487 bus.busid = 0;
496 switch (mpc_default_type) { 488 switch (mpc_default_type) {
497 default: 489 default:
498 printk(KERN_ERR "???\nUnknown standard configuration %d\n", 490 printk(KERN_ERR "???\nUnknown standard configuration %d\n",
@@ -500,29 +492,29 @@ static void __init construct_ioapic_table(int mpc_default_type)
500 /* fall through */ 492 /* fall through */
501 case 1: 493 case 1:
502 case 5: 494 case 5:
503 memcpy(bus.mpc_bustype, "ISA ", 6); 495 memcpy(bus.bustype, "ISA ", 6);
504 break; 496 break;
505 case 2: 497 case 2:
506 case 6: 498 case 6:
507 case 3: 499 case 3:
508 memcpy(bus.mpc_bustype, "EISA ", 6); 500 memcpy(bus.bustype, "EISA ", 6);
509 break; 501 break;
510 case 4: 502 case 4:
511 case 7: 503 case 7:
512 memcpy(bus.mpc_bustype, "MCA ", 6); 504 memcpy(bus.bustype, "MCA ", 6);
513 } 505 }
514 MP_bus_info(&bus); 506 MP_bus_info(&bus);
515 if (mpc_default_type > 4) { 507 if (mpc_default_type > 4) {
516 bus.mpc_busid = 1; 508 bus.busid = 1;
517 memcpy(bus.mpc_bustype, "PCI ", 6); 509 memcpy(bus.bustype, "PCI ", 6);
518 MP_bus_info(&bus); 510 MP_bus_info(&bus);
519 } 511 }
520 512
521 ioapic.mpc_type = MP_IOAPIC; 513 ioapic.type = MP_IOAPIC;
522 ioapic.mpc_apicid = 2; 514 ioapic.apicid = 2;
523 ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; 515 ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
524 ioapic.mpc_flags = MPC_APIC_USABLE; 516 ioapic.flags = MPC_APIC_USABLE;
525 ioapic.mpc_apicaddr = 0xFEC00000; 517 ioapic.apicaddr = 0xFEC00000;
526 MP_ioapic_info(&ioapic); 518 MP_ioapic_info(&ioapic);
527 519
528 /* 520 /*
@@ -536,8 +528,8 @@ static inline void __init construct_ioapic_table(int mpc_default_type) { }
536 528
537static inline void __init construct_default_ISA_mptable(int mpc_default_type) 529static inline void __init construct_default_ISA_mptable(int mpc_default_type)
538{ 530{
539 struct mpc_config_processor processor; 531 struct mpc_cpu processor;
540 struct mpc_config_lintsrc lintsrc; 532 struct mpc_lintsrc lintsrc;
541 int linttypes[2] = { mp_ExtINT, mp_NMI }; 533 int linttypes[2] = { mp_ExtINT, mp_NMI };
542 int i; 534 int i;
543 535
@@ -549,30 +541,30 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
549 /* 541 /*
550 * 2 CPUs, numbered 0 & 1. 542 * 2 CPUs, numbered 0 & 1.
551 */ 543 */
552 processor.mpc_type = MP_PROCESSOR; 544 processor.type = MP_PROCESSOR;
553 /* Either an integrated APIC or a discrete 82489DX. */ 545 /* Either an integrated APIC or a discrete 82489DX. */
554 processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; 546 processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
555 processor.mpc_cpuflag = CPU_ENABLED; 547 processor.cpuflag = CPU_ENABLED;
556 processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | 548 processor.cpufeature = (boot_cpu_data.x86 << 8) |
557 (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; 549 (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
558 processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; 550 processor.featureflag = boot_cpu_data.x86_capability[0];
559 processor.mpc_reserved[0] = 0; 551 processor.reserved[0] = 0;
560 processor.mpc_reserved[1] = 0; 552 processor.reserved[1] = 0;
561 for (i = 0; i < 2; i++) { 553 for (i = 0; i < 2; i++) {
562 processor.mpc_apicid = i; 554 processor.apicid = i;
563 MP_processor_info(&processor); 555 MP_processor_info(&processor);
564 } 556 }
565 557
566 construct_ioapic_table(mpc_default_type); 558 construct_ioapic_table(mpc_default_type);
567 559
568 lintsrc.mpc_type = MP_LINTSRC; 560 lintsrc.type = MP_LINTSRC;
569 lintsrc.mpc_irqflag = 0; /* conforming */ 561 lintsrc.irqflag = 0; /* conforming */
570 lintsrc.mpc_srcbusid = 0; 562 lintsrc.srcbusid = 0;
571 lintsrc.mpc_srcbusirq = 0; 563 lintsrc.srcbusirq = 0;
572 lintsrc.mpc_destapic = MP_APIC_ALL; 564 lintsrc.destapic = MP_APIC_ALL;
573 for (i = 0; i < 2; i++) { 565 for (i = 0; i < 2; i++) {
574 lintsrc.mpc_irqtype = linttypes[i]; 566 lintsrc.irqtype = linttypes[i];
575 lintsrc.mpc_destapiclint = i; 567 lintsrc.destapiclint = i;
576 MP_lintsrc_info(&lintsrc); 568 MP_lintsrc_info(&lintsrc);
577 } 569 }
578} 570}
@@ -657,15 +649,15 @@ static void __init __get_smp_config(unsigned int early)
657 * ISA defaults and hope it will work. 649 * ISA defaults and hope it will work.
658 */ 650 */
659 if (!mp_irq_entries) { 651 if (!mp_irq_entries) {
660 struct mpc_config_bus bus; 652 struct mpc_bus bus;
661 653
662 printk(KERN_ERR "BIOS bug, no explicit IRQ entries, " 654 printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
663 "using default mptable. " 655 "using default mptable. "
664 "(tell your hw vendor)\n"); 656 "(tell your hw vendor)\n");
665 657
666 bus.mpc_type = MP_BUS; 658 bus.type = MP_BUS;
667 bus.mpc_busid = 0; 659 bus.busid = 0;
668 memcpy(bus.mpc_bustype, "ISA ", 6); 660 memcpy(bus.bustype, "ISA ", 6);
669 MP_bus_info(&bus); 661 MP_bus_info(&bus);
670 662
671 construct_default_ioirq_mptable(0); 663 construct_default_ioirq_mptable(0);
@@ -803,14 +795,14 @@ void __init find_smp_config(void)
803#ifdef CONFIG_X86_IO_APIC 795#ifdef CONFIG_X86_IO_APIC
804static u8 __initdata irq_used[MAX_IRQ_SOURCES]; 796static u8 __initdata irq_used[MAX_IRQ_SOURCES];
805 797
806static int __init get_MP_intsrc_index(struct mpc_config_intsrc *m) 798static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
807{ 799{
808 int i; 800 int i;
809 801
810 if (m->mpc_irqtype != mp_INT) 802 if (m->irqtype != mp_INT)
811 return 0; 803 return 0;
812 804
813 if (m->mpc_irqflag != 0x0f) 805 if (m->irqflag != 0x0f)
814 return 0; 806 return 0;
815 807
816 /* not legacy */ 808 /* not legacy */
@@ -822,9 +814,9 @@ static int __init get_MP_intsrc_index(struct mpc_config_intsrc *m)
822 if (mp_irqs[i].mp_irqflag != 0x0f) 814 if (mp_irqs[i].mp_irqflag != 0x0f)
823 continue; 815 continue;
824 816
825 if (mp_irqs[i].mp_srcbus != m->mpc_srcbus) 817 if (mp_irqs[i].mp_srcbus != m->srcbus)
826 continue; 818 continue;
827 if (mp_irqs[i].mp_srcbusirq != m->mpc_srcbusirq) 819 if (mp_irqs[i].mp_srcbusirq != m->srcbusirq)
828 continue; 820 continue;
829 if (irq_used[i]) { 821 if (irq_used[i]) {
830 /* already claimed */ 822 /* already claimed */
@@ -840,10 +832,10 @@ static int __init get_MP_intsrc_index(struct mpc_config_intsrc *m)
840 832
841#define SPARE_SLOT_NUM 20 833#define SPARE_SLOT_NUM 20
842 834
843static struct mpc_config_intsrc __initdata *m_spare[SPARE_SLOT_NUM]; 835static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
844#endif 836#endif
845 837
846static int __init replace_intsrc_all(struct mp_config_table *mpc, 838static int __init replace_intsrc_all(struct mpc_table *mpc,
847 unsigned long mpc_new_phys, 839 unsigned long mpc_new_phys,
848 unsigned long mpc_new_length) 840 unsigned long mpc_new_length)
849{ 841{
@@ -855,36 +847,33 @@ static int __init replace_intsrc_all(struct mp_config_table *mpc,
855 int count = sizeof(*mpc); 847 int count = sizeof(*mpc);
856 unsigned char *mpt = ((unsigned char *)mpc) + count; 848 unsigned char *mpt = ((unsigned char *)mpc) + count;
857 849
858 printk(KERN_INFO "mpc_length %x\n", mpc->mpc_length); 850 printk(KERN_INFO "mpc_length %x\n", mpc->length);
859 while (count < mpc->mpc_length) { 851 while (count < mpc->length) {
860 switch (*mpt) { 852 switch (*mpt) {
861 case MP_PROCESSOR: 853 case MP_PROCESSOR:
862 { 854 {
863 struct mpc_config_processor *m = 855 struct mpc_cpu *m = (struct mpc_cpu *)mpt;
864 (struct mpc_config_processor *)mpt;
865 mpt += sizeof(*m); 856 mpt += sizeof(*m);
866 count += sizeof(*m); 857 count += sizeof(*m);
867 break; 858 break;
868 } 859 }
869 case MP_BUS: 860 case MP_BUS:
870 { 861 {
871 struct mpc_config_bus *m = 862 struct mpc_bus *m = (struct mpc_bus *)mpt;
872 (struct mpc_config_bus *)mpt;
873 mpt += sizeof(*m); 863 mpt += sizeof(*m);
874 count += sizeof(*m); 864 count += sizeof(*m);
875 break; 865 break;
876 } 866 }
877 case MP_IOAPIC: 867 case MP_IOAPIC:
878 { 868 {
879 mpt += sizeof(struct mpc_config_ioapic); 869 mpt += sizeof(struct mpc_ioapic);
880 count += sizeof(struct mpc_config_ioapic); 870 count += sizeof(struct mpc_ioapic);
881 break; 871 break;
882 } 872 }
883 case MP_INTSRC: 873 case MP_INTSRC:
884 { 874 {
885#ifdef CONFIG_X86_IO_APIC 875#ifdef CONFIG_X86_IO_APIC
886 struct mpc_config_intsrc *m = 876 struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
887 (struct mpc_config_intsrc *)mpt;
888 877
889 printk(KERN_INFO "OLD "); 878 printk(KERN_INFO "OLD ");
890 print_MP_intsrc_info(m); 879 print_MP_intsrc_info(m);
@@ -905,14 +894,14 @@ static int __init replace_intsrc_all(struct mp_config_table *mpc,
905 nr_m_spare++; 894 nr_m_spare++;
906 } 895 }
907#endif 896#endif
908 mpt += sizeof(struct mpc_config_intsrc); 897 mpt += sizeof(struct mpc_intsrc);
909 count += sizeof(struct mpc_config_intsrc); 898 count += sizeof(struct mpc_intsrc);
910 break; 899 break;
911 } 900 }
912 case MP_LINTSRC: 901 case MP_LINTSRC:
913 { 902 {
914 struct mpc_config_lintsrc *m = 903 struct mpc_lintsrc *m =
915 (struct mpc_config_lintsrc *)mpt; 904 (struct mpc_lintsrc *)mpt;
916 mpt += sizeof(*m); 905 mpt += sizeof(*m);
917 count += sizeof(*m); 906 count += sizeof(*m);
918 break; 907 break;
@@ -922,7 +911,7 @@ static int __init replace_intsrc_all(struct mp_config_table *mpc,
922 printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); 911 printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
923 printk(KERN_ERR "type %x\n", *mpt); 912 printk(KERN_ERR "type %x\n", *mpt);
924 print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, 913 print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
925 1, mpc, mpc->mpc_length, 1); 914 1, mpc, mpc->length, 1);
926 goto out; 915 goto out;
927 } 916 }
928 } 917 }
@@ -944,9 +933,8 @@ static int __init replace_intsrc_all(struct mp_config_table *mpc,
944 assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]); 933 assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]);
945 m_spare[nr_m_spare] = NULL; 934 m_spare[nr_m_spare] = NULL;
946 } else { 935 } else {
947 struct mpc_config_intsrc *m = 936 struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
948 (struct mpc_config_intsrc *)mpt; 937 count += sizeof(struct mpc_intsrc);
949 count += sizeof(struct mpc_config_intsrc);
950 if (!mpc_new_phys) { 938 if (!mpc_new_phys) {
951 printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count); 939 printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count);
952 } else { 940 } else {
@@ -958,17 +946,16 @@ static int __init replace_intsrc_all(struct mp_config_table *mpc,
958 } 946 }
959 } 947 }
960 assign_to_mpc_intsrc(&mp_irqs[i], m); 948 assign_to_mpc_intsrc(&mp_irqs[i], m);
961 mpc->mpc_length = count; 949 mpc->length = count;
962 mpt += sizeof(struct mpc_config_intsrc); 950 mpt += sizeof(struct mpc_intsrc);
963 } 951 }
964 print_mp_irq_info(&mp_irqs[i]); 952 print_mp_irq_info(&mp_irqs[i]);
965 } 953 }
966#endif 954#endif
967out: 955out:
968 /* update checksum */ 956 /* update checksum */
969 mpc->mpc_checksum = 0; 957 mpc->checksum = 0;
970 mpc->mpc_checksum -= mpf_checksum((unsigned char *)mpc, 958 mpc->checksum -= mpf_checksum((unsigned char *)mpc, mpc->length);
971 mpc->mpc_length);
972 959
973 return 0; 960 return 0;
974} 961}
@@ -1014,8 +1001,7 @@ static int __init update_mp_table(void)
1014 char str[16]; 1001 char str[16];
1015 char oem[10]; 1002 char oem[10];
1016 struct intel_mp_floating *mpf; 1003 struct intel_mp_floating *mpf;
1017 struct mp_config_table *mpc; 1004 struct mpc_table *mpc, *mpc_new;
1018 struct mp_config_table *mpc_new;
1019 1005
1020 if (!enable_update_mptable) 1006 if (!enable_update_mptable)
1021 return 0; 1007 return 0;
@@ -1041,7 +1027,7 @@ static int __init update_mp_table(void)
1041 printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); 1027 printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf));
1042 printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); 1028 printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr);
1043 1029
1044 if (mpc_new_phys && mpc->mpc_length > mpc_new_length) { 1030 if (mpc_new_phys && mpc->length > mpc_new_length) {
1045 mpc_new_phys = 0; 1031 mpc_new_phys = 0;
1046 printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n", 1032 printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n",
1047 mpc_new_length); 1033 mpc_new_length);
@@ -1050,10 +1036,10 @@ static int __init update_mp_table(void)
1050 if (!mpc_new_phys) { 1036 if (!mpc_new_phys) {
1051 unsigned char old, new; 1037 unsigned char old, new;
1052 /* check if we can change the postion */ 1038 /* check if we can change the postion */
1053 mpc->mpc_checksum = 0; 1039 mpc->checksum = 0;
1054 old = mpf_checksum((unsigned char *)mpc, mpc->mpc_length); 1040 old = mpf_checksum((unsigned char *)mpc, mpc->length);
1055 mpc->mpc_checksum = 0xff; 1041 mpc->checksum = 0xff;
1056 new = mpf_checksum((unsigned char *)mpc, mpc->mpc_length); 1042 new = mpf_checksum((unsigned char *)mpc, mpc->length);
1057 if (old == new) { 1043 if (old == new) {
1058 printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n"); 1044 printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n");
1059 return 0; 1045 return 0;
@@ -1062,7 +1048,7 @@ static int __init update_mp_table(void)
1062 } else { 1048 } else {
1063 mpf->mpf_physptr = mpc_new_phys; 1049 mpf->mpf_physptr = mpc_new_phys;
1064 mpc_new = phys_to_virt(mpc_new_phys); 1050 mpc_new = phys_to_virt(mpc_new_phys);
1065 memcpy(mpc_new, mpc, mpc->mpc_length); 1051 memcpy(mpc_new, mpc, mpc->length);
1066 mpc = mpc_new; 1052 mpc = mpc_new;
1067 /* check if we can modify that */ 1053 /* check if we can modify that */
1068 if (mpc_new_phys - mpf->mpf_physptr) { 1054 if (mpc_new_phys - mpf->mpf_physptr) {
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 82a7c7ed6d45..726266695b2c 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -136,7 +136,7 @@ static int msr_open(struct inode *inode, struct file *file)
136 lock_kernel(); 136 lock_kernel();
137 cpu = iminor(file->f_path.dentry->d_inode); 137 cpu = iminor(file->f_path.dentry->d_inode);
138 138
139 if (cpu >= NR_CPUS || !cpu_online(cpu)) { 139 if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
140 ret = -ENXIO; /* No such CPU */ 140 ret = -ENXIO; /* No such CPU */
141 goto out; 141 goto out;
142 } 142 }
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 8bd1bf9622a7..7228979f1e7f 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -29,8 +29,6 @@
29 29
30#include <asm/i8259.h> 30#include <asm/i8259.h>
31#include <asm/io_apic.h> 31#include <asm/io_apic.h>
32#include <asm/smp.h>
33#include <asm/nmi.h>
34#include <asm/proto.h> 32#include <asm/proto.h>
35#include <asm/timer.h> 33#include <asm/timer.h>
36 34
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 0deea37a53cf..f2191d4f2717 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -117,16 +117,15 @@ static inline int generate_logical_apicid(int quad, int phys_apicid)
117} 117}
118 118
119/* x86_quirks member */ 119/* x86_quirks member */
120static int mpc_apic_id(struct mpc_config_processor *m) 120static int mpc_apic_id(struct mpc_cpu *m)
121{ 121{
122 int quad = translation_table[mpc_record]->trans_quad; 122 int quad = translation_table[mpc_record]->trans_quad;
123 int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); 123 int logical_apicid = generate_logical_apicid(quad, m->apicid);
124 124
125 printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", 125 printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
126 m->mpc_apicid, 126 m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
127 (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, 127 (m->cpufeature & CPU_MODEL_MASK) >> 4,
128 (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, 128 m->apicver, quad, logical_apicid);
129 m->mpc_apicver, quad, logical_apicid);
130 return logical_apicid; 129 return logical_apicid;
131} 130}
132 131
@@ -135,26 +134,26 @@ int mp_bus_id_to_node[MAX_MP_BUSSES];
135int mp_bus_id_to_local[MAX_MP_BUSSES]; 134int mp_bus_id_to_local[MAX_MP_BUSSES];
136 135
137/* x86_quirks member */ 136/* x86_quirks member */
138static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name) 137static void mpc_oem_bus_info(struct mpc_bus *m, char *name)
139{ 138{
140 int quad = translation_table[mpc_record]->trans_quad; 139 int quad = translation_table[mpc_record]->trans_quad;
141 int local = translation_table[mpc_record]->trans_local; 140 int local = translation_table[mpc_record]->trans_local;
142 141
143 mp_bus_id_to_node[m->mpc_busid] = quad; 142 mp_bus_id_to_node[m->busid] = quad;
144 mp_bus_id_to_local[m->mpc_busid] = local; 143 mp_bus_id_to_local[m->busid] = local;
145 printk(KERN_INFO "Bus #%d is %s (node %d)\n", 144 printk(KERN_INFO "Bus #%d is %s (node %d)\n",
146 m->mpc_busid, name, quad); 145 m->busid, name, quad);
147} 146}
148 147
149int quad_local_to_mp_bus_id [NR_CPUS/4][4]; 148int quad_local_to_mp_bus_id [NR_CPUS/4][4];
150 149
151/* x86_quirks member */ 150/* x86_quirks member */
152static void mpc_oem_pci_bus(struct mpc_config_bus *m) 151static void mpc_oem_pci_bus(struct mpc_bus *m)
153{ 152{
154 int quad = translation_table[mpc_record]->trans_quad; 153 int quad = translation_table[mpc_record]->trans_quad;
155 int local = translation_table[mpc_record]->trans_local; 154 int local = translation_table[mpc_record]->trans_local;
156 155
157 quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; 156 quad_local_to_mp_bus_id[quad][local] = m->busid;
158} 157}
159 158
160static void __init MP_translation_info(struct mpc_config_translation *m) 159static void __init MP_translation_info(struct mpc_config_translation *m)
@@ -186,7 +185,7 @@ static int __init mpf_checksum(unsigned char *mp, int len)
186 * Read/parse the MPC oem tables 185 * Read/parse the MPC oem tables
187 */ 186 */
188 187
189static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, 188static void __init smp_read_mpc_oem(struct mpc_oemtable *oemtable,
190 unsigned short oemsize) 189 unsigned short oemsize)
191{ 190{
192 int count = sizeof(*oemtable); /* the header size */ 191 int count = sizeof(*oemtable); /* the header size */
@@ -195,18 +194,18 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
195 mpc_record = 0; 194 mpc_record = 0;
196 printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", 195 printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
197 oemtable); 196 oemtable);
198 if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) { 197 if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) {
199 printk(KERN_WARNING 198 printk(KERN_WARNING
200 "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", 199 "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
201 oemtable->oem_signature[0], oemtable->oem_signature[1], 200 oemtable->signature[0], oemtable->signature[1],
202 oemtable->oem_signature[2], oemtable->oem_signature[3]); 201 oemtable->signature[2], oemtable->signature[3]);
203 return; 202 return;
204 } 203 }
205 if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) { 204 if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) {
206 printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); 205 printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
207 return; 206 return;
208 } 207 }
209 while (count < oemtable->oem_length) { 208 while (count < oemtable->length) {
210 switch (*oemptr) { 209 switch (*oemptr) {
211 case MP_TRANSLATION: 210 case MP_TRANSLATION:
212 { 211 {
@@ -260,8 +259,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
260 .update_genapic = numaq_update_genapic, 259 .update_genapic = numaq_update_genapic,
261}; 260};
262 261
263void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, 262void numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
264 char *productid)
265{ 263{
266 if (strncmp(oem, "IBM NUMA", 8)) 264 if (strncmp(oem, "IBM NUMA", 8))
267 printk("Warning! Not a NUMA-Q system!\n"); 265 printk("Warning! Not a NUMA-Q system!\n");
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 7a3dfceb90e4..b25428533141 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -38,7 +38,7 @@ EXPORT_SYMBOL(bad_dma_address);
38 be probably a smaller DMA mask, but this is bug-to-bug compatible 38 be probably a smaller DMA mask, but this is bug-to-bug compatible
39 to older i386. */ 39 to older i386. */
40struct device x86_dma_fallback_dev = { 40struct device x86_dma_fallback_dev = {
41 .bus_id = "fallback device", 41 .init_name = "fallback device",
42 .coherent_dma_mask = DMA_32BIT_MASK, 42 .coherent_dma_mask = DMA_32BIT_MASK,
43 .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, 43 .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
44}; 44};
@@ -101,11 +101,15 @@ static void __init dma32_free_bootmem(void)
101 dma32_bootmem_ptr = NULL; 101 dma32_bootmem_ptr = NULL;
102 dma32_bootmem_size = 0; 102 dma32_bootmem_size = 0;
103} 103}
104#endif
104 105
105void __init pci_iommu_alloc(void) 106void __init pci_iommu_alloc(void)
106{ 107{
108#ifdef CONFIG_X86_64
107 /* free the range so iommu could get some range less than 4G */ 109 /* free the range so iommu could get some range less than 4G */
108 dma32_free_bootmem(); 110 dma32_free_bootmem();
111#endif
112
109 /* 113 /*
110 * The order of these functions is important for 114 * The order of these functions is important for
111 * fall-back/fail-over reasons 115 * fall-back/fail-over reasons
@@ -121,15 +125,6 @@ void __init pci_iommu_alloc(void)
121 pci_swiotlb_init(); 125 pci_swiotlb_init();
122} 126}
123 127
124unsigned long iommu_nr_pages(unsigned long addr, unsigned long len)
125{
126 unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
127
128 return size >> PAGE_SHIFT;
129}
130EXPORT_SYMBOL(iommu_nr_pages);
131#endif
132
133void *dma_generic_alloc_coherent(struct device *dev, size_t size, 128void *dma_generic_alloc_coherent(struct device *dev, size_t size,
134 dma_addr_t *dma_addr, gfp_t flag) 129 dma_addr_t *dma_addr, gfp_t flag)
135{ 130{
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index a35eaa379ff6..00c2bcd41463 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -52,7 +52,7 @@ static u32 *iommu_gatt_base; /* Remapping table */
52 * to trigger bugs with some popular PCI cards, in particular 3ware (but 52 * to trigger bugs with some popular PCI cards, in particular 3ware (but
53 * has been also also seen with Qlogic at least). 53 * has been also also seen with Qlogic at least).
54 */ 54 */
55int iommu_fullflush = 1; 55static int iommu_fullflush = 1;
56 56
57/* Allocation bitmap for the remapping area: */ 57/* Allocation bitmap for the remapping area: */
58static DEFINE_SPINLOCK(iommu_bitmap_lock); 58static DEFINE_SPINLOCK(iommu_bitmap_lock);
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 3c539d111abb..d59c91747665 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -3,6 +3,8 @@
3#include <linux/pci.h> 3#include <linux/pci.h>
4#include <linux/cache.h> 4#include <linux/cache.h>
5#include <linux/module.h> 5#include <linux/module.h>
6#include <linux/swiotlb.h>
7#include <linux/bootmem.h>
6#include <linux/dma-mapping.h> 8#include <linux/dma-mapping.h>
7 9
8#include <asm/iommu.h> 10#include <asm/iommu.h>
@@ -11,6 +13,31 @@
11 13
12int swiotlb __read_mostly; 14int swiotlb __read_mostly;
13 15
16void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
17{
18 return alloc_bootmem_low_pages(size);
19}
20
21void *swiotlb_alloc(unsigned order, unsigned long nslabs)
22{
23 return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
24}
25
26dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
27{
28 return paddr;
29}
30
31phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
32{
33 return baddr;
34}
35
36int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size)
37{
38 return 0;
39}
40
14static dma_addr_t 41static dma_addr_t
15swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, 42swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
16 int direction) 43 int direction)
@@ -50,8 +77,10 @@ struct dma_mapping_ops swiotlb_dma_ops = {
50void __init pci_swiotlb_init(void) 77void __init pci_swiotlb_init(void)
51{ 78{
52 /* don't initialize swiotlb if iommu=off (no_iommu=1) */ 79 /* don't initialize swiotlb if iommu=off (no_iommu=1) */
80#ifdef CONFIG_X86_64
53 if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) 81 if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)
54 swiotlb = 1; 82 swiotlb = 1;
83#endif
55 if (swiotlb_force) 84 if (swiotlb_force)
56 swiotlb = 1; 85 swiotlb = 1;
57 if (swiotlb) { 86 if (swiotlb) {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 3ba155d24884..a546f55c77b4 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -39,11 +39,12 @@
39#include <linux/prctl.h> 39#include <linux/prctl.h>
40#include <linux/dmi.h> 40#include <linux/dmi.h>
41#include <linux/ftrace.h> 41#include <linux/ftrace.h>
42#include <linux/uaccess.h>
43#include <linux/io.h>
44#include <linux/kdebug.h>
42 45
43#include <asm/uaccess.h>
44#include <asm/pgtable.h> 46#include <asm/pgtable.h>
45#include <asm/system.h> 47#include <asm/system.h>
46#include <asm/io.h>
47#include <asm/ldt.h> 48#include <asm/ldt.h>
48#include <asm/processor.h> 49#include <asm/processor.h>
49#include <asm/i387.h> 50#include <asm/i387.h>
@@ -56,10 +57,8 @@
56 57
57#include <asm/tlbflush.h> 58#include <asm/tlbflush.h>
58#include <asm/cpu.h> 59#include <asm/cpu.h>
59#include <asm/kdebug.h>
60#include <asm/idle.h> 60#include <asm/idle.h>
61#include <asm/syscalls.h> 61#include <asm/syscalls.h>
62#include <asm/smp.h>
63#include <asm/ds.h> 62#include <asm/ds.h>
64 63
65asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 64asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -205,7 +204,7 @@ extern void kernel_thread_helper(void);
205/* 204/*
206 * Create a kernel thread 205 * Create a kernel thread
207 */ 206 */
208int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) 207int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
209{ 208{
210 struct pt_regs regs; 209 struct pt_regs regs;
211 210
@@ -266,7 +265,7 @@ void flush_thread(void)
266 tsk->thread.debugreg3 = 0; 265 tsk->thread.debugreg3 = 0;
267 tsk->thread.debugreg6 = 0; 266 tsk->thread.debugreg6 = 0;
268 tsk->thread.debugreg7 = 0; 267 tsk->thread.debugreg7 = 0;
269 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 268 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
270 clear_tsk_thread_flag(tsk, TIF_DEBUG); 269 clear_tsk_thread_flag(tsk, TIF_DEBUG);
271 /* 270 /*
272 * Forget coprocessor state.. 271 * Forget coprocessor state..
@@ -293,9 +292,9 @@ void prepare_to_copy(struct task_struct *tsk)
293 292
294int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, 293int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
295 unsigned long unused, 294 unsigned long unused,
296 struct task_struct * p, struct pt_regs * regs) 295 struct task_struct *p, struct pt_regs *regs)
297{ 296{
298 struct pt_regs * childregs; 297 struct pt_regs *childregs;
299 struct task_struct *tsk; 298 struct task_struct *tsk;
300 int err; 299 int err;
301 300
@@ -347,7 +346,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
347void 346void
348start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) 347start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
349{ 348{
350 __asm__("movl %0, %%gs" :: "r"(0)); 349 __asm__("movl %0, %%gs" : : "r"(0));
351 regs->fs = 0; 350 regs->fs = 0;
352 set_fs(USER_DS); 351 set_fs(USER_DS);
353 regs->ds = __USER_DS; 352 regs->ds = __USER_DS;
@@ -638,7 +637,7 @@ asmlinkage int sys_vfork(struct pt_regs regs)
638asmlinkage int sys_execve(struct pt_regs regs) 637asmlinkage int sys_execve(struct pt_regs regs)
639{ 638{
640 int error; 639 int error;
641 char * filename; 640 char *filename;
642 641
643 filename = getname((char __user *) regs.bx); 642 filename = getname((char __user *) regs.bx);
644 error = PTR_ERR(filename); 643 error = PTR_ERR(filename);
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 67465ed89310..309949e9e1c1 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -168,6 +168,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31,
168 ich_force_enable_hpet); 168 ich_force_enable_hpet);
169DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, 169DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1,
170 ich_force_enable_hpet); 170 ich_force_enable_hpet);
171DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4,
172 ich_force_enable_hpet);
171DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, 173DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7,
172 ich_force_enable_hpet); 174 ich_force_enable_hpet);
173 175
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 61f718df6eec..2b46eb41643b 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,6 +12,8 @@
12#include <asm/proto.h> 12#include <asm/proto.h>
13#include <asm/reboot_fixups.h> 13#include <asm/reboot_fixups.h>
14#include <asm/reboot.h> 14#include <asm/reboot.h>
15#include <asm/pci_x86.h>
16#include <asm/virtext.h>
15 17
16#ifdef CONFIG_X86_32 18#ifdef CONFIG_X86_32
17# include <linux/dmi.h> 19# include <linux/dmi.h>
@@ -23,7 +25,6 @@
23 25
24#include <mach_ipi.h> 26#include <mach_ipi.h>
25 27
26
27/* 28/*
28 * Power off function, if any 29 * Power off function, if any
29 */ 30 */
@@ -39,6 +40,12 @@ int reboot_force;
39static int reboot_cpu = -1; 40static int reboot_cpu = -1;
40#endif 41#endif
41 42
43/* This is set if we need to go through the 'emergency' path.
44 * When machine_emergency_restart() is called, we may be on
45 * an inconsistent state and won't be able to do a clean cleanup
46 */
47static int reboot_emergency;
48
42/* This is set by the PCI code if either type 1 or type 2 PCI is detected */ 49/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
43bool port_cf9_safe = false; 50bool port_cf9_safe = false;
44 51
@@ -368,6 +375,48 @@ static inline void kb_wait(void)
368 } 375 }
369} 376}
370 377
378static void vmxoff_nmi(int cpu, struct die_args *args)
379{
380 cpu_emergency_vmxoff();
381}
382
383/* Use NMIs as IPIs to tell all CPUs to disable virtualization
384 */
385static void emergency_vmx_disable_all(void)
386{
387 /* Just make sure we won't change CPUs while doing this */
388 local_irq_disable();
389
390 /* We need to disable VMX on all CPUs before rebooting, otherwise
391 * we risk hanging up the machine, because the CPU ignore INIT
392 * signals when VMX is enabled.
393 *
394 * We can't take any locks and we may be on an inconsistent
395 * state, so we use NMIs as IPIs to tell the other CPUs to disable
396 * VMX and halt.
397 *
398 * For safety, we will avoid running the nmi_shootdown_cpus()
399 * stuff unnecessarily, but we don't have a way to check
400 * if other CPUs have VMX enabled. So we will call it only if the
401 * CPU we are running on has VMX enabled.
402 *
403 * We will miss cases where VMX is not enabled on all CPUs. This
404 * shouldn't do much harm because KVM always enable VMX on all
405 * CPUs anyway. But we can miss it on the small window where KVM
406 * is still enabling VMX.
407 */
408 if (cpu_has_vmx() && cpu_vmx_enabled()) {
409 /* Disable VMX on this CPU.
410 */
411 cpu_vmxoff();
412
413 /* Halt and disable VMX on the other CPUs */
414 nmi_shootdown_cpus(vmxoff_nmi);
415
416 }
417}
418
419
371void __attribute__((weak)) mach_reboot_fixups(void) 420void __attribute__((weak)) mach_reboot_fixups(void)
372{ 421{
373} 422}
@@ -376,6 +425,9 @@ static void native_machine_emergency_restart(void)
376{ 425{
377 int i; 426 int i;
378 427
428 if (reboot_emergency)
429 emergency_vmx_disable_all();
430
379 /* Tell the BIOS if we want cold or warm reboot */ 431 /* Tell the BIOS if we want cold or warm reboot */
380 *((unsigned short *)__va(0x472)) = reboot_mode; 432 *((unsigned short *)__va(0x472)) = reboot_mode;
381 433
@@ -449,7 +501,7 @@ void native_machine_shutdown(void)
449 501
450#ifdef CONFIG_X86_32 502#ifdef CONFIG_X86_32
451 /* See if there has been given a command line override */ 503 /* See if there has been given a command line override */
452 if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && 504 if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) &&
453 cpu_online(reboot_cpu)) 505 cpu_online(reboot_cpu))
454 reboot_cpu_id = reboot_cpu; 506 reboot_cpu_id = reboot_cpu;
455#endif 507#endif
@@ -459,7 +511,7 @@ void native_machine_shutdown(void)
459 reboot_cpu_id = smp_processor_id(); 511 reboot_cpu_id = smp_processor_id();
460 512
461 /* Make certain I only run on the appropriate processor */ 513 /* Make certain I only run on the appropriate processor */
462 set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id)); 514 set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id));
463 515
464 /* O.K Now that I'm on the appropriate processor, 516 /* O.K Now that I'm on the appropriate processor,
465 * stop all of the others. 517 * stop all of the others.
@@ -482,13 +534,19 @@ void native_machine_shutdown(void)
482#endif 534#endif
483} 535}
484 536
537static void __machine_emergency_restart(int emergency)
538{
539 reboot_emergency = emergency;
540 machine_ops.emergency_restart();
541}
542
485static void native_machine_restart(char *__unused) 543static void native_machine_restart(char *__unused)
486{ 544{
487 printk("machine restart\n"); 545 printk("machine restart\n");
488 546
489 if (!reboot_force) 547 if (!reboot_force)
490 machine_shutdown(); 548 machine_shutdown();
491 machine_emergency_restart(); 549 __machine_emergency_restart(0);
492} 550}
493 551
494static void native_machine_halt(void) 552static void native_machine_halt(void)
@@ -532,7 +590,7 @@ void machine_shutdown(void)
532 590
533void machine_emergency_restart(void) 591void machine_emergency_restart(void)
534{ 592{
535 machine_ops.emergency_restart(); 593 __machine_emergency_restart(1);
536} 594}
537 595
538void machine_restart(char *cmd) 596void machine_restart(char *cmd)
@@ -592,10 +650,7 @@ static int crash_nmi_callback(struct notifier_block *self,
592 650
593static void smp_send_nmi_allbutself(void) 651static void smp_send_nmi_allbutself(void)
594{ 652{
595 cpumask_t mask = cpu_online_map; 653 send_IPI_allbutself(NMI_VECTOR);
596 cpu_clear(safe_smp_processor_id(), mask);
597 if (!cpus_empty(mask))
598 send_IPI_mask(mask, NMI_VECTOR);
599} 654}
600 655
601static struct notifier_block crash_nmi_nb = { 656static struct notifier_block crash_nmi_nb = {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 08e02e8453c9..ae0d8042cf69 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -953,7 +953,7 @@ void __init setup_arch(char **cmdline_p)
953 ioapic_init_mappings(); 953 ioapic_init_mappings();
954 954
955 /* need to wait for io_apic is mapped */ 955 /* need to wait for io_apic is mapped */
956 nr_irqs = probe_nr_irqs(); 956 probe_nr_irqs_gsi();
957 957
958 kvm_guest_init(); 958 kvm_guest_init();
959 959
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ae0c0d3bb770..55c46074eba0 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -5,12 +5,11 @@
5#include <linux/percpu.h> 5#include <linux/percpu.h>
6#include <linux/kexec.h> 6#include <linux/kexec.h>
7#include <linux/crash_dump.h> 7#include <linux/crash_dump.h>
8#include <asm/smp.h> 8#include <linux/smp.h>
9#include <asm/percpu.h> 9#include <linux/topology.h>
10#include <asm/sections.h> 10#include <asm/sections.h>
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/setup.h> 12#include <asm/setup.h>
13#include <asm/topology.h>
14#include <asm/mpspec.h> 13#include <asm/mpspec.h>
15#include <asm/apicdef.h> 14#include <asm/apicdef.h>
16#include <asm/highmem.h> 15#include <asm/highmem.h>
@@ -20,8 +19,8 @@ unsigned int num_processors;
20unsigned disabled_cpus __cpuinitdata; 19unsigned disabled_cpus __cpuinitdata;
21/* Processor that is doing the boot up */ 20/* Processor that is doing the boot up */
22unsigned int boot_cpu_physical_apicid = -1U; 21unsigned int boot_cpu_physical_apicid = -1U;
23unsigned int max_physical_apicid;
24EXPORT_SYMBOL(boot_cpu_physical_apicid); 22EXPORT_SYMBOL(boot_cpu_physical_apicid);
23unsigned int max_physical_apicid;
25 24
26/* Bitmask of physically existing CPUs */ 25/* Bitmask of physically existing CPUs */
27physid_mask_t phys_cpu_present_map; 26physid_mask_t phys_cpu_present_map;
@@ -131,7 +130,27 @@ static void __init setup_cpu_pda_map(void)
131 /* point to new pointer table */ 130 /* point to new pointer table */
132 _cpu_pda = new_cpu_pda; 131 _cpu_pda = new_cpu_pda;
133} 132}
134#endif 133
134#endif /* CONFIG_SMP && CONFIG_X86_64 */
135
136#ifdef CONFIG_X86_64
137
138/* correctly size the local cpu masks */
139static void setup_cpu_local_masks(void)
140{
141 alloc_bootmem_cpumask_var(&cpu_initialized_mask);
142 alloc_bootmem_cpumask_var(&cpu_callin_mask);
143 alloc_bootmem_cpumask_var(&cpu_callout_mask);
144 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
145}
146
147#else /* CONFIG_X86_32 */
148
149static inline void setup_cpu_local_masks(void)
150{
151}
152
153#endif /* CONFIG_X86_32 */
135 154
136/* 155/*
137 * Great future plan: 156 * Great future plan:
@@ -152,8 +171,11 @@ void __init setup_per_cpu_areas(void)
152 old_size = PERCPU_ENOUGH_ROOM; 171 old_size = PERCPU_ENOUGH_ROOM;
153 align = max_t(unsigned long, PAGE_SIZE, align); 172 align = max_t(unsigned long, PAGE_SIZE, align);
154 size = roundup(old_size, align); 173 size = roundup(old_size, align);
155 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", 174
156 size); 175 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
176 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
177
178 pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size);
157 179
158 for_each_possible_cpu(cpu) { 180 for_each_possible_cpu(cpu) {
159#ifndef CONFIG_NEED_MULTIPLE_NODES 181#ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -164,33 +186,29 @@ void __init setup_per_cpu_areas(void)
164 if (!node_online(node) || !NODE_DATA(node)) { 186 if (!node_online(node) || !NODE_DATA(node)) {
165 ptr = __alloc_bootmem(size, align, 187 ptr = __alloc_bootmem(size, align,
166 __pa(MAX_DMA_ADDRESS)); 188 __pa(MAX_DMA_ADDRESS));
167 printk(KERN_INFO 189 pr_info("cpu %d has no node %d or node-local memory\n",
168 "cpu %d has no node %d or node-local memory\n",
169 cpu, node); 190 cpu, node);
170 if (ptr) 191 pr_debug("per cpu data for cpu%d at %016lx\n",
171 printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", 192 cpu, __pa(ptr));
172 cpu, __pa(ptr)); 193 } else {
173 }
174 else {
175 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, 194 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
176 __pa(MAX_DMA_ADDRESS)); 195 __pa(MAX_DMA_ADDRESS));
177 if (ptr) 196 pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
178 printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", 197 cpu, node, __pa(ptr));
179 cpu, node, __pa(ptr));
180 } 198 }
181#endif 199#endif
182 per_cpu_offset(cpu) = ptr - __per_cpu_start; 200 per_cpu_offset(cpu) = ptr - __per_cpu_start;
183 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 201 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
184 } 202 }
185 203
186 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
187 NR_CPUS, nr_cpu_ids, nr_node_ids);
188
189 /* Setup percpu data maps */ 204 /* Setup percpu data maps */
190 setup_per_cpu_maps(); 205 setup_per_cpu_maps();
191 206
192 /* Setup node to cpumask map */ 207 /* Setup node to cpumask map */
193 setup_node_to_cpumask_map(); 208 setup_node_to_cpumask_map();
209
210 /* Setup cpu initialized, callin, callout masks */
211 setup_cpu_local_masks();
194} 212}
195 213
196#endif 214#endif
@@ -282,10 +300,10 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable)
282 else 300 else
283 cpu_clear(cpu, *mask); 301 cpu_clear(cpu, *mask);
284 302
285 cpulist_scnprintf(buf, sizeof(buf), *mask); 303 cpulist_scnprintf(buf, sizeof(buf), mask);
286 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 304 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
287 enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); 305 enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
288 } 306}
289 307
290void __cpuinit numa_add_cpu(int cpu) 308void __cpuinit numa_add_cpu(int cpu)
291{ 309{
@@ -334,25 +352,25 @@ static const cpumask_t cpu_mask_none;
334/* 352/*
335 * Returns a pointer to the bitmask of CPUs on Node 'node'. 353 * Returns a pointer to the bitmask of CPUs on Node 'node'.
336 */ 354 */
337const cpumask_t *_node_to_cpumask_ptr(int node) 355const cpumask_t *cpumask_of_node(int node)
338{ 356{
339 if (node_to_cpumask_map == NULL) { 357 if (node_to_cpumask_map == NULL) {
340 printk(KERN_WARNING 358 printk(KERN_WARNING
341 "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", 359 "cpumask_of_node(%d): no node_to_cpumask_map!\n",
342 node); 360 node);
343 dump_stack(); 361 dump_stack();
344 return (const cpumask_t *)&cpu_online_map; 362 return (const cpumask_t *)&cpu_online_map;
345 } 363 }
346 if (node >= nr_node_ids) { 364 if (node >= nr_node_ids) {
347 printk(KERN_WARNING 365 printk(KERN_WARNING
348 "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", 366 "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
349 node, nr_node_ids); 367 node, nr_node_ids);
350 dump_stack(); 368 dump_stack();
351 return &cpu_mask_none; 369 return &cpu_mask_none;
352 } 370 }
353 return &node_to_cpumask_map[node]; 371 return &node_to_cpumask_map[node];
354} 372}
355EXPORT_SYMBOL(_node_to_cpumask_ptr); 373EXPORT_SYMBOL(cpumask_of_node);
356 374
357/* 375/*
358 * Returns a bitmask of CPUs on Node 'node'. 376 * Returns a bitmask of CPUs on Node 'node'.
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 7e558db362c1..e6faa3316bd2 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Intel SMP support routines. 2 * Intel SMP support routines.
3 * 3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> 4 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
5 * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> 5 * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
6 * (c) 2002,2003 Andi Kleen, SuSE Labs. 6 * (c) 2002,2003 Andi Kleen, SuSE Labs.
7 * 7 *
@@ -118,26 +118,33 @@ static void native_smp_send_reschedule(int cpu)
118 WARN_ON(1); 118 WARN_ON(1);
119 return; 119 return;
120 } 120 }
121 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); 121 send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
122} 122}
123 123
124void native_send_call_func_single_ipi(int cpu) 124void native_send_call_func_single_ipi(int cpu)
125{ 125{
126 send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR); 126 send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
127} 127}
128 128
129void native_send_call_func_ipi(cpumask_t mask) 129void native_send_call_func_ipi(const struct cpumask *mask)
130{ 130{
131 cpumask_t allbutself; 131 cpumask_var_t allbutself;
132 132
133 allbutself = cpu_online_map; 133 if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) {
134 cpu_clear(smp_processor_id(), allbutself); 134 send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
135 return;
136 }
135 137
136 if (cpus_equal(mask, allbutself) && 138 cpumask_copy(allbutself, cpu_online_mask);
137 cpus_equal(cpu_online_map, cpu_callout_map)) 139 cpumask_clear_cpu(smp_processor_id(), allbutself);
140
141 if (cpumask_equal(mask, allbutself) &&
142 cpumask_equal(cpu_online_mask, cpu_callout_mask))
138 send_IPI_allbutself(CALL_FUNCTION_VECTOR); 143 send_IPI_allbutself(CALL_FUNCTION_VECTOR);
139 else 144 else
140 send_IPI_mask(mask, CALL_FUNCTION_VECTOR); 145 send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
146
147 free_cpumask_var(allbutself);
141} 148}
142 149
143/* 150/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f8500c969442..bb1a3b1fc87f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * x86 SMP booting functions 2 * x86 SMP booting functions
3 * 3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> 4 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> 5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6 * Copyright 2001 Andi Kleen, SuSE Labs. 6 * Copyright 2001 Andi Kleen, SuSE Labs.
7 * 7 *
@@ -102,15 +102,6 @@ EXPORT_SYMBOL(smp_num_siblings);
102/* Last level cache ID of each logical CPU */ 102/* Last level cache ID of each logical CPU */
103DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; 103DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
104 104
105/* bitmap of online cpus */
106cpumask_t cpu_online_map __read_mostly;
107EXPORT_SYMBOL(cpu_online_map);
108
109cpumask_t cpu_callin_map;
110cpumask_t cpu_callout_map;
111cpumask_t cpu_possible_map;
112EXPORT_SYMBOL(cpu_possible_map);
113
114/* representing HT siblings of each logical CPU */ 105/* representing HT siblings of each logical CPU */
115DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); 106DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
116EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); 107EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
@@ -126,9 +117,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
126static atomic_t init_deasserted; 117static atomic_t init_deasserted;
127 118
128 119
129/* representing cpus for which sibling maps can be computed */
130static cpumask_t cpu_sibling_setup_map;
131
132/* Set if we find a B stepping CPU */ 120/* Set if we find a B stepping CPU */
133static int __cpuinitdata smp_b_stepping; 121static int __cpuinitdata smp_b_stepping;
134 122
@@ -146,7 +134,7 @@ EXPORT_SYMBOL(cpu_to_node_map);
146static void map_cpu_to_node(int cpu, int node) 134static void map_cpu_to_node(int cpu, int node)
147{ 135{
148 printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node); 136 printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
149 cpu_set(cpu, node_to_cpumask_map[node]); 137 cpumask_set_cpu(cpu, &node_to_cpumask_map[node]);
150 cpu_to_node_map[cpu] = node; 138 cpu_to_node_map[cpu] = node;
151} 139}
152 140
@@ -157,7 +145,7 @@ static void unmap_cpu_to_node(int cpu)
157 145
158 printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu); 146 printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
159 for (node = 0; node < MAX_NUMNODES; node++) 147 for (node = 0; node < MAX_NUMNODES; node++)
160 cpu_clear(cpu, node_to_cpumask_map[node]); 148 cpumask_clear_cpu(cpu, &node_to_cpumask_map[node]);
161 cpu_to_node_map[cpu] = 0; 149 cpu_to_node_map[cpu] = 0;
162} 150}
163#else /* !(CONFIG_NUMA && CONFIG_X86_32) */ 151#else /* !(CONFIG_NUMA && CONFIG_X86_32) */
@@ -215,7 +203,7 @@ static void __cpuinit smp_callin(void)
215 */ 203 */
216 phys_id = read_apic_id(); 204 phys_id = read_apic_id();
217 cpuid = smp_processor_id(); 205 cpuid = smp_processor_id();
218 if (cpu_isset(cpuid, cpu_callin_map)) { 206 if (cpumask_test_cpu(cpuid, cpu_callin_mask)) {
219 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, 207 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
220 phys_id, cpuid); 208 phys_id, cpuid);
221 } 209 }
@@ -237,7 +225,7 @@ static void __cpuinit smp_callin(void)
237 /* 225 /*
238 * Has the boot CPU finished it's STARTUP sequence? 226 * Has the boot CPU finished it's STARTUP sequence?
239 */ 227 */
240 if (cpu_isset(cpuid, cpu_callout_map)) 228 if (cpumask_test_cpu(cpuid, cpu_callout_mask))
241 break; 229 break;
242 cpu_relax(); 230 cpu_relax();
243 } 231 }
@@ -280,7 +268,7 @@ static void __cpuinit smp_callin(void)
280 /* 268 /*
281 * Allow the master to continue. 269 * Allow the master to continue.
282 */ 270 */
283 cpu_set(cpuid, cpu_callin_map); 271 cpumask_set_cpu(cpuid, cpu_callin_mask);
284} 272}
285 273
286static int __cpuinitdata unsafe_smp; 274static int __cpuinitdata unsafe_smp;
@@ -338,7 +326,7 @@ notrace static void __cpuinit start_secondary(void *unused)
338 ipi_call_lock(); 326 ipi_call_lock();
339 lock_vector_lock(); 327 lock_vector_lock();
340 __setup_vector_irq(smp_processor_id()); 328 __setup_vector_irq(smp_processor_id());
341 cpu_set(smp_processor_id(), cpu_online_map); 329 set_cpu_online(smp_processor_id(), true);
342 unlock_vector_lock(); 330 unlock_vector_lock();
343 ipi_call_unlock(); 331 ipi_call_unlock();
344 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 332 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
@@ -444,50 +432,52 @@ void __cpuinit set_cpu_sibling_map(int cpu)
444 int i; 432 int i;
445 struct cpuinfo_x86 *c = &cpu_data(cpu); 433 struct cpuinfo_x86 *c = &cpu_data(cpu);
446 434
447 cpu_set(cpu, cpu_sibling_setup_map); 435 cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
448 436
449 if (smp_num_siblings > 1) { 437 if (smp_num_siblings > 1) {
450 for_each_cpu_mask_nr(i, cpu_sibling_setup_map) { 438 for_each_cpu(i, cpu_sibling_setup_mask) {
451 if (c->phys_proc_id == cpu_data(i).phys_proc_id && 439 struct cpuinfo_x86 *o = &cpu_data(i);
452 c->cpu_core_id == cpu_data(i).cpu_core_id) { 440
453 cpu_set(i, per_cpu(cpu_sibling_map, cpu)); 441 if (c->phys_proc_id == o->phys_proc_id &&
454 cpu_set(cpu, per_cpu(cpu_sibling_map, i)); 442 c->cpu_core_id == o->cpu_core_id) {
455 cpu_set(i, per_cpu(cpu_core_map, cpu)); 443 cpumask_set_cpu(i, cpu_sibling_mask(cpu));
456 cpu_set(cpu, per_cpu(cpu_core_map, i)); 444 cpumask_set_cpu(cpu, cpu_sibling_mask(i));
457 cpu_set(i, c->llc_shared_map); 445 cpumask_set_cpu(i, cpu_core_mask(cpu));
458 cpu_set(cpu, cpu_data(i).llc_shared_map); 446 cpumask_set_cpu(cpu, cpu_core_mask(i));
447 cpumask_set_cpu(i, &c->llc_shared_map);
448 cpumask_set_cpu(cpu, &o->llc_shared_map);
459 } 449 }
460 } 450 }
461 } else { 451 } else {
462 cpu_set(cpu, per_cpu(cpu_sibling_map, cpu)); 452 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
463 } 453 }
464 454
465 cpu_set(cpu, c->llc_shared_map); 455 cpumask_set_cpu(cpu, &c->llc_shared_map);
466 456
467 if (current_cpu_data.x86_max_cores == 1) { 457 if (current_cpu_data.x86_max_cores == 1) {
468 per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu); 458 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
469 c->booted_cores = 1; 459 c->booted_cores = 1;
470 return; 460 return;
471 } 461 }
472 462
473 for_each_cpu_mask_nr(i, cpu_sibling_setup_map) { 463 for_each_cpu(i, cpu_sibling_setup_mask) {
474 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && 464 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
475 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { 465 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
476 cpu_set(i, c->llc_shared_map); 466 cpumask_set_cpu(i, &c->llc_shared_map);
477 cpu_set(cpu, cpu_data(i).llc_shared_map); 467 cpumask_set_cpu(cpu, &cpu_data(i).llc_shared_map);
478 } 468 }
479 if (c->phys_proc_id == cpu_data(i).phys_proc_id) { 469 if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
480 cpu_set(i, per_cpu(cpu_core_map, cpu)); 470 cpumask_set_cpu(i, cpu_core_mask(cpu));
481 cpu_set(cpu, per_cpu(cpu_core_map, i)); 471 cpumask_set_cpu(cpu, cpu_core_mask(i));
482 /* 472 /*
483 * Does this new cpu bringup a new core? 473 * Does this new cpu bringup a new core?
484 */ 474 */
485 if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) { 475 if (cpumask_weight(cpu_sibling_mask(cpu)) == 1) {
486 /* 476 /*
487 * for each core in package, increment 477 * for each core in package, increment
488 * the booted_cores for this new cpu 478 * the booted_cores for this new cpu
489 */ 479 */
490 if (first_cpu(per_cpu(cpu_sibling_map, i)) == i) 480 if (cpumask_first(cpu_sibling_mask(i)) == i)
491 c->booted_cores++; 481 c->booted_cores++;
492 /* 482 /*
493 * increment the core count for all 483 * increment the core count for all
@@ -502,7 +492,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
502} 492}
503 493
504/* maps the cpu to the sched domain representing multi-core */ 494/* maps the cpu to the sched domain representing multi-core */
505cpumask_t cpu_coregroup_map(int cpu) 495const struct cpumask *cpu_coregroup_mask(int cpu)
506{ 496{
507 struct cpuinfo_x86 *c = &cpu_data(cpu); 497 struct cpuinfo_x86 *c = &cpu_data(cpu);
508 /* 498 /*
@@ -510,9 +500,14 @@ cpumask_t cpu_coregroup_map(int cpu)
510 * And for power savings, we return cpu_core_map 500 * And for power savings, we return cpu_core_map
511 */ 501 */
512 if (sched_mc_power_savings || sched_smt_power_savings) 502 if (sched_mc_power_savings || sched_smt_power_savings)
513 return per_cpu(cpu_core_map, cpu); 503 return cpu_core_mask(cpu);
514 else 504 else
515 return c->llc_shared_map; 505 return &c->llc_shared_map;
506}
507
508cpumask_t cpu_coregroup_map(int cpu)
509{
510 return *cpu_coregroup_mask(cpu);
516} 511}
517 512
518static void impress_friends(void) 513static void impress_friends(void)
@@ -524,7 +519,7 @@ static void impress_friends(void)
524 */ 519 */
525 pr_debug("Before bogomips.\n"); 520 pr_debug("Before bogomips.\n");
526 for_each_possible_cpu(cpu) 521 for_each_possible_cpu(cpu)
527 if (cpu_isset(cpu, cpu_callout_map)) 522 if (cpumask_test_cpu(cpu, cpu_callout_mask))
528 bogosum += cpu_data(cpu).loops_per_jiffy; 523 bogosum += cpu_data(cpu).loops_per_jiffy;
529 printk(KERN_INFO 524 printk(KERN_INFO
530 "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", 525 "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
@@ -905,19 +900,19 @@ do_rest:
905 * allow APs to start initializing. 900 * allow APs to start initializing.
906 */ 901 */
907 pr_debug("Before Callout %d.\n", cpu); 902 pr_debug("Before Callout %d.\n", cpu);
908 cpu_set(cpu, cpu_callout_map); 903 cpumask_set_cpu(cpu, cpu_callout_mask);
909 pr_debug("After Callout %d.\n", cpu); 904 pr_debug("After Callout %d.\n", cpu);
910 905
911 /* 906 /*
912 * Wait 5s total for a response 907 * Wait 5s total for a response
913 */ 908 */
914 for (timeout = 0; timeout < 50000; timeout++) { 909 for (timeout = 0; timeout < 50000; timeout++) {
915 if (cpu_isset(cpu, cpu_callin_map)) 910 if (cpumask_test_cpu(cpu, cpu_callin_mask))
916 break; /* It has booted */ 911 break; /* It has booted */
917 udelay(100); 912 udelay(100);
918 } 913 }
919 914
920 if (cpu_isset(cpu, cpu_callin_map)) { 915 if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
921 /* number CPUs logically, starting from 1 (BSP is 0) */ 916 /* number CPUs logically, starting from 1 (BSP is 0) */
922 pr_debug("OK.\n"); 917 pr_debug("OK.\n");
923 printk(KERN_INFO "CPU%d: ", cpu); 918 printk(KERN_INFO "CPU%d: ", cpu);
@@ -942,9 +937,14 @@ restore_state:
942 if (boot_error) { 937 if (boot_error) {
943 /* Try to put things back the way they were before ... */ 938 /* Try to put things back the way they were before ... */
944 numa_remove_cpu(cpu); /* was set by numa_add_cpu */ 939 numa_remove_cpu(cpu); /* was set by numa_add_cpu */
945 cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ 940
946 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ 941 /* was set by do_boot_cpu() */
947 cpu_clear(cpu, cpu_present_map); 942 cpumask_clear_cpu(cpu, cpu_callout_mask);
943
944 /* was set by cpu_init() */
945 cpumask_clear_cpu(cpu, cpu_initialized_mask);
946
947 set_cpu_present(cpu, false);
948 per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; 948 per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
949 } 949 }
950 950
@@ -978,7 +978,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
978 /* 978 /*
979 * Already booted CPU? 979 * Already booted CPU?
980 */ 980 */
981 if (cpu_isset(cpu, cpu_callin_map)) { 981 if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
982 pr_debug("do_boot_cpu %d Already started\n", cpu); 982 pr_debug("do_boot_cpu %d Already started\n", cpu);
983 return -ENOSYS; 983 return -ENOSYS;
984 } 984 }
@@ -1033,8 +1033,9 @@ int __cpuinit native_cpu_up(unsigned int cpu)
1033 */ 1033 */
1034static __init void disable_smp(void) 1034static __init void disable_smp(void)
1035{ 1035{
1036 cpu_present_map = cpumask_of_cpu(0); 1036 /* use the read/write pointers to the present and possible maps */
1037 cpu_possible_map = cpumask_of_cpu(0); 1037 cpumask_copy(&cpu_present_map, cpumask_of(0));
1038 cpumask_copy(&cpu_possible_map, cpumask_of(0));
1038 smpboot_clear_io_apic_irqs(); 1039 smpboot_clear_io_apic_irqs();
1039 1040
1040 if (smp_found_config) 1041 if (smp_found_config)
@@ -1042,8 +1043,8 @@ static __init void disable_smp(void)
1042 else 1043 else
1043 physid_set_mask_of_physid(0, &phys_cpu_present_map); 1044 physid_set_mask_of_physid(0, &phys_cpu_present_map);
1044 map_cpu_to_logical_apicid(); 1045 map_cpu_to_logical_apicid();
1045 cpu_set(0, per_cpu(cpu_sibling_map, 0)); 1046 cpumask_set_cpu(0, cpu_sibling_mask(0));
1046 cpu_set(0, per_cpu(cpu_core_map, 0)); 1047 cpumask_set_cpu(0, cpu_core_mask(0));
1047} 1048}
1048 1049
1049/* 1050/*
@@ -1065,14 +1066,14 @@ static int __init smp_sanity_check(unsigned max_cpus)
1065 nr = 0; 1066 nr = 0;
1066 for_each_present_cpu(cpu) { 1067 for_each_present_cpu(cpu) {
1067 if (nr >= 8) 1068 if (nr >= 8)
1068 cpu_clear(cpu, cpu_present_map); 1069 set_cpu_present(cpu, false);
1069 nr++; 1070 nr++;
1070 } 1071 }
1071 1072
1072 nr = 0; 1073 nr = 0;
1073 for_each_possible_cpu(cpu) { 1074 for_each_possible_cpu(cpu) {
1074 if (nr >= 8) 1075 if (nr >= 8)
1075 cpu_clear(cpu, cpu_possible_map); 1076 set_cpu_possible(cpu, false);
1076 nr++; 1077 nr++;
1077 } 1078 }
1078 1079
@@ -1155,7 +1156,7 @@ static void __init smp_cpu_index_default(void)
1155 for_each_possible_cpu(i) { 1156 for_each_possible_cpu(i) {
1156 c = &cpu_data(i); 1157 c = &cpu_data(i);
1157 /* mark all to hotplug */ 1158 /* mark all to hotplug */
1158 c->cpu_index = NR_CPUS; 1159 c->cpu_index = nr_cpu_ids;
1159 } 1160 }
1160} 1161}
1161 1162
@@ -1168,7 +1169,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1168 preempt_disable(); 1169 preempt_disable();
1169 smp_cpu_index_default(); 1170 smp_cpu_index_default();
1170 current_cpu_data = boot_cpu_data; 1171 current_cpu_data = boot_cpu_data;
1171 cpu_callin_map = cpumask_of_cpu(0); 1172 cpumask_copy(cpu_callin_mask, cpumask_of(0));
1172 mb(); 1173 mb();
1173 /* 1174 /*
1174 * Setup boot CPU information 1175 * Setup boot CPU information
@@ -1243,8 +1244,8 @@ void __init native_smp_prepare_boot_cpu(void)
1243 init_gdt(me); 1244 init_gdt(me);
1244#endif 1245#endif
1245 switch_to_new_gdt(); 1246 switch_to_new_gdt();
1246 /* already set me in cpu_online_map in boot_cpu_init() */ 1247 /* already set me in cpu_online_mask in boot_cpu_init() */
1247 cpu_set(me, cpu_callout_map); 1248 cpumask_set_cpu(me, cpu_callout_mask);
1248 per_cpu(cpu_state, me) = CPU_ONLINE; 1249 per_cpu(cpu_state, me) = CPU_ONLINE;
1249} 1250}
1250 1251
@@ -1260,6 +1261,15 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1260 check_nmi_watchdog(); 1261 check_nmi_watchdog();
1261} 1262}
1262 1263
1264static int __initdata setup_possible_cpus = -1;
1265static int __init _setup_possible_cpus(char *str)
1266{
1267 get_option(&str, &setup_possible_cpus);
1268 return 0;
1269}
1270early_param("possible_cpus", _setup_possible_cpus);
1271
1272
1263/* 1273/*
1264 * cpu_possible_map should be static, it cannot change as cpu's 1274 * cpu_possible_map should be static, it cannot change as cpu's
1265 * are onlined, or offlined. The reason is per-cpu data-structures 1275 * are onlined, or offlined. The reason is per-cpu data-structures
@@ -1272,7 +1282,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1272 * 1282 *
1273 * Three ways to find out the number of additional hotplug CPUs: 1283 * Three ways to find out the number of additional hotplug CPUs:
1274 * - If the BIOS specified disabled CPUs in ACPI/mptables use that. 1284 * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
1275 * - The user can overwrite it with additional_cpus=NUM 1285 * - The user can overwrite it with possible_cpus=NUM
1276 * - Otherwise don't reserve additional CPUs. 1286 * - Otherwise don't reserve additional CPUs.
1277 * We do this because additional CPUs waste a lot of memory. 1287 * We do this because additional CPUs waste a lot of memory.
1278 * -AK 1288 * -AK
@@ -1285,15 +1295,25 @@ __init void prefill_possible_map(void)
1285 if (!num_processors) 1295 if (!num_processors)
1286 num_processors = 1; 1296 num_processors = 1;
1287 1297
1288 possible = num_processors + disabled_cpus; 1298 if (setup_possible_cpus == -1)
1289 if (possible > NR_CPUS) 1299 possible = num_processors + disabled_cpus;
1290 possible = NR_CPUS; 1300 else
1301 possible = setup_possible_cpus;
1302
1303 total_cpus = max_t(int, possible, num_processors + disabled_cpus);
1304
1305 if (possible > CONFIG_NR_CPUS) {
1306 printk(KERN_WARNING
1307 "%d Processors exceeds NR_CPUS limit of %d\n",
1308 possible, CONFIG_NR_CPUS);
1309 possible = CONFIG_NR_CPUS;
1310 }
1291 1311
1292 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", 1312 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
1293 possible, max_t(int, possible - num_processors, 0)); 1313 possible, max_t(int, possible - num_processors, 0));
1294 1314
1295 for (i = 0; i < possible; i++) 1315 for (i = 0; i < possible; i++)
1296 cpu_set(i, cpu_possible_map); 1316 set_cpu_possible(i, true);
1297 1317
1298 nr_cpu_ids = possible; 1318 nr_cpu_ids = possible;
1299} 1319}
@@ -1305,31 +1325,31 @@ static void remove_siblinginfo(int cpu)
1305 int sibling; 1325 int sibling;
1306 struct cpuinfo_x86 *c = &cpu_data(cpu); 1326 struct cpuinfo_x86 *c = &cpu_data(cpu);
1307 1327
1308 for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) { 1328 for_each_cpu(sibling, cpu_core_mask(cpu)) {
1309 cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); 1329 cpumask_clear_cpu(cpu, cpu_core_mask(sibling));
1310 /*/ 1330 /*/
1311 * last thread sibling in this cpu core going down 1331 * last thread sibling in this cpu core going down
1312 */ 1332 */
1313 if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) 1333 if (cpumask_weight(cpu_sibling_mask(cpu)) == 1)
1314 cpu_data(sibling).booted_cores--; 1334 cpu_data(sibling).booted_cores--;
1315 } 1335 }
1316 1336
1317 for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu)) 1337 for_each_cpu(sibling, cpu_sibling_mask(cpu))
1318 cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); 1338 cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling));
1319 cpus_clear(per_cpu(cpu_sibling_map, cpu)); 1339 cpumask_clear(cpu_sibling_mask(cpu));
1320 cpus_clear(per_cpu(cpu_core_map, cpu)); 1340 cpumask_clear(cpu_core_mask(cpu));
1321 c->phys_proc_id = 0; 1341 c->phys_proc_id = 0;
1322 c->cpu_core_id = 0; 1342 c->cpu_core_id = 0;
1323 cpu_clear(cpu, cpu_sibling_setup_map); 1343 cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
1324} 1344}
1325 1345
1326static void __ref remove_cpu_from_maps(int cpu) 1346static void __ref remove_cpu_from_maps(int cpu)
1327{ 1347{
1328 cpu_clear(cpu, cpu_online_map); 1348 set_cpu_online(cpu, false);
1329 cpu_clear(cpu, cpu_callout_map); 1349 cpumask_clear_cpu(cpu, cpu_callout_mask);
1330 cpu_clear(cpu, cpu_callin_map); 1350 cpumask_clear_cpu(cpu, cpu_callin_mask);
1331 /* was set by cpu_init() */ 1351 /* was set by cpu_init() */
1332 cpu_clear(cpu, cpu_initialized); 1352 cpumask_clear_cpu(cpu, cpu_initialized_mask);
1333 numa_remove_cpu(cpu); 1353 numa_remove_cpu(cpu);
1334} 1354}
1335 1355
@@ -1352,7 +1372,7 @@ void cpu_disable_common(void)
1352 lock_vector_lock(); 1372 lock_vector_lock();
1353 remove_cpu_from_maps(cpu); 1373 remove_cpu_from_maps(cpu);
1354 unlock_vector_lock(); 1374 unlock_vector_lock();
1355 fixup_irqs(cpu_online_map); 1375 fixup_irqs();
1356} 1376}
1357 1377
1358int native_cpu_disable(void) 1378int native_cpu_disable(void)
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 65309e4cb1c0..3985cac0ed47 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -105,8 +105,8 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
105 high bit of the PPI port B (0x61). Note that some PS/2s, 105 high bit of the PPI port B (0x61). Note that some PS/2s,
106 notably the 55SX, work fine if this is removed. */ 106 notably the 55SX, work fine if this is removed. */
107 107
108 u8 irq_v = inb_p( 0x61 ); /* read the current state */ 108 u8 irq_v = inb_p(0x61); /* read the current state */
109 outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */ 109 outb_p(irq_v | 0x80, 0x61); /* reset the IRQ */
110 } 110 }
111#endif 111#endif
112 112
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 891e7a7c4334..e6e695acd725 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -17,10 +17,10 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/time.h> 18#include <linux/time.h>
19#include <linux/mca.h> 19#include <linux/mca.h>
20#include <linux/nmi.h>
20 21
21#include <asm/i8253.h> 22#include <asm/i8253.h>
22#include <asm/hpet.h> 23#include <asm/hpet.h>
23#include <asm/nmi.h>
24#include <asm/vgtod.h> 24#include <asm/vgtod.h>
25#include <asm/time.h> 25#include <asm/time.h>
26#include <asm/timer.h> 26#include <asm/timer.h>
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index 8da059f949be..ce5054642247 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -163,7 +163,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
163 * We have to send the IPI only to 163 * We have to send the IPI only to
164 * CPUs affected. 164 * CPUs affected.
165 */ 165 */
166 send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); 166 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
167 167
168 while (!cpus_empty(flush_cpumask)) 168 while (!cpus_empty(flush_cpumask))
169 /* nothing. lockup detection does not belong here */ 169 /* nothing. lockup detection does not belong here */
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 29887d7081a9..f8be6f1d2e48 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
191 * We have to send the IPI only to 191 * We have to send the IPI only to
192 * CPUs affected. 192 * CPUs affected.
193 */ 193 */
194 send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); 194 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
195 195
196 while (!cpus_empty(f->flush_cpumask)) 196 while (!cpus_empty(f->flush_cpumask))
197 cpu_relax(); 197 cpu_relax();
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 6a00e5faaa74..f885023167e0 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -582,7 +582,6 @@ static int __init uv_ptc_init(void)
582static struct bau_control * __init uv_table_bases_init(int blade, int node) 582static struct bau_control * __init uv_table_bases_init(int blade, int node)
583{ 583{
584 int i; 584 int i;
585 int *ip;
586 struct bau_msg_status *msp; 585 struct bau_msg_status *msp;
587 struct bau_control *bau_tabp; 586 struct bau_control *bau_tabp;
588 587
@@ -599,13 +598,6 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node)
599 bau_cpubits_clear(&msp->seen_by, (int) 598 bau_cpubits_clear(&msp->seen_by, (int)
600 uv_blade_nr_possible_cpus(blade)); 599 uv_blade_nr_possible_cpus(blade));
601 600
602 bau_tabp->watching =
603 kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node);
604 BUG_ON(!bau_tabp->watching);
605
606 for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++)
607 *ip = 0;
608
609 uv_bau_table_bases[blade] = bau_tabp; 601 uv_bau_table_bases[blade] = bau_tabp;
610 602
611 return bau_tabp; 603 return bau_tabp;
@@ -628,7 +620,6 @@ uv_table_bases_finish(int blade, int node, int cur_cpu,
628 bcp->bau_msg_head = bau_tablesp->va_queue_first; 620 bcp->bau_msg_head = bau_tablesp->va_queue_first;
629 bcp->va_queue_first = bau_tablesp->va_queue_first; 621 bcp->va_queue_first = bau_tablesp->va_queue_first;
630 bcp->va_queue_last = bau_tablesp->va_queue_last; 622 bcp->va_queue_last = bau_tablesp->va_queue_last;
631 bcp->watching = bau_tablesp->watching;
632 bcp->msg_statuses = bau_tablesp->msg_statuses; 623 bcp->msg_statuses = bau_tablesp->msg_statuses;
633 bcp->descriptor_base = adp; 624 bcp->descriptor_base = adp;
634 } 625 }
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 141907ab6e22..98c2d055284b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -20,7 +20,6 @@
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/ptrace.h> 21#include <linux/ptrace.h>
22#include <linux/string.h> 22#include <linux/string.h>
23#include <linux/unwind.h>
24#include <linux/delay.h> 23#include <linux/delay.h>
25#include <linux/errno.h> 24#include <linux/errno.h>
26#include <linux/kexec.h> 25#include <linux/kexec.h>
@@ -51,7 +50,6 @@
51#include <asm/debugreg.h> 50#include <asm/debugreg.h>
52#include <asm/atomic.h> 51#include <asm/atomic.h>
53#include <asm/system.h> 52#include <asm/system.h>
54#include <asm/unwind.h>
55#include <asm/traps.h> 53#include <asm/traps.h>
56#include <asm/desc.h> 54#include <asm/desc.h>
57#include <asm/i387.h> 55#include <asm/i387.h>
@@ -65,16 +63,10 @@
65#else 63#else
66#include <asm/processor-flags.h> 64#include <asm/processor-flags.h>
67#include <asm/arch_hooks.h> 65#include <asm/arch_hooks.h>
68#include <asm/nmi.h>
69#include <asm/smp.h>
70#include <asm/io.h>
71#include <asm/traps.h> 66#include <asm/traps.h>
72 67
73#include "cpu/mcheck/mce.h" 68#include "cpu/mcheck/mce.h"
74 69
75DECLARE_BITMAP(used_vectors, NR_VECTORS);
76EXPORT_SYMBOL_GPL(used_vectors);
77
78asmlinkage int system_call(void); 70asmlinkage int system_call(void);
79 71
80/* Do we ignore FPU interrupts ? */ 72/* Do we ignore FPU interrupts ? */
@@ -89,6 +81,9 @@ gate_desc idt_table[256]
89 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; 81 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
90#endif 82#endif
91 83
84DECLARE_BITMAP(used_vectors, NR_VECTORS);
85EXPORT_SYMBOL_GPL(used_vectors);
86
92static int ignore_nmis; 87static int ignore_nmis;
93 88
94static inline void conditional_sti(struct pt_regs *regs) 89static inline void conditional_sti(struct pt_regs *regs)
@@ -292,8 +287,10 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
292 tsk->thread.error_code = error_code; 287 tsk->thread.error_code = error_code;
293 tsk->thread.trap_no = 8; 288 tsk->thread.trap_no = 8;
294 289
295 /* This is always a kernel trap and never fixable (and thus must 290 /*
296 never return). */ 291 * This is always a kernel trap and never fixable (and thus must
292 * never return).
293 */
297 for (;;) 294 for (;;)
298 die(str, regs, error_code); 295 die(str, regs, error_code);
299} 296}
@@ -520,9 +517,11 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
520} 517}
521 518
522#ifdef CONFIG_X86_64 519#ifdef CONFIG_X86_64
523/* Help handler running on IST stack to switch back to user stack 520/*
524 for scheduling or signal handling. The actual stack switch is done in 521 * Help handler running on IST stack to switch back to user stack
525 entry.S */ 522 * for scheduling or signal handling. The actual stack switch is done in
523 * entry.S
524 */
526asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) 525asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
527{ 526{
528 struct pt_regs *regs = eregs; 527 struct pt_regs *regs = eregs;
@@ -532,8 +531,10 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
532 /* Exception from user space */ 531 /* Exception from user space */
533 else if (user_mode(eregs)) 532 else if (user_mode(eregs))
534 regs = task_pt_regs(current); 533 regs = task_pt_regs(current);
535 /* Exception from kernel and interrupts are enabled. Move to 534 /*
536 kernel process stack. */ 535 * Exception from kernel and interrupts are enabled. Move to
536 * kernel process stack.
537 */
537 else if (eregs->flags & X86_EFLAGS_IF) 538 else if (eregs->flags & X86_EFLAGS_IF)
538 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); 539 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
539 if (eregs != regs) 540 if (eregs != regs)
@@ -685,12 +686,7 @@ void math_error(void __user *ip)
685 cwd = get_fpu_cwd(task); 686 cwd = get_fpu_cwd(task);
686 swd = get_fpu_swd(task); 687 swd = get_fpu_swd(task);
687 688
688 err = swd & ~cwd & 0x3f; 689 err = swd & ~cwd;
689
690#ifdef CONFIG_X86_32
691 if (!err)
692 return;
693#endif
694 690
695 if (err & 0x001) { /* Invalid op */ 691 if (err & 0x001) { /* Invalid op */
696 /* 692 /*
@@ -708,7 +704,11 @@ void math_error(void __user *ip)
708 } else if (err & 0x020) { /* Precision */ 704 } else if (err & 0x020) { /* Precision */
709 info.si_code = FPE_FLTRES; 705 info.si_code = FPE_FLTRES;
710 } else { 706 } else {
711 info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */ 707 /*
708 * If we're using IRQ 13, or supposedly even some trap 16
709 * implementations, it's possible we get a spurious trap...
710 */
711 return; /* Spurious trap, no error */
712 } 712 }
713 force_sig_info(SIGFPE, &info, task); 713 force_sig_info(SIGFPE, &info, task);
714} 714}
@@ -941,9 +941,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
941 941
942void __init trap_init(void) 942void __init trap_init(void)
943{ 943{
944#ifdef CONFIG_X86_32
945 int i; 944 int i;
946#endif
947 945
948#ifdef CONFIG_EISA 946#ifdef CONFIG_EISA
949 void __iomem *p = early_ioremap(0x0FFFD9, 4); 947 void __iomem *p = early_ioremap(0x0FFFD9, 4);
@@ -1000,11 +998,15 @@ void __init trap_init(void)
1000 } 998 }
1001 999
1002 set_system_trap_gate(SYSCALL_VECTOR, &system_call); 1000 set_system_trap_gate(SYSCALL_VECTOR, &system_call);
1001#endif
1003 1002
1004 /* Reserve all the builtin and the syscall vector: */ 1003 /* Reserve all the builtin and the syscall vector: */
1005 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) 1004 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
1006 set_bit(i, used_vectors); 1005 set_bit(i, used_vectors);
1007 1006
1007#ifdef CONFIG_X86_64
1008 set_bit(IA32_SYSCALL_VECTOR, used_vectors);
1009#else
1008 set_bit(SYSCALL_VECTOR, used_vectors); 1010 set_bit(SYSCALL_VECTOR, used_vectors);
1009#endif 1011#endif
1010 /* 1012 /*
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 0c9667f0752a..d801d06af068 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -176,33 +176,31 @@ static int __init visws_get_smp_config(unsigned int early)
176 * No problem for Linux. 176 * No problem for Linux.
177 */ 177 */
178 178
179static void __init MP_processor_info(struct mpc_config_processor *m) 179static void __init MP_processor_info(struct mpc_cpu *m)
180{ 180{
181 int ver, logical_apicid; 181 int ver, logical_apicid;
182 physid_mask_t apic_cpus; 182 physid_mask_t apic_cpus;
183 183
184 if (!(m->mpc_cpuflag & CPU_ENABLED)) 184 if (!(m->cpuflag & CPU_ENABLED))
185 return; 185 return;
186 186
187 logical_apicid = m->mpc_apicid; 187 logical_apicid = m->apicid;
188 printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n", 188 printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n",
189 m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "", 189 m->cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
190 m->mpc_apicid, 190 m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
191 (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, 191 (m->cpufeature & CPU_MODEL_MASK) >> 4, m->apicver);
192 (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
193 m->mpc_apicver);
194 192
195 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) 193 if (m->cpuflag & CPU_BOOTPROCESSOR)
196 boot_cpu_physical_apicid = m->mpc_apicid; 194 boot_cpu_physical_apicid = m->apicid;
197 195
198 ver = m->mpc_apicver; 196 ver = m->apicver;
199 if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) { 197 if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) {
200 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", 198 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
201 m->mpc_apicid, MAX_APICS); 199 m->apicid, MAX_APICS);
202 return; 200 return;
203 } 201 }
204 202
205 apic_cpus = apicid_to_cpu_present(m->mpc_apicid); 203 apic_cpus = apicid_to_cpu_present(m->apicid);
206 physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); 204 physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
207 /* 205 /*
208 * Validate version 206 * Validate version
@@ -210,15 +208,15 @@ static void __init MP_processor_info(struct mpc_config_processor *m)
210 if (ver == 0x0) { 208 if (ver == 0x0) {
211 printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! " 209 printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! "
212 "fixing up to 0x10. (tell your hw vendor)\n", 210 "fixing up to 0x10. (tell your hw vendor)\n",
213 m->mpc_apicid); 211 m->apicid);
214 ver = 0x10; 212 ver = 0x10;
215 } 213 }
216 apic_version[m->mpc_apicid] = ver; 214 apic_version[m->apicid] = ver;
217} 215}
218 216
219static int __init visws_find_smp_config(unsigned int reserve) 217static int __init visws_find_smp_config(unsigned int reserve)
220{ 218{
221 struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS); 219 struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS);
222 unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); 220 unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
223 221
224 if (ncpus > CO_CPU_MAX) { 222 if (ncpus > CO_CPU_MAX) {
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 254ee07f8635..c4c1f9e09402 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void)
226 /* Upper bound is clockevent's use of ulong for cycle deltas. */ 226 /* Upper bound is clockevent's use of ulong for cycle deltas. */
227 evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); 227 evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt);
228 evt->min_delta_ns = clockevent_delta2ns(1, evt); 228 evt->min_delta_ns = clockevent_delta2ns(1, evt);
229 evt->cpumask = cpumask_of_cpu(cpu); 229 evt->cpumask = cpumask_of(cpu);
230 230
231 printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", 231 printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n",
232 evt->name, evt->mult, evt->shift); 232 evt->name, evt->mult, evt->shift);
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 15c3e6999182..2b54fe002e94 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -159,7 +159,7 @@ int save_i387_xstate(void __user *buf)
159 * Restore the extended state if present. Otherwise, restore the FP/SSE 159 * Restore the extended state if present. Otherwise, restore the FP/SSE
160 * state. 160 * state.
161 */ 161 */
162int restore_user_xstate(void __user *buf) 162static int restore_user_xstate(void __user *buf)
163{ 163{
164 struct _fpx_sw_bytes fx_sw_user; 164 struct _fpx_sw_bytes fx_sw_user;
165 u64 mask; 165 u64 mask;