aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-08-14 06:19:59 -0400
committerIngo Molnar <mingo@elte.hu>2008-08-14 06:19:59 -0400
commit8d7ccaa545490cdffdfaff0842436a8dd85cf47b (patch)
tree8129b5907161bc6ae26deb3645ce1e280c5e1f51 /arch/x86/kernel
parentb2139aa0eec330c711c5a279db361e5ef1178e78 (diff)
parent30a2f3c60a84092c8084dfe788b710f8d0768cd4 (diff)
Merge commit 'v2.6.27-rc3' into x86/prototypes
Conflicts: include/asm-x86/dma-mapping.h Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/sleep.c4
-rw-r--r--arch/x86/kernel/amd_iommu.c17
-rw-r--r--arch/x86/kernel/amd_iommu_init.c4
-rw-r--r--arch/x86/kernel/apic_32.c14
-rw-r--r--arch/x86/kernel/apm_32.c1
-rw-r--r--arch/x86/kernel/cpu/bugs.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/elanfreq.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c120
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h3
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c149
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c4
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c20
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c1
-rw-r--r--arch/x86/kernel/cpu/proc.c2
-rw-r--r--arch/x86/kernel/cpuid.c4
-rw-r--r--arch/x86/kernel/entry_32.S55
-rw-r--r--arch/x86/kernel/entry_64.S55
-rw-r--r--arch/x86/kernel/genapic_64.c1
-rw-r--r--arch/x86/kernel/genapic_flat_64.c2
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c4
-rw-r--r--arch/x86/kernel/head_32.S8
-rw-r--r--arch/x86/kernel/hpet.c10
-rw-r--r--arch/x86/kernel/io_apic_32.c6
-rw-r--r--arch/x86/kernel/io_apic_64.c37
-rw-r--r--arch/x86/kernel/irqinit_64.c5
-rw-r--r--arch/x86/kernel/kprobes.c6
-rw-r--r--arch/x86/kernel/ldt.c6
-rw-r--r--arch/x86/kernel/machine_kexec_32.c39
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/microcode.c14
-rw-r--r--arch/x86/kernel/module_64.c1
-rw-r--r--arch/x86/kernel/mpparse.c11
-rw-r--r--arch/x86/kernel/msr.c4
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/pci-calgary_64.c158
-rw-r--r--arch/x86/kernel/pci-dma.c163
-rw-r--r--arch/x86/kernel/pci-gart_64.c18
-rw-r--r--arch/x86/kernel/pci-nommu.c14
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c2
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/reboot.c3
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S174
-rw-r--r--arch/x86/kernel/setup.c42
-rw-r--r--arch/x86/kernel/setup_percpu.c21
-rw-r--r--arch/x86/kernel/signal_32.c3
-rw-r--r--arch/x86/kernel/signal_64.c56
-rw-r--r--arch/x86/kernel/smpboot.c30
-rw-r--r--arch/x86/kernel/syscall_table_32.S6
-rw-r--r--arch/x86/kernel/vmi_32.c3
55 files changed, 825 insertions, 513 deletions
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index a3ddad18aaa3..fa2161d5003b 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -150,6 +150,10 @@ static int __init acpi_sleep_setup(char *str)
150 acpi_realmode_flags |= 2; 150 acpi_realmode_flags |= 2;
151 if (strncmp(str, "s3_beep", 7) == 0) 151 if (strncmp(str, "s3_beep", 7) == 0)
152 acpi_realmode_flags |= 4; 152 acpi_realmode_flags |= 4;
153#ifdef CONFIG_HIBERNATION
154 if (strncmp(str, "s4_nohwsig", 10) == 0)
155 acpi_no_s4_hw_signature();
156#endif
153 if (strncmp(str, "old_ordering", 12) == 0) 157 if (strncmp(str, "old_ordering", 12) == 0)
154 acpi_old_suspend_ordering(); 158 acpi_old_suspend_ordering();
155 str = strchr(str, ','); 159 str = strchr(str, ',');
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index c25210e6ac88..22d7d050905d 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -29,9 +29,6 @@
29 29
30#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) 30#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
31 31
32#define to_pages(addr, size) \
33 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
34
35#define EXIT_LOOP_COUNT 10000000 32#define EXIT_LOOP_COUNT 10000000
36 33
37static DEFINE_RWLOCK(amd_iommu_devtable_lock); 34static DEFINE_RWLOCK(amd_iommu_devtable_lock);
@@ -185,7 +182,7 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
185 u64 address, size_t size) 182 u64 address, size_t size)
186{ 183{
187 int s = 0; 184 int s = 0;
188 unsigned pages = to_pages(address, size); 185 unsigned pages = iommu_num_pages(address, size);
189 186
190 address &= PAGE_MASK; 187 address &= PAGE_MASK;
191 188
@@ -557,8 +554,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
557 if (iommu->exclusion_start && 554 if (iommu->exclusion_start &&
558 iommu->exclusion_start < dma_dom->aperture_size) { 555 iommu->exclusion_start < dma_dom->aperture_size) {
559 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; 556 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
560 int pages = to_pages(iommu->exclusion_start, 557 int pages = iommu_num_pages(iommu->exclusion_start,
561 iommu->exclusion_length); 558 iommu->exclusion_length);
562 dma_ops_reserve_addresses(dma_dom, startpage, pages); 559 dma_ops_reserve_addresses(dma_dom, startpage, pages);
563 } 560 }
564 561
@@ -667,7 +664,7 @@ static int get_device_resources(struct device *dev,
667 _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); 664 _bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
668 665
669 /* device not translated by any IOMMU in the system? */ 666 /* device not translated by any IOMMU in the system? */
670 if (_bdf >= amd_iommu_last_bdf) { 667 if (_bdf > amd_iommu_last_bdf) {
671 *iommu = NULL; 668 *iommu = NULL;
672 *domain = NULL; 669 *domain = NULL;
673 *bdf = 0xffff; 670 *bdf = 0xffff;
@@ -767,7 +764,7 @@ static dma_addr_t __map_single(struct device *dev,
767 unsigned int pages; 764 unsigned int pages;
768 int i; 765 int i;
769 766
770 pages = to_pages(paddr, size); 767 pages = iommu_num_pages(paddr, size);
771 paddr &= PAGE_MASK; 768 paddr &= PAGE_MASK;
772 769
773 address = dma_ops_alloc_addresses(dev, dma_dom, pages); 770 address = dma_ops_alloc_addresses(dev, dma_dom, pages);
@@ -802,7 +799,7 @@ static void __unmap_single(struct amd_iommu *iommu,
802 if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) 799 if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
803 return; 800 return;
804 801
805 pages = to_pages(dma_addr, size); 802 pages = iommu_num_pages(dma_addr, size);
806 dma_addr &= PAGE_MASK; 803 dma_addr &= PAGE_MASK;
807 start = dma_addr; 804 start = dma_addr;
808 805
@@ -1085,7 +1082,7 @@ void prealloc_protection_domains(void)
1085 1082
1086 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 1083 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1087 devid = (dev->bus->number << 8) | dev->devfn; 1084 devid = (dev->bus->number << 8) | dev->devfn;
1088 if (devid >= amd_iommu_last_bdf) 1085 if (devid > amd_iommu_last_bdf)
1089 continue; 1086 continue;
1090 devid = amd_iommu_alias_table[devid]; 1087 devid = amd_iommu_alias_table[devid];
1091 if (domain_for_device(devid)) 1088 if (domain_for_device(devid))
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c9d8ff2eb130..d9a9da597e79 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -732,7 +732,7 @@ static int __init init_exclusion_range(struct ivmd_header *m)
732 set_device_exclusion_range(m->devid, m); 732 set_device_exclusion_range(m->devid, m);
733 break; 733 break;
734 case ACPI_IVMD_TYPE_ALL: 734 case ACPI_IVMD_TYPE_ALL:
735 for (i = 0; i < amd_iommu_last_bdf; ++i) 735 for (i = 0; i <= amd_iommu_last_bdf; ++i)
736 set_device_exclusion_range(i, m); 736 set_device_exclusion_range(i, m);
737 break; 737 break;
738 case ACPI_IVMD_TYPE_RANGE: 738 case ACPI_IVMD_TYPE_RANGE:
@@ -934,7 +934,7 @@ int __init amd_iommu_init(void)
934 /* 934 /*
935 * let all alias entries point to itself 935 * let all alias entries point to itself
936 */ 936 */
937 for (i = 0; i < amd_iommu_last_bdf; ++i) 937 for (i = 0; i <= amd_iommu_last_bdf; ++i)
938 amd_iommu_alias_table[i] = i; 938 amd_iommu_alias_table[i] = i;
939 939
940 /* 940 /*
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index d6c898358371..039a8d4aaf62 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1720,15 +1720,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg)
1720} 1720}
1721early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); 1721early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
1722 1722
1723static int __init apic_set_verbosity(char *str) 1723static int __init apic_set_verbosity(char *arg)
1724{ 1724{
1725 if (strcmp("debug", str) == 0) 1725 if (!arg)
1726 return -EINVAL;
1727
1728 if (strcmp(arg, "debug") == 0)
1726 apic_verbosity = APIC_DEBUG; 1729 apic_verbosity = APIC_DEBUG;
1727 else if (strcmp("verbose", str) == 0) 1730 else if (strcmp(arg, "verbose") == 0)
1728 apic_verbosity = APIC_VERBOSE; 1731 apic_verbosity = APIC_VERBOSE;
1729 return 1; 1732
1733 return 0;
1730} 1734}
1731__setup("apic=", apic_set_verbosity); 1735early_param("apic", apic_set_verbosity);
1732 1736
1733static int __init lapic_insert_resource(void) 1737static int __init lapic_insert_resource(void)
1734{ 1738{
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index bf9b441331e9..9ee24e6bc4b0 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -219,7 +219,6 @@
219#include <linux/time.h> 219#include <linux/time.h>
220#include <linux/sched.h> 220#include <linux/sched.h>
221#include <linux/pm.h> 221#include <linux/pm.h>
222#include <linux/pm_legacy.h>
223#include <linux/capability.h> 222#include <linux/capability.h>
224#include <linux/device.h> 223#include <linux/device.h>
225#include <linux/kernel.h> 224#include <linux/kernel.h>
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c9b58a806e85..c8e315f1aa83 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -50,6 +50,8 @@ static double __initdata y = 3145727.0;
50 */ 50 */
51static void __init check_fpu(void) 51static void __init check_fpu(void)
52{ 52{
53 s32 fdiv_bug;
54
53 if (!boot_cpu_data.hard_math) { 55 if (!boot_cpu_data.hard_math) {
54#ifndef CONFIG_MATH_EMULATION 56#ifndef CONFIG_MATH_EMULATION
55 printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); 57 printk(KERN_EMERG "No coprocessor found and no math emulation present.\n");
@@ -74,8 +76,10 @@ static void __init check_fpu(void)
74 "fistpl %0\n\t" 76 "fistpl %0\n\t"
75 "fwait\n\t" 77 "fwait\n\t"
76 "fninit" 78 "fninit"
77 : "=m" (*&boot_cpu_data.fdiv_bug) 79 : "=m" (*&fdiv_bug)
78 : "m" (*&x), "m" (*&y)); 80 : "m" (*&x), "m" (*&y));
81
82 boot_cpu_data.fdiv_bug = fdiv_bug;
79 if (boot_cpu_data.fdiv_bug) 83 if (boot_cpu_data.fdiv_bug)
80 printk("Hmm, FPU with FDIV bug.\n"); 84 printk("Hmm, FPU with FDIV bug.\n");
81} 85}
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index cb7a5715596d..efae3b22a0ff 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -235,9 +235,9 @@ config X86_LONGHAUL
235 If in doubt, say N. 235 If in doubt, say N.
236 236
237config X86_E_POWERSAVER 237config X86_E_POWERSAVER
238 tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" 238 tristate "VIA C7 Enhanced PowerSaver"
239 select CPU_FREQ_TABLE 239 select CPU_FREQ_TABLE
240 depends on X86_32 && EXPERIMENTAL 240 depends on X86_32
241 help 241 help
242 This adds the CPUFreq driver for VIA C7 processors. 242 This adds the CPUFreq driver for VIA C7 processors.
243 243
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index b0c8208df9fa..dd097b835839 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -202,7 +202,7 @@ static void drv_write(struct drv_cmd *cmd)
202 cpumask_t saved_mask = current->cpus_allowed; 202 cpumask_t saved_mask = current->cpus_allowed;
203 unsigned int i; 203 unsigned int i;
204 204
205 for_each_cpu_mask(i, cmd->mask) { 205 for_each_cpu_mask_nr(i, cmd->mask) {
206 set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); 206 set_cpus_allowed_ptr(current, &cpumask_of_cpu(i));
207 do_drv_write(cmd); 207 do_drv_write(cmd);
208 } 208 }
@@ -451,7 +451,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
451 451
452 freqs.old = perf->states[perf->state].core_frequency * 1000; 452 freqs.old = perf->states[perf->state].core_frequency * 1000;
453 freqs.new = data->freq_table[next_state].frequency; 453 freqs.new = data->freq_table[next_state].frequency;
454 for_each_cpu_mask(i, cmd.mask) { 454 for_each_cpu_mask_nr(i, cmd.mask) {
455 freqs.cpu = i; 455 freqs.cpu = i;
456 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 456 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
457 } 457 }
@@ -466,7 +466,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
466 } 466 }
467 } 467 }
468 468
469 for_each_cpu_mask(i, cmd.mask) { 469 for_each_cpu_mask_nr(i, cmd.mask) {
470 freqs.cpu = i; 470 freqs.cpu = i;
471 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 471 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
472 } 472 }
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index 94619c22f563..e4a4bf870e94 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -44,7 +44,7 @@ struct s_elan_multiplier {
44 * It is important that the frequencies 44 * It is important that the frequencies
45 * are listed in ascending order here! 45 * are listed in ascending order here!
46 */ 46 */
47struct s_elan_multiplier elan_multiplier[] = { 47static struct s_elan_multiplier elan_multiplier[] = {
48 {1000, 0x02, 0x18}, 48 {1000, 0x02, 0x18},
49 {2000, 0x02, 0x10}, 49 {2000, 0x02, 0x10},
50 {4000, 0x02, 0x08}, 50 {4000, 0x02, 0x08},
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 199e4e05e5dc..f1685fb91fbd 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -122,7 +122,7 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
122 return 0; 122 return 0;
123 123
124 /* notifiers */ 124 /* notifiers */
125 for_each_cpu_mask(i, policy->cpus) { 125 for_each_cpu_mask_nr(i, policy->cpus) {
126 freqs.cpu = i; 126 freqs.cpu = i;
127 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 127 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
128 } 128 }
@@ -130,11 +130,11 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
130 /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software 130 /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software
131 * Developer's Manual, Volume 3 131 * Developer's Manual, Volume 3
132 */ 132 */
133 for_each_cpu_mask(i, policy->cpus) 133 for_each_cpu_mask_nr(i, policy->cpus)
134 cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); 134 cpufreq_p4_setdc(i, p4clockmod_table[newstate].index);
135 135
136 /* notifiers */ 136 /* notifiers */
137 for_each_cpu_mask(i, policy->cpus) { 137 for_each_cpu_mask_nr(i, policy->cpus) {
138 freqs.cpu = i; 138 freqs.cpu = i;
139 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 139 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
140 } 140 }
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 206791eb46e3..4e7271999a74 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -66,7 +66,6 @@ static u32 find_freq_from_fid(u32 fid)
66 return 800 + (fid * 100); 66 return 800 + (fid * 100);
67} 67}
68 68
69
70/* Return a frequency in KHz, given an input fid */ 69/* Return a frequency in KHz, given an input fid */
71static u32 find_khz_freq_from_fid(u32 fid) 70static u32 find_khz_freq_from_fid(u32 fid)
72{ 71{
@@ -78,7 +77,6 @@ static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 p
78 return data[pstate].frequency; 77 return data[pstate].frequency;
79} 78}
80 79
81
82/* Return the vco fid for an input fid 80/* Return the vco fid for an input fid
83 * 81 *
84 * Each "low" fid has corresponding "high" fid, and you can get to "low" fids 82 * Each "low" fid has corresponding "high" fid, and you can get to "low" fids
@@ -166,7 +164,6 @@ static void fidvid_msr_init(void)
166 wrmsr(MSR_FIDVID_CTL, lo, hi); 164 wrmsr(MSR_FIDVID_CTL, lo, hi);
167} 165}
168 166
169
170/* write the new fid value along with the other control fields to the msr */ 167/* write the new fid value along with the other control fields to the msr */
171static int write_new_fid(struct powernow_k8_data *data, u32 fid) 168static int write_new_fid(struct powernow_k8_data *data, u32 fid)
172{ 169{
@@ -740,44 +737,63 @@ static int find_psb_table(struct powernow_k8_data *data)
740#ifdef CONFIG_X86_POWERNOW_K8_ACPI 737#ifdef CONFIG_X86_POWERNOW_K8_ACPI
741static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) 738static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index)
742{ 739{
743 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) 740 if (!data->acpi_data->state_count || (cpu_family == CPU_HW_PSTATE))
744 return; 741 return;
745 742
746 data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; 743 data->irt = (data->acpi_data->states[index].control >> IRT_SHIFT) & IRT_MASK;
747 data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; 744 data->rvo = (data->acpi_data->states[index].control >> RVO_SHIFT) & RVO_MASK;
748 data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; 745 data->exttype = (data->acpi_data->states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
749 data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; 746 data->plllock = (data->acpi_data->states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
750 data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); 747 data->vidmvs = 1 << ((data->acpi_data->states[index].control >> MVS_SHIFT) & MVS_MASK);
751 data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; 748 data->vstable = (data->acpi_data->states[index].control >> VST_SHIFT) & VST_MASK;
749}
750
751
752static struct acpi_processor_performance *acpi_perf_data;
753static int preregister_valid;
754
755static int powernow_k8_cpu_preinit_acpi(void)
756{
757 acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
758 if (!acpi_perf_data)
759 return -ENODEV;
760
761 if (acpi_processor_preregister_performance(acpi_perf_data))
762 return -ENODEV;
763 else
764 preregister_valid = 1;
765 return 0;
752} 766}
753 767
754static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) 768static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
755{ 769{
756 struct cpufreq_frequency_table *powernow_table; 770 struct cpufreq_frequency_table *powernow_table;
757 int ret_val; 771 int ret_val;
772 int cpu = 0;
758 773
759 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { 774 data->acpi_data = percpu_ptr(acpi_perf_data, cpu);
775 if (acpi_processor_register_performance(data->acpi_data, data->cpu)) {
760 dprintk("register performance failed: bad ACPI data\n"); 776 dprintk("register performance failed: bad ACPI data\n");
761 return -EIO; 777 return -EIO;
762 } 778 }
763 779
764 /* verify the data contained in the ACPI structures */ 780 /* verify the data contained in the ACPI structures */
765 if (data->acpi_data.state_count <= 1) { 781 if (data->acpi_data->state_count <= 1) {
766 dprintk("No ACPI P-States\n"); 782 dprintk("No ACPI P-States\n");
767 goto err_out; 783 goto err_out;
768 } 784 }
769 785
770 if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || 786 if ((data->acpi_data->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
771 (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { 787 (data->acpi_data->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
772 dprintk("Invalid control/status registers (%x - %x)\n", 788 dprintk("Invalid control/status registers (%x - %x)\n",
773 data->acpi_data.control_register.space_id, 789 data->acpi_data->control_register.space_id,
774 data->acpi_data.status_register.space_id); 790 data->acpi_data->status_register.space_id);
775 goto err_out; 791 goto err_out;
776 } 792 }
777 793
778 /* fill in data->powernow_table */ 794 /* fill in data->powernow_table */
779 powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) 795 powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
780 * (data->acpi_data.state_count + 1)), GFP_KERNEL); 796 * (data->acpi_data->state_count + 1)), GFP_KERNEL);
781 if (!powernow_table) { 797 if (!powernow_table) {
782 dprintk("powernow_table memory alloc failure\n"); 798 dprintk("powernow_table memory alloc failure\n");
783 goto err_out; 799 goto err_out;
@@ -790,12 +806,12 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
790 if (ret_val) 806 if (ret_val)
791 goto err_out_mem; 807 goto err_out_mem;
792 808
793 powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; 809 powernow_table[data->acpi_data->state_count].frequency = CPUFREQ_TABLE_END;
794 powernow_table[data->acpi_data.state_count].index = 0; 810 powernow_table[data->acpi_data->state_count].index = 0;
795 data->powernow_table = powernow_table; 811 data->powernow_table = powernow_table;
796 812
797 /* fill in data */ 813 /* fill in data */
798 data->numps = data->acpi_data.state_count; 814 data->numps = data->acpi_data->state_count;
799 if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) 815 if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
800 print_basics(data); 816 print_basics(data);
801 powernow_k8_acpi_pst_values(data, 0); 817 powernow_k8_acpi_pst_values(data, 0);
@@ -803,16 +819,31 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
803 /* notify BIOS that we exist */ 819 /* notify BIOS that we exist */
804 acpi_processor_notify_smm(THIS_MODULE); 820 acpi_processor_notify_smm(THIS_MODULE);
805 821
822 /* determine affinity, from ACPI if available */
823 if (preregister_valid) {
824 if ((data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ALL) ||
825 (data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ANY))
826 data->starting_core_affinity = data->acpi_data->shared_cpu_map;
827 else
828 data->starting_core_affinity = cpumask_of_cpu(data->cpu);
829 } else {
830 /* best guess from family if not */
831 if (cpu_family == CPU_HW_PSTATE)
832 data->starting_core_affinity = cpumask_of_cpu(data->cpu);
833 else
834 data->starting_core_affinity = per_cpu(cpu_core_map, data->cpu);
835 }
836
806 return 0; 837 return 0;
807 838
808err_out_mem: 839err_out_mem:
809 kfree(powernow_table); 840 kfree(powernow_table);
810 841
811err_out: 842err_out:
812 acpi_processor_unregister_performance(&data->acpi_data, data->cpu); 843 acpi_processor_unregister_performance(data->acpi_data, data->cpu);
813 844
814 /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ 845 /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
815 data->acpi_data.state_count = 0; 846 data->acpi_data->state_count = 0;
816 847
817 return -ENODEV; 848 return -ENODEV;
818} 849}
@@ -824,10 +855,10 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
824 rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); 855 rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo);
825 data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; 856 data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
826 857
827 for (i = 0; i < data->acpi_data.state_count; i++) { 858 for (i = 0; i < data->acpi_data->state_count; i++) {
828 u32 index; 859 u32 index;
829 860
830 index = data->acpi_data.states[i].control & HW_PSTATE_MASK; 861 index = data->acpi_data->states[i].control & HW_PSTATE_MASK;
831 if (index > data->max_hw_pstate) { 862 if (index > data->max_hw_pstate) {
832 printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); 863 printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index);
833 printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); 864 printk(KERN_ERR PFX "Please report to BIOS manufacturer\n");
@@ -843,7 +874,7 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
843 874
844 powernow_table[i].index = index; 875 powernow_table[i].index = index;
845 876
846 powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; 877 powernow_table[i].frequency = data->acpi_data->states[i].core_frequency * 1000;
847 } 878 }
848 return 0; 879 return 0;
849} 880}
@@ -852,16 +883,16 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
852{ 883{
853 int i; 884 int i;
854 int cntlofreq = 0; 885 int cntlofreq = 0;
855 for (i = 0; i < data->acpi_data.state_count; i++) { 886 for (i = 0; i < data->acpi_data->state_count; i++) {
856 u32 fid; 887 u32 fid;
857 u32 vid; 888 u32 vid;
858 889
859 if (data->exttype) { 890 if (data->exttype) {
860 fid = data->acpi_data.states[i].status & EXT_FID_MASK; 891 fid = data->acpi_data->states[i].status & EXT_FID_MASK;
861 vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; 892 vid = (data->acpi_data->states[i].status >> VID_SHIFT) & EXT_VID_MASK;
862 } else { 893 } else {
863 fid = data->acpi_data.states[i].control & FID_MASK; 894 fid = data->acpi_data->states[i].control & FID_MASK;
864 vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; 895 vid = (data->acpi_data->states[i].control >> VID_SHIFT) & VID_MASK;
865 } 896 }
866 897
867 dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); 898 dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
@@ -902,10 +933,10 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
902 cntlofreq = i; 933 cntlofreq = i;
903 } 934 }
904 935
905 if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { 936 if (powernow_table[i].frequency != (data->acpi_data->states[i].core_frequency * 1000)) {
906 printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", 937 printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
907 powernow_table[i].frequency, 938 powernow_table[i].frequency,
908 (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); 939 (unsigned int) (data->acpi_data->states[i].core_frequency * 1000));
909 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; 940 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
910 continue; 941 continue;
911 } 942 }
@@ -915,11 +946,12 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
915 946
916static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) 947static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
917{ 948{
918 if (data->acpi_data.state_count) 949 if (data->acpi_data->state_count)
919 acpi_processor_unregister_performance(&data->acpi_data, data->cpu); 950 acpi_processor_unregister_performance(data->acpi_data, data->cpu);
920} 951}
921 952
922#else 953#else
954static int powernow_k8_cpu_preinit_acpi(void) { return -ENODEV; }
923static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } 955static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
924static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } 956static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
925static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } 957static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
@@ -966,7 +998,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i
966 freqs.old = find_khz_freq_from_fid(data->currfid); 998 freqs.old = find_khz_freq_from_fid(data->currfid);
967 freqs.new = find_khz_freq_from_fid(fid); 999 freqs.new = find_khz_freq_from_fid(fid);
968 1000
969 for_each_cpu_mask(i, *(data->available_cores)) { 1001 for_each_cpu_mask_nr(i, *(data->available_cores)) {
970 freqs.cpu = i; 1002 freqs.cpu = i;
971 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 1003 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
972 } 1004 }
@@ -974,7 +1006,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i
974 res = transition_fid_vid(data, fid, vid); 1006 res = transition_fid_vid(data, fid, vid);
975 freqs.new = find_khz_freq_from_fid(data->currfid); 1007 freqs.new = find_khz_freq_from_fid(data->currfid);
976 1008
977 for_each_cpu_mask(i, *(data->available_cores)) { 1009 for_each_cpu_mask_nr(i, *(data->available_cores)) {
978 freqs.cpu = i; 1010 freqs.cpu = i;
979 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 1011 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
980 } 1012 }
@@ -997,7 +1029,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
997 freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); 1029 freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
998 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); 1030 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
999 1031
1000 for_each_cpu_mask(i, *(data->available_cores)) { 1032 for_each_cpu_mask_nr(i, *(data->available_cores)) {
1001 freqs.cpu = i; 1033 freqs.cpu = i;
1002 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 1034 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1003 } 1035 }
@@ -1005,7 +1037,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
1005 res = transition_pstate(data, pstate); 1037 res = transition_pstate(data, pstate);
1006 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); 1038 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1007 1039
1008 for_each_cpu_mask(i, *(data->available_cores)) { 1040 for_each_cpu_mask_nr(i, *(data->available_cores)) {
1009 freqs.cpu = i; 1041 freqs.cpu = i;
1010 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 1042 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1011 } 1043 }
@@ -1104,7 +1136,7 @@ static int powernowk8_verify(struct cpufreq_policy *pol)
1104static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) 1136static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1105{ 1137{
1106 struct powernow_k8_data *data; 1138 struct powernow_k8_data *data;
1107 cpumask_t oldmask; 1139 cpumask_t oldmask = CPU_MASK_ALL;
1108 int rc; 1140 int rc;
1109 1141
1110 if (!cpu_online(pol->cpu)) 1142 if (!cpu_online(pol->cpu))
@@ -1177,10 +1209,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1177 /* run on any CPU again */ 1209 /* run on any CPU again */
1178 set_cpus_allowed_ptr(current, &oldmask); 1210 set_cpus_allowed_ptr(current, &oldmask);
1179 1211
1180 if (cpu_family == CPU_HW_PSTATE) 1212 pol->cpus = data->starting_core_affinity;
1181 pol->cpus = cpumask_of_cpu(pol->cpu);
1182 else
1183 pol->cpus = per_cpu(cpu_core_map, pol->cpu);
1184 data->available_cores = &(pol->cpus); 1213 data->available_cores = &(pol->cpus);
1185 1214
1186 /* Take a crude guess here. 1215 /* Take a crude guess here.
@@ -1303,6 +1332,7 @@ static int __cpuinit powernowk8_init(void)
1303 } 1332 }
1304 1333
1305 if (supported_cpus == num_online_cpus()) { 1334 if (supported_cpus == num_online_cpus()) {
1335 powernow_k8_cpu_preinit_acpi();
1306 printk(KERN_INFO PFX "Found %d %s " 1336 printk(KERN_INFO PFX "Found %d %s "
1307 "processors (%d cpu cores) (" VERSION ")\n", 1337 "processors (%d cpu cores) (" VERSION ")\n",
1308 num_online_nodes(), 1338 num_online_nodes(),
@@ -1319,6 +1349,10 @@ static void __exit powernowk8_exit(void)
1319 dprintk("exit\n"); 1349 dprintk("exit\n");
1320 1350
1321 cpufreq_unregister_driver(&cpufreq_amd64_driver); 1351 cpufreq_unregister_driver(&cpufreq_amd64_driver);
1352
1353#ifdef CONFIG_X86_POWERNOW_K8_ACPI
1354 free_percpu(acpi_perf_data);
1355#endif
1322} 1356}
1323 1357
1324MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>"); 1358MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>");
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index ab48cfed4d96..a62612cd4be8 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -33,12 +33,13 @@ struct powernow_k8_data {
33#ifdef CONFIG_X86_POWERNOW_K8_ACPI 33#ifdef CONFIG_X86_POWERNOW_K8_ACPI
34 /* the acpi table needs to be kept. it's only available if ACPI was 34 /* the acpi table needs to be kept. it's only available if ACPI was
35 * used to determine valid frequency/vid/fid states */ 35 * used to determine valid frequency/vid/fid states */
36 struct acpi_processor_performance acpi_data; 36 struct acpi_processor_performance *acpi_data;
37#endif 37#endif
38 /* we need to keep track of associated cores, but let cpufreq 38 /* we need to keep track of associated cores, but let cpufreq
39 * handle hotplug events - so just point at cpufreq pol->cpus 39 * handle hotplug events - so just point at cpufreq pol->cpus
40 * structure */ 40 * structure */
41 cpumask_t *available_cores; 41 cpumask_t *available_cores;
42 cpumask_t starting_core_affinity;
42}; 43};
43 44
44 45
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 908dd347c67e..15e13c01cc36 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -28,7 +28,8 @@
28#define PFX "speedstep-centrino: " 28#define PFX "speedstep-centrino: "
29#define MAINTAINER "cpufreq@lists.linux.org.uk" 29#define MAINTAINER "cpufreq@lists.linux.org.uk"
30 30
31#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) 31#define dprintk(msg...) \
32 cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
32 33
33#define INTEL_MSR_RANGE (0xffff) 34#define INTEL_MSR_RANGE (0xffff)
34 35
@@ -66,11 +67,12 @@ struct cpu_model
66 67
67 struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ 68 struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */
68}; 69};
69static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x); 70static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
71 const struct cpu_id *x);
70 72
71/* Operating points for current CPU */ 73/* Operating points for current CPU */
72static struct cpu_model *centrino_model[NR_CPUS]; 74static DEFINE_PER_CPU(struct cpu_model *, centrino_model);
73static const struct cpu_id *centrino_cpu[NR_CPUS]; 75static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu);
74 76
75static struct cpufreq_driver centrino_driver; 77static struct cpufreq_driver centrino_driver;
76 78
@@ -255,7 +257,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
255 return -ENOENT; 257 return -ENOENT;
256 } 258 }
257 259
258 centrino_model[policy->cpu] = model; 260 per_cpu(centrino_model, policy->cpu) = model;
259 261
260 dprintk("found \"%s\": max frequency: %dkHz\n", 262 dprintk("found \"%s\": max frequency: %dkHz\n",
261 model->model_name, model->max_freq); 263 model->model_name, model->max_freq);
@@ -264,10 +266,14 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
264} 266}
265 267
266#else 268#else
267static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; } 269static inline int centrino_cpu_init_table(struct cpufreq_policy *policy)
270{
271 return -ENODEV;
272}
268#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ 273#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */
269 274
270static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x) 275static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
276 const struct cpu_id *x)
271{ 277{
272 if ((c->x86 == x->x86) && 278 if ((c->x86 == x->x86) &&
273 (c->x86_model == x->x86_model) && 279 (c->x86_model == x->x86_model) &&
@@ -286,23 +292,28 @@ static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe)
286 * for centrino, as some DSDTs are buggy. 292 * for centrino, as some DSDTs are buggy.
287 * Ideally, this can be done using the acpi_data structure. 293 * Ideally, this can be done using the acpi_data structure.
288 */ 294 */
289 if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) || 295 if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) ||
290 (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) || 296 (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) ||
291 (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) { 297 (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) {
292 msr = (msr >> 8) & 0xff; 298 msr = (msr >> 8) & 0xff;
293 return msr * 100000; 299 return msr * 100000;
294 } 300 }
295 301
296 if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points)) 302 if ((!per_cpu(centrino_model, cpu)) ||
303 (!per_cpu(centrino_model, cpu)->op_points))
297 return 0; 304 return 0;
298 305
299 msr &= 0xffff; 306 msr &= 0xffff;
300 for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) { 307 for (i = 0;
301 if (msr == centrino_model[cpu]->op_points[i].index) 308 per_cpu(centrino_model, cpu)->op_points[i].frequency
302 return centrino_model[cpu]->op_points[i].frequency; 309 != CPUFREQ_TABLE_END;
310 i++) {
311 if (msr == per_cpu(centrino_model, cpu)->op_points[i].index)
312 return per_cpu(centrino_model, cpu)->
313 op_points[i].frequency;
303 } 314 }
304 if (failsafe) 315 if (failsafe)
305 return centrino_model[cpu]->op_points[i-1].frequency; 316 return per_cpu(centrino_model, cpu)->op_points[i-1].frequency;
306 else 317 else
307 return 0; 318 return 0;
308} 319}
@@ -347,7 +358,8 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
347 int i; 358 int i;
348 359
349 /* Only Intel makes Enhanced Speedstep-capable CPUs */ 360 /* Only Intel makes Enhanced Speedstep-capable CPUs */
350 if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) 361 if (cpu->x86_vendor != X86_VENDOR_INTEL ||
362 !cpu_has(cpu, X86_FEATURE_EST))
351 return -ENODEV; 363 return -ENODEV;
352 364
353 if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) 365 if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC))
@@ -361,9 +373,9 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
361 break; 373 break;
362 374
363 if (i != N_IDS) 375 if (i != N_IDS)
364 centrino_cpu[policy->cpu] = &cpu_ids[i]; 376 per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i];
365 377
366 if (!centrino_cpu[policy->cpu]) { 378 if (!per_cpu(centrino_cpu, policy->cpu)) {
367 dprintk("found unsupported CPU with " 379 dprintk("found unsupported CPU with "
368 "Enhanced SpeedStep: send /proc/cpuinfo to " 380 "Enhanced SpeedStep: send /proc/cpuinfo to "
369 MAINTAINER "\n"); 381 MAINTAINER "\n");
@@ -386,23 +398,26 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
386 /* check to see if it stuck */ 398 /* check to see if it stuck */
387 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 399 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
388 if (!(l & (1<<16))) { 400 if (!(l & (1<<16))) {
389 printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n"); 401 printk(KERN_INFO PFX
402 "couldn't enable Enhanced SpeedStep\n");
390 return -ENODEV; 403 return -ENODEV;
391 } 404 }
392 } 405 }
393 406
394 freq = get_cur_freq(policy->cpu); 407 freq = get_cur_freq(policy->cpu);
395 408 policy->cpuinfo.transition_latency = 10000;
396 policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */ 409 /* 10uS transition latency */
397 policy->cur = freq; 410 policy->cur = freq;
398 411
399 dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); 412 dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur);
400 413
401 ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points); 414 ret = cpufreq_frequency_table_cpuinfo(policy,
415 per_cpu(centrino_model, policy->cpu)->op_points);
402 if (ret) 416 if (ret)
403 return (ret); 417 return (ret);
404 418
405 cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu); 419 cpufreq_frequency_table_get_attr(
420 per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu);
406 421
407 return 0; 422 return 0;
408} 423}
@@ -411,12 +426,12 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy)
411{ 426{
412 unsigned int cpu = policy->cpu; 427 unsigned int cpu = policy->cpu;
413 428
414 if (!centrino_model[cpu]) 429 if (!per_cpu(centrino_model, cpu))
415 return -ENODEV; 430 return -ENODEV;
416 431
417 cpufreq_frequency_table_put_attr(cpu); 432 cpufreq_frequency_table_put_attr(cpu);
418 433
419 centrino_model[cpu] = NULL; 434 per_cpu(centrino_model, cpu) = NULL;
420 435
421 return 0; 436 return 0;
422} 437}
@@ -430,17 +445,26 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy)
430 */ 445 */
431static int centrino_verify (struct cpufreq_policy *policy) 446static int centrino_verify (struct cpufreq_policy *policy)
432{ 447{
433 return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points); 448 return cpufreq_frequency_table_verify(policy,
449 per_cpu(centrino_model, policy->cpu)->op_points);
434} 450}
435 451
436/** 452/**
437 * centrino_setpolicy - set a new CPUFreq policy 453 * centrino_setpolicy - set a new CPUFreq policy
438 * @policy: new policy 454 * @policy: new policy
439 * @target_freq: the target frequency 455 * @target_freq: the target frequency
440 * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) 456 * @relation: how that frequency relates to achieved frequency
457 * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
441 * 458 *
442 * Sets a new CPUFreq policy. 459 * Sets a new CPUFreq policy.
443 */ 460 */
461struct allmasks {
462 cpumask_t online_policy_cpus;
463 cpumask_t saved_mask;
464 cpumask_t set_mask;
465 cpumask_t covered_cpus;
466};
467
444static int centrino_target (struct cpufreq_policy *policy, 468static int centrino_target (struct cpufreq_policy *policy,
445 unsigned int target_freq, 469 unsigned int target_freq,
446 unsigned int relation) 470 unsigned int relation)
@@ -448,48 +472,55 @@ static int centrino_target (struct cpufreq_policy *policy,
448 unsigned int newstate = 0; 472 unsigned int newstate = 0;
449 unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; 473 unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu;
450 struct cpufreq_freqs freqs; 474 struct cpufreq_freqs freqs;
451 cpumask_t online_policy_cpus;
452 cpumask_t saved_mask;
453 cpumask_t set_mask;
454 cpumask_t covered_cpus;
455 int retval = 0; 475 int retval = 0;
456 unsigned int j, k, first_cpu, tmp; 476 unsigned int j, k, first_cpu, tmp;
457 477 CPUMASK_ALLOC(allmasks);
458 if (unlikely(centrino_model[cpu] == NULL)) 478 CPUMASK_PTR(online_policy_cpus, allmasks);
459 return -ENODEV; 479 CPUMASK_PTR(saved_mask, allmasks);
480 CPUMASK_PTR(set_mask, allmasks);
481 CPUMASK_PTR(covered_cpus, allmasks);
482
483 if (unlikely(allmasks == NULL))
484 return -ENOMEM;
485
486 if (unlikely(per_cpu(centrino_model, cpu) == NULL)) {
487 retval = -ENODEV;
488 goto out;
489 }
460 490
461 if (unlikely(cpufreq_frequency_table_target(policy, 491 if (unlikely(cpufreq_frequency_table_target(policy,
462 centrino_model[cpu]->op_points, 492 per_cpu(centrino_model, cpu)->op_points,
463 target_freq, 493 target_freq,
464 relation, 494 relation,
465 &newstate))) { 495 &newstate))) {
466 return -EINVAL; 496 retval = -EINVAL;
497 goto out;
467 } 498 }
468 499
469#ifdef CONFIG_HOTPLUG_CPU 500#ifdef CONFIG_HOTPLUG_CPU
470 /* cpufreq holds the hotplug lock, so we are safe from here on */ 501 /* cpufreq holds the hotplug lock, so we are safe from here on */
471 cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); 502 cpus_and(*online_policy_cpus, cpu_online_map, policy->cpus);
472#else 503#else
473 online_policy_cpus = policy->cpus; 504 *online_policy_cpus = policy->cpus;
474#endif 505#endif
475 506
476 saved_mask = current->cpus_allowed; 507 *saved_mask = current->cpus_allowed;
477 first_cpu = 1; 508 first_cpu = 1;
478 cpus_clear(covered_cpus); 509 cpus_clear(*covered_cpus);
479 for_each_cpu_mask(j, online_policy_cpus) { 510 for_each_cpu_mask_nr(j, *online_policy_cpus) {
480 /* 511 /*
481 * Support for SMP systems. 512 * Support for SMP systems.
482 * Make sure we are running on CPU that wants to change freq 513 * Make sure we are running on CPU that wants to change freq
483 */ 514 */
484 cpus_clear(set_mask); 515 cpus_clear(*set_mask);
485 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) 516 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
486 cpus_or(set_mask, set_mask, online_policy_cpus); 517 cpus_or(*set_mask, *set_mask, *online_policy_cpus);
487 else 518 else
488 cpu_set(j, set_mask); 519 cpu_set(j, *set_mask);
489 520
490 set_cpus_allowed_ptr(current, &set_mask); 521 set_cpus_allowed_ptr(current, set_mask);
491 preempt_disable(); 522 preempt_disable();
492 if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { 523 if (unlikely(!cpu_isset(smp_processor_id(), *set_mask))) {
493 dprintk("couldn't limit to CPUs in this domain\n"); 524 dprintk("couldn't limit to CPUs in this domain\n");
494 retval = -EAGAIN; 525 retval = -EAGAIN;
495 if (first_cpu) { 526 if (first_cpu) {
@@ -500,7 +531,7 @@ static int centrino_target (struct cpufreq_policy *policy,
500 break; 531 break;
501 } 532 }
502 533
503 msr = centrino_model[cpu]->op_points[newstate].index; 534 msr = per_cpu(centrino_model, cpu)->op_points[newstate].index;
504 535
505 if (first_cpu) { 536 if (first_cpu) {
506 rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); 537 rdmsr(MSR_IA32_PERF_CTL, oldmsr, h);
@@ -517,7 +548,7 @@ static int centrino_target (struct cpufreq_policy *policy,
517 dprintk("target=%dkHz old=%d new=%d msr=%04x\n", 548 dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
518 target_freq, freqs.old, freqs.new, msr); 549 target_freq, freqs.old, freqs.new, msr);
519 550
520 for_each_cpu_mask(k, online_policy_cpus) { 551 for_each_cpu_mask_nr(k, *online_policy_cpus) {
521 freqs.cpu = k; 552 freqs.cpu = k;
522 cpufreq_notify_transition(&freqs, 553 cpufreq_notify_transition(&freqs,
523 CPUFREQ_PRECHANGE); 554 CPUFREQ_PRECHANGE);
@@ -536,11 +567,11 @@ static int centrino_target (struct cpufreq_policy *policy,
536 break; 567 break;
537 } 568 }
538 569
539 cpu_set(j, covered_cpus); 570 cpu_set(j, *covered_cpus);
540 preempt_enable(); 571 preempt_enable();
541 } 572 }
542 573
543 for_each_cpu_mask(k, online_policy_cpus) { 574 for_each_cpu_mask_nr(k, *online_policy_cpus) {
544 freqs.cpu = k; 575 freqs.cpu = k;
545 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 576 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
546 } 577 }
@@ -553,30 +584,32 @@ static int centrino_target (struct cpufreq_policy *policy,
553 * Best effort undo.. 584 * Best effort undo..
554 */ 585 */
555 586
556 if (!cpus_empty(covered_cpus)) { 587 if (!cpus_empty(*covered_cpus))
557 for_each_cpu_mask(j, covered_cpus) { 588 for_each_cpu_mask_nr(j, *covered_cpus) {
558 set_cpus_allowed_ptr(current, 589 set_cpus_allowed_ptr(current,
559 &cpumask_of_cpu(j)); 590 &cpumask_of_cpu(j));
560 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); 591 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
561 } 592 }
562 }
563 593
564 tmp = freqs.new; 594 tmp = freqs.new;
565 freqs.new = freqs.old; 595 freqs.new = freqs.old;
566 freqs.old = tmp; 596 freqs.old = tmp;
567 for_each_cpu_mask(j, online_policy_cpus) { 597 for_each_cpu_mask_nr(j, *online_policy_cpus) {
568 freqs.cpu = j; 598 freqs.cpu = j;
569 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 599 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
570 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 600 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
571 } 601 }
572 } 602 }
573 set_cpus_allowed_ptr(current, &saved_mask); 603 set_cpus_allowed_ptr(current, saved_mask);
574 return 0; 604 retval = 0;
605 goto out;
575 606
576migrate_end: 607migrate_end:
577 preempt_enable(); 608 preempt_enable();
578 set_cpus_allowed_ptr(current, &saved_mask); 609 set_cpus_allowed_ptr(current, saved_mask);
579 return 0; 610out:
611 CPUMASK_FREE(allmasks);
612 return retval;
580} 613}
581 614
582static struct freq_attr* centrino_attr[] = { 615static struct freq_attr* centrino_attr[] = {
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 1b50244b1fdf..191f7263c61d 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -279,7 +279,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
279 279
280 cpus_allowed = current->cpus_allowed; 280 cpus_allowed = current->cpus_allowed;
281 281
282 for_each_cpu_mask(i, policy->cpus) { 282 for_each_cpu_mask_nr(i, policy->cpus) {
283 freqs.cpu = i; 283 freqs.cpu = i;
284 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 284 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
285 } 285 }
@@ -292,7 +292,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
292 /* allow to be run on all CPUs */ 292 /* allow to be run on all CPUs */
293 set_cpus_allowed_ptr(current, &cpus_allowed); 293 set_cpus_allowed_ptr(current, &cpus_allowed);
294 294
295 for_each_cpu_mask(i, policy->cpus) { 295 for_each_cpu_mask_nr(i, policy->cpus) {
296 freqs.cpu = i; 296 freqs.cpu = i;
297 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 297 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
298 } 298 }
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index ff517f0b8cc4..6b0a10b002f1 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -489,7 +489,7 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
489 int sibling; 489 int sibling;
490 490
491 this_leaf = CPUID4_INFO_IDX(cpu, index); 491 this_leaf = CPUID4_INFO_IDX(cpu, index);
492 for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { 492 for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) {
493 sibling_leaf = CPUID4_INFO_IDX(sibling, index); 493 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
494 cpu_clear(cpu, sibling_leaf->shared_cpu_map); 494 cpu_clear(cpu, sibling_leaf->shared_cpu_map);
495 } 495 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index c4a7ec31394c..65a339678ece 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -580,7 +580,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
580 char __user *buf = ubuf; 580 char __user *buf = ubuf;
581 int i, err; 581 int i, err;
582 582
583 cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL); 583 cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
584 if (!cpu_tsc) 584 if (!cpu_tsc)
585 return -ENOMEM; 585 return -ENOMEM;
586 586
@@ -762,10 +762,14 @@ DEFINE_PER_CPU(struct sys_device, device_mce);
762 762
763/* Why are there no generic functions for this? */ 763/* Why are there no generic functions for this? */
764#define ACCESSOR(name, var, start) \ 764#define ACCESSOR(name, var, start) \
765 static ssize_t show_ ## name(struct sys_device *s, char *buf) { \ 765 static ssize_t show_ ## name(struct sys_device *s, \
766 struct sysdev_attribute *attr, \
767 char *buf) { \
766 return sprintf(buf, "%lx\n", (unsigned long)var); \ 768 return sprintf(buf, "%lx\n", (unsigned long)var); \
767 } \ 769 } \
768 static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \ 770 static ssize_t set_ ## name(struct sys_device *s, \
771 struct sysdev_attribute *attr, \
772 const char *buf, size_t siz) { \
769 char *end; \ 773 char *end; \
770 unsigned long new = simple_strtoul(buf, &end, 0); \ 774 unsigned long new = simple_strtoul(buf, &end, 0); \
771 if (end == buf) return -EINVAL; \ 775 if (end == buf) return -EINVAL; \
@@ -786,14 +790,16 @@ ACCESSOR(bank3ctl,bank[3],mce_restart())
786ACCESSOR(bank4ctl,bank[4],mce_restart()) 790ACCESSOR(bank4ctl,bank[4],mce_restart())
787ACCESSOR(bank5ctl,bank[5],mce_restart()) 791ACCESSOR(bank5ctl,bank[5],mce_restart())
788 792
789static ssize_t show_trigger(struct sys_device *s, char *buf) 793static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr,
794 char *buf)
790{ 795{
791 strcpy(buf, trigger); 796 strcpy(buf, trigger);
792 strcat(buf, "\n"); 797 strcat(buf, "\n");
793 return strlen(trigger) + 1; 798 return strlen(trigger) + 1;
794} 799}
795 800
796static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz) 801static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
802 const char *buf,size_t siz)
797{ 803{
798 char *p; 804 char *p;
799 int len; 805 int len;
@@ -806,12 +812,12 @@ static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz)
806} 812}
807 813
808static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); 814static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
809ACCESSOR(tolerant,tolerant,) 815static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
810ACCESSOR(check_interval,check_interval,mce_restart()) 816ACCESSOR(check_interval,check_interval,mce_restart())
811static struct sysdev_attribute *mce_attributes[] = { 817static struct sysdev_attribute *mce_attributes[] = {
812 &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, 818 &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
813 &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl, 819 &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl,
814 &attr_tolerant, &attr_check_interval, &attr_trigger, 820 &attr_tolerant.attr, &attr_check_interval, &attr_trigger,
815 NULL 821 NULL
816}; 822};
817 823
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 7c9a813e1193..88736cadbaa6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -527,7 +527,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
527 if (err) 527 if (err)
528 goto out_free; 528 goto out_free;
529 529
530 for_each_cpu_mask(i, b->cpus) { 530 for_each_cpu_mask_nr(i, b->cpus) {
531 if (i == cpu) 531 if (i == cpu)
532 continue; 532 continue;
533 533
@@ -617,7 +617,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
617#endif 617#endif
618 618
619 /* remove all sibling symlinks before unregistering */ 619 /* remove all sibling symlinks before unregistering */
620 for_each_cpu_mask(i, b->cpus) { 620 for_each_cpu_mask_nr(i, b->cpus) {
621 if (i == cpu) 621 if (i == cpu)
622 continue; 622 continue;
623 623
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 1f4cc48c14c6..d5ae2243f0b9 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -35,6 +35,7 @@ atomic_t therm_throt_en = ATOMIC_INIT(0);
35 35
36#define define_therm_throt_sysdev_show_func(name) \ 36#define define_therm_throt_sysdev_show_func(name) \
37static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ 37static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
38 struct sysdev_attribute *attr, \
38 char *buf) \ 39 char *buf) \
39{ \ 40{ \
40 unsigned int cpu = dev->id; \ 41 unsigned int cpu = dev->id; \
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 0d0d9057e7c0..a26c480b9491 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -160,7 +160,7 @@ static void *c_start(struct seq_file *m, loff_t *pos)
160{ 160{
161 if (*pos == 0) /* just in case, cpu 0 is not the first */ 161 if (*pos == 0) /* just in case, cpu 0 is not the first */
162 *pos = first_cpu(cpu_online_map); 162 *pos = first_cpu(cpu_online_map);
163 if ((*pos) < NR_CPUS && cpu_online(*pos)) 163 if ((*pos) < nr_cpu_ids && cpu_online(*pos))
164 return &cpu_data(*pos); 164 return &cpu_data(*pos);
165 return NULL; 165 return NULL;
166} 166}
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 2de5fa2bbf77..14b11b3be31c 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -141,8 +141,8 @@ static __cpuinit int cpuid_device_create(int cpu)
141{ 141{
142 struct device *dev; 142 struct device *dev;
143 143
144 dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu), 144 dev = device_create_drvdata(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu),
145 "cpu%d", cpu); 145 NULL, "cpu%d", cpu);
146 return IS_ERR(dev) ? PTR_ERR(dev) : 0; 146 return IS_ERR(dev) ? PTR_ERR(dev) : 0;
147} 147}
148 148
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index cdfd94cc6b14..109792bc7cfa 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -54,6 +54,16 @@
54#include <asm/ftrace.h> 54#include <asm/ftrace.h>
55#include <asm/irq_vectors.h> 55#include <asm/irq_vectors.h>
56 56
57/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
58#include <linux/elf-em.h>
59#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
60#define __AUDIT_ARCH_LE 0x40000000
61
62#ifndef CONFIG_AUDITSYSCALL
63#define sysenter_audit syscall_trace_entry
64#define sysexit_audit syscall_exit_work
65#endif
66
57/* 67/*
58 * We use macros for low-level operations which need to be overridden 68 * We use macros for low-level operations which need to be overridden
59 * for paravirtualization. The following will never clobber any registers: 69 * for paravirtualization. The following will never clobber any registers:
@@ -333,7 +343,8 @@ sysenter_past_esp:
333 343
334 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ 344 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
335 testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) 345 testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
336 jnz syscall_trace_entry 346 jnz sysenter_audit
347sysenter_do_call:
337 cmpl $(nr_syscalls), %eax 348 cmpl $(nr_syscalls), %eax
338 jae syscall_badsys 349 jae syscall_badsys
339 call *sys_call_table(,%eax,4) 350 call *sys_call_table(,%eax,4)
@@ -343,7 +354,8 @@ sysenter_past_esp:
343 TRACE_IRQS_OFF 354 TRACE_IRQS_OFF
344 movl TI_flags(%ebp), %ecx 355 movl TI_flags(%ebp), %ecx
345 testw $_TIF_ALLWORK_MASK, %cx 356 testw $_TIF_ALLWORK_MASK, %cx
346 jne syscall_exit_work 357 jne sysexit_audit
358sysenter_exit:
347/* if something modifies registers it must also disable sysexit */ 359/* if something modifies registers it must also disable sysexit */
348 movl PT_EIP(%esp), %edx 360 movl PT_EIP(%esp), %edx
349 movl PT_OLDESP(%esp), %ecx 361 movl PT_OLDESP(%esp), %ecx
@@ -351,6 +363,45 @@ sysenter_past_esp:
351 TRACE_IRQS_ON 363 TRACE_IRQS_ON
3521: mov PT_FS(%esp), %fs 3641: mov PT_FS(%esp), %fs
353 ENABLE_INTERRUPTS_SYSEXIT 365 ENABLE_INTERRUPTS_SYSEXIT
366
367#ifdef CONFIG_AUDITSYSCALL
368sysenter_audit:
369 testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
370 jnz syscall_trace_entry
371 addl $4,%esp
372 CFI_ADJUST_CFA_OFFSET -4
373 /* %esi already in 8(%esp) 6th arg: 4th syscall arg */
374 /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */
375 /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */
376 movl %ebx,%ecx /* 3rd arg: 1st syscall arg */
377 movl %eax,%edx /* 2nd arg: syscall number */
378 movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */
379 call audit_syscall_entry
380 pushl %ebx
381 CFI_ADJUST_CFA_OFFSET 4
382 movl PT_EAX(%esp),%eax /* reload syscall number */
383 jmp sysenter_do_call
384
385sysexit_audit:
386 testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx
387 jne syscall_exit_work
388 TRACE_IRQS_ON
389 ENABLE_INTERRUPTS(CLBR_ANY)
390 movl %eax,%edx /* second arg, syscall return value */
391 cmpl $0,%eax /* is it < 0? */
392 setl %al /* 1 if so, 0 if not */
393 movzbl %al,%eax /* zero-extend that */
394 inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
395 call audit_syscall_exit
396 DISABLE_INTERRUPTS(CLBR_ANY)
397 TRACE_IRQS_OFF
398 movl TI_flags(%ebp), %ecx
399 testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx
400 jne syscall_exit_work
401 movl PT_EAX(%esp),%eax /* reload syscall return value */
402 jmp sysenter_exit
403#endif
404
354 CFI_ENDPROC 405 CFI_ENDPROC
355.pushsection .fixup,"ax" 406.pushsection .fixup,"ax"
3562: movl $0,PT_FS(%esp) 4072: movl $0,PT_FS(%esp)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 8410e26f4183..89434d439605 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -53,6 +53,12 @@
53#include <asm/paravirt.h> 53#include <asm/paravirt.h>
54#include <asm/ftrace.h> 54#include <asm/ftrace.h>
55 55
56/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
57#include <linux/elf-em.h>
58#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
59#define __AUDIT_ARCH_64BIT 0x80000000
60#define __AUDIT_ARCH_LE 0x40000000
61
56 .code64 62 .code64
57 63
58#ifdef CONFIG_FTRACE 64#ifdef CONFIG_FTRACE
@@ -351,6 +357,7 @@ ENTRY(system_call_after_swapgs)
351 GET_THREAD_INFO(%rcx) 357 GET_THREAD_INFO(%rcx)
352 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) 358 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
353 jnz tracesys 359 jnz tracesys
360system_call_fastpath:
354 cmpq $__NR_syscall_max,%rax 361 cmpq $__NR_syscall_max,%rax
355 ja badsys 362 ja badsys
356 movq %r10,%rcx 363 movq %r10,%rcx
@@ -402,16 +409,16 @@ sysret_careful:
402sysret_signal: 409sysret_signal:
403 TRACE_IRQS_ON 410 TRACE_IRQS_ON
404 ENABLE_INTERRUPTS(CLBR_NONE) 411 ENABLE_INTERRUPTS(CLBR_NONE)
405 testl $_TIF_DO_NOTIFY_MASK,%edx 412#ifdef CONFIG_AUDITSYSCALL
406 jz 1f 413 bt $TIF_SYSCALL_AUDIT,%edx
407 414 jc sysret_audit
408 /* Really a signal */ 415#endif
409 /* edx: work flags (arg3) */ 416 /* edx: work flags (arg3) */
410 leaq do_notify_resume(%rip),%rax 417 leaq do_notify_resume(%rip),%rax
411 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 418 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
412 xorl %esi,%esi # oldset -> arg2 419 xorl %esi,%esi # oldset -> arg2
413 call ptregscall_common 420 call ptregscall_common
4141: movl $_TIF_WORK_MASK,%edi 421 movl $_TIF_WORK_MASK,%edi
415 /* Use IRET because user could have changed frame. This 422 /* Use IRET because user could have changed frame. This
416 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ 423 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
417 DISABLE_INTERRUPTS(CLBR_NONE) 424 DISABLE_INTERRUPTS(CLBR_NONE)
@@ -422,8 +429,45 @@ badsys:
422 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 429 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
423 jmp ret_from_sys_call 430 jmp ret_from_sys_call
424 431
432#ifdef CONFIG_AUDITSYSCALL
433 /*
434 * Fast path for syscall audit without full syscall trace.
435 * We just call audit_syscall_entry() directly, and then
436 * jump back to the normal fast path.
437 */
438auditsys:
439 movq %r10,%r9 /* 6th arg: 4th syscall arg */
440 movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
441 movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
442 movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
443 movq %rax,%rsi /* 2nd arg: syscall number */
444 movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
445 call audit_syscall_entry
446 LOAD_ARGS 0 /* reload call-clobbered registers */
447 jmp system_call_fastpath
448
449 /*
450 * Return fast path for syscall audit. Call audit_syscall_exit()
451 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
452 * masked off.
453 */
454sysret_audit:
455 movq %rax,%rsi /* second arg, syscall return value */
456 cmpq $0,%rax /* is it < 0? */
457 setl %al /* 1 if so, 0 if not */
458 movzbl %al,%edi /* zero-extend that into %edi */
459 inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
460 call audit_syscall_exit
461 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
462 jmp sysret_check
463#endif /* CONFIG_AUDITSYSCALL */
464
425 /* Do syscall tracing */ 465 /* Do syscall tracing */
426tracesys: 466tracesys:
467#ifdef CONFIG_AUDITSYSCALL
468 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
469 jz auditsys
470#endif
427 SAVE_REST 471 SAVE_REST
428 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ 472 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
429 FIXUP_TOP_OF_STACK %rdi 473 FIXUP_TOP_OF_STACK %rdi
@@ -448,6 +492,7 @@ tracesys:
448 * Has correct top of stack, but partial stack frame. 492 * Has correct top of stack, but partial stack frame.
449 */ 493 */
450 .globl int_ret_from_sys_call 494 .globl int_ret_from_sys_call
495 .globl int_with_check
451int_ret_from_sys_call: 496int_ret_from_sys_call:
452 DISABLE_INTERRUPTS(CLBR_NONE) 497 DISABLE_INTERRUPTS(CLBR_NONE)
453 TRACE_IRQS_OFF 498 TRACE_IRQS_OFF
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 1fa8be5bd217..eaff0bbb1444 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -99,3 +99,4 @@ int is_uv_system(void)
99{ 99{
100 return uv_system_type != UV_NONE; 100 return uv_system_type != UV_NONE;
101} 101}
102EXPORT_SYMBOL_GPL(is_uv_system);
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 1a9c68845ee8..786548a62d38 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -168,7 +168,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
168 * May as well be the first. 168 * May as well be the first.
169 */ 169 */
170 cpu = first_cpu(cpumask); 170 cpu = first_cpu(cpumask);
171 if ((unsigned)cpu < NR_CPUS) 171 if ((unsigned)cpu < nr_cpu_ids)
172 return per_cpu(x86_cpu_to_apicid, cpu); 172 return per_cpu(x86_cpu_to_apicid, cpu);
173 else 173 else
174 return BAD_APICID; 174 return BAD_APICID;
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 3c3929340692..2cfcbded888a 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -98,7 +98,7 @@ static void uv_send_IPI_mask(cpumask_t mask, int vector)
98{ 98{
99 unsigned int cpu; 99 unsigned int cpu;
100 100
101 for (cpu = 0; cpu < NR_CPUS; ++cpu) 101 for_each_possible_cpu(cpu)
102 if (cpu_isset(cpu, mask)) 102 if (cpu_isset(cpu, mask))
103 uv_send_IPI_one(cpu, vector); 103 uv_send_IPI_one(cpu, vector);
104} 104}
@@ -132,7 +132,7 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
132 * May as well be the first. 132 * May as well be the first.
133 */ 133 */
134 cpu = first_cpu(cpumask); 134 cpu = first_cpu(cpumask);
135 if ((unsigned)cpu < NR_CPUS) 135 if ((unsigned)cpu < nr_cpu_ids)
136 return per_cpu(x86_cpu_to_apicid, cpu); 136 return per_cpu(x86_cpu_to_apicid, cpu);
137 else 137 else
138 return BAD_APICID; 138 return BAD_APICID;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f67e93441caf..a7010c3a377a 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -456,9 +456,6 @@ is386: movl $2,%ecx # set MP
4561: 4561:
457#endif /* CONFIG_SMP */ 457#endif /* CONFIG_SMP */
458 jmp *(initial_code) 458 jmp *(initial_code)
459.align 4
460ENTRY(initial_code)
461 .long i386_start_kernel
462 459
463/* 460/*
464 * We depend on ET to be correct. This checks for 287/387. 461 * We depend on ET to be correct. This checks for 287/387.
@@ -601,6 +598,11 @@ ignore_int:
601#endif 598#endif
602 iret 599 iret
603 600
601.section .cpuinit.data,"wa"
602.align 4
603ENTRY(initial_code)
604 .long i386_start_kernel
605
604.section .text 606.section .text
605/* 607/*
606 * Real beginning of normal "text" segment 608 * Real beginning of normal "text" segment
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 0ea6a19bfdfe..ad2b15a1334d 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -468,7 +468,7 @@ void hpet_disable(void)
468#define RTC_NUM_INTS 1 468#define RTC_NUM_INTS 1
469 469
470static unsigned long hpet_rtc_flags; 470static unsigned long hpet_rtc_flags;
471static unsigned long hpet_prev_update_sec; 471static int hpet_prev_update_sec;
472static struct rtc_time hpet_alarm_time; 472static struct rtc_time hpet_alarm_time;
473static unsigned long hpet_pie_count; 473static unsigned long hpet_pie_count;
474static unsigned long hpet_t1_cmp; 474static unsigned long hpet_t1_cmp;
@@ -575,6 +575,9 @@ int hpet_set_rtc_irq_bit(unsigned long bit_mask)
575 575
576 hpet_rtc_flags |= bit_mask; 576 hpet_rtc_flags |= bit_mask;
577 577
578 if ((bit_mask & RTC_UIE) && !(oldbits & RTC_UIE))
579 hpet_prev_update_sec = -1;
580
578 if (!oldbits) 581 if (!oldbits)
579 hpet_rtc_timer_init(); 582 hpet_rtc_timer_init();
580 583
@@ -652,7 +655,7 @@ static void hpet_rtc_timer_reinit(void)
652 if (hpet_rtc_flags & RTC_PIE) 655 if (hpet_rtc_flags & RTC_PIE)
653 hpet_pie_count += lost_ints; 656 hpet_pie_count += lost_ints;
654 if (printk_ratelimit()) 657 if (printk_ratelimit())
655 printk(KERN_WARNING "rtc: lost %d interrupts\n", 658 printk(KERN_WARNING "hpet1: lost %d rtc interrupts\n",
656 lost_ints); 659 lost_ints);
657 } 660 }
658} 661}
@@ -670,7 +673,8 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
670 673
671 if (hpet_rtc_flags & RTC_UIE && 674 if (hpet_rtc_flags & RTC_UIE &&
672 curr_time.tm_sec != hpet_prev_update_sec) { 675 curr_time.tm_sec != hpet_prev_update_sec) {
673 rtc_int_flag = RTC_UF; 676 if (hpet_prev_update_sec >= 0)
677 rtc_int_flag = RTC_UF;
674 hpet_prev_update_sec = curr_time.tm_sec; 678 hpet_prev_update_sec = curr_time.tm_sec;
675 } 679 }
676 680
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index de9aa0e3a9c5..09cddb57bec4 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -57,7 +57,7 @@ atomic_t irq_mis_count;
57static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 57static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
58 58
59static DEFINE_SPINLOCK(ioapic_lock); 59static DEFINE_SPINLOCK(ioapic_lock);
60static DEFINE_SPINLOCK(vector_lock); 60DEFINE_SPINLOCK(vector_lock);
61 61
62int timer_through_8259 __initdata; 62int timer_through_8259 __initdata;
63 63
@@ -1209,10 +1209,6 @@ static int assign_irq_vector(int irq)
1209 return vector; 1209 return vector;
1210} 1210}
1211 1211
1212void setup_vector_irq(int cpu)
1213{
1214}
1215
1216static struct irq_chip ioapic_chip; 1212static struct irq_chip ioapic_chip;
1217 1213
1218#define IOAPIC_AUTO -1 1214#define IOAPIC_AUTO -1
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 64a46affd858..61a83b70c18f 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -101,7 +101,7 @@ int timer_through_8259 __initdata;
101static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 101static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
102 102
103static DEFINE_SPINLOCK(ioapic_lock); 103static DEFINE_SPINLOCK(ioapic_lock);
104DEFINE_SPINLOCK(vector_lock); 104static DEFINE_SPINLOCK(vector_lock);
105 105
106/* 106/*
107 * # of IRQ routing registers 107 * # of IRQ routing registers
@@ -697,6 +697,19 @@ static int pin_2_irq(int idx, int apic, int pin)
697 return irq; 697 return irq;
698} 698}
699 699
700void lock_vector_lock(void)
701{
702 /* Used to the online set of cpus does not change
703 * during assign_irq_vector.
704 */
705 spin_lock(&vector_lock);
706}
707
708void unlock_vector_lock(void)
709{
710 spin_unlock(&vector_lock);
711}
712
700static int __assign_irq_vector(int irq, cpumask_t mask) 713static int __assign_irq_vector(int irq, cpumask_t mask)
701{ 714{
702 /* 715 /*
@@ -732,7 +745,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
732 return 0; 745 return 0;
733 } 746 }
734 747
735 for_each_cpu_mask(cpu, mask) { 748 for_each_cpu_mask_nr(cpu, mask) {
736 cpumask_t domain, new_mask; 749 cpumask_t domain, new_mask;
737 int new_cpu; 750 int new_cpu;
738 int vector, offset; 751 int vector, offset;
@@ -753,7 +766,7 @@ next:
753 continue; 766 continue;
754 if (vector == IA32_SYSCALL_VECTOR) 767 if (vector == IA32_SYSCALL_VECTOR)
755 goto next; 768 goto next;
756 for_each_cpu_mask(new_cpu, new_mask) 769 for_each_cpu_mask_nr(new_cpu, new_mask)
757 if (per_cpu(vector_irq, new_cpu)[vector] != -1) 770 if (per_cpu(vector_irq, new_cpu)[vector] != -1)
758 goto next; 771 goto next;
759 /* Found one! */ 772 /* Found one! */
@@ -763,7 +776,7 @@ next:
763 cfg->move_in_progress = 1; 776 cfg->move_in_progress = 1;
764 cfg->old_domain = cfg->domain; 777 cfg->old_domain = cfg->domain;
765 } 778 }
766 for_each_cpu_mask(new_cpu, new_mask) 779 for_each_cpu_mask_nr(new_cpu, new_mask)
767 per_cpu(vector_irq, new_cpu)[vector] = irq; 780 per_cpu(vector_irq, new_cpu)[vector] = irq;
768 cfg->vector = vector; 781 cfg->vector = vector;
769 cfg->domain = domain; 782 cfg->domain = domain;
@@ -795,14 +808,14 @@ static void __clear_irq_vector(int irq)
795 808
796 vector = cfg->vector; 809 vector = cfg->vector;
797 cpus_and(mask, cfg->domain, cpu_online_map); 810 cpus_and(mask, cfg->domain, cpu_online_map);
798 for_each_cpu_mask(cpu, mask) 811 for_each_cpu_mask_nr(cpu, mask)
799 per_cpu(vector_irq, cpu)[vector] = -1; 812 per_cpu(vector_irq, cpu)[vector] = -1;
800 813
801 cfg->vector = 0; 814 cfg->vector = 0;
802 cpus_clear(cfg->domain); 815 cpus_clear(cfg->domain);
803} 816}
804 817
805static void __setup_vector_irq(int cpu) 818void __setup_vector_irq(int cpu)
806{ 819{
807 /* Initialize vector_irq on a new cpu */ 820 /* Initialize vector_irq on a new cpu */
808 /* This function must be called with vector_lock held */ 821 /* This function must be called with vector_lock held */
@@ -825,14 +838,6 @@ static void __setup_vector_irq(int cpu)
825 } 838 }
826} 839}
827 840
828void setup_vector_irq(int cpu)
829{
830 spin_lock(&vector_lock);
831 __setup_vector_irq(smp_processor_id());
832 spin_unlock(&vector_lock);
833}
834
835
836static struct irq_chip ioapic_chip; 841static struct irq_chip ioapic_chip;
837 842
838static void ioapic_register_intr(int irq, unsigned long trigger) 843static void ioapic_register_intr(int irq, unsigned long trigger)
@@ -1373,12 +1378,10 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
1373static int ioapic_retrigger_irq(unsigned int irq) 1378static int ioapic_retrigger_irq(unsigned int irq)
1374{ 1379{
1375 struct irq_cfg *cfg = &irq_cfg[irq]; 1380 struct irq_cfg *cfg = &irq_cfg[irq];
1376 cpumask_t mask;
1377 unsigned long flags; 1381 unsigned long flags;
1378 1382
1379 spin_lock_irqsave(&vector_lock, flags); 1383 spin_lock_irqsave(&vector_lock, flags);
1380 mask = cpumask_of_cpu(first_cpu(cfg->domain)); 1384 send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
1381 send_IPI_mask(mask, cfg->vector);
1382 spin_unlock_irqrestore(&vector_lock, flags); 1385 spin_unlock_irqrestore(&vector_lock, flags);
1383 1386
1384 return 1; 1387 return 1;
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 0373e88de95a..1f26fd9ec4f4 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -43,10 +43,11 @@
43 43
44#define BUILD_IRQ(nr) \ 44#define BUILD_IRQ(nr) \
45 asmlinkage void IRQ_NAME(nr); \ 45 asmlinkage void IRQ_NAME(nr); \
46 asm("\n.p2align\n" \ 46 asm("\n.text\n.p2align\n" \
47 "IRQ" #nr "_interrupt:\n\t" \ 47 "IRQ" #nr "_interrupt:\n\t" \
48 "push $~(" #nr ") ; " \ 48 "push $~(" #nr ") ; " \
49 "jmp common_interrupt"); 49 "jmp common_interrupt\n" \
50 ".previous");
50 51
51#define BI(x,y) \ 52#define BI(x,y) \
52 BUILD_IRQ(x##y) 53 BUILD_IRQ(x##y)
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 43c019f85f0d..6c27679ec6aa 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -431,7 +431,6 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
431 regs->ip = (unsigned long)p->ainsn.insn; 431 regs->ip = (unsigned long)p->ainsn.insn;
432} 432}
433 433
434/* Called with kretprobe_lock held */
435void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 434void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
436 struct pt_regs *regs) 435 struct pt_regs *regs)
437{ 436{
@@ -682,8 +681,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
682 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; 681 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
683 682
684 INIT_HLIST_HEAD(&empty_rp); 683 INIT_HLIST_HEAD(&empty_rp);
685 spin_lock_irqsave(&kretprobe_lock, flags); 684 kretprobe_hash_lock(current, &head, &flags);
686 head = kretprobe_inst_table_head(current);
687 /* fixup registers */ 685 /* fixup registers */
688#ifdef CONFIG_X86_64 686#ifdef CONFIG_X86_64
689 regs->cs = __KERNEL_CS; 687 regs->cs = __KERNEL_CS;
@@ -732,7 +730,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
732 730
733 kretprobe_assert(ri, orig_ret_address, trampoline_address); 731 kretprobe_assert(ri, orig_ret_address, trampoline_address);
734 732
735 spin_unlock_irqrestore(&kretprobe_lock, flags); 733 kretprobe_hash_unlock(current, &flags);
736 734
737 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 735 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
738 hlist_del(&ri->hlist); 736 hlist_del(&ri->hlist);
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index c49ff2dc8f83..0ed5f939b905 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -63,12 +63,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
63 63
64 if (reload) { 64 if (reload) {
65#ifdef CONFIG_SMP 65#ifdef CONFIG_SMP
66 cpumask_t mask;
67
68 preempt_disable(); 66 preempt_disable();
69 load_LDT(pc); 67 load_LDT(pc);
70 mask = cpumask_of_cpu(smp_processor_id()); 68 if (!cpus_equal(current->mm->cpu_vm_mask,
71 if (!cpus_equal(current->mm->cpu_vm_mask, mask)) 69 cpumask_of_cpu(smp_processor_id())))
72 smp_call_function(flush_ldt, current->mm, 1); 70 smp_call_function(flush_ldt, current->mm, 1);
73 preempt_enable(); 71 preempt_enable();
74#else 72#else
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 8864230d55af..9fe478d98406 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -22,6 +22,7 @@
22#include <asm/cpufeature.h> 22#include <asm/cpufeature.h>
23#include <asm/desc.h> 23#include <asm/desc.h>
24#include <asm/system.h> 24#include <asm/system.h>
25#include <asm/cacheflush.h>
25 26
26#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) 27#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
27static u32 kexec_pgd[1024] PAGE_ALIGNED; 28static u32 kexec_pgd[1024] PAGE_ALIGNED;
@@ -85,10 +86,12 @@ static void load_segments(void)
85 * reboot code buffer to allow us to avoid allocations 86 * reboot code buffer to allow us to avoid allocations
86 * later. 87 * later.
87 * 88 *
88 * Currently nothing. 89 * Make control page executable.
89 */ 90 */
90int machine_kexec_prepare(struct kimage *image) 91int machine_kexec_prepare(struct kimage *image)
91{ 92{
93 if (nx_enabled)
94 set_pages_x(image->control_code_page, 1);
92 return 0; 95 return 0;
93} 96}
94 97
@@ -98,27 +101,48 @@ int machine_kexec_prepare(struct kimage *image)
98 */ 101 */
99void machine_kexec_cleanup(struct kimage *image) 102void machine_kexec_cleanup(struct kimage *image)
100{ 103{
104 if (nx_enabled)
105 set_pages_nx(image->control_code_page, 1);
101} 106}
102 107
103/* 108/*
104 * Do not allocate memory (or fail in any way) in machine_kexec(). 109 * Do not allocate memory (or fail in any way) in machine_kexec().
105 * We are past the point of no return, committed to rebooting now. 110 * We are past the point of no return, committed to rebooting now.
106 */ 111 */
107NORET_TYPE void machine_kexec(struct kimage *image) 112void machine_kexec(struct kimage *image)
108{ 113{
109 unsigned long page_list[PAGES_NR]; 114 unsigned long page_list[PAGES_NR];
110 void *control_page; 115 void *control_page;
116 asmlinkage unsigned long
117 (*relocate_kernel_ptr)(unsigned long indirection_page,
118 unsigned long control_page,
119 unsigned long start_address,
120 unsigned int has_pae,
121 unsigned int preserve_context);
111 122
112 tracer_disable(); 123 tracer_disable();
113 124
114 /* Interrupts aren't acceptable while we reboot */ 125 /* Interrupts aren't acceptable while we reboot */
115 local_irq_disable(); 126 local_irq_disable();
116 127
128 if (image->preserve_context) {
129#ifdef CONFIG_X86_IO_APIC
130 /* We need to put APICs in legacy mode so that we can
131 * get timer interrupts in second kernel. kexec/kdump
132 * paths already have calls to disable_IO_APIC() in
133 * one form or other. kexec jump path also need
134 * one.
135 */
136 disable_IO_APIC();
137#endif
138 }
139
117 control_page = page_address(image->control_code_page); 140 control_page = page_address(image->control_code_page);
118 memcpy(control_page, relocate_kernel, PAGE_SIZE); 141 memcpy(control_page, relocate_kernel, PAGE_SIZE/2);
119 142
143 relocate_kernel_ptr = control_page;
120 page_list[PA_CONTROL_PAGE] = __pa(control_page); 144 page_list[PA_CONTROL_PAGE] = __pa(control_page);
121 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; 145 page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
122 page_list[PA_PGD] = __pa(kexec_pgd); 146 page_list[PA_PGD] = __pa(kexec_pgd);
123 page_list[VA_PGD] = (unsigned long)kexec_pgd; 147 page_list[VA_PGD] = (unsigned long)kexec_pgd;
124#ifdef CONFIG_X86_PAE 148#ifdef CONFIG_X86_PAE
@@ -131,6 +155,7 @@ NORET_TYPE void machine_kexec(struct kimage *image)
131 page_list[VA_PTE_0] = (unsigned long)kexec_pte0; 155 page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
132 page_list[PA_PTE_1] = __pa(kexec_pte1); 156 page_list[PA_PTE_1] = __pa(kexec_pte1);
133 page_list[VA_PTE_1] = (unsigned long)kexec_pte1; 157 page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
158 page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT);
134 159
135 /* The segment registers are funny things, they have both a 160 /* The segment registers are funny things, they have both a
136 * visible and an invisible part. Whenever the visible part is 161 * visible and an invisible part. Whenever the visible part is
@@ -149,8 +174,10 @@ NORET_TYPE void machine_kexec(struct kimage *image)
149 set_idt(phys_to_virt(0),0); 174 set_idt(phys_to_virt(0),0);
150 175
151 /* now call it */ 176 /* now call it */
152 relocate_kernel((unsigned long)image->head, (unsigned long)page_list, 177 image->start = relocate_kernel_ptr((unsigned long)image->head,
153 image->start, cpu_has_pae); 178 (unsigned long)page_list,
179 image->start, cpu_has_pae,
180 image->preserve_context);
154} 181}
155 182
156void arch_crash_save_vmcoreinfo(void) 183void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 9dd9262693a3..c43caa3a91f3 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -181,7 +181,7 @@ void machine_kexec_cleanup(struct kimage *image)
181 * Do not allocate memory (or fail in any way) in machine_kexec(). 181 * Do not allocate memory (or fail in any way) in machine_kexec().
182 * We are past the point of no return, committed to rebooting now. 182 * We are past the point of no return, committed to rebooting now.
183 */ 183 */
184NORET_TYPE void machine_kexec(struct kimage *image) 184void machine_kexec(struct kimage *image)
185{ 185{
186 unsigned long page_list[PAGES_NR]; 186 unsigned long page_list[PAGES_NR];
187 void *control_page; 187 void *control_page;
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 56b933119a04..652fa5c38ebe 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -644,7 +644,9 @@ static void microcode_fini_cpu(int cpu)
644 mutex_unlock(&microcode_mutex); 644 mutex_unlock(&microcode_mutex);
645} 645}
646 646
647static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) 647static ssize_t reload_store(struct sys_device *dev,
648 struct sysdev_attribute *attr,
649 const char *buf, size_t sz)
648{ 650{
649 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; 651 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
650 char *end; 652 char *end;
@@ -655,9 +657,7 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz)
655 if (end == buf) 657 if (end == buf)
656 return -EINVAL; 658 return -EINVAL;
657 if (val == 1) { 659 if (val == 1) {
658 cpumask_t old; 660 cpumask_t old = current->cpus_allowed;
659
660 old = current->cpus_allowed;
661 661
662 get_online_cpus(); 662 get_online_cpus();
663 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); 663 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
@@ -674,14 +674,16 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz)
674 return sz; 674 return sz;
675} 675}
676 676
677static ssize_t version_show(struct sys_device *dev, char *buf) 677static ssize_t version_show(struct sys_device *dev,
678 struct sysdev_attribute *attr, char *buf)
678{ 679{
679 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; 680 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
680 681
681 return sprintf(buf, "0x%x\n", uci->rev); 682 return sprintf(buf, "0x%x\n", uci->rev);
682} 683}
683 684
684static ssize_t pf_show(struct sys_device *dev, char *buf) 685static ssize_t pf_show(struct sys_device *dev,
686 struct sysdev_attribute *attr, char *buf)
685{ 687{
686 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; 688 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
687 689
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c
index 0e867676b5a5..6ba87830d4b1 100644
--- a/arch/x86/kernel/module_64.c
+++ b/arch/x86/kernel/module_64.c
@@ -22,6 +22,7 @@
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/string.h> 23#include <linux/string.h>
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/mm.h>
25#include <linux/slab.h> 26#include <linux/slab.h>
26#include <linux/bug.h> 27#include <linux/bug.h>
27 28
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 6ae005ccaed8..678090508a62 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -83,7 +83,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
83 if (x86_quirks->mpc_oem_bus_info) 83 if (x86_quirks->mpc_oem_bus_info)
84 x86_quirks->mpc_oem_bus_info(m, str); 84 x86_quirks->mpc_oem_bus_info(m, str);
85 else 85 else
86 printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); 86 apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str);
87 87
88#if MAX_MP_BUSSES < 256 88#if MAX_MP_BUSSES < 256
89 if (m->mpc_busid >= MAX_MP_BUSSES) { 89 if (m->mpc_busid >= MAX_MP_BUSSES) {
@@ -154,7 +154,7 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
154 154
155static void print_MP_intsrc_info(struct mpc_config_intsrc *m) 155static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
156{ 156{
157 printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," 157 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
158 " IRQ %02x, APIC ID %x, APIC INT %02x\n", 158 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
159 m->mpc_irqtype, m->mpc_irqflag & 3, 159 m->mpc_irqtype, m->mpc_irqflag & 3,
160 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, 160 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
@@ -163,7 +163,7 @@ static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
163 163
164static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) 164static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
165{ 165{
166 printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," 166 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
167 " IRQ %02x, APIC ID %x, APIC INT %02x\n", 167 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
168 mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, 168 mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
169 (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, 169 (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
@@ -235,7 +235,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
235 235
236static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) 236static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
237{ 237{
238 printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x," 238 apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
239 " IRQ %02x, APIC ID %x, APIC LINT %02x\n", 239 " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
240 m->mpc_irqtype, m->mpc_irqflag & 3, 240 m->mpc_irqtype, m->mpc_irqflag & 3,
241 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, 241 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
@@ -695,7 +695,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
695 unsigned int *bp = phys_to_virt(base); 695 unsigned int *bp = phys_to_virt(base);
696 struct intel_mp_floating *mpf; 696 struct intel_mp_floating *mpf;
697 697
698 printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length); 698 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
699 bp, length);
699 BUILD_BUG_ON(sizeof(*mpf) != 16); 700 BUILD_BUG_ON(sizeof(*mpf) != 16);
700 701
701 while (length > 0) { 702 while (length > 0) {
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index a153b3905f60..9fd809552447 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -149,8 +149,8 @@ static int __cpuinit msr_device_create(int cpu)
149{ 149{
150 struct device *dev; 150 struct device *dev;
151 151
152 dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, cpu), 152 dev = device_create_drvdata(msr_class, NULL, MKDEV(MSR_MAJOR, cpu),
153 "msr%d", cpu); 153 NULL, "msr%d", cpu);
154 return IS_ERR(dev) ? PTR_ERR(dev) : 0; 154 return IS_ERR(dev) ? PTR_ERR(dev) : 0;
155} 155}
156 156
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 097d8a6797fa..94da4d52d798 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -443,7 +443,7 @@ struct pv_mmu_ops pv_mmu_ops = {
443#endif /* PAGETABLE_LEVELS >= 3 */ 443#endif /* PAGETABLE_LEVELS >= 3 */
444 444
445 .pte_val = native_pte_val, 445 .pte_val = native_pte_val,
446 .pte_flags = native_pte_val, 446 .pte_flags = native_pte_flags,
447 .pgd_val = native_pgd_val, 447 .pgd_val = native_pgd_val,
448 448
449 .make_pte = native_make_pte, 449 .make_pte = native_make_pte,
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 151f2d171f7c..02d19328525d 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -29,6 +29,7 @@
29#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/spinlock.h> 30#include <linux/spinlock.h>
31#include <linux/string.h> 31#include <linux/string.h>
32#include <linux/crash_dump.h>
32#include <linux/dma-mapping.h> 33#include <linux/dma-mapping.h>
33#include <linux/bitops.h> 34#include <linux/bitops.h>
34#include <linux/pci_ids.h> 35#include <linux/pci_ids.h>
@@ -36,6 +37,7 @@
36#include <linux/delay.h> 37#include <linux/delay.h>
37#include <linux/scatterlist.h> 38#include <linux/scatterlist.h>
38#include <linux/iommu-helper.h> 39#include <linux/iommu-helper.h>
40
39#include <asm/iommu.h> 41#include <asm/iommu.h>
40#include <asm/calgary.h> 42#include <asm/calgary.h>
41#include <asm/tce.h> 43#include <asm/tce.h>
@@ -167,6 +169,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl);
167static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev); 169static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
168static void calioc2_tce_cache_blast(struct iommu_table *tbl); 170static void calioc2_tce_cache_blast(struct iommu_table *tbl);
169static void calioc2_dump_error_regs(struct iommu_table *tbl); 171static void calioc2_dump_error_regs(struct iommu_table *tbl);
172static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl);
173static void get_tce_space_from_tar(void);
170 174
171static struct cal_chipset_ops calgary_chip_ops = { 175static struct cal_chipset_ops calgary_chip_ops = {
172 .handle_quirks = calgary_handle_quirks, 176 .handle_quirks = calgary_handle_quirks,
@@ -410,22 +414,6 @@ static void calgary_unmap_sg(struct device *dev,
410 } 414 }
411} 415}
412 416
413static int calgary_nontranslate_map_sg(struct device* dev,
414 struct scatterlist *sg, int nelems, int direction)
415{
416 struct scatterlist *s;
417 int i;
418
419 for_each_sg(sg, s, nelems, i) {
420 struct page *p = sg_page(s);
421
422 BUG_ON(!p);
423 s->dma_address = virt_to_bus(sg_virt(s));
424 s->dma_length = s->length;
425 }
426 return nelems;
427}
428
429static int calgary_map_sg(struct device *dev, struct scatterlist *sg, 417static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
430 int nelems, int direction) 418 int nelems, int direction)
431{ 419{
@@ -436,9 +424,6 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
436 unsigned long entry; 424 unsigned long entry;
437 int i; 425 int i;
438 426
439 if (!translation_enabled(tbl))
440 return calgary_nontranslate_map_sg(dev, sg, nelems, direction);
441
442 for_each_sg(sg, s, nelems, i) { 427 for_each_sg(sg, s, nelems, i) {
443 BUG_ON(!sg_page(s)); 428 BUG_ON(!sg_page(s));
444 429
@@ -474,7 +459,6 @@ error:
474static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, 459static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
475 size_t size, int direction) 460 size_t size, int direction)
476{ 461{
477 dma_addr_t dma_handle = bad_dma_address;
478 void *vaddr = phys_to_virt(paddr); 462 void *vaddr = phys_to_virt(paddr);
479 unsigned long uaddr; 463 unsigned long uaddr;
480 unsigned int npages; 464 unsigned int npages;
@@ -483,12 +467,7 @@ static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
483 uaddr = (unsigned long)vaddr; 467 uaddr = (unsigned long)vaddr;
484 npages = num_dma_pages(uaddr, size); 468 npages = num_dma_pages(uaddr, size);
485 469
486 if (translation_enabled(tbl)) 470 return iommu_alloc(dev, tbl, vaddr, npages, direction);
487 dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction);
488 else
489 dma_handle = virt_to_bus(vaddr);
490
491 return dma_handle;
492} 471}
493 472
494static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, 473static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
@@ -497,9 +476,6 @@ static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
497 struct iommu_table *tbl = find_iommu_table(dev); 476 struct iommu_table *tbl = find_iommu_table(dev);
498 unsigned int npages; 477 unsigned int npages;
499 478
500 if (!translation_enabled(tbl))
501 return;
502
503 npages = num_dma_pages(dma_handle, size); 479 npages = num_dma_pages(dma_handle, size);
504 iommu_free(tbl, dma_handle, npages); 480 iommu_free(tbl, dma_handle, npages);
505} 481}
@@ -522,18 +498,12 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
522 goto error; 498 goto error;
523 memset(ret, 0, size); 499 memset(ret, 0, size);
524 500
525 if (translation_enabled(tbl)) { 501 /* set up tces to cover the allocated range */
526 /* set up tces to cover the allocated range */ 502 mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
527 mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); 503 if (mapping == bad_dma_address)
528 if (mapping == bad_dma_address) 504 goto free;
529 goto free; 505 *dma_handle = mapping;
530
531 *dma_handle = mapping;
532 } else /* non translated slot */
533 *dma_handle = virt_to_bus(ret);
534
535 return ret; 506 return ret;
536
537free: 507free:
538 free_pages((unsigned long)ret, get_order(size)); 508 free_pages((unsigned long)ret, get_order(size));
539 ret = NULL; 509 ret = NULL;
@@ -541,7 +511,7 @@ error:
541 return ret; 511 return ret;
542} 512}
543 513
544static const struct dma_mapping_ops calgary_dma_ops = { 514static struct dma_mapping_ops calgary_dma_ops = {
545 .alloc_coherent = calgary_alloc_coherent, 515 .alloc_coherent = calgary_alloc_coherent,
546 .map_single = calgary_map_single, 516 .map_single = calgary_map_single,
547 .unmap_single = calgary_unmap_single, 517 .unmap_single = calgary_unmap_single,
@@ -830,7 +800,11 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
830 800
831 tbl = pci_iommu(dev->bus); 801 tbl = pci_iommu(dev->bus);
832 tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space; 802 tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
833 tce_free(tbl, 0, tbl->it_size); 803
804 if (is_kdump_kernel())
805 calgary_init_bitmap_from_tce_table(tbl);
806 else
807 tce_free(tbl, 0, tbl->it_size);
834 808
835 if (is_calgary(dev->device)) 809 if (is_calgary(dev->device))
836 tbl->chip_ops = &calgary_chip_ops; 810 tbl->chip_ops = &calgary_chip_ops;
@@ -1209,6 +1183,10 @@ static int __init calgary_init(void)
1209 if (ret) 1183 if (ret)
1210 return ret; 1184 return ret;
1211 1185
1186 /* Purely for kdump kernel case */
1187 if (is_kdump_kernel())
1188 get_tce_space_from_tar();
1189
1212 do { 1190 do {
1213 dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); 1191 dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
1214 if (!dev) 1192 if (!dev)
@@ -1230,6 +1208,16 @@ static int __init calgary_init(void)
1230 goto error; 1208 goto error;
1231 } while (1); 1209 } while (1);
1232 1210
1211 dev = NULL;
1212 for_each_pci_dev(dev) {
1213 struct iommu_table *tbl;
1214
1215 tbl = find_iommu_table(&dev->dev);
1216
1217 if (translation_enabled(tbl))
1218 dev->dev.archdata.dma_ops = &calgary_dma_ops;
1219 }
1220
1233 return ret; 1221 return ret;
1234 1222
1235error: 1223error:
@@ -1251,6 +1239,7 @@ error:
1251 calgary_disable_translation(dev); 1239 calgary_disable_translation(dev);
1252 calgary_free_bus(dev); 1240 calgary_free_bus(dev);
1253 pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */ 1241 pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */
1242 dev->dev.archdata.dma_ops = NULL;
1254 } while (1); 1243 } while (1);
1255 1244
1256 return ret; 1245 return ret;
@@ -1339,6 +1328,61 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev)
1339 return (val != 0xffffffff); 1328 return (val != 0xffffffff);
1340} 1329}
1341 1330
1331/*
1332 * calgary_init_bitmap_from_tce_table():
1333 * Funtion for kdump case. In the second/kdump kernel initialize
1334 * the bitmap based on the tce table entries obtained from first kernel
1335 */
1336static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
1337{
1338 u64 *tp;
1339 unsigned int index;
1340 tp = ((u64 *)tbl->it_base);
1341 for (index = 0 ; index < tbl->it_size; index++) {
1342 if (*tp != 0x0)
1343 set_bit(index, tbl->it_map);
1344 tp++;
1345 }
1346}
1347
1348/*
1349 * get_tce_space_from_tar():
1350 * Function for kdump case. Get the tce tables from first kernel
1351 * by reading the contents of the base adress register of calgary iommu
1352 */
1353static void get_tce_space_from_tar(void)
1354{
1355 int bus;
1356 void __iomem *target;
1357 unsigned long tce_space;
1358
1359 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
1360 struct calgary_bus_info *info = &bus_info[bus];
1361 unsigned short pci_device;
1362 u32 val;
1363
1364 val = read_pci_config(bus, 0, 0, 0);
1365 pci_device = (val & 0xFFFF0000) >> 16;
1366
1367 if (!is_cal_pci_dev(pci_device))
1368 continue;
1369 if (info->translation_disabled)
1370 continue;
1371
1372 if (calgary_bus_has_devices(bus, pci_device) ||
1373 translate_empty_slots) {
1374 target = calgary_reg(bus_info[bus].bbar,
1375 tar_offset(bus));
1376 tce_space = be64_to_cpu(readq(target));
1377 tce_space = tce_space & TAR_SW_BITS;
1378
1379 tce_space = tce_space & (~specified_table_size);
1380 info->tce_space = (u64 *)__va(tce_space);
1381 }
1382 }
1383 return;
1384}
1385
1342void __init detect_calgary(void) 1386void __init detect_calgary(void)
1343{ 1387{
1344 int bus; 1388 int bus;
@@ -1394,7 +1438,8 @@ void __init detect_calgary(void)
1394 return; 1438 return;
1395 } 1439 }
1396 1440
1397 specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE); 1441 specified_table_size = determine_tce_table_size((is_kdump_kernel() ?
1442 saved_max_pfn : max_pfn) * PAGE_SIZE);
1398 1443
1399 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { 1444 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
1400 struct calgary_bus_info *info = &bus_info[bus]; 1445 struct calgary_bus_info *info = &bus_info[bus];
@@ -1412,10 +1457,16 @@ void __init detect_calgary(void)
1412 1457
1413 if (calgary_bus_has_devices(bus, pci_device) || 1458 if (calgary_bus_has_devices(bus, pci_device) ||
1414 translate_empty_slots) { 1459 translate_empty_slots) {
1415 tbl = alloc_tce_table(); 1460 /*
1416 if (!tbl) 1461 * If it is kdump kernel, find and use tce tables
1417 goto cleanup; 1462 * from first kernel, else allocate tce tables here
1418 info->tce_space = tbl; 1463 */
1464 if (!is_kdump_kernel()) {
1465 tbl = alloc_tce_table();
1466 if (!tbl)
1467 goto cleanup;
1468 info->tce_space = tbl;
1469 }
1419 calgary_found = 1; 1470 calgary_found = 1;
1420 } 1471 }
1421 } 1472 }
@@ -1430,6 +1481,10 @@ void __init detect_calgary(void)
1430 printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " 1481 printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, "
1431 "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, 1482 "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size,
1432 debugging ? "enabled" : "disabled"); 1483 debugging ? "enabled" : "disabled");
1484
1485 /* swiotlb for devices that aren't behind the Calgary. */
1486 if (max_pfn > MAX_DMA32_PFN)
1487 swiotlb = 1;
1433 } 1488 }
1434 return; 1489 return;
1435 1490
@@ -1446,7 +1501,7 @@ int __init calgary_iommu_init(void)
1446{ 1501{
1447 int ret; 1502 int ret;
1448 1503
1449 if (no_iommu || swiotlb) 1504 if (no_iommu || (swiotlb && !calgary_detected))
1450 return -ENODEV; 1505 return -ENODEV;
1451 1506
1452 if (!calgary_detected) 1507 if (!calgary_detected)
@@ -1459,15 +1514,14 @@ int __init calgary_iommu_init(void)
1459 if (ret) { 1514 if (ret) {
1460 printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " 1515 printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
1461 "falling back to no_iommu\n", ret); 1516 "falling back to no_iommu\n", ret);
1462 if (max_pfn > MAX_DMA32_PFN)
1463 printk(KERN_ERR "WARNING more than 4GB of memory, "
1464 "32bit PCI may malfunction.\n");
1465 return ret; 1517 return ret;
1466 } 1518 }
1467 1519
1468 force_iommu = 1; 1520 force_iommu = 1;
1469 bad_dma_address = 0x0; 1521 bad_dma_address = 0x0;
1470 dma_ops = &calgary_dma_ops; 1522 /* dma_ops is set to swiotlb or nommu */
1523 if (!dma_ops)
1524 dma_ops = &nommu_dma_ops;
1471 1525
1472 return 0; 1526 return 0;
1473} 1527}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index a4213c00dffc..87d4d6964ec2 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -11,7 +11,7 @@
11 11
12static int forbid_dac __read_mostly; 12static int forbid_dac __read_mostly;
13 13
14const struct dma_mapping_ops *dma_ops; 14struct dma_mapping_ops *dma_ops;
15EXPORT_SYMBOL(dma_ops); 15EXPORT_SYMBOL(dma_ops);
16 16
17static int iommu_sac_force __read_mostly; 17static int iommu_sac_force __read_mostly;
@@ -123,6 +123,14 @@ void __init pci_iommu_alloc(void)
123 123
124 pci_swiotlb_init(); 124 pci_swiotlb_init();
125} 125}
126
127unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
128{
129 unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
130
131 return size >> PAGE_SHIFT;
132}
133EXPORT_SYMBOL(iommu_num_pages);
126#endif 134#endif
127 135
128/* 136/*
@@ -192,136 +200,19 @@ static __init int iommu_setup(char *p)
192} 200}
193early_param("iommu", iommu_setup); 201early_param("iommu", iommu_setup);
194 202
195#ifdef CONFIG_X86_32
196int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
197 dma_addr_t device_addr, size_t size, int flags)
198{
199 void __iomem *mem_base = NULL;
200 int pages = size >> PAGE_SHIFT;
201 int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
202
203 if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
204 goto out;
205 if (!size)
206 goto out;
207 if (dev->dma_mem)
208 goto out;
209
210 /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
211
212 mem_base = ioremap(bus_addr, size);
213 if (!mem_base)
214 goto out;
215
216 dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
217 if (!dev->dma_mem)
218 goto out;
219 dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
220 if (!dev->dma_mem->bitmap)
221 goto free1_out;
222
223 dev->dma_mem->virt_base = mem_base;
224 dev->dma_mem->device_base = device_addr;
225 dev->dma_mem->size = pages;
226 dev->dma_mem->flags = flags;
227
228 if (flags & DMA_MEMORY_MAP)
229 return DMA_MEMORY_MAP;
230
231 return DMA_MEMORY_IO;
232
233 free1_out:
234 kfree(dev->dma_mem);
235 out:
236 if (mem_base)
237 iounmap(mem_base);
238 return 0;
239}
240EXPORT_SYMBOL(dma_declare_coherent_memory);
241
242void dma_release_declared_memory(struct device *dev)
243{
244 struct dma_coherent_mem *mem = dev->dma_mem;
245
246 if (!mem)
247 return;
248 dev->dma_mem = NULL;
249 iounmap(mem->virt_base);
250 kfree(mem->bitmap);
251 kfree(mem);
252}
253EXPORT_SYMBOL(dma_release_declared_memory);
254
255void *dma_mark_declared_memory_occupied(struct device *dev,
256 dma_addr_t device_addr, size_t size)
257{
258 struct dma_coherent_mem *mem = dev->dma_mem;
259 int pos, err;
260 int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
261
262 pages >>= PAGE_SHIFT;
263
264 if (!mem)
265 return ERR_PTR(-EINVAL);
266
267 pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
268 err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
269 if (err != 0)
270 return ERR_PTR(err);
271 return mem->virt_base + (pos << PAGE_SHIFT);
272}
273EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
274
275static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size,
276 dma_addr_t *dma_handle, void **ret)
277{
278 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
279 int order = get_order(size);
280
281 if (mem) {
282 int page = bitmap_find_free_region(mem->bitmap, mem->size,
283 order);
284 if (page >= 0) {
285 *dma_handle = mem->device_base + (page << PAGE_SHIFT);
286 *ret = mem->virt_base + (page << PAGE_SHIFT);
287 memset(*ret, 0, size);
288 }
289 if (mem->flags & DMA_MEMORY_EXCLUSIVE)
290 *ret = NULL;
291 }
292 return (mem != NULL);
293}
294
295static int dma_release_coherent(struct device *dev, int order, void *vaddr)
296{
297 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
298
299 if (mem && vaddr >= mem->virt_base && vaddr <
300 (mem->virt_base + (mem->size << PAGE_SHIFT))) {
301 int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
302
303 bitmap_release_region(mem->bitmap, page, order);
304 return 1;
305 }
306 return 0;
307}
308#else
309#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0)
310#define dma_release_coherent(dev, order, vaddr) (0)
311#endif /* CONFIG_X86_32 */
312
313int dma_supported(struct device *dev, u64 mask) 203int dma_supported(struct device *dev, u64 mask)
314{ 204{
205 struct dma_mapping_ops *ops = get_dma_ops(dev);
206
315#ifdef CONFIG_PCI 207#ifdef CONFIG_PCI
316 if (mask > 0xffffffff && forbid_dac > 0) { 208 if (mask > 0xffffffff && forbid_dac > 0) {
317 printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", 209 dev_info(dev, "PCI: Disallowing DAC for device\n");
318 dev->bus_id);
319 return 0; 210 return 0;
320 } 211 }
321#endif 212#endif
322 213
323 if (dma_ops->dma_supported) 214 if (ops->dma_supported)
324 return dma_ops->dma_supported(dev, mask); 215 return ops->dma_supported(dev, mask);
325 216
326 /* Copied from i386. Doesn't make much sense, because it will 217 /* Copied from i386. Doesn't make much sense, because it will
327 only work for pci_alloc_coherent. 218 only work for pci_alloc_coherent.
@@ -342,8 +233,7 @@ int dma_supported(struct device *dev, u64 mask)
342 type. Normally this doesn't make any difference, but gives 233 type. Normally this doesn't make any difference, but gives
343 more gentle handling of IOMMU overflow. */ 234 more gentle handling of IOMMU overflow. */
344 if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { 235 if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
345 printk(KERN_INFO "%s: Force SAC with mask %Lx\n", 236 dev_info(dev, "Force SAC with mask %Lx\n", mask);
346 dev->bus_id, mask);
347 return 0; 237 return 0;
348 } 238 }
349 239
@@ -369,6 +259,7 @@ void *
369dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 259dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
370 gfp_t gfp) 260 gfp_t gfp)
371{ 261{
262 struct dma_mapping_ops *ops = get_dma_ops(dev);
372 void *memory = NULL; 263 void *memory = NULL;
373 struct page *page; 264 struct page *page;
374 unsigned long dma_mask = 0; 265 unsigned long dma_mask = 0;
@@ -378,7 +269,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
378 /* ignore region specifiers */ 269 /* ignore region specifiers */
379 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); 270 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
380 271
381 if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) 272 if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
382 return memory; 273 return memory;
383 274
384 if (!dev) { 275 if (!dev) {
@@ -437,8 +328,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
437 /* Let low level make its own zone decisions */ 328 /* Let low level make its own zone decisions */
438 gfp &= ~(GFP_DMA32|GFP_DMA); 329 gfp &= ~(GFP_DMA32|GFP_DMA);
439 330
440 if (dma_ops->alloc_coherent) 331 if (ops->alloc_coherent)
441 return dma_ops->alloc_coherent(dev, size, 332 return ops->alloc_coherent(dev, size,
442 dma_handle, gfp); 333 dma_handle, gfp);
443 return NULL; 334 return NULL;
444 } 335 }
@@ -450,14 +341,14 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
450 } 341 }
451 } 342 }
452 343
453 if (dma_ops->alloc_coherent) { 344 if (ops->alloc_coherent) {
454 free_pages((unsigned long)memory, get_order(size)); 345 free_pages((unsigned long)memory, get_order(size));
455 gfp &= ~(GFP_DMA|GFP_DMA32); 346 gfp &= ~(GFP_DMA|GFP_DMA32);
456 return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); 347 return ops->alloc_coherent(dev, size, dma_handle, gfp);
457 } 348 }
458 349
459 if (dma_ops->map_simple) { 350 if (ops->map_simple) {
460 *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), 351 *dma_handle = ops->map_simple(dev, virt_to_phys(memory),
461 size, 352 size,
462 PCI_DMA_BIDIRECTIONAL); 353 PCI_DMA_BIDIRECTIONAL);
463 if (*dma_handle != bad_dma_address) 354 if (*dma_handle != bad_dma_address)
@@ -479,12 +370,14 @@ EXPORT_SYMBOL(dma_alloc_coherent);
479void dma_free_coherent(struct device *dev, size_t size, 370void dma_free_coherent(struct device *dev, size_t size,
480 void *vaddr, dma_addr_t bus) 371 void *vaddr, dma_addr_t bus)
481{ 372{
373 struct dma_mapping_ops *ops = get_dma_ops(dev);
374
482 int order = get_order(size); 375 int order = get_order(size);
483 WARN_ON(irqs_disabled()); /* for portability */ 376 WARN_ON(irqs_disabled()); /* for portability */
484 if (dma_release_coherent(dev, order, vaddr)) 377 if (dma_release_from_coherent(dev, order, vaddr))
485 return; 378 return;
486 if (dma_ops->unmap_single) 379 if (ops->unmap_single)
487 dma_ops->unmap_single(dev, bus, size, 0); 380 ops->unmap_single(dev, bus, size, 0);
488 free_pages((unsigned long)vaddr, order); 381 free_pages((unsigned long)vaddr, order);
489} 382}
490EXPORT_SYMBOL(dma_free_coherent); 383EXPORT_SYMBOL(dma_free_coherent);
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index be60961f8695..49285f8fd4d5 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -67,9 +67,6 @@ static u32 gart_unmapped_entry;
67 (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) 67 (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
68#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) 68#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
69 69
70#define to_pages(addr, size) \
71 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
72
73#define EMERGENCY_PAGES 32 /* = 128KB */ 70#define EMERGENCY_PAGES 32 /* = 128KB */
74 71
75#ifdef CONFIG_AGP 72#ifdef CONFIG_AGP
@@ -198,9 +195,7 @@ static void iommu_full(struct device *dev, size_t size, int dir)
198 * out. Hopefully no network devices use single mappings that big. 195 * out. Hopefully no network devices use single mappings that big.
199 */ 196 */
200 197
201 printk(KERN_ERR 198 dev_err(dev, "PCI-DMA: Out of IOMMU space for %lu bytes\n", size);
202 "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n",
203 size, dev->bus_id);
204 199
205 if (size > PAGE_SIZE*EMERGENCY_PAGES) { 200 if (size > PAGE_SIZE*EMERGENCY_PAGES) {
206 if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) 201 if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
@@ -243,7 +238,7 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
243static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, 238static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
244 size_t size, int dir) 239 size_t size, int dir)
245{ 240{
246 unsigned long npages = to_pages(phys_mem, size); 241 unsigned long npages = iommu_num_pages(phys_mem, size);
247 unsigned long iommu_page = alloc_iommu(dev, npages); 242 unsigned long iommu_page = alloc_iommu(dev, npages);
248 int i; 243 int i;
249 244
@@ -306,7 +301,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
306 return; 301 return;
307 302
308 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; 303 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
309 npages = to_pages(dma_addr, size); 304 npages = iommu_num_pages(dma_addr, size);
310 for (i = 0; i < npages; i++) { 305 for (i = 0; i < npages; i++) {
311 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; 306 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
312 CLEAR_LEAK(iommu_page + i); 307 CLEAR_LEAK(iommu_page + i);
@@ -389,7 +384,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
389 } 384 }
390 385
391 addr = phys_addr; 386 addr = phys_addr;
392 pages = to_pages(s->offset, s->length); 387 pages = iommu_num_pages(s->offset, s->length);
393 while (pages--) { 388 while (pages--) {
394 iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); 389 iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
395 SET_LEAK(iommu_page); 390 SET_LEAK(iommu_page);
@@ -472,7 +467,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
472 467
473 seg_size += s->length; 468 seg_size += s->length;
474 need = nextneed; 469 need = nextneed;
475 pages += to_pages(s->offset, s->length); 470 pages += iommu_num_pages(s->offset, s->length);
476 ps = s; 471 ps = s;
477 } 472 }
478 if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) 473 if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
@@ -694,8 +689,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
694 689
695extern int agp_amd64_init(void); 690extern int agp_amd64_init(void);
696 691
697static const struct dma_mapping_ops gart_dma_ops = { 692static struct dma_mapping_ops gart_dma_ops = {
698 .mapping_error = NULL,
699 .map_single = gart_map_single, 693 .map_single = gart_map_single,
700 .map_simple = gart_map_simple, 694 .map_simple = gart_map_simple,
701 .unmap_single = gart_unmap_single, 695 .unmap_single = gart_unmap_single,
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 792b9179eff3..3f91f71cdc3e 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -72,21 +72,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
72 return nents; 72 return nents;
73} 73}
74 74
75/* Make sure we keep the same behaviour */ 75struct dma_mapping_ops nommu_dma_ops = {
76static int nommu_mapping_error(dma_addr_t dma_addr)
77{
78#ifdef CONFIG_X86_32
79 return 0;
80#else
81 return (dma_addr == bad_dma_address);
82#endif
83}
84
85
86const struct dma_mapping_ops nommu_dma_ops = {
87 .map_single = nommu_map_single, 76 .map_single = nommu_map_single,
88 .map_sg = nommu_map_sg, 77 .map_sg = nommu_map_sg,
89 .mapping_error = nommu_mapping_error,
90 .is_phys = 1, 78 .is_phys = 1,
91}; 79};
92 80
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 20df839b9c20..c4ce0332759e 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -18,7 +18,7 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
18 return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); 18 return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction);
19} 19}
20 20
21const struct dma_mapping_ops swiotlb_dma_ops = { 21struct dma_mapping_ops swiotlb_dma_ops = {
22 .mapping_error = swiotlb_dma_mapping_error, 22 .mapping_error = swiotlb_dma_mapping_error,
23 .alloc_coherent = swiotlb_alloc_coherent, 23 .alloc_coherent = swiotlb_alloc_coherent,
24 .free_coherent = swiotlb_free_coherent, 24 .free_coherent = swiotlb_free_coherent,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 7218bccd1766..7b6e44a7c624 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -130,7 +130,7 @@ void cpu_idle(void)
130 130
131 /* endless idle loop with no priority at all */ 131 /* endless idle loop with no priority at all */
132 while (1) { 132 while (1) {
133 tick_nohz_stop_sched_tick(); 133 tick_nohz_stop_sched_tick(1);
134 while (!need_resched()) { 134 while (!need_resched()) {
135 135
136 check_pgt_cache(); 136 check_pgt_cache();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c78090dd0c52..87d7dfdcf46c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -121,7 +121,7 @@ void cpu_idle(void)
121 current_thread_info()->status |= TS_POLLING; 121 current_thread_info()->status |= TS_POLLING;
122 /* endless idle loop with no priority at all */ 122 /* endless idle loop with no priority at all */
123 while (1) { 123 while (1) {
124 tick_nohz_stop_sched_tick(); 124 tick_nohz_stop_sched_tick(1);
125 while (!need_resched()) { 125 while (!need_resched()) {
126 126
127 rmb(); 127 rmb();
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 9dcf39c02972..724adfc63cb9 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -411,10 +411,9 @@ void native_machine_shutdown(void)
411{ 411{
412 /* Stop the cpus and apics */ 412 /* Stop the cpus and apics */
413#ifdef CONFIG_SMP 413#ifdef CONFIG_SMP
414 int reboot_cpu_id;
415 414
416 /* The boot cpu is always logical cpu 0 */ 415 /* The boot cpu is always logical cpu 0 */
417 reboot_cpu_id = 0; 416 int reboot_cpu_id = 0;
418 417
419#ifdef CONFIG_X86_32 418#ifdef CONFIG_X86_32
420 /* See if there has been given a command line override */ 419 /* See if there has been given a command line override */
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index c30fe25d470d..703310a99023 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -20,11 +20,44 @@
20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
21#define PAE_PGD_ATTR (_PAGE_PRESENT) 21#define PAE_PGD_ATTR (_PAGE_PRESENT)
22 22
23/* control_page + PAGE_SIZE/2 ~ control_page + PAGE_SIZE * 3/4 are
24 * used to save some data for jumping back
25 */
26#define DATA(offset) (PAGE_SIZE/2+(offset))
27
28/* Minimal CPU state */
29#define ESP DATA(0x0)
30#define CR0 DATA(0x4)
31#define CR3 DATA(0x8)
32#define CR4 DATA(0xc)
33
34/* other data */
35#define CP_VA_CONTROL_PAGE DATA(0x10)
36#define CP_PA_PGD DATA(0x14)
37#define CP_PA_SWAP_PAGE DATA(0x18)
38#define CP_PA_BACKUP_PAGES_MAP DATA(0x1c)
39
23 .text 40 .text
24 .align PAGE_SIZE 41 .align PAGE_SIZE
25 .globl relocate_kernel 42 .globl relocate_kernel
26relocate_kernel: 43relocate_kernel:
27 movl 8(%esp), %ebp /* list of pages */ 44 /* Save the CPU context, used for jumping back */
45
46 pushl %ebx
47 pushl %esi
48 pushl %edi
49 pushl %ebp
50 pushf
51
52 movl 20+8(%esp), %ebp /* list of pages */
53 movl PTR(VA_CONTROL_PAGE)(%ebp), %edi
54 movl %esp, ESP(%edi)
55 movl %cr0, %eax
56 movl %eax, CR0(%edi)
57 movl %cr3, %eax
58 movl %eax, CR3(%edi)
59 movl %cr4, %eax
60 movl %eax, CR4(%edi)
28 61
29#ifdef CONFIG_X86_PAE 62#ifdef CONFIG_X86_PAE
30 /* map the control page at its virtual address */ 63 /* map the control page at its virtual address */
@@ -138,15 +171,25 @@ relocate_kernel:
138 171
139relocate_new_kernel: 172relocate_new_kernel:
140 /* read the arguments and say goodbye to the stack */ 173 /* read the arguments and say goodbye to the stack */
141 movl 4(%esp), %ebx /* page_list */ 174 movl 20+4(%esp), %ebx /* page_list */
142 movl 8(%esp), %ebp /* list of pages */ 175 movl 20+8(%esp), %ebp /* list of pages */
143 movl 12(%esp), %edx /* start address */ 176 movl 20+12(%esp), %edx /* start address */
144 movl 16(%esp), %ecx /* cpu_has_pae */ 177 movl 20+16(%esp), %ecx /* cpu_has_pae */
178 movl 20+20(%esp), %esi /* preserve_context */
145 179
146 /* zero out flags, and disable interrupts */ 180 /* zero out flags, and disable interrupts */
147 pushl $0 181 pushl $0
148 popfl 182 popfl
149 183
184 /* save some information for jumping back */
185 movl PTR(VA_CONTROL_PAGE)(%ebp), %edi
186 movl %edi, CP_VA_CONTROL_PAGE(%edi)
187 movl PTR(PA_PGD)(%ebp), %eax
188 movl %eax, CP_PA_PGD(%edi)
189 movl PTR(PA_SWAP_PAGE)(%ebp), %eax
190 movl %eax, CP_PA_SWAP_PAGE(%edi)
191 movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi)
192
150 /* get physical address of control page now */ 193 /* get physical address of control page now */
151 /* this is impossible after page table switch */ 194 /* this is impossible after page table switch */
152 movl PTR(PA_CONTROL_PAGE)(%ebp), %edi 195 movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
@@ -197,8 +240,90 @@ identity_mapped:
197 xorl %eax, %eax 240 xorl %eax, %eax
198 movl %eax, %cr3 241 movl %eax, %cr3
199 242
243 movl CP_PA_SWAP_PAGE(%edi), %eax
244 pushl %eax
245 pushl %ebx
246 call swap_pages
247 addl $8, %esp
248
249 /* To be certain of avoiding problems with self-modifying code
250 * I need to execute a serializing instruction here.
251 * So I flush the TLB, it's handy, and not processor dependent.
252 */
253 xorl %eax, %eax
254 movl %eax, %cr3
255
256 /* set all of the registers to known values */
257 /* leave %esp alone */
258
259 testl %esi, %esi
260 jnz 1f
261 xorl %edi, %edi
262 xorl %eax, %eax
263 xorl %ebx, %ebx
264 xorl %ecx, %ecx
265 xorl %edx, %edx
266 xorl %esi, %esi
267 xorl %ebp, %ebp
268 ret
2691:
270 popl %edx
271 movl CP_PA_SWAP_PAGE(%edi), %esp
272 addl $PAGE_SIZE, %esp
2732:
274 call *%edx
275
276 /* get the re-entry point of the peer system */
277 movl 0(%esp), %ebp
278 call 1f
2791:
280 popl %ebx
281 subl $(1b - relocate_kernel), %ebx
282 movl CP_VA_CONTROL_PAGE(%ebx), %edi
283 lea PAGE_SIZE(%ebx), %esp
284 movl CP_PA_SWAP_PAGE(%ebx), %eax
285 movl CP_PA_BACKUP_PAGES_MAP(%ebx), %edx
286 pushl %eax
287 pushl %edx
288 call swap_pages
289 addl $8, %esp
290 movl CP_PA_PGD(%ebx), %eax
291 movl %eax, %cr3
292 movl %cr0, %eax
293 orl $(1<<31), %eax
294 movl %eax, %cr0
295 lea PAGE_SIZE(%edi), %esp
296 movl %edi, %eax
297 addl $(virtual_mapped - relocate_kernel), %eax
298 pushl %eax
299 ret
300
301virtual_mapped:
302 movl CR4(%edi), %eax
303 movl %eax, %cr4
304 movl CR3(%edi), %eax
305 movl %eax, %cr3
306 movl CR0(%edi), %eax
307 movl %eax, %cr0
308 movl ESP(%edi), %esp
309 movl %ebp, %eax
310
311 popf
312 popl %ebp
313 popl %edi
314 popl %esi
315 popl %ebx
316 ret
317
200 /* Do the copies */ 318 /* Do the copies */
201 movl %ebx, %ecx 319swap_pages:
320 movl 8(%esp), %edx
321 movl 4(%esp), %ecx
322 pushl %ebp
323 pushl %ebx
324 pushl %edi
325 pushl %esi
326 movl %ecx, %ebx
202 jmp 1f 327 jmp 1f
203 328
2040: /* top, read another word from the indirection page */ 3290: /* top, read another word from the indirection page */
@@ -226,27 +351,28 @@ identity_mapped:
226 movl %ecx, %esi /* For every source page do a copy */ 351 movl %ecx, %esi /* For every source page do a copy */
227 andl $0xfffff000, %esi 352 andl $0xfffff000, %esi
228 353
354 movl %edi, %eax
355 movl %esi, %ebp
356
357 movl %edx, %edi
229 movl $1024, %ecx 358 movl $1024, %ecx
230 rep ; movsl 359 rep ; movsl
231 jmp 0b
232 360
2333: 361 movl %ebp, %edi
234 362 movl %eax, %esi
235 /* To be certain of avoiding problems with self-modifying code 363 movl $1024, %ecx
236 * I need to execute a serializing instruction here. 364 rep ; movsl
237 * So I flush the TLB, it's handy, and not processor dependent.
238 */
239 xorl %eax, %eax
240 movl %eax, %cr3
241 365
242 /* set all of the registers to known values */ 366 movl %eax, %edi
243 /* leave %esp alone */ 367 movl %edx, %esi
368 movl $1024, %ecx
369 rep ; movsl
244 370
245 xorl %eax, %eax 371 lea PAGE_SIZE(%ebp), %esi
246 xorl %ebx, %ebx 372 jmp 0b
247 xorl %ecx, %ecx 3733:
248 xorl %edx, %edx 374 popl %esi
249 xorl %esi, %esi 375 popl %edi
250 xorl %edi, %edi 376 popl %ebx
251 xorl %ebp, %ebp 377 popl %ebp
252 ret 378 ret
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ec952aa5394a..68b48e3fbcbd 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -597,13 +597,21 @@ void __init setup_arch(char **cmdline_p)
597 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 597 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
598 visws_early_detect(); 598 visws_early_detect();
599 pre_setup_arch_hook(); 599 pre_setup_arch_hook();
600 early_cpu_init();
601#else 600#else
602 printk(KERN_INFO "Command line: %s\n", boot_command_line); 601 printk(KERN_INFO "Command line: %s\n", boot_command_line);
603#endif 602#endif
604 603
604 early_cpu_init();
605 early_ioremap_init(); 605 early_ioremap_init();
606 606
607#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
608 /*
609 * Must be before kernel pagetables are setup
610 * or fixmap area is touched.
611 */
612 vmi_init();
613#endif
614
607 ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); 615 ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
608 screen_info = boot_params.screen_info; 616 screen_info = boot_params.screen_info;
609 edid_info = boot_params.edid_info; 617 edid_info = boot_params.edid_info;
@@ -665,9 +673,6 @@ void __init setup_arch(char **cmdline_p)
665 bss_resource.start = virt_to_phys(&__bss_start); 673 bss_resource.start = virt_to_phys(&__bss_start);
666 bss_resource.end = virt_to_phys(&__bss_stop)-1; 674 bss_resource.end = virt_to_phys(&__bss_stop)-1;
667 675
668#ifdef CONFIG_X86_64
669 early_cpu_init();
670#endif
671 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); 676 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
672 *cmdline_p = command_line; 677 *cmdline_p = command_line;
673 678
@@ -680,7 +685,7 @@ void __init setup_arch(char **cmdline_p)
680#ifdef CONFIG_X86_LOCAL_APIC 685#ifdef CONFIG_X86_LOCAL_APIC
681 disable_apic = 1; 686 disable_apic = 1;
682#endif 687#endif
683 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); 688 setup_clear_cpu_cap(X86_FEATURE_APIC);
684 } 689 }
685 690
686#ifdef CONFIG_PCI 691#ifdef CONFIG_PCI
@@ -791,10 +796,6 @@ void __init setup_arch(char **cmdline_p)
791 796
792 initmem_init(0, max_pfn); 797 initmem_init(0, max_pfn);
793 798
794#ifdef CONFIG_X86_64
795 dma32_reserve_bootmem();
796#endif
797
798#ifdef CONFIG_ACPI_SLEEP 799#ifdef CONFIG_ACPI_SLEEP
799 /* 800 /*
800 * Reserve low memory region for sleep support. 801 * Reserve low memory region for sleep support.
@@ -809,20 +810,21 @@ void __init setup_arch(char **cmdline_p)
809#endif 810#endif
810 reserve_crashkernel(); 811 reserve_crashkernel();
811 812
813#ifdef CONFIG_X86_64
814 /*
815 * dma32_reserve_bootmem() allocates bootmem which may conflict
816 * with the crashkernel command line, so do that after
817 * reserve_crashkernel()
818 */
819 dma32_reserve_bootmem();
820#endif
821
812 reserve_ibft_region(); 822 reserve_ibft_region();
813 823
814#ifdef CONFIG_KVM_CLOCK 824#ifdef CONFIG_KVM_CLOCK
815 kvmclock_init(); 825 kvmclock_init();
816#endif 826#endif
817 827
818#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
819 /*
820 * Must be after max_low_pfn is determined, and before kernel
821 * pagetables are setup.
822 */
823 vmi_init();
824#endif
825
826 paravirt_pagetable_setup_start(swapper_pg_dir); 828 paravirt_pagetable_setup_start(swapper_pg_dir);
827 paging_init(); 829 paging_init();
828 paravirt_pagetable_setup_done(swapper_pg_dir); 830 paravirt_pagetable_setup_done(swapper_pg_dir);
@@ -859,12 +861,6 @@ void __init setup_arch(char **cmdline_p)
859 init_apic_mappings(); 861 init_apic_mappings();
860 ioapic_init_mappings(); 862 ioapic_init_mappings();
861 863
862#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
863 if (def_to_bigsmp)
864 printk(KERN_WARNING "More than 8 CPUs detected and "
865 "CONFIG_X86_PC cannot handle it.\nUse "
866 "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
867#endif
868 kvm_guest_init(); 864 kvm_guest_init();
869 865
870 e820_reserve_resources(); 866 e820_reserve_resources();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index f7745f94c006..76e305e064f9 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -80,24 +80,6 @@ static void __init setup_per_cpu_maps(void)
80#endif 80#endif
81} 81}
82 82
83#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
84cpumask_t *cpumask_of_cpu_map __read_mostly;
85EXPORT_SYMBOL(cpumask_of_cpu_map);
86
87/* requires nr_cpu_ids to be initialized */
88static void __init setup_cpumask_of_cpu(void)
89{
90 int i;
91
92 /* alloc_bootmem zeroes memory */
93 cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
94 for (i = 0; i < nr_cpu_ids; i++)
95 cpu_set(i, cpumask_of_cpu_map[i]);
96}
97#else
98static inline void setup_cpumask_of_cpu(void) { }
99#endif
100
101#ifdef CONFIG_X86_32 83#ifdef CONFIG_X86_32
102/* 84/*
103 * Great future not-so-futuristic plan: make i386 and x86_64 do it 85 * Great future not-so-futuristic plan: make i386 and x86_64 do it
@@ -197,9 +179,6 @@ void __init setup_per_cpu_areas(void)
197 179
198 /* Setup node to cpumask map */ 180 /* Setup node to cpumask map */
199 setup_node_to_cpumask_map(); 181 setup_node_to_cpumask_map();
200
201 /* Setup cpumask_of_cpu map */
202 setup_cpumask_of_cpu();
203} 182}
204 183
205#endif 184#endif
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 5cede1045ce7..0c727f64e79b 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -662,8 +662,5 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
662 if (thread_info_flags & _TIF_SIGPENDING) 662 if (thread_info_flags & _TIF_SIGPENDING)
663 do_signal(regs); 663 do_signal(regs);
664 664
665 if (thread_info_flags & _TIF_HRTICK_RESCHED)
666 hrtick_resched();
667
668 clear_thread_flag(TIF_IRET); 665 clear_thread_flag(TIF_IRET);
669} 666}
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index b95a0a609053..02b02583f5b5 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -54,6 +54,59 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
54 return do_sigaltstack(uss, uoss, regs->sp); 54 return do_sigaltstack(uss, uoss, regs->sp);
55} 55}
56 56
57/*
58 * Signal frame handlers.
59 */
60
61static inline int save_i387(struct _fpstate __user *buf)
62{
63 struct task_struct *tsk = current;
64 int err = 0;
65
66 BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
67 sizeof(tsk->thread.xstate->fxsave));
68
69 if ((unsigned long)buf % 16)
70 printk("save_i387: bad fpstate %p\n", buf);
71
72 if (!used_math())
73 return 0;
74 clear_used_math(); /* trigger finit */
75 if (task_thread_info(tsk)->status & TS_USEDFPU) {
76 err = save_i387_checking((struct i387_fxsave_struct __user *)
77 buf);
78 if (err)
79 return err;
80 task_thread_info(tsk)->status &= ~TS_USEDFPU;
81 stts();
82 } else {
83 if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
84 sizeof(struct i387_fxsave_struct)))
85 return -1;
86 }
87 return 1;
88}
89
90/*
91 * This restores directly out of user space. Exceptions are handled.
92 */
93static inline int restore_i387(struct _fpstate __user *buf)
94{
95 struct task_struct *tsk = current;
96 int err;
97
98 if (!used_math()) {
99 err = init_fpu(tsk);
100 if (err)
101 return err;
102 }
103
104 if (!(task_thread_info(current)->status & TS_USEDFPU)) {
105 clts();
106 task_thread_info(current)->status |= TS_USEDFPU;
107 }
108 return restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
109}
57 110
58/* 111/*
59 * Do a signal return; undo the signal stack. 112 * Do a signal return; undo the signal stack.
@@ -497,9 +550,6 @@ void do_notify_resume(struct pt_regs *regs, void *unused,
497 /* deal with pending signal delivery */ 550 /* deal with pending signal delivery */
498 if (thread_info_flags & _TIF_SIGPENDING) 551 if (thread_info_flags & _TIF_SIGPENDING)
499 do_signal(regs); 552 do_signal(regs);
500
501 if (thread_info_flags & _TIF_HRTICK_RESCHED)
502 hrtick_resched();
503} 553}
504 554
505void signal_fault(struct pt_regs *regs, void __user *frame, char *where) 555void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index a9331a42f76f..6112c3632345 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -326,12 +326,16 @@ static void __cpuinit start_secondary(void *unused)
326 * for which cpus receive the IPI. Holding this 326 * for which cpus receive the IPI. Holding this
327 * lock helps us to not include this cpu in a currently in progress 327 * lock helps us to not include this cpu in a currently in progress
328 * smp_call_function(). 328 * smp_call_function().
329 *
330 * We need to hold vector_lock so there the set of online cpus
331 * does not change while we are assigning vectors to cpus. Holding
332 * this lock ensures we don't half assign or remove an irq from a cpu.
329 */ 333 */
330 ipi_call_lock_irq(); 334 ipi_call_lock_irq();
331#ifdef CONFIG_X86_IO_APIC 335 lock_vector_lock();
332 setup_vector_irq(smp_processor_id()); 336 __setup_vector_irq(smp_processor_id());
333#endif
334 cpu_set(smp_processor_id(), cpu_online_map); 337 cpu_set(smp_processor_id(), cpu_online_map);
338 unlock_vector_lock();
335 ipi_call_unlock_irq(); 339 ipi_call_unlock_irq();
336 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 340 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
337 341
@@ -438,7 +442,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
438 cpu_set(cpu, cpu_sibling_setup_map); 442 cpu_set(cpu, cpu_sibling_setup_map);
439 443
440 if (smp_num_siblings > 1) { 444 if (smp_num_siblings > 1) {
441 for_each_cpu_mask(i, cpu_sibling_setup_map) { 445 for_each_cpu_mask_nr(i, cpu_sibling_setup_map) {
442 if (c->phys_proc_id == cpu_data(i).phys_proc_id && 446 if (c->phys_proc_id == cpu_data(i).phys_proc_id &&
443 c->cpu_core_id == cpu_data(i).cpu_core_id) { 447 c->cpu_core_id == cpu_data(i).cpu_core_id) {
444 cpu_set(i, per_cpu(cpu_sibling_map, cpu)); 448 cpu_set(i, per_cpu(cpu_sibling_map, cpu));
@@ -461,7 +465,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
461 return; 465 return;
462 } 466 }
463 467
464 for_each_cpu_mask(i, cpu_sibling_setup_map) { 468 for_each_cpu_mask_nr(i, cpu_sibling_setup_map) {
465 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && 469 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
466 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { 470 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
467 cpu_set(i, c->llc_shared_map); 471 cpu_set(i, c->llc_shared_map);
@@ -990,7 +994,17 @@ int __cpuinit native_cpu_up(unsigned int cpu)
990 flush_tlb_all(); 994 flush_tlb_all();
991 low_mappings = 1; 995 low_mappings = 1;
992 996
997#ifdef CONFIG_X86_PC
998 if (def_to_bigsmp && apicid > 8) {
999 printk(KERN_WARNING
1000 "More than 8 CPUs detected - skipping them.\n"
1001 "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n");
1002 err = -1;
1003 } else
1004 err = do_boot_cpu(apicid, cpu);
1005#else
993 err = do_boot_cpu(apicid, cpu); 1006 err = do_boot_cpu(apicid, cpu);
1007#endif
994 1008
995 zap_low_mappings(); 1009 zap_low_mappings();
996 low_mappings = 0; 1010 low_mappings = 0;
@@ -1219,7 +1233,7 @@ static void remove_siblinginfo(int cpu)
1219 int sibling; 1233 int sibling;
1220 struct cpuinfo_x86 *c = &cpu_data(cpu); 1234 struct cpuinfo_x86 *c = &cpu_data(cpu);
1221 1235
1222 for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { 1236 for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) {
1223 cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); 1237 cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
1224 /*/ 1238 /*/
1225 * last thread sibling in this cpu core going down 1239 * last thread sibling in this cpu core going down
@@ -1228,7 +1242,7 @@ static void remove_siblinginfo(int cpu)
1228 cpu_data(sibling).booted_cores--; 1242 cpu_data(sibling).booted_cores--;
1229 } 1243 }
1230 1244
1231 for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) 1245 for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu))
1232 cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); 1246 cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
1233 cpus_clear(per_cpu(cpu_sibling_map, cpu)); 1247 cpus_clear(per_cpu(cpu_sibling_map, cpu));
1234 cpus_clear(per_cpu(cpu_core_map, cpu)); 1248 cpus_clear(per_cpu(cpu_core_map, cpu));
@@ -1336,7 +1350,9 @@ int __cpu_disable(void)
1336 remove_siblinginfo(cpu); 1350 remove_siblinginfo(cpu);
1337 1351
1338 /* It's now safe to remove this processor from the online map */ 1352 /* It's now safe to remove this processor from the online map */
1353 lock_vector_lock();
1339 remove_cpu_from_maps(cpu); 1354 remove_cpu_from_maps(cpu);
1355 unlock_vector_lock();
1340 fixup_irqs(cpu_online_map); 1356 fixup_irqs(cpu_online_map);
1341 return 0; 1357 return 0;
1342} 1358}
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index adff5562f5fd..d44395ff34c3 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -326,3 +326,9 @@ ENTRY(sys_call_table)
326 .long sys_fallocate 326 .long sys_fallocate
327 .long sys_timerfd_settime /* 325 */ 327 .long sys_timerfd_settime /* 325 */
328 .long sys_timerfd_gettime 328 .long sys_timerfd_gettime
329 .long sys_signalfd4
330 .long sys_eventfd2
331 .long sys_epoll_create1
332 .long sys_dup3 /* 330 */
333 .long sys_pipe2
334 .long sys_inotify_init1
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 0a1b1a9d922d..6ca515d6db54 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -37,6 +37,7 @@
37#include <asm/timer.h> 37#include <asm/timer.h>
38#include <asm/vmi_time.h> 38#include <asm/vmi_time.h>
39#include <asm/kmap_types.h> 39#include <asm/kmap_types.h>
40#include <asm/setup.h>
40 41
41/* Convenient for calling VMI functions indirectly in the ROM */ 42/* Convenient for calling VMI functions indirectly in the ROM */
42typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); 43typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void);
@@ -683,7 +684,7 @@ void vmi_bringup(void)
683{ 684{
684 /* We must establish the lowmem mapping for MMU ops to work */ 685 /* We must establish the lowmem mapping for MMU ops to work */
685 if (vmi_ops.set_linear_mapping) 686 if (vmi_ops.set_linear_mapping)
686 vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0); 687 vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0);
687} 688}
688 689
689/* 690/*