aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-08-18 21:34:07 -0400
committerIngo Molnar <mingo@elte.hu>2008-08-18 21:34:07 -0400
commit2879a927bb7a3cf91ae3906a5e59215f9c17dd75 (patch)
tree870bdd1bd530a3d5d2abd10539700446b2878188 /arch/x86
parent7e7b43892b87b6be259479ef4de14029dcb4012f (diff)
parent20211e4d344729f4d4c93da37a590fc1c3a1fd9b (diff)
Merge branch 'x86/oprofile' into oprofile
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig41
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/boot/boot.h10
-rw-r--r--arch/x86/boot/cpu.c3
-rw-r--r--arch/x86/boot/cpucheck.c10
-rw-r--r--arch/x86/boot/main.c5
-rw-r--r--arch/x86/boot/memory.c1
-rw-r--r--arch/x86/ia32/ia32_aout.c6
-rw-r--r--arch/x86/kernel/acpi/boot.c16
-rw-r--r--arch/x86/kernel/acpi/cstate.c3
-rw-r--r--arch/x86/kernel/acpi/sleep.c4
-rw-r--r--arch/x86/kernel/amd_iommu.c36
-rw-r--r--arch/x86/kernel/amd_iommu_init.c28
-rw-r--r--arch/x86/kernel/apic_32.c22
-rw-r--r--arch/x86/kernel/apic_64.c7
-rw-r--r--arch/x86/kernel/cpu/bugs.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c10
-rw-r--r--arch/x86/kernel/cpu/cpufreq/elanfreq.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c125
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h3
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c12
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c3
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c3
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c8
-rw-r--r--arch/x86/kernel/efi_32.c4
-rw-r--r--arch/x86/kernel/genapic_64.c1
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/head64.c1
-rw-r--r--arch/x86/kernel/head_32.S8
-rw-r--r--arch/x86/kernel/hpet.c24
-rw-r--r--arch/x86/kernel/io_apic_32.c6
-rw-r--r--arch/x86/kernel/io_apic_64.c25
-rw-r--r--arch/x86/kernel/ldt.c6
-rw-r--r--arch/x86/kernel/machine_kexec_32.c57
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/mfgpt_32.c52
-rw-r--r--arch/x86/kernel/microcode.c17
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c2
-rw-r--r--arch/x86/kernel/mpparse.c17
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/nmi.c28
-rw-r--r--arch/x86/kernel/pci-calgary_64.c75
-rw-r--r--arch/x86/kernel/pci-dma.c157
-rw-r--r--arch/x86/kernel/pci-gart_64.c14
-rw-r--r--arch/x86/kernel/pci-nommu.c14
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c2
-rw-r--r--arch/x86/kernel/process_32.c5
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/reboot.c11
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S178
-rw-r--r--arch/x86/kernel/setup.c37
-rw-r--r--arch/x86/kernel/setup_percpu.c21
-rw-r--r--arch/x86/kernel/signal_64.c11
-rw-r--r--arch/x86/kernel/smpboot.c65
-rw-r--r--arch/x86/kernel/smpcommon.c17
-rw-r--r--arch/x86/kernel/traps_64.c9
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/visws_quirks.c6
-rw-r--r--arch/x86/kernel/vmi_32.c3
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S8
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/mmu.c107
-rw-r--r--arch/x86/kvm/paging_tmpl.h12
-rw-r--r--arch/x86/kvm/svm.c10
-rw-r--r--arch/x86/kvm/vmx.c22
-rw-r--r--arch/x86/kvm/x86.c133
-rw-r--r--arch/x86/lguest/boot.c3
-rw-r--r--arch/x86/lib/copy_user_64.S2
-rw-r--r--arch/x86/lib/copy_user_nocache_64.S3
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/gup.c298
-rw-r--r--arch/x86/mm/init_64.c49
-rw-r--r--arch/x86/mm/ioremap.c2
-rw-r--r--arch/x86/mm/pageattr-test.c3
-rw-r--r--arch/x86/mm/pageattr.c21
-rw-r--r--arch/x86/mm/pgtable.c3
-rw-r--r--arch/x86/mm/pgtable_32.c47
-rw-r--r--arch/x86/mm/srat_32.c12
-rw-r--r--arch/x86/oprofile/op_model_p4.c175
-rw-r--r--arch/x86/pci/fixup.c3
-rw-r--r--arch/x86/pci/i386.c26
-rw-r--r--arch/x86/pci/irq.c106
-rw-r--r--arch/x86/pci/mmconfig-shared.c2
-rw-r--r--arch/x86/pci/numaq_32.c5
-rw-r--r--arch/x86/power/cpu_32.c6
-rw-r--r--arch/x86/power/hibernate_asm_32.S26
87 files changed, 1422 insertions, 924 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e3cba0b45600..68d91c8233f4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,12 +23,13 @@ config X86
23 select HAVE_OPROFILE 23 select HAVE_OPROFILE
24 select HAVE_IOREMAP_PROT 24 select HAVE_IOREMAP_PROT
25 select HAVE_KPROBES 25 select HAVE_KPROBES
26 select ARCH_WANT_OPTIONAL_GPIOLIB if !X86_RDC321X 26 select ARCH_WANT_OPTIONAL_GPIOLIB
27 select HAVE_KRETPROBES 27 select HAVE_KRETPROBES
28 select HAVE_DYNAMIC_FTRACE 28 select HAVE_DYNAMIC_FTRACE
29 select HAVE_FTRACE 29 select HAVE_FTRACE
30 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 30 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
31 select HAVE_ARCH_KGDB if !X86_VOYAGER 31 select HAVE_ARCH_KGDB if !X86_VOYAGER
32 select HAVE_GENERIC_DMA_COHERENT if X86_32
32 select HAVE_EFFICIENT_UNALIGNED_ACCESS 33 select HAVE_EFFICIENT_UNALIGNED_ACCESS
33 34
34config ARCH_DEFCONFIG 35config ARCH_DEFCONFIG
@@ -332,20 +333,6 @@ config X86_BIGSMP
332 333
333endif 334endif
334 335
335config X86_RDC321X
336 bool "RDC R-321x SoC"
337 depends on X86_32
338 select M486
339 select X86_REBOOTFIXUPS
340 select GENERIC_GPIO
341 select LEDS_CLASS
342 select LEDS_GPIO
343 select NEW_LEDS
344 help
345 This option is needed for RDC R-321x system-on-chip, also known
346 as R-8610-(G).
347 If you don't have one of these chips, you should say N here.
348
349config X86_VSMP 336config X86_VSMP
350 bool "Support for ScaleMP vSMP" 337 bool "Support for ScaleMP vSMP"
351 select PARAVIRT 338 select PARAVIRT
@@ -369,6 +356,16 @@ config X86_VISWS
369 A kernel compiled for the Visual Workstation will run on general 356 A kernel compiled for the Visual Workstation will run on general
370 PCs as well. See <file:Documentation/sgi-visws.txt> for details. 357 PCs as well. See <file:Documentation/sgi-visws.txt> for details.
371 358
359config X86_RDC321X
360 bool "RDC R-321x SoC"
361 depends on X86_32
362 select M486
363 select X86_REBOOTFIXUPS
364 help
365 This option is needed for RDC R-321x system-on-chip, also known
366 as R-8610-(G).
367 If you don't have one of these chips, you should say N here.
368
372config SCHED_NO_NO_OMIT_FRAME_POINTER 369config SCHED_NO_NO_OMIT_FRAME_POINTER
373 def_bool y 370 def_bool y
374 prompt "Single-depth WCHAN output" 371 prompt "Single-depth WCHAN output"
@@ -954,9 +951,9 @@ config NUMA
954 local memory controller of the CPU and add some more 951 local memory controller of the CPU and add some more
955 NUMA awareness to the kernel. 952 NUMA awareness to the kernel.
956 953
957 For i386 this is currently highly experimental and should be only 954 For 32-bit this is currently highly experimental and should be only
958 used for kernel development. It might also cause boot failures. 955 used for kernel development. It might also cause boot failures.
959 For x86_64 this is recommended on all multiprocessor Opteron systems. 956 For 64-bit this is recommended on all multiprocessor Opteron systems.
960 If the system is EM64T, you should say N unless your system is 957 If the system is EM64T, you should say N unless your system is
961 EM64T NUMA. 958 EM64T NUMA.
962 959
@@ -1266,7 +1263,7 @@ config KEXEC
1266 strongly in flux, so no good recommendation can be made. 1263 strongly in flux, so no good recommendation can be made.
1267 1264
1268config CRASH_DUMP 1265config CRASH_DUMP
1269 bool "kernel crash dumps (EXPERIMENTAL)" 1266 bool "kernel crash dumps"
1270 depends on X86_64 || (X86_32 && HIGHMEM) 1267 depends on X86_64 || (X86_32 && HIGHMEM)
1271 help 1268 help
1272 Generate crash dump after being started by kexec. 1269 Generate crash dump after being started by kexec.
@@ -1279,6 +1276,14 @@ config CRASH_DUMP
1279 (CONFIG_RELOCATABLE=y). 1276 (CONFIG_RELOCATABLE=y).
1280 For more details see Documentation/kdump/kdump.txt 1277 For more details see Documentation/kdump/kdump.txt
1281 1278
1279config KEXEC_JUMP
1280 bool "kexec jump (EXPERIMENTAL)"
1281 depends on EXPERIMENTAL
1282 depends on KEXEC && HIBERNATION && X86_32
1283 help
1284 Jump between original kernel and kexeced kernel and invoke
1285 code in physical address mode via KEXEC
1286
1282config PHYSICAL_START 1287config PHYSICAL_START
1283 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) 1288 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
1284 default "0x1000000" if X86_NUMAQ 1289 default "0x1000000" if X86_NUMAQ
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 919ce21ea654..f5631da585b6 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -118,11 +118,6 @@ mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic
118fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ 118fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/
119mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ 119mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/
120 120
121# RDC R-321x subarch support
122mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x
123mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default/
124core-$(CONFIG_X86_RDC321X) += arch/x86/mach-rdc321x/
125
126# default subarch .h files 121# default subarch .h files
127mflags-y += -Iinclude/asm-x86/mach-default 122mflags-y += -Iinclude/asm-x86/mach-default
128 123
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index a34b9982c7cb..cc0ef13fba7a 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -24,10 +24,14 @@
24#include <linux/edd.h> 24#include <linux/edd.h>
25#include <asm/boot.h> 25#include <asm/boot.h>
26#include <asm/setup.h> 26#include <asm/setup.h>
27#include "bitops.h"
28#include <asm/cpufeature.h>
27 29
28/* Useful macros */ 30/* Useful macros */
29#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) 31#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
30 32
33#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
34
31extern struct setup_header hdr; 35extern struct setup_header hdr;
32extern struct boot_params boot_params; 36extern struct boot_params boot_params;
33 37
@@ -242,6 +246,12 @@ int cmdline_find_option(const char *option, char *buffer, int bufsize);
242int cmdline_find_option_bool(const char *option); 246int cmdline_find_option_bool(const char *option);
243 247
244/* cpu.c, cpucheck.c */ 248/* cpu.c, cpucheck.c */
249struct cpu_features {
250 int level; /* Family, or 64 for x86-64 */
251 int model;
252 u32 flags[NCAPINTS];
253};
254extern struct cpu_features cpu;
245int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); 255int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
246int validate_cpu(void); 256int validate_cpu(void);
247 257
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 92d6fd73dc7d..75298fe2edca 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -16,9 +16,6 @@
16 */ 16 */
17 17
18#include "boot.h" 18#include "boot.h"
19#include "bitops.h"
20#include <asm/cpufeature.h>
21
22#include "cpustr.h" 19#include "cpustr.h"
23 20
24static char *cpu_name(int level) 21static char *cpu_name(int level)
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 7804389ee005..4b9ae7c56748 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -22,21 +22,13 @@
22 22
23#ifdef _SETUP 23#ifdef _SETUP
24# include "boot.h" 24# include "boot.h"
25# include "bitops.h"
26#endif 25#endif
27#include <linux/types.h> 26#include <linux/types.h>
28#include <asm/cpufeature.h>
29#include <asm/processor-flags.h> 27#include <asm/processor-flags.h>
30#include <asm/required-features.h> 28#include <asm/required-features.h>
31#include <asm/msr-index.h> 29#include <asm/msr-index.h>
32 30
33struct cpu_features { 31struct cpu_features cpu;
34 int level; /* Family, or 64 for x86-64 */
35 int model;
36 u32 flags[NCAPINTS];
37};
38
39static struct cpu_features cpu;
40static u32 cpu_vendor[3]; 32static u32 cpu_vendor[3];
41static u32 err_flags[NCAPINTS]; 33static u32 err_flags[NCAPINTS];
42 34
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 2296164b54d2..197421db1af1 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -73,6 +73,11 @@ static void keyboard_set_repeat(void)
73 */ 73 */
74static void query_ist(void) 74static void query_ist(void)
75{ 75{
76 /* Some older BIOSes apparently crash on this call, so filter
77 it from machines too old to have SpeedStep at all. */
78 if (cpu.level < 6)
79 return;
80
76 asm("int $0x15" 81 asm("int $0x15"
77 : "=a" (boot_params.ist_info.signature), 82 : "=a" (boot_params.ist_info.signature),
78 "=b" (boot_params.ist_info.command), 83 "=b" (boot_params.ist_info.command),
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index 53165c97336b..8c3c25f35578 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -13,7 +13,6 @@
13 */ 13 */
14 14
15#include "boot.h" 15#include "boot.h"
16#include <linux/kernel.h>
17 16
18#define SMAP 0x534d4150 /* ASCII "SMAP" */ 17#define SMAP 0x534d4150 /* ASCII "SMAP" */
19 18
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 58cccb6483b0..a0e1dbe67dc1 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -441,12 +441,6 @@ beyond_if:
441 regs->r8 = regs->r9 = regs->r10 = regs->r11 = 441 regs->r8 = regs->r9 = regs->r10 = regs->r11 =
442 regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; 442 regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
443 set_fs(USER_DS); 443 set_fs(USER_DS);
444 if (unlikely(current->ptrace & PT_PTRACED)) {
445 if (current->ptrace & PT_TRACE_EXEC)
446 ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
447 else
448 send_sig(SIGTRAP, current, 0);
449 }
450 return 0; 444 return 0;
451} 445}
452 446
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index fa88a1d71290..bfd10fd211cd 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -97,6 +97,8 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
97#warning ACPI uses CMPXCHG, i486 and later hardware 97#warning ACPI uses CMPXCHG, i486 and later hardware
98#endif 98#endif
99 99
100static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
101
100/* -------------------------------------------------------------------------- 102/* --------------------------------------------------------------------------
101 Boot-time Configuration 103 Boot-time Configuration
102 -------------------------------------------------------------------------- */ 104 -------------------------------------------------------------------------- */
@@ -158,6 +160,14 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
158struct acpi_mcfg_allocation *pci_mmcfg_config; 160struct acpi_mcfg_allocation *pci_mmcfg_config;
159int pci_mmcfg_config_num; 161int pci_mmcfg_config_num;
160 162
163static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
164{
165 if (!strcmp(mcfg->header.oem_id, "SGI"))
166 acpi_mcfg_64bit_base_addr = TRUE;
167
168 return 0;
169}
170
161int __init acpi_parse_mcfg(struct acpi_table_header *header) 171int __init acpi_parse_mcfg(struct acpi_table_header *header)
162{ 172{
163 struct acpi_table_mcfg *mcfg; 173 struct acpi_table_mcfg *mcfg;
@@ -190,8 +200,12 @@ int __init acpi_parse_mcfg(struct acpi_table_header *header)
190 } 200 }
191 201
192 memcpy(pci_mmcfg_config, &mcfg[1], config_size); 202 memcpy(pci_mmcfg_config, &mcfg[1], config_size);
203
204 acpi_mcfg_oem_check(mcfg);
205
193 for (i = 0; i < pci_mmcfg_config_num; ++i) { 206 for (i = 0; i < pci_mmcfg_config_num; ++i) {
194 if (pci_mmcfg_config[i].address > 0xFFFFFFFF) { 207 if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) &&
208 !acpi_mcfg_64bit_base_addr) {
195 printk(KERN_ERR PREFIX 209 printk(KERN_ERR PREFIX
196 "MMCONFIG not in low 4GB of memory\n"); 210 "MMCONFIG not in low 4GB of memory\n");
197 kfree(pci_mmcfg_config); 211 kfree(pci_mmcfg_config);
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 9220cf46aa10..c2502eb9aa83 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -73,7 +73,6 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
73 struct cpuinfo_x86 *c = &cpu_data(cpu); 73 struct cpuinfo_x86 *c = &cpu_data(cpu);
74 74
75 cpumask_t saved_mask; 75 cpumask_t saved_mask;
76 cpumask_of_cpu_ptr(new_mask, cpu);
77 int retval; 76 int retval;
78 unsigned int eax, ebx, ecx, edx; 77 unsigned int eax, ebx, ecx, edx;
79 unsigned int edx_part; 78 unsigned int edx_part;
@@ -92,7 +91,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
92 91
93 /* Make sure we are running on right CPU */ 92 /* Make sure we are running on right CPU */
94 saved_mask = current->cpus_allowed; 93 saved_mask = current->cpus_allowed;
95 retval = set_cpus_allowed_ptr(current, new_mask); 94 retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
96 if (retval) 95 if (retval)
97 return -1; 96 return -1;
98 97
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index fa2161d5003b..426e5d91b63a 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -20,7 +20,7 @@ unsigned long acpi_realmode_flags;
20/* address in low memory of the wakeup routine. */ 20/* address in low memory of the wakeup routine. */
21static unsigned long acpi_realmode; 21static unsigned long acpi_realmode;
22 22
23#ifdef CONFIG_64BIT 23#if defined(CONFIG_SMP) && defined(CONFIG_64BIT)
24static char temp_stack[10240]; 24static char temp_stack[10240];
25#endif 25#endif
26 26
@@ -86,7 +86,7 @@ int acpi_save_state_mem(void)
86#endif /* !CONFIG_64BIT */ 86#endif /* !CONFIG_64BIT */
87 87
88 header->pmode_cr0 = read_cr0(); 88 header->pmode_cr0 = read_cr0();
89 header->pmode_cr4 = read_cr4(); 89 header->pmode_cr4 = read_cr4_safe();
90 header->realmode_flags = acpi_realmode_flags; 90 header->realmode_flags = acpi_realmode_flags;
91 header->real_magic = 0x12345678; 91 header->real_magic = 0x12345678;
92 92
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index c25210e6ac88..de39e1f2ede5 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -29,9 +29,6 @@
29 29
30#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) 30#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
31 31
32#define to_pages(addr, size) \
33 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
34
35#define EXIT_LOOP_COUNT 10000000 32#define EXIT_LOOP_COUNT 10000000
36 33
37static DEFINE_RWLOCK(amd_iommu_devtable_lock); 34static DEFINE_RWLOCK(amd_iommu_devtable_lock);
@@ -104,16 +101,13 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
104 */ 101 */
105static int iommu_completion_wait(struct amd_iommu *iommu) 102static int iommu_completion_wait(struct amd_iommu *iommu)
106{ 103{
107 int ret; 104 int ret, ready = 0;
105 unsigned status = 0;
108 struct iommu_cmd cmd; 106 struct iommu_cmd cmd;
109 volatile u64 ready = 0;
110 unsigned long ready_phys = virt_to_phys(&ready);
111 unsigned long i = 0; 107 unsigned long i = 0;
112 108
113 memset(&cmd, 0, sizeof(cmd)); 109 memset(&cmd, 0, sizeof(cmd));
114 cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; 110 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
115 cmd.data[1] = upper_32_bits(ready_phys);
116 cmd.data[2] = 1; /* value written to 'ready' */
117 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); 111 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
118 112
119 iommu->need_sync = 0; 113 iommu->need_sync = 0;
@@ -125,9 +119,15 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
125 119
126 while (!ready && (i < EXIT_LOOP_COUNT)) { 120 while (!ready && (i < EXIT_LOOP_COUNT)) {
127 ++i; 121 ++i;
128 cpu_relax(); 122 /* wait for the bit to become one */
123 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
124 ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
129 } 125 }
130 126
127 /* set bit back to zero */
128 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
129 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
130
131 if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) 131 if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
132 printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); 132 printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
133 133
@@ -164,7 +164,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
164 address &= PAGE_MASK; 164 address &= PAGE_MASK;
165 CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); 165 CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
166 cmd.data[1] |= domid; 166 cmd.data[1] |= domid;
167 cmd.data[2] = LOW_U32(address); 167 cmd.data[2] = lower_32_bits(address);
168 cmd.data[3] = upper_32_bits(address); 168 cmd.data[3] = upper_32_bits(address);
169 if (s) /* size bit - we flush more than one 4kb page */ 169 if (s) /* size bit - we flush more than one 4kb page */
170 cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; 170 cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
@@ -185,7 +185,7 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
185 u64 address, size_t size) 185 u64 address, size_t size)
186{ 186{
187 int s = 0; 187 int s = 0;
188 unsigned pages = to_pages(address, size); 188 unsigned pages = iommu_num_pages(address, size);
189 189
190 address &= PAGE_MASK; 190 address &= PAGE_MASK;
191 191
@@ -557,8 +557,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
557 if (iommu->exclusion_start && 557 if (iommu->exclusion_start &&
558 iommu->exclusion_start < dma_dom->aperture_size) { 558 iommu->exclusion_start < dma_dom->aperture_size) {
559 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; 559 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
560 int pages = to_pages(iommu->exclusion_start, 560 int pages = iommu_num_pages(iommu->exclusion_start,
561 iommu->exclusion_length); 561 iommu->exclusion_length);
562 dma_ops_reserve_addresses(dma_dom, startpage, pages); 562 dma_ops_reserve_addresses(dma_dom, startpage, pages);
563 } 563 }
564 564
@@ -667,7 +667,7 @@ static int get_device_resources(struct device *dev,
667 _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); 667 _bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
668 668
669 /* device not translated by any IOMMU in the system? */ 669 /* device not translated by any IOMMU in the system? */
670 if (_bdf >= amd_iommu_last_bdf) { 670 if (_bdf > amd_iommu_last_bdf) {
671 *iommu = NULL; 671 *iommu = NULL;
672 *domain = NULL; 672 *domain = NULL;
673 *bdf = 0xffff; 673 *bdf = 0xffff;
@@ -767,7 +767,7 @@ static dma_addr_t __map_single(struct device *dev,
767 unsigned int pages; 767 unsigned int pages;
768 int i; 768 int i;
769 769
770 pages = to_pages(paddr, size); 770 pages = iommu_num_pages(paddr, size);
771 paddr &= PAGE_MASK; 771 paddr &= PAGE_MASK;
772 772
773 address = dma_ops_alloc_addresses(dev, dma_dom, pages); 773 address = dma_ops_alloc_addresses(dev, dma_dom, pages);
@@ -802,7 +802,7 @@ static void __unmap_single(struct amd_iommu *iommu,
802 if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) 802 if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
803 return; 803 return;
804 804
805 pages = to_pages(dma_addr, size); 805 pages = iommu_num_pages(dma_addr, size);
806 dma_addr &= PAGE_MASK; 806 dma_addr &= PAGE_MASK;
807 start = dma_addr; 807 start = dma_addr;
808 808
@@ -1085,7 +1085,7 @@ void prealloc_protection_domains(void)
1085 1085
1086 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 1086 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1087 devid = (dev->bus->number << 8) | dev->devfn; 1087 devid = (dev->bus->number << 8) | dev->devfn;
1088 if (devid >= amd_iommu_last_bdf) 1088 if (devid > amd_iommu_last_bdf)
1089 continue; 1089 continue;
1090 devid = amd_iommu_alias_table[devid]; 1090 devid = amd_iommu_alias_table[devid];
1091 if (domain_for_device(devid)) 1091 if (domain_for_device(devid))
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c9d8ff2eb130..a69cc0f52042 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -732,7 +732,7 @@ static int __init init_exclusion_range(struct ivmd_header *m)
732 set_device_exclusion_range(m->devid, m); 732 set_device_exclusion_range(m->devid, m);
733 break; 733 break;
734 case ACPI_IVMD_TYPE_ALL: 734 case ACPI_IVMD_TYPE_ALL:
735 for (i = 0; i < amd_iommu_last_bdf; ++i) 735 for (i = 0; i <= amd_iommu_last_bdf; ++i)
736 set_device_exclusion_range(i, m); 736 set_device_exclusion_range(i, m);
737 break; 737 break;
738 case ACPI_IVMD_TYPE_RANGE: 738 case ACPI_IVMD_TYPE_RANGE:
@@ -801,6 +801,21 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
801} 801}
802 802
803/* 803/*
804 * Init the device table to not allow DMA access for devices and
805 * suppress all page faults
806 */
807static void init_device_table(void)
808{
809 u16 devid;
810
811 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
812 set_dev_entry_bit(devid, DEV_ENTRY_VALID);
813 set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
814 set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT);
815 }
816}
817
818/*
804 * This function finally enables all IOMMUs found in the system after 819 * This function finally enables all IOMMUs found in the system after
805 * they have been initialized 820 * they have been initialized
806 */ 821 */
@@ -931,10 +946,13 @@ int __init amd_iommu_init(void)
931 if (amd_iommu_pd_alloc_bitmap == NULL) 946 if (amd_iommu_pd_alloc_bitmap == NULL)
932 goto free; 947 goto free;
933 948
949 /* init the device table */
950 init_device_table();
951
934 /* 952 /*
935 * let all alias entries point to itself 953 * let all alias entries point to itself
936 */ 954 */
937 for (i = 0; i < amd_iommu_last_bdf; ++i) 955 for (i = 0; i <= amd_iommu_last_bdf; ++i)
938 amd_iommu_alias_table[i] = i; 956 amd_iommu_alias_table[i] = i;
939 957
940 /* 958 /*
@@ -954,15 +972,15 @@ int __init amd_iommu_init(void)
954 if (acpi_table_parse("IVRS", init_memory_definitions) != 0) 972 if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
955 goto free; 973 goto free;
956 974
957 ret = amd_iommu_init_dma_ops(); 975 ret = sysdev_class_register(&amd_iommu_sysdev_class);
958 if (ret) 976 if (ret)
959 goto free; 977 goto free;
960 978
961 ret = sysdev_class_register(&amd_iommu_sysdev_class); 979 ret = sysdev_register(&device_amd_iommu);
962 if (ret) 980 if (ret)
963 goto free; 981 goto free;
964 982
965 ret = sysdev_register(&device_amd_iommu); 983 ret = amd_iommu_init_dma_ops();
966 if (ret) 984 if (ret)
967 goto free; 985 goto free;
968 986
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 0059e7a8a9e6..0ff576d026a4 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1458,8 +1458,6 @@ void disconnect_bsp_APIC(int virt_wire_setup)
1458 } 1458 }
1459} 1459}
1460 1460
1461unsigned int __cpuinitdata maxcpus = NR_CPUS;
1462
1463void __cpuinit generic_processor_info(int apicid, int version) 1461void __cpuinit generic_processor_info(int apicid, int version)
1464{ 1462{
1465 int cpu; 1463 int cpu;
@@ -1486,12 +1484,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
1486 return; 1484 return;
1487 } 1485 }
1488 1486
1489 if (num_processors >= maxcpus) {
1490 printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
1491 " Processor ignored.\n", maxcpus);
1492 return;
1493 }
1494
1495 num_processors++; 1487 num_processors++;
1496 cpus_complement(tmp_map, cpu_present_map); 1488 cpus_complement(tmp_map, cpu_present_map);
1497 cpu = first_cpu(tmp_map); 1489 cpu = first_cpu(tmp_map);
@@ -1724,15 +1716,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg)
1724} 1716}
1725early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); 1717early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
1726 1718
1727static int __init apic_set_verbosity(char *str) 1719static int __init apic_set_verbosity(char *arg)
1728{ 1720{
1729 if (strcmp("debug", str) == 0) 1721 if (!arg)
1722 return -EINVAL;
1723
1724 if (strcmp(arg, "debug") == 0)
1730 apic_verbosity = APIC_DEBUG; 1725 apic_verbosity = APIC_DEBUG;
1731 else if (strcmp("verbose", str) == 0) 1726 else if (strcmp(arg, "verbose") == 0)
1732 apic_verbosity = APIC_VERBOSE; 1727 apic_verbosity = APIC_VERBOSE;
1733 return 1; 1728
1729 return 0;
1734} 1730}
1735__setup("apic=", apic_set_verbosity); 1731early_param("apic", apic_set_verbosity);
1736 1732
1737static int __init lapic_insert_resource(void) 1733static int __init lapic_insert_resource(void)
1738{ 1734{
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index e571351f2a93..57744f4a75b4 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -90,7 +90,6 @@ static unsigned long apic_phys;
90 90
91unsigned long mp_lapic_addr; 91unsigned long mp_lapic_addr;
92 92
93unsigned int __cpuinitdata maxcpus = NR_CPUS;
94/* 93/*
95 * Get the LAPIC version 94 * Get the LAPIC version
96 */ 95 */
@@ -1066,12 +1065,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
1066 return; 1065 return;
1067 } 1066 }
1068 1067
1069 if (num_processors >= maxcpus) {
1070 printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
1071 " Processor ignored.\n", maxcpus);
1072 return;
1073 }
1074
1075 num_processors++; 1068 num_processors++;
1076 cpus_complement(tmp_map, cpu_present_map); 1069 cpus_complement(tmp_map, cpu_present_map);
1077 cpu = first_cpu(tmp_map); 1070 cpu = first_cpu(tmp_map);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c9b58a806e85..c8e315f1aa83 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -50,6 +50,8 @@ static double __initdata y = 3145727.0;
50 */ 50 */
51static void __init check_fpu(void) 51static void __init check_fpu(void)
52{ 52{
53 s32 fdiv_bug;
54
53 if (!boot_cpu_data.hard_math) { 55 if (!boot_cpu_data.hard_math) {
54#ifndef CONFIG_MATH_EMULATION 56#ifndef CONFIG_MATH_EMULATION
55 printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); 57 printk(KERN_EMERG "No coprocessor found and no math emulation present.\n");
@@ -74,8 +76,10 @@ static void __init check_fpu(void)
74 "fistpl %0\n\t" 76 "fistpl %0\n\t"
75 "fwait\n\t" 77 "fwait\n\t"
76 "fninit" 78 "fninit"
77 : "=m" (*&boot_cpu_data.fdiv_bug) 79 : "=m" (*&fdiv_bug)
78 : "m" (*&x), "m" (*&y)); 80 : "m" (*&x), "m" (*&y));
81
82 boot_cpu_data.fdiv_bug = fdiv_bug;
79 if (boot_cpu_data.fdiv_bug) 83 if (boot_cpu_data.fdiv_bug)
80 printk("Hmm, FPU with FDIV bug.\n"); 84 printk("Hmm, FPU with FDIV bug.\n");
81} 85}
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index cb7a5715596d..efae3b22a0ff 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -235,9 +235,9 @@ config X86_LONGHAUL
235 If in doubt, say N. 235 If in doubt, say N.
236 236
237config X86_E_POWERSAVER 237config X86_E_POWERSAVER
238 tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" 238 tristate "VIA C7 Enhanced PowerSaver"
239 select CPU_FREQ_TABLE 239 select CPU_FREQ_TABLE
240 depends on X86_32 && EXPERIMENTAL 240 depends on X86_32
241 help 241 help
242 This adds the CPUFreq driver for VIA C7 processors. 242 This adds the CPUFreq driver for VIA C7 processors.
243 243
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index ff2fff56f0a8..dd097b835839 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -200,12 +200,10 @@ static void drv_read(struct drv_cmd *cmd)
200static void drv_write(struct drv_cmd *cmd) 200static void drv_write(struct drv_cmd *cmd)
201{ 201{
202 cpumask_t saved_mask = current->cpus_allowed; 202 cpumask_t saved_mask = current->cpus_allowed;
203 cpumask_of_cpu_ptr_declare(cpu_mask);
204 unsigned int i; 203 unsigned int i;
205 204
206 for_each_cpu_mask_nr(i, cmd->mask) { 205 for_each_cpu_mask_nr(i, cmd->mask) {
207 cpumask_of_cpu_ptr_next(cpu_mask, i); 206 set_cpus_allowed_ptr(current, &cpumask_of_cpu(i));
208 set_cpus_allowed_ptr(current, cpu_mask);
209 do_drv_write(cmd); 207 do_drv_write(cmd);
210 } 208 }
211 209
@@ -269,12 +267,11 @@ static unsigned int get_measured_perf(unsigned int cpu)
269 } aperf_cur, mperf_cur; 267 } aperf_cur, mperf_cur;
270 268
271 cpumask_t saved_mask; 269 cpumask_t saved_mask;
272 cpumask_of_cpu_ptr(cpu_mask, cpu);
273 unsigned int perf_percent; 270 unsigned int perf_percent;
274 unsigned int retval; 271 unsigned int retval;
275 272
276 saved_mask = current->cpus_allowed; 273 saved_mask = current->cpus_allowed;
277 set_cpus_allowed_ptr(current, cpu_mask); 274 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
278 if (get_cpu() != cpu) { 275 if (get_cpu() != cpu) {
279 /* We were not able to run on requested processor */ 276 /* We were not able to run on requested processor */
280 put_cpu(); 277 put_cpu();
@@ -340,7 +337,6 @@ static unsigned int get_measured_perf(unsigned int cpu)
340 337
341static unsigned int get_cur_freq_on_cpu(unsigned int cpu) 338static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
342{ 339{
343 cpumask_of_cpu_ptr(cpu_mask, cpu);
344 struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); 340 struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu);
345 unsigned int freq; 341 unsigned int freq;
346 unsigned int cached_freq; 342 unsigned int cached_freq;
@@ -353,7 +349,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
353 } 349 }
354 350
355 cached_freq = data->freq_table[data->acpi_data->state].frequency; 351 cached_freq = data->freq_table[data->acpi_data->state].frequency;
356 freq = extract_freq(get_cur_val(cpu_mask), data); 352 freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data);
357 if (freq != cached_freq) { 353 if (freq != cached_freq) {
358 /* 354 /*
359 * The dreaded BIOS frequency change behind our back. 355 * The dreaded BIOS frequency change behind our back.
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index 94619c22f563..e4a4bf870e94 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -44,7 +44,7 @@ struct s_elan_multiplier {
44 * It is important that the frequencies 44 * It is important that the frequencies
45 * are listed in ascending order here! 45 * are listed in ascending order here!
46 */ 46 */
47struct s_elan_multiplier elan_multiplier[] = { 47static struct s_elan_multiplier elan_multiplier[] = {
48 {1000, 0x02, 0x18}, 48 {1000, 0x02, 0x18},
49 {2000, 0x02, 0x10}, 49 {2000, 0x02, 0x10},
50 {4000, 0x02, 0x08}, 50 {4000, 0x02, 0x08},
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 53c7b6936973..4e7271999a74 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -66,7 +66,6 @@ static u32 find_freq_from_fid(u32 fid)
66 return 800 + (fid * 100); 66 return 800 + (fid * 100);
67} 67}
68 68
69
70/* Return a frequency in KHz, given an input fid */ 69/* Return a frequency in KHz, given an input fid */
71static u32 find_khz_freq_from_fid(u32 fid) 70static u32 find_khz_freq_from_fid(u32 fid)
72{ 71{
@@ -78,7 +77,6 @@ static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 p
78 return data[pstate].frequency; 77 return data[pstate].frequency;
79} 78}
80 79
81
82/* Return the vco fid for an input fid 80/* Return the vco fid for an input fid
83 * 81 *
84 * Each "low" fid has corresponding "high" fid, and you can get to "low" fids 82 * Each "low" fid has corresponding "high" fid, and you can get to "low" fids
@@ -166,7 +164,6 @@ static void fidvid_msr_init(void)
166 wrmsr(MSR_FIDVID_CTL, lo, hi); 164 wrmsr(MSR_FIDVID_CTL, lo, hi);
167} 165}
168 166
169
170/* write the new fid value along with the other control fields to the msr */ 167/* write the new fid value along with the other control fields to the msr */
171static int write_new_fid(struct powernow_k8_data *data, u32 fid) 168static int write_new_fid(struct powernow_k8_data *data, u32 fid)
172{ 169{
@@ -479,12 +476,11 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi
479static int check_supported_cpu(unsigned int cpu) 476static int check_supported_cpu(unsigned int cpu)
480{ 477{
481 cpumask_t oldmask; 478 cpumask_t oldmask;
482 cpumask_of_cpu_ptr(cpu_mask, cpu);
483 u32 eax, ebx, ecx, edx; 479 u32 eax, ebx, ecx, edx;
484 unsigned int rc = 0; 480 unsigned int rc = 0;
485 481
486 oldmask = current->cpus_allowed; 482 oldmask = current->cpus_allowed;
487 set_cpus_allowed_ptr(current, cpu_mask); 483 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
488 484
489 if (smp_processor_id() != cpu) { 485 if (smp_processor_id() != cpu) {
490 printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); 486 printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
@@ -741,44 +737,63 @@ static int find_psb_table(struct powernow_k8_data *data)
741#ifdef CONFIG_X86_POWERNOW_K8_ACPI 737#ifdef CONFIG_X86_POWERNOW_K8_ACPI
742static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) 738static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index)
743{ 739{
744 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) 740 if (!data->acpi_data->state_count || (cpu_family == CPU_HW_PSTATE))
745 return; 741 return;
746 742
747 data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; 743 data->irt = (data->acpi_data->states[index].control >> IRT_SHIFT) & IRT_MASK;
748 data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; 744 data->rvo = (data->acpi_data->states[index].control >> RVO_SHIFT) & RVO_MASK;
749 data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; 745 data->exttype = (data->acpi_data->states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
750 data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; 746 data->plllock = (data->acpi_data->states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
751 data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); 747 data->vidmvs = 1 << ((data->acpi_data->states[index].control >> MVS_SHIFT) & MVS_MASK);
752 data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; 748 data->vstable = (data->acpi_data->states[index].control >> VST_SHIFT) & VST_MASK;
749}
750
751
752static struct acpi_processor_performance *acpi_perf_data;
753static int preregister_valid;
754
755static int powernow_k8_cpu_preinit_acpi(void)
756{
757 acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
758 if (!acpi_perf_data)
759 return -ENODEV;
760
761 if (acpi_processor_preregister_performance(acpi_perf_data))
762 return -ENODEV;
763 else
764 preregister_valid = 1;
765 return 0;
753} 766}
754 767
755static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) 768static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
756{ 769{
757 struct cpufreq_frequency_table *powernow_table; 770 struct cpufreq_frequency_table *powernow_table;
758 int ret_val; 771 int ret_val;
772 int cpu = 0;
759 773
760 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { 774 data->acpi_data = percpu_ptr(acpi_perf_data, cpu);
775 if (acpi_processor_register_performance(data->acpi_data, data->cpu)) {
761 dprintk("register performance failed: bad ACPI data\n"); 776 dprintk("register performance failed: bad ACPI data\n");
762 return -EIO; 777 return -EIO;
763 } 778 }
764 779
765 /* verify the data contained in the ACPI structures */ 780 /* verify the data contained in the ACPI structures */
766 if (data->acpi_data.state_count <= 1) { 781 if (data->acpi_data->state_count <= 1) {
767 dprintk("No ACPI P-States\n"); 782 dprintk("No ACPI P-States\n");
768 goto err_out; 783 goto err_out;
769 } 784 }
770 785
771 if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || 786 if ((data->acpi_data->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
772 (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { 787 (data->acpi_data->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
773 dprintk("Invalid control/status registers (%x - %x)\n", 788 dprintk("Invalid control/status registers (%x - %x)\n",
774 data->acpi_data.control_register.space_id, 789 data->acpi_data->control_register.space_id,
775 data->acpi_data.status_register.space_id); 790 data->acpi_data->status_register.space_id);
776 goto err_out; 791 goto err_out;
777 } 792 }
778 793
779 /* fill in data->powernow_table */ 794 /* fill in data->powernow_table */
780 powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) 795 powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
781 * (data->acpi_data.state_count + 1)), GFP_KERNEL); 796 * (data->acpi_data->state_count + 1)), GFP_KERNEL);
782 if (!powernow_table) { 797 if (!powernow_table) {
783 dprintk("powernow_table memory alloc failure\n"); 798 dprintk("powernow_table memory alloc failure\n");
784 goto err_out; 799 goto err_out;
@@ -791,12 +806,12 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
791 if (ret_val) 806 if (ret_val)
792 goto err_out_mem; 807 goto err_out_mem;
793 808
794 powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; 809 powernow_table[data->acpi_data->state_count].frequency = CPUFREQ_TABLE_END;
795 powernow_table[data->acpi_data.state_count].index = 0; 810 powernow_table[data->acpi_data->state_count].index = 0;
796 data->powernow_table = powernow_table; 811 data->powernow_table = powernow_table;
797 812
798 /* fill in data */ 813 /* fill in data */
799 data->numps = data->acpi_data.state_count; 814 data->numps = data->acpi_data->state_count;
800 if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) 815 if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
801 print_basics(data); 816 print_basics(data);
802 powernow_k8_acpi_pst_values(data, 0); 817 powernow_k8_acpi_pst_values(data, 0);
@@ -804,16 +819,31 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
804 /* notify BIOS that we exist */ 819 /* notify BIOS that we exist */
805 acpi_processor_notify_smm(THIS_MODULE); 820 acpi_processor_notify_smm(THIS_MODULE);
806 821
822 /* determine affinity, from ACPI if available */
823 if (preregister_valid) {
824 if ((data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ALL) ||
825 (data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ANY))
826 data->starting_core_affinity = data->acpi_data->shared_cpu_map;
827 else
828 data->starting_core_affinity = cpumask_of_cpu(data->cpu);
829 } else {
830 /* best guess from family if not */
831 if (cpu_family == CPU_HW_PSTATE)
832 data->starting_core_affinity = cpumask_of_cpu(data->cpu);
833 else
834 data->starting_core_affinity = per_cpu(cpu_core_map, data->cpu);
835 }
836
807 return 0; 837 return 0;
808 838
809err_out_mem: 839err_out_mem:
810 kfree(powernow_table); 840 kfree(powernow_table);
811 841
812err_out: 842err_out:
813 acpi_processor_unregister_performance(&data->acpi_data, data->cpu); 843 acpi_processor_unregister_performance(data->acpi_data, data->cpu);
814 844
815 /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ 845 /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
816 data->acpi_data.state_count = 0; 846 data->acpi_data->state_count = 0;
817 847
818 return -ENODEV; 848 return -ENODEV;
819} 849}
@@ -825,10 +855,10 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
825 rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); 855 rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo);
826 data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; 856 data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
827 857
828 for (i = 0; i < data->acpi_data.state_count; i++) { 858 for (i = 0; i < data->acpi_data->state_count; i++) {
829 u32 index; 859 u32 index;
830 860
831 index = data->acpi_data.states[i].control & HW_PSTATE_MASK; 861 index = data->acpi_data->states[i].control & HW_PSTATE_MASK;
832 if (index > data->max_hw_pstate) { 862 if (index > data->max_hw_pstate) {
833 printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); 863 printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index);
834 printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); 864 printk(KERN_ERR PFX "Please report to BIOS manufacturer\n");
@@ -844,7 +874,7 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
844 874
845 powernow_table[i].index = index; 875 powernow_table[i].index = index;
846 876
847 powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; 877 powernow_table[i].frequency = data->acpi_data->states[i].core_frequency * 1000;
848 } 878 }
849 return 0; 879 return 0;
850} 880}
@@ -853,16 +883,16 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
853{ 883{
854 int i; 884 int i;
855 int cntlofreq = 0; 885 int cntlofreq = 0;
856 for (i = 0; i < data->acpi_data.state_count; i++) { 886 for (i = 0; i < data->acpi_data->state_count; i++) {
857 u32 fid; 887 u32 fid;
858 u32 vid; 888 u32 vid;
859 889
860 if (data->exttype) { 890 if (data->exttype) {
861 fid = data->acpi_data.states[i].status & EXT_FID_MASK; 891 fid = data->acpi_data->states[i].status & EXT_FID_MASK;
862 vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; 892 vid = (data->acpi_data->states[i].status >> VID_SHIFT) & EXT_VID_MASK;
863 } else { 893 } else {
864 fid = data->acpi_data.states[i].control & FID_MASK; 894 fid = data->acpi_data->states[i].control & FID_MASK;
865 vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; 895 vid = (data->acpi_data->states[i].control >> VID_SHIFT) & VID_MASK;
866 } 896 }
867 897
868 dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); 898 dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
@@ -903,10 +933,10 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
903 cntlofreq = i; 933 cntlofreq = i;
904 } 934 }
905 935
906 if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { 936 if (powernow_table[i].frequency != (data->acpi_data->states[i].core_frequency * 1000)) {
907 printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", 937 printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
908 powernow_table[i].frequency, 938 powernow_table[i].frequency,
909 (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); 939 (unsigned int) (data->acpi_data->states[i].core_frequency * 1000));
910 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; 940 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
911 continue; 941 continue;
912 } 942 }
@@ -916,11 +946,12 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
916 946
917static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) 947static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
918{ 948{
919 if (data->acpi_data.state_count) 949 if (data->acpi_data->state_count)
920 acpi_processor_unregister_performance(&data->acpi_data, data->cpu); 950 acpi_processor_unregister_performance(data->acpi_data, data->cpu);
921} 951}
922 952
923#else 953#else
954static int powernow_k8_cpu_preinit_acpi(void) { return -ENODEV; }
924static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } 955static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
925static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } 956static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
926static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } 957static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
@@ -1017,7 +1048,6 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
1017static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) 1048static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
1018{ 1049{
1019 cpumask_t oldmask; 1050 cpumask_t oldmask;
1020 cpumask_of_cpu_ptr(cpu_mask, pol->cpu);
1021 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); 1051 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1022 u32 checkfid; 1052 u32 checkfid;
1023 u32 checkvid; 1053 u32 checkvid;
@@ -1032,7 +1062,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
1032 1062
1033 /* only run on specific CPU from here on */ 1063 /* only run on specific CPU from here on */
1034 oldmask = current->cpus_allowed; 1064 oldmask = current->cpus_allowed;
1035 set_cpus_allowed_ptr(current, cpu_mask); 1065 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
1036 1066
1037 if (smp_processor_id() != pol->cpu) { 1067 if (smp_processor_id() != pol->cpu) {
1038 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); 1068 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1106,8 +1136,7 @@ static int powernowk8_verify(struct cpufreq_policy *pol)
1106static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) 1136static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1107{ 1137{
1108 struct powernow_k8_data *data; 1138 struct powernow_k8_data *data;
1109 cpumask_t oldmask; 1139 cpumask_t oldmask = CPU_MASK_ALL;
1110 cpumask_of_cpu_ptr_declare(newmask);
1111 int rc; 1140 int rc;
1112 1141
1113 if (!cpu_online(pol->cpu)) 1142 if (!cpu_online(pol->cpu))
@@ -1159,8 +1188,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1159 1188
1160 /* only run on specific CPU from here on */ 1189 /* only run on specific CPU from here on */
1161 oldmask = current->cpus_allowed; 1190 oldmask = current->cpus_allowed;
1162 cpumask_of_cpu_ptr_next(newmask, pol->cpu); 1191 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
1163 set_cpus_allowed_ptr(current, newmask);
1164 1192
1165 if (smp_processor_id() != pol->cpu) { 1193 if (smp_processor_id() != pol->cpu) {
1166 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); 1194 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1181,10 +1209,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1181 /* run on any CPU again */ 1209 /* run on any CPU again */
1182 set_cpus_allowed_ptr(current, &oldmask); 1210 set_cpus_allowed_ptr(current, &oldmask);
1183 1211
1184 if (cpu_family == CPU_HW_PSTATE) 1212 pol->cpus = data->starting_core_affinity;
1185 pol->cpus = *newmask;
1186 else
1187 pol->cpus = per_cpu(cpu_core_map, pol->cpu);
1188 data->available_cores = &(pol->cpus); 1213 data->available_cores = &(pol->cpus);
1189 1214
1190 /* Take a crude guess here. 1215 /* Take a crude guess here.
@@ -1248,7 +1273,6 @@ static unsigned int powernowk8_get (unsigned int cpu)
1248{ 1273{
1249 struct powernow_k8_data *data; 1274 struct powernow_k8_data *data;
1250 cpumask_t oldmask = current->cpus_allowed; 1275 cpumask_t oldmask = current->cpus_allowed;
1251 cpumask_of_cpu_ptr(newmask, cpu);
1252 unsigned int khz = 0; 1276 unsigned int khz = 0;
1253 unsigned int first; 1277 unsigned int first;
1254 1278
@@ -1258,7 +1282,7 @@ static unsigned int powernowk8_get (unsigned int cpu)
1258 if (!data) 1282 if (!data)
1259 return -EINVAL; 1283 return -EINVAL;
1260 1284
1261 set_cpus_allowed_ptr(current, newmask); 1285 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
1262 if (smp_processor_id() != cpu) { 1286 if (smp_processor_id() != cpu) {
1263 printk(KERN_ERR PFX 1287 printk(KERN_ERR PFX
1264 "limiting to CPU %d failed in powernowk8_get\n", cpu); 1288 "limiting to CPU %d failed in powernowk8_get\n", cpu);
@@ -1308,6 +1332,7 @@ static int __cpuinit powernowk8_init(void)
1308 } 1332 }
1309 1333
1310 if (supported_cpus == num_online_cpus()) { 1334 if (supported_cpus == num_online_cpus()) {
1335 powernow_k8_cpu_preinit_acpi();
1311 printk(KERN_INFO PFX "Found %d %s " 1336 printk(KERN_INFO PFX "Found %d %s "
1312 "processors (%d cpu cores) (" VERSION ")\n", 1337 "processors (%d cpu cores) (" VERSION ")\n",
1313 num_online_nodes(), 1338 num_online_nodes(),
@@ -1324,6 +1349,10 @@ static void __exit powernowk8_exit(void)
1324 dprintk("exit\n"); 1349 dprintk("exit\n");
1325 1350
1326 cpufreq_unregister_driver(&cpufreq_amd64_driver); 1351 cpufreq_unregister_driver(&cpufreq_amd64_driver);
1352
1353#ifdef CONFIG_X86_POWERNOW_K8_ACPI
1354 free_percpu(acpi_perf_data);
1355#endif
1327} 1356}
1328 1357
1329MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>"); 1358MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>");
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index ab48cfed4d96..a62612cd4be8 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -33,12 +33,13 @@ struct powernow_k8_data {
33#ifdef CONFIG_X86_POWERNOW_K8_ACPI 33#ifdef CONFIG_X86_POWERNOW_K8_ACPI
34 /* the acpi table needs to be kept. it's only available if ACPI was 34 /* the acpi table needs to be kept. it's only available if ACPI was
35 * used to determine valid frequency/vid/fid states */ 35 * used to determine valid frequency/vid/fid states */
36 struct acpi_processor_performance acpi_data; 36 struct acpi_processor_performance *acpi_data;
37#endif 37#endif
38 /* we need to keep track of associated cores, but let cpufreq 38 /* we need to keep track of associated cores, but let cpufreq
39 * handle hotplug events - so just point at cpufreq pol->cpus 39 * handle hotplug events - so just point at cpufreq pol->cpus
40 * structure */ 40 * structure */
41 cpumask_t *available_cores; 41 cpumask_t *available_cores;
42 cpumask_t starting_core_affinity;
42}; 43};
43 44
44 45
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index ca2ac13b7af2..15e13c01cc36 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -324,10 +324,9 @@ static unsigned int get_cur_freq(unsigned int cpu)
324 unsigned l, h; 324 unsigned l, h;
325 unsigned clock_freq; 325 unsigned clock_freq;
326 cpumask_t saved_mask; 326 cpumask_t saved_mask;
327 cpumask_of_cpu_ptr(new_mask, cpu);
328 327
329 saved_mask = current->cpus_allowed; 328 saved_mask = current->cpus_allowed;
330 set_cpus_allowed_ptr(current, new_mask); 329 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
331 if (smp_processor_id() != cpu) 330 if (smp_processor_id() != cpu)
332 return 0; 331 return 0;
333 332
@@ -585,15 +584,12 @@ static int centrino_target (struct cpufreq_policy *policy,
585 * Best effort undo.. 584 * Best effort undo..
586 */ 585 */
587 586
588 if (!cpus_empty(*covered_cpus)) { 587 if (!cpus_empty(*covered_cpus))
589 cpumask_of_cpu_ptr_declare(new_mask);
590
591 for_each_cpu_mask_nr(j, *covered_cpus) { 588 for_each_cpu_mask_nr(j, *covered_cpus) {
592 cpumask_of_cpu_ptr_next(new_mask, j); 589 set_cpus_allowed_ptr(current,
593 set_cpus_allowed_ptr(current, new_mask); 590 &cpumask_of_cpu(j));
594 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); 591 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
595 } 592 }
596 }
597 593
598 tmp = freqs.new; 594 tmp = freqs.new;
599 freqs.new = freqs.old; 595 freqs.new = freqs.old;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 2f3728dc24f6..191f7263c61d 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -244,8 +244,7 @@ static unsigned int _speedstep_get(const cpumask_t *cpus)
244 244
245static unsigned int speedstep_get(unsigned int cpu) 245static unsigned int speedstep_get(unsigned int cpu)
246{ 246{
247 cpumask_of_cpu_ptr(newmask, cpu); 247 return _speedstep_get(&cpumask_of_cpu(cpu));
248 return _speedstep_get(newmask);
249} 248}
250 249
251/** 250/**
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 650d40f7912b..6b0a10b002f1 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -516,7 +516,6 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
516 unsigned long j; 516 unsigned long j;
517 int retval; 517 int retval;
518 cpumask_t oldmask; 518 cpumask_t oldmask;
519 cpumask_of_cpu_ptr(newmask, cpu);
520 519
521 if (num_cache_leaves == 0) 520 if (num_cache_leaves == 0)
522 return -ENOENT; 521 return -ENOENT;
@@ -527,7 +526,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
527 return -ENOMEM; 526 return -ENOMEM;
528 527
529 oldmask = current->cpus_allowed; 528 oldmask = current->cpus_allowed;
530 retval = set_cpus_allowed_ptr(current, newmask); 529 retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
531 if (retval) 530 if (retval)
532 goto out; 531 goto out;
533 532
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index de7439f82b92..05cc22dbd4ff 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -478,7 +478,13 @@ static int setup_p4_watchdog(unsigned nmi_hz)
478 perfctr_msr = MSR_P4_IQ_PERFCTR1; 478 perfctr_msr = MSR_P4_IQ_PERFCTR1;
479 evntsel_msr = MSR_P4_CRU_ESCR0; 479 evntsel_msr = MSR_P4_CRU_ESCR0;
480 cccr_msr = MSR_P4_IQ_CCCR1; 480 cccr_msr = MSR_P4_IQ_CCCR1;
481 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); 481
482 /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
483 if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
484 cccr_val = P4_CCCR_OVF_PMI0;
485 else
486 cccr_val = P4_CCCR_OVF_PMI1;
487 cccr_val |= P4_CCCR_ESCR_SELECT(4);
482 } 488 }
483 489
484 evntsel = P4_ESCR_EVENT_SELECT(0x3F) 490 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
index 4b63c8e1f13b..5cab48ee61a4 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -53,7 +53,7 @@ void efi_call_phys_prelog(void)
53 * directory. If I have PAE, I just need to duplicate one entry in 53 * directory. If I have PAE, I just need to duplicate one entry in
54 * page directory. 54 * page directory.
55 */ 55 */
56 cr4 = read_cr4(); 56 cr4 = read_cr4_safe();
57 57
58 if (cr4 & X86_CR4_PAE) { 58 if (cr4 & X86_CR4_PAE) {
59 efi_bak_pg_dir_pointer[0].pgd = 59 efi_bak_pg_dir_pointer[0].pgd =
@@ -91,7 +91,7 @@ void efi_call_phys_epilog(void)
91 gdt_descr.size = GDT_SIZE - 1; 91 gdt_descr.size = GDT_SIZE - 1;
92 load_gdt(&gdt_descr); 92 load_gdt(&gdt_descr);
93 93
94 cr4 = read_cr4(); 94 cr4 = read_cr4_safe();
95 95
96 if (cr4 & X86_CR4_PAE) { 96 if (cr4 & X86_CR4_PAE) {
97 swapper_pg_dir[pgd_index(0)].pgd = 97 swapper_pg_dir[pgd_index(0)].pgd =
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 1fa8be5bd217..eaff0bbb1444 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -99,3 +99,4 @@ int is_uv_system(void)
99{ 99{
100 return uv_system_type != UV_NONE; 100 return uv_system_type != UV_NONE;
101} 101}
102EXPORT_SYMBOL_GPL(is_uv_system);
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 2cfcbded888a..2d7e307c7779 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -222,7 +222,7 @@ static __init void map_low_mmrs(void)
222 222
223enum map_type {map_wb, map_uc}; 223enum map_type {map_wb, map_uc};
224 224
225static void map_high(char *id, unsigned long base, int shift, enum map_type map_type) 225static __init void map_high(char *id, unsigned long base, int shift, enum map_type map_type)
226{ 226{
227 unsigned long bytes, paddr; 227 unsigned long bytes, paddr;
228 228
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 1b318e903bf6..9bfc4d72fb2e 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -88,6 +88,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
88 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); 88 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
89 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == 89 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
90 (__START_KERNEL & PGDIR_MASK))); 90 (__START_KERNEL & PGDIR_MASK)));
91 BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
91 92
92 /* clear bss before set_intr_gate with early_idt_handler */ 93 /* clear bss before set_intr_gate with early_idt_handler */
93 clear_bss(); 94 clear_bss();
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f67e93441caf..a7010c3a377a 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -456,9 +456,6 @@ is386: movl $2,%ecx # set MP
4561: 4561:
457#endif /* CONFIG_SMP */ 457#endif /* CONFIG_SMP */
458 jmp *(initial_code) 458 jmp *(initial_code)
459.align 4
460ENTRY(initial_code)
461 .long i386_start_kernel
462 459
463/* 460/*
464 * We depend on ET to be correct. This checks for 287/387. 461 * We depend on ET to be correct. This checks for 287/387.
@@ -601,6 +598,11 @@ ignore_int:
601#endif 598#endif
602 iret 599 iret
603 600
601.section .cpuinit.data,"wa"
602.align 4
603ENTRY(initial_code)
604 .long i386_start_kernel
605
604.section .text 606.section .text
605/* 607/*
606 * Real beginning of normal "text" segment 608 * Real beginning of normal "text" segment
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ad2b15a1334d..59fd3b6b1303 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -359,6 +359,7 @@ static int hpet_clocksource_register(void)
359int __init hpet_enable(void) 359int __init hpet_enable(void)
360{ 360{
361 unsigned long id; 361 unsigned long id;
362 int i;
362 363
363 if (!is_hpet_capable()) 364 if (!is_hpet_capable())
364 return 0; 365 return 0;
@@ -369,6 +370,29 @@ int __init hpet_enable(void)
369 * Read the period and check for a sane value: 370 * Read the period and check for a sane value:
370 */ 371 */
371 hpet_period = hpet_readl(HPET_PERIOD); 372 hpet_period = hpet_readl(HPET_PERIOD);
373
374 /*
375 * AMD SB700 based systems with spread spectrum enabled use a
376 * SMM based HPET emulation to provide proper frequency
377 * setting. The SMM code is initialized with the first HPET
378 * register access and takes some time to complete. During
379 * this time the config register reads 0xffffffff. We check
380 * for max. 1000 loops whether the config register reads a non
381 * 0xffffffff value to make sure that HPET is up and running
382 * before we go further. A counting loop is safe, as the HPET
383 * access takes thousands of CPU cycles. On non SB700 based
384 * machines this check is only done once and has no side
385 * effects.
386 */
387 for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) {
388 if (i == 1000) {
389 printk(KERN_WARNING
390 "HPET config register value = 0xFFFFFFFF. "
391 "Disabling HPET\n");
392 goto out_nohpet;
393 }
394 }
395
372 if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) 396 if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD)
373 goto out_nohpet; 397 goto out_nohpet;
374 398
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index de9aa0e3a9c5..09cddb57bec4 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -57,7 +57,7 @@ atomic_t irq_mis_count;
57static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 57static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
58 58
59static DEFINE_SPINLOCK(ioapic_lock); 59static DEFINE_SPINLOCK(ioapic_lock);
60static DEFINE_SPINLOCK(vector_lock); 60DEFINE_SPINLOCK(vector_lock);
61 61
62int timer_through_8259 __initdata; 62int timer_through_8259 __initdata;
63 63
@@ -1209,10 +1209,6 @@ static int assign_irq_vector(int irq)
1209 return vector; 1209 return vector;
1210} 1210}
1211 1211
1212void setup_vector_irq(int cpu)
1213{
1214}
1215
1216static struct irq_chip ioapic_chip; 1212static struct irq_chip ioapic_chip;
1217 1213
1218#define IOAPIC_AUTO -1 1214#define IOAPIC_AUTO -1
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 8269434d1707..61a83b70c18f 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -101,7 +101,7 @@ int timer_through_8259 __initdata;
101static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 101static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
102 102
103static DEFINE_SPINLOCK(ioapic_lock); 103static DEFINE_SPINLOCK(ioapic_lock);
104DEFINE_SPINLOCK(vector_lock); 104static DEFINE_SPINLOCK(vector_lock);
105 105
106/* 106/*
107 * # of IRQ routing registers 107 * # of IRQ routing registers
@@ -697,6 +697,19 @@ static int pin_2_irq(int idx, int apic, int pin)
697 return irq; 697 return irq;
698} 698}
699 699
700void lock_vector_lock(void)
701{
702 /* Used to the online set of cpus does not change
703 * during assign_irq_vector.
704 */
705 spin_lock(&vector_lock);
706}
707
708void unlock_vector_lock(void)
709{
710 spin_unlock(&vector_lock);
711}
712
700static int __assign_irq_vector(int irq, cpumask_t mask) 713static int __assign_irq_vector(int irq, cpumask_t mask)
701{ 714{
702 /* 715 /*
@@ -802,7 +815,7 @@ static void __clear_irq_vector(int irq)
802 cpus_clear(cfg->domain); 815 cpus_clear(cfg->domain);
803} 816}
804 817
805static void __setup_vector_irq(int cpu) 818void __setup_vector_irq(int cpu)
806{ 819{
807 /* Initialize vector_irq on a new cpu */ 820 /* Initialize vector_irq on a new cpu */
808 /* This function must be called with vector_lock held */ 821 /* This function must be called with vector_lock held */
@@ -825,14 +838,6 @@ static void __setup_vector_irq(int cpu)
825 } 838 }
826} 839}
827 840
828void setup_vector_irq(int cpu)
829{
830 spin_lock(&vector_lock);
831 __setup_vector_irq(smp_processor_id());
832 spin_unlock(&vector_lock);
833}
834
835
836static struct irq_chip ioapic_chip; 841static struct irq_chip ioapic_chip;
837 842
838static void ioapic_register_intr(int irq, unsigned long trigger) 843static void ioapic_register_intr(int irq, unsigned long trigger)
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 3fee2aa50f3f..b68e21f06f4f 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -62,12 +62,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
62 62
63 if (reload) { 63 if (reload) {
64#ifdef CONFIG_SMP 64#ifdef CONFIG_SMP
65 cpumask_of_cpu_ptr_declare(mask);
66
67 preempt_disable(); 65 preempt_disable();
68 load_LDT(pc); 66 load_LDT(pc);
69 cpumask_of_cpu_ptr_next(mask, smp_processor_id()); 67 if (!cpus_equal(current->mm->cpu_vm_mask,
70 if (!cpus_equal(current->mm->cpu_vm_mask, *mask)) 68 cpumask_of_cpu(smp_processor_id())))
71 smp_call_function(flush_ldt, current->mm, 1); 69 smp_call_function(flush_ldt, current->mm, 1);
72 preempt_enable(); 70 preempt_enable();
73#else 71#else
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 8864230d55af..0732adba05ca 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -12,6 +12,7 @@
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h> 14#include <linux/ftrace.h>
15#include <linux/suspend.h>
15 16
16#include <asm/pgtable.h> 17#include <asm/pgtable.h>
17#include <asm/pgalloc.h> 18#include <asm/pgalloc.h>
@@ -22,6 +23,7 @@
22#include <asm/cpufeature.h> 23#include <asm/cpufeature.h>
23#include <asm/desc.h> 24#include <asm/desc.h>
24#include <asm/system.h> 25#include <asm/system.h>
26#include <asm/cacheflush.h>
25 27
26#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) 28#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
27static u32 kexec_pgd[1024] PAGE_ALIGNED; 29static u32 kexec_pgd[1024] PAGE_ALIGNED;
@@ -77,7 +79,7 @@ static void load_segments(void)
77/* 79/*
78 * A architecture hook called to validate the 80 * A architecture hook called to validate the
79 * proposed image and prepare the control pages 81 * proposed image and prepare the control pages
80 * as needed. The pages for KEXEC_CONTROL_CODE_SIZE 82 * as needed. The pages for KEXEC_CONTROL_PAGE_SIZE
81 * have been allocated, but the segments have yet 83 * have been allocated, but the segments have yet
82 * been copied into the kernel. 84 * been copied into the kernel.
83 * 85 *
@@ -85,10 +87,12 @@ static void load_segments(void)
85 * reboot code buffer to allow us to avoid allocations 87 * reboot code buffer to allow us to avoid allocations
86 * later. 88 * later.
87 * 89 *
88 * Currently nothing. 90 * Make control page executable.
89 */ 91 */
90int machine_kexec_prepare(struct kimage *image) 92int machine_kexec_prepare(struct kimage *image)
91{ 93{
94 if (nx_enabled)
95 set_pages_x(image->control_code_page, 1);
92 return 0; 96 return 0;
93} 97}
94 98
@@ -98,27 +102,54 @@ int machine_kexec_prepare(struct kimage *image)
98 */ 102 */
99void machine_kexec_cleanup(struct kimage *image) 103void machine_kexec_cleanup(struct kimage *image)
100{ 104{
105 if (nx_enabled)
106 set_pages_nx(image->control_code_page, 1);
101} 107}
102 108
103/* 109/*
104 * Do not allocate memory (or fail in any way) in machine_kexec(). 110 * Do not allocate memory (or fail in any way) in machine_kexec().
105 * We are past the point of no return, committed to rebooting now. 111 * We are past the point of no return, committed to rebooting now.
106 */ 112 */
107NORET_TYPE void machine_kexec(struct kimage *image) 113void machine_kexec(struct kimage *image)
108{ 114{
109 unsigned long page_list[PAGES_NR]; 115 unsigned long page_list[PAGES_NR];
110 void *control_page; 116 void *control_page;
117 int save_ftrace_enabled;
118 asmlinkage unsigned long
119 (*relocate_kernel_ptr)(unsigned long indirection_page,
120 unsigned long control_page,
121 unsigned long start_address,
122 unsigned int has_pae,
123 unsigned int preserve_context);
124
125#ifdef CONFIG_KEXEC_JUMP
126 if (kexec_image->preserve_context)
127 save_processor_state();
128#endif
111 129
112 tracer_disable(); 130 save_ftrace_enabled = __ftrace_enabled_save();
113 131
114 /* Interrupts aren't acceptable while we reboot */ 132 /* Interrupts aren't acceptable while we reboot */
115 local_irq_disable(); 133 local_irq_disable();
116 134
135 if (image->preserve_context) {
136#ifdef CONFIG_X86_IO_APIC
137 /* We need to put APICs in legacy mode so that we can
138 * get timer interrupts in second kernel. kexec/kdump
139 * paths already have calls to disable_IO_APIC() in
140 * one form or other. kexec jump path also need
141 * one.
142 */
143 disable_IO_APIC();
144#endif
145 }
146
117 control_page = page_address(image->control_code_page); 147 control_page = page_address(image->control_code_page);
118 memcpy(control_page, relocate_kernel, PAGE_SIZE); 148 memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
119 149
150 relocate_kernel_ptr = control_page;
120 page_list[PA_CONTROL_PAGE] = __pa(control_page); 151 page_list[PA_CONTROL_PAGE] = __pa(control_page);
121 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; 152 page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
122 page_list[PA_PGD] = __pa(kexec_pgd); 153 page_list[PA_PGD] = __pa(kexec_pgd);
123 page_list[VA_PGD] = (unsigned long)kexec_pgd; 154 page_list[VA_PGD] = (unsigned long)kexec_pgd;
124#ifdef CONFIG_X86_PAE 155#ifdef CONFIG_X86_PAE
@@ -131,6 +162,7 @@ NORET_TYPE void machine_kexec(struct kimage *image)
131 page_list[VA_PTE_0] = (unsigned long)kexec_pte0; 162 page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
132 page_list[PA_PTE_1] = __pa(kexec_pte1); 163 page_list[PA_PTE_1] = __pa(kexec_pte1);
133 page_list[VA_PTE_1] = (unsigned long)kexec_pte1; 164 page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
165 page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT);
134 166
135 /* The segment registers are funny things, they have both a 167 /* The segment registers are funny things, they have both a
136 * visible and an invisible part. Whenever the visible part is 168 * visible and an invisible part. Whenever the visible part is
@@ -149,8 +181,17 @@ NORET_TYPE void machine_kexec(struct kimage *image)
149 set_idt(phys_to_virt(0),0); 181 set_idt(phys_to_virt(0),0);
150 182
151 /* now call it */ 183 /* now call it */
152 relocate_kernel((unsigned long)image->head, (unsigned long)page_list, 184 image->start = relocate_kernel_ptr((unsigned long)image->head,
153 image->start, cpu_has_pae); 185 (unsigned long)page_list,
186 image->start, cpu_has_pae,
187 image->preserve_context);
188
189#ifdef CONFIG_KEXEC_JUMP
190 if (kexec_image->preserve_context)
191 restore_processor_state();
192#endif
193
194 __ftrace_enabled_restore(save_ftrace_enabled);
154} 195}
155 196
156void arch_crash_save_vmcoreinfo(void) 197void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 9dd9262693a3..c43caa3a91f3 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -181,7 +181,7 @@ void machine_kexec_cleanup(struct kimage *image)
181 * Do not allocate memory (or fail in any way) in machine_kexec(). 181 * Do not allocate memory (or fail in any way) in machine_kexec().
182 * We are past the point of no return, committed to rebooting now. 182 * We are past the point of no return, committed to rebooting now.
183 */ 183 */
184NORET_TYPE void machine_kexec(struct kimage *image) 184void machine_kexec(struct kimage *image)
185{ 185{
186 unsigned long page_list[PAGES_NR]; 186 unsigned long page_list[PAGES_NR];
187 void *control_page; 187 void *control_page;
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 07c0f828f488..3b599518c322 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -33,6 +33,8 @@
33#include <linux/module.h> 33#include <linux/module.h>
34#include <asm/geode.h> 34#include <asm/geode.h>
35 35
36#define MFGPT_DEFAULT_IRQ 7
37
36static struct mfgpt_timer_t { 38static struct mfgpt_timer_t {
37 unsigned int avail:1; 39 unsigned int avail:1;
38} mfgpt_timers[MFGPT_MAX_TIMERS]; 40} mfgpt_timers[MFGPT_MAX_TIMERS];
@@ -157,29 +159,48 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
157} 159}
158EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); 160EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event);
159 161
160int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) 162int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable)
161{ 163{
162 u32 val, dummy; 164 u32 zsel, lpc, dummy;
163 int offset; 165 int shift;
164 166
165 if (timer < 0 || timer >= MFGPT_MAX_TIMERS) 167 if (timer < 0 || timer >= MFGPT_MAX_TIMERS)
166 return -EIO; 168 return -EIO;
167 169
168 if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) 170 /*
171 * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA
172 * is using the same CMP of the timer's Siamese twin, the IRQ is set to
173 * 2, and we mustn't use nor change it.
174 * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the
175 * IRQ of the 1st. This can only happen if forcing an IRQ, calling this
176 * with *irq==0 is safe. Currently there _are_ no 2 drivers.
177 */
178 rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
179 shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4;
180 if (((zsel >> shift) & 0xF) == 2)
169 return -EIO; 181 return -EIO;
170 182
171 rdmsr(MSR_PIC_ZSEL_LOW, val, dummy); 183 /* Choose IRQ: if none supplied, keep IRQ already set or use default */
184 if (!*irq)
185 *irq = (zsel >> shift) & 0xF;
186 if (!*irq)
187 *irq = MFGPT_DEFAULT_IRQ;
172 188
173 offset = (timer % 4) * 4; 189 /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */
174 190 if (*irq < 1 || *irq == 2 || *irq > 15)
175 val &= ~((0xF << offset) | (0xF << (offset + 16))); 191 return -EIO;
192 rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy);
193 if (lpc & (1 << *irq))
194 return -EIO;
176 195
196 /* All chosen and checked - go for it */
197 if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable))
198 return -EIO;
177 if (enable) { 199 if (enable) {
178 val |= (irq & 0x0F) << (offset); 200 zsel = (zsel & ~(0xF << shift)) | (*irq << shift);
179 val |= (irq & 0x0F) << (offset + 16); 201 wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
180 } 202 }
181 203
182 wrmsr(MSR_PIC_ZSEL_LOW, val, dummy);
183 return 0; 204 return 0;
184} 205}
185 206
@@ -242,7 +263,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer);
242static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN; 263static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN;
243static u16 mfgpt_event_clock; 264static u16 mfgpt_event_clock;
244 265
245static int irq = 7; 266static int irq;
246static int __init mfgpt_setup(char *str) 267static int __init mfgpt_setup(char *str)
247{ 268{
248 get_option(&str, &irq); 269 get_option(&str, &irq);
@@ -346,7 +367,7 @@ int __init mfgpt_timer_setup(void)
346 mfgpt_event_clock = timer; 367 mfgpt_event_clock = timer;
347 368
348 /* Set up the IRQ on the MFGPT side */ 369 /* Set up the IRQ on the MFGPT side */
349 if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, irq)) { 370 if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) {
350 printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq); 371 printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq);
351 return -EIO; 372 return -EIO;
352 } 373 }
@@ -374,13 +395,14 @@ int __init mfgpt_timer_setup(void)
374 &mfgpt_clockevent); 395 &mfgpt_clockevent);
375 396
376 printk(KERN_INFO 397 printk(KERN_INFO
377 "mfgpt-timer: registering the MFGPT timer as a clock event.\n"); 398 "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n",
399 timer, irq);
378 clockevents_register_device(&mfgpt_clockevent); 400 clockevents_register_device(&mfgpt_clockevent);
379 401
380 return 0; 402 return 0;
381 403
382err: 404err:
383 geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, irq); 405 geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq);
384 printk(KERN_ERR 406 printk(KERN_ERR
385 "mfgpt-timer: Unable to set up the MFGPT clock source\n"); 407 "mfgpt-timer: Unable to set up the MFGPT clock source\n");
386 return -EIO; 408 return -EIO;
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 6994c751590e..652fa5c38ebe 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -388,7 +388,6 @@ static int do_microcode_update (void)
388 void *new_mc = NULL; 388 void *new_mc = NULL;
389 int cpu; 389 int cpu;
390 cpumask_t old; 390 cpumask_t old;
391 cpumask_of_cpu_ptr_declare(newmask);
392 391
393 old = current->cpus_allowed; 392 old = current->cpus_allowed;
394 393
@@ -405,8 +404,7 @@ static int do_microcode_update (void)
405 404
406 if (!uci->valid) 405 if (!uci->valid)
407 continue; 406 continue;
408 cpumask_of_cpu_ptr_next(newmask, cpu); 407 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
409 set_cpus_allowed_ptr(current, newmask);
410 error = get_maching_microcode(new_mc, cpu); 408 error = get_maching_microcode(new_mc, cpu);
411 if (error < 0) 409 if (error < 0)
412 goto out; 410 goto out;
@@ -576,7 +574,6 @@ static int apply_microcode_check_cpu(int cpu)
576 struct cpuinfo_x86 *c = &cpu_data(cpu); 574 struct cpuinfo_x86 *c = &cpu_data(cpu);
577 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 575 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
578 cpumask_t old; 576 cpumask_t old;
579 cpumask_of_cpu_ptr(newmask, cpu);
580 unsigned int val[2]; 577 unsigned int val[2];
581 int err = 0; 578 int err = 0;
582 579
@@ -585,7 +582,7 @@ static int apply_microcode_check_cpu(int cpu)
585 return 0; 582 return 0;
586 583
587 old = current->cpus_allowed; 584 old = current->cpus_allowed;
588 set_cpus_allowed_ptr(current, newmask); 585 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
589 586
590 /* Check if the microcode we have in memory matches the CPU */ 587 /* Check if the microcode we have in memory matches the CPU */
591 if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || 588 if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
@@ -623,12 +620,11 @@ static int apply_microcode_check_cpu(int cpu)
623static void microcode_init_cpu(int cpu, int resume) 620static void microcode_init_cpu(int cpu, int resume)
624{ 621{
625 cpumask_t old; 622 cpumask_t old;
626 cpumask_of_cpu_ptr(newmask, cpu);
627 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 623 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
628 624
629 old = current->cpus_allowed; 625 old = current->cpus_allowed;
630 626
631 set_cpus_allowed_ptr(current, newmask); 627 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
632 mutex_lock(&microcode_mutex); 628 mutex_lock(&microcode_mutex);
633 collect_cpu_info(cpu); 629 collect_cpu_info(cpu);
634 if (uci->valid && system_state == SYSTEM_RUNNING && !resume) 630 if (uci->valid && system_state == SYSTEM_RUNNING && !resume)
@@ -661,13 +657,10 @@ static ssize_t reload_store(struct sys_device *dev,
661 if (end == buf) 657 if (end == buf)
662 return -EINVAL; 658 return -EINVAL;
663 if (val == 1) { 659 if (val == 1) {
664 cpumask_t old; 660 cpumask_t old = current->cpus_allowed;
665 cpumask_of_cpu_ptr(newmask, cpu);
666
667 old = current->cpus_allowed;
668 661
669 get_online_cpus(); 662 get_online_cpus();
670 set_cpus_allowed_ptr(current, newmask); 663 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
671 664
672 mutex_lock(&microcode_mutex); 665 mutex_lock(&microcode_mutex);
673 if (uci->valid) 666 if (uci->valid)
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index fdfdc550b366..efc2f361fe85 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -238,7 +238,7 @@ static struct dmi_system_id __devinitdata mmconf_dmi_table[] = {
238 {} 238 {}
239}; 239};
240 240
241void __init check_enable_amd_mmconf_dmi(void) 241void __cpuinit check_enable_amd_mmconf_dmi(void)
242{ 242{
243 dmi_check_system(mmconf_dmi_table); 243 dmi_check_system(mmconf_dmi_table);
244} 244}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 6ae005ccaed8..b3fb430725cb 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -49,7 +49,7 @@ static int __init mpf_checksum(unsigned char *mp, int len)
49 return sum & 0xFF; 49 return sum & 0xFF;
50} 50}
51 51
52static void __cpuinit MP_processor_info(struct mpc_config_processor *m) 52static void __init MP_processor_info(struct mpc_config_processor *m)
53{ 53{
54 int apicid; 54 int apicid;
55 char *bootup_cpu = ""; 55 char *bootup_cpu = "";
@@ -83,7 +83,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
83 if (x86_quirks->mpc_oem_bus_info) 83 if (x86_quirks->mpc_oem_bus_info)
84 x86_quirks->mpc_oem_bus_info(m, str); 84 x86_quirks->mpc_oem_bus_info(m, str);
85 else 85 else
86 printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); 86 apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str);
87 87
88#if MAX_MP_BUSSES < 256 88#if MAX_MP_BUSSES < 256
89 if (m->mpc_busid >= MAX_MP_BUSSES) { 89 if (m->mpc_busid >= MAX_MP_BUSSES) {
@@ -154,7 +154,7 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
154 154
155static void print_MP_intsrc_info(struct mpc_config_intsrc *m) 155static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
156{ 156{
157 printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," 157 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
158 " IRQ %02x, APIC ID %x, APIC INT %02x\n", 158 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
159 m->mpc_irqtype, m->mpc_irqflag & 3, 159 m->mpc_irqtype, m->mpc_irqflag & 3,
160 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, 160 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
@@ -163,7 +163,7 @@ static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
163 163
164static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) 164static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
165{ 165{
166 printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," 166 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
167 " IRQ %02x, APIC ID %x, APIC INT %02x\n", 167 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
168 mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, 168 mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
169 (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, 169 (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
@@ -235,7 +235,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
235 235
236static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) 236static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
237{ 237{
238 printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x," 238 apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
239 " IRQ %02x, APIC ID %x, APIC LINT %02x\n", 239 " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
240 m->mpc_irqtype, m->mpc_irqflag & 3, 240 m->mpc_irqtype, m->mpc_irqflag & 3,
241 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, 241 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
@@ -484,7 +484,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
484} 484}
485 485
486 486
487static void construct_ioapic_table(int mpc_default_type) 487static void __init construct_ioapic_table(int mpc_default_type)
488{ 488{
489 struct mpc_config_ioapic ioapic; 489 struct mpc_config_ioapic ioapic;
490 struct mpc_config_bus bus; 490 struct mpc_config_bus bus;
@@ -529,7 +529,7 @@ static void construct_ioapic_table(int mpc_default_type)
529 construct_default_ioirq_mptable(mpc_default_type); 529 construct_default_ioirq_mptable(mpc_default_type);
530} 530}
531#else 531#else
532static inline void construct_ioapic_table(int mpc_default_type) { } 532static inline void __init construct_ioapic_table(int mpc_default_type) { }
533#endif 533#endif
534 534
535static inline void __init construct_default_ISA_mptable(int mpc_default_type) 535static inline void __init construct_default_ISA_mptable(int mpc_default_type)
@@ -695,7 +695,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
695 unsigned int *bp = phys_to_virt(base); 695 unsigned int *bp = phys_to_virt(base);
696 struct intel_mp_floating *mpf; 696 struct intel_mp_floating *mpf;
697 697
698 printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length); 698 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
699 bp, length);
699 BUILD_BUG_ON(sizeof(*mpf) != 16); 700 BUILD_BUG_ON(sizeof(*mpf) != 16);
700 701
701 while (length > 0) { 702 while (length > 0) {
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 9fd809552447..e43938086885 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -131,7 +131,7 @@ static int msr_open(struct inode *inode, struct file *file)
131 ret = -EIO; /* MSR not supported */ 131 ret = -EIO; /* MSR not supported */
132out: 132out:
133 unlock_kernel(); 133 unlock_kernel();
134 return 0; 134 return ret;
135} 135}
136 136
137/* 137/*
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index ac6d51222e7d..abb78a2cc4ad 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -114,6 +114,23 @@ static __init void nmi_cpu_busy(void *data)
114} 114}
115#endif 115#endif
116 116
117static void report_broken_nmi(int cpu, int *prev_nmi_count)
118{
119 printk(KERN_CONT "\n");
120
121 printk(KERN_WARNING
122 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
123 cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
124
125 printk(KERN_WARNING
126 "Please report this to bugzilla.kernel.org,\n");
127 printk(KERN_WARNING
128 "and attach the output of the 'dmesg' command.\n");
129
130 per_cpu(wd_enabled, cpu) = 0;
131 atomic_dec(&nmi_active);
132}
133
117int __init check_nmi_watchdog(void) 134int __init check_nmi_watchdog(void)
118{ 135{
119 unsigned int *prev_nmi_count; 136 unsigned int *prev_nmi_count;
@@ -141,15 +158,8 @@ int __init check_nmi_watchdog(void)
141 for_each_online_cpu(cpu) { 158 for_each_online_cpu(cpu) {
142 if (!per_cpu(wd_enabled, cpu)) 159 if (!per_cpu(wd_enabled, cpu))
143 continue; 160 continue;
144 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { 161 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
145 printk(KERN_WARNING "WARNING: CPU#%d: NMI " 162 report_broken_nmi(cpu, prev_nmi_count);
146 "appears to be stuck (%d->%d)!\n",
147 cpu,
148 prev_nmi_count[cpu],
149 get_nmi_count(cpu));
150 per_cpu(wd_enabled, cpu) = 0;
151 atomic_dec(&nmi_active);
152 }
153 } 163 }
154 endflag = 1; 164 endflag = 1;
155 if (!atomic_read(&nmi_active)) { 165 if (!atomic_read(&nmi_active)) {
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 19e7fc7c2c4f..218d783ed7a8 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -37,6 +37,7 @@
37#include <linux/delay.h> 37#include <linux/delay.h>
38#include <linux/scatterlist.h> 38#include <linux/scatterlist.h>
39#include <linux/iommu-helper.h> 39#include <linux/iommu-helper.h>
40
40#include <asm/iommu.h> 41#include <asm/iommu.h>
41#include <asm/calgary.h> 42#include <asm/calgary.h>
42#include <asm/tce.h> 43#include <asm/tce.h>
@@ -413,22 +414,6 @@ static void calgary_unmap_sg(struct device *dev,
413 } 414 }
414} 415}
415 416
416static int calgary_nontranslate_map_sg(struct device* dev,
417 struct scatterlist *sg, int nelems, int direction)
418{
419 struct scatterlist *s;
420 int i;
421
422 for_each_sg(sg, s, nelems, i) {
423 struct page *p = sg_page(s);
424
425 BUG_ON(!p);
426 s->dma_address = virt_to_bus(sg_virt(s));
427 s->dma_length = s->length;
428 }
429 return nelems;
430}
431
432static int calgary_map_sg(struct device *dev, struct scatterlist *sg, 417static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
433 int nelems, int direction) 418 int nelems, int direction)
434{ 419{
@@ -439,9 +424,6 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
439 unsigned long entry; 424 unsigned long entry;
440 int i; 425 int i;
441 426
442 if (!translation_enabled(tbl))
443 return calgary_nontranslate_map_sg(dev, sg, nelems, direction);
444
445 for_each_sg(sg, s, nelems, i) { 427 for_each_sg(sg, s, nelems, i) {
446 BUG_ON(!sg_page(s)); 428 BUG_ON(!sg_page(s));
447 429
@@ -477,7 +459,6 @@ error:
477static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, 459static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
478 size_t size, int direction) 460 size_t size, int direction)
479{ 461{
480 dma_addr_t dma_handle = bad_dma_address;
481 void *vaddr = phys_to_virt(paddr); 462 void *vaddr = phys_to_virt(paddr);
482 unsigned long uaddr; 463 unsigned long uaddr;
483 unsigned int npages; 464 unsigned int npages;
@@ -486,12 +467,7 @@ static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
486 uaddr = (unsigned long)vaddr; 467 uaddr = (unsigned long)vaddr;
487 npages = num_dma_pages(uaddr, size); 468 npages = num_dma_pages(uaddr, size);
488 469
489 if (translation_enabled(tbl)) 470 return iommu_alloc(dev, tbl, vaddr, npages, direction);
490 dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction);
491 else
492 dma_handle = virt_to_bus(vaddr);
493
494 return dma_handle;
495} 471}
496 472
497static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, 473static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
@@ -500,9 +476,6 @@ static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
500 struct iommu_table *tbl = find_iommu_table(dev); 476 struct iommu_table *tbl = find_iommu_table(dev);
501 unsigned int npages; 477 unsigned int npages;
502 478
503 if (!translation_enabled(tbl))
504 return;
505
506 npages = num_dma_pages(dma_handle, size); 479 npages = num_dma_pages(dma_handle, size);
507 iommu_free(tbl, dma_handle, npages); 480 iommu_free(tbl, dma_handle, npages);
508} 481}
@@ -525,18 +498,12 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
525 goto error; 498 goto error;
526 memset(ret, 0, size); 499 memset(ret, 0, size);
527 500
528 if (translation_enabled(tbl)) { 501 /* set up tces to cover the allocated range */
529 /* set up tces to cover the allocated range */ 502 mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
530 mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); 503 if (mapping == bad_dma_address)
531 if (mapping == bad_dma_address) 504 goto free;
532 goto free; 505 *dma_handle = mapping;
533
534 *dma_handle = mapping;
535 } else /* non translated slot */
536 *dma_handle = virt_to_bus(ret);
537
538 return ret; 506 return ret;
539
540free: 507free:
541 free_pages((unsigned long)ret, get_order(size)); 508 free_pages((unsigned long)ret, get_order(size));
542 ret = NULL; 509 ret = NULL;
@@ -544,7 +511,7 @@ error:
544 return ret; 511 return ret;
545} 512}
546 513
547static const struct dma_mapping_ops calgary_dma_ops = { 514static struct dma_mapping_ops calgary_dma_ops = {
548 .alloc_coherent = calgary_alloc_coherent, 515 .alloc_coherent = calgary_alloc_coherent,
549 .map_single = calgary_map_single, 516 .map_single = calgary_map_single,
550 .unmap_single = calgary_unmap_single, 517 .unmap_single = calgary_unmap_single,
@@ -1241,6 +1208,16 @@ static int __init calgary_init(void)
1241 goto error; 1208 goto error;
1242 } while (1); 1209 } while (1);
1243 1210
1211 dev = NULL;
1212 for_each_pci_dev(dev) {
1213 struct iommu_table *tbl;
1214
1215 tbl = find_iommu_table(&dev->dev);
1216
1217 if (translation_enabled(tbl))
1218 dev->dev.archdata.dma_ops = &calgary_dma_ops;
1219 }
1220
1244 return ret; 1221 return ret;
1245 1222
1246error: 1223error:
@@ -1262,6 +1239,7 @@ error:
1262 calgary_disable_translation(dev); 1239 calgary_disable_translation(dev);
1263 calgary_free_bus(dev); 1240 calgary_free_bus(dev);
1264 pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */ 1241 pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */
1242 dev->dev.archdata.dma_ops = NULL;
1265 } while (1); 1243 } while (1);
1266 1244
1267 return ret; 1245 return ret;
@@ -1372,7 +1350,7 @@ static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
1372 * Function for kdump case. Get the tce tables from first kernel 1350 * Function for kdump case. Get the tce tables from first kernel
1373 * by reading the contents of the base adress register of calgary iommu 1351 * by reading the contents of the base adress register of calgary iommu
1374 */ 1352 */
1375static void get_tce_space_from_tar() 1353static void __init get_tce_space_from_tar(void)
1376{ 1354{
1377 int bus; 1355 int bus;
1378 void __iomem *target; 1356 void __iomem *target;
@@ -1503,6 +1481,10 @@ void __init detect_calgary(void)
1503 printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " 1481 printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, "
1504 "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, 1482 "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size,
1505 debugging ? "enabled" : "disabled"); 1483 debugging ? "enabled" : "disabled");
1484
1485 /* swiotlb for devices that aren't behind the Calgary. */
1486 if (max_pfn > MAX_DMA32_PFN)
1487 swiotlb = 1;
1506 } 1488 }
1507 return; 1489 return;
1508 1490
@@ -1519,7 +1501,7 @@ int __init calgary_iommu_init(void)
1519{ 1501{
1520 int ret; 1502 int ret;
1521 1503
1522 if (no_iommu || swiotlb) 1504 if (no_iommu || (swiotlb && !calgary_detected))
1523 return -ENODEV; 1505 return -ENODEV;
1524 1506
1525 if (!calgary_detected) 1507 if (!calgary_detected)
@@ -1532,15 +1514,14 @@ int __init calgary_iommu_init(void)
1532 if (ret) { 1514 if (ret) {
1533 printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " 1515 printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
1534 "falling back to no_iommu\n", ret); 1516 "falling back to no_iommu\n", ret);
1535 if (max_pfn > MAX_DMA32_PFN)
1536 printk(KERN_ERR "WARNING more than 4GB of memory, "
1537 "32bit PCI may malfunction.\n");
1538 return ret; 1517 return ret;
1539 } 1518 }
1540 1519
1541 force_iommu = 1; 1520 force_iommu = 1;
1542 bad_dma_address = 0x0; 1521 bad_dma_address = 0x0;
1543 dma_ops = &calgary_dma_ops; 1522 /* dma_ops is set to swiotlb or nommu */
1523 if (!dma_ops)
1524 dma_ops = &nommu_dma_ops;
1544 1525
1545 return 0; 1526 return 0;
1546} 1527}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index cbecb05551bb..87d4d6964ec2 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -11,7 +11,7 @@
11 11
12static int forbid_dac __read_mostly; 12static int forbid_dac __read_mostly;
13 13
14const struct dma_mapping_ops *dma_ops; 14struct dma_mapping_ops *dma_ops;
15EXPORT_SYMBOL(dma_ops); 15EXPORT_SYMBOL(dma_ops);
16 16
17static int iommu_sac_force __read_mostly; 17static int iommu_sac_force __read_mostly;
@@ -123,6 +123,14 @@ void __init pci_iommu_alloc(void)
123 123
124 pci_swiotlb_init(); 124 pci_swiotlb_init();
125} 125}
126
127unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
128{
129 unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
130
131 return size >> PAGE_SHIFT;
132}
133EXPORT_SYMBOL(iommu_num_pages);
126#endif 134#endif
127 135
128/* 136/*
@@ -192,126 +200,10 @@ static __init int iommu_setup(char *p)
192} 200}
193early_param("iommu", iommu_setup); 201early_param("iommu", iommu_setup);
194 202
195#ifdef CONFIG_X86_32
196int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
197 dma_addr_t device_addr, size_t size, int flags)
198{
199 void __iomem *mem_base = NULL;
200 int pages = size >> PAGE_SHIFT;
201 int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
202
203 if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
204 goto out;
205 if (!size)
206 goto out;
207 if (dev->dma_mem)
208 goto out;
209
210 /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
211
212 mem_base = ioremap(bus_addr, size);
213 if (!mem_base)
214 goto out;
215
216 dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
217 if (!dev->dma_mem)
218 goto out;
219 dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
220 if (!dev->dma_mem->bitmap)
221 goto free1_out;
222
223 dev->dma_mem->virt_base = mem_base;
224 dev->dma_mem->device_base = device_addr;
225 dev->dma_mem->size = pages;
226 dev->dma_mem->flags = flags;
227
228 if (flags & DMA_MEMORY_MAP)
229 return DMA_MEMORY_MAP;
230
231 return DMA_MEMORY_IO;
232
233 free1_out:
234 kfree(dev->dma_mem);
235 out:
236 if (mem_base)
237 iounmap(mem_base);
238 return 0;
239}
240EXPORT_SYMBOL(dma_declare_coherent_memory);
241
242void dma_release_declared_memory(struct device *dev)
243{
244 struct dma_coherent_mem *mem = dev->dma_mem;
245
246 if (!mem)
247 return;
248 dev->dma_mem = NULL;
249 iounmap(mem->virt_base);
250 kfree(mem->bitmap);
251 kfree(mem);
252}
253EXPORT_SYMBOL(dma_release_declared_memory);
254
255void *dma_mark_declared_memory_occupied(struct device *dev,
256 dma_addr_t device_addr, size_t size)
257{
258 struct dma_coherent_mem *mem = dev->dma_mem;
259 int pos, err;
260 int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
261
262 pages >>= PAGE_SHIFT;
263
264 if (!mem)
265 return ERR_PTR(-EINVAL);
266
267 pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
268 err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
269 if (err != 0)
270 return ERR_PTR(err);
271 return mem->virt_base + (pos << PAGE_SHIFT);
272}
273EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
274
275static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size,
276 dma_addr_t *dma_handle, void **ret)
277{
278 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
279 int order = get_order(size);
280
281 if (mem) {
282 int page = bitmap_find_free_region(mem->bitmap, mem->size,
283 order);
284 if (page >= 0) {
285 *dma_handle = mem->device_base + (page << PAGE_SHIFT);
286 *ret = mem->virt_base + (page << PAGE_SHIFT);
287 memset(*ret, 0, size);
288 }
289 if (mem->flags & DMA_MEMORY_EXCLUSIVE)
290 *ret = NULL;
291 }
292 return (mem != NULL);
293}
294
295static int dma_release_coherent(struct device *dev, int order, void *vaddr)
296{
297 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
298
299 if (mem && vaddr >= mem->virt_base && vaddr <
300 (mem->virt_base + (mem->size << PAGE_SHIFT))) {
301 int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
302
303 bitmap_release_region(mem->bitmap, page, order);
304 return 1;
305 }
306 return 0;
307}
308#else
309#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0)
310#define dma_release_coherent(dev, order, vaddr) (0)
311#endif /* CONFIG_X86_32 */
312
313int dma_supported(struct device *dev, u64 mask) 203int dma_supported(struct device *dev, u64 mask)
314{ 204{
205 struct dma_mapping_ops *ops = get_dma_ops(dev);
206
315#ifdef CONFIG_PCI 207#ifdef CONFIG_PCI
316 if (mask > 0xffffffff && forbid_dac > 0) { 208 if (mask > 0xffffffff && forbid_dac > 0) {
317 dev_info(dev, "PCI: Disallowing DAC for device\n"); 209 dev_info(dev, "PCI: Disallowing DAC for device\n");
@@ -319,8 +211,8 @@ int dma_supported(struct device *dev, u64 mask)
319 } 211 }
320#endif 212#endif
321 213
322 if (dma_ops->dma_supported) 214 if (ops->dma_supported)
323 return dma_ops->dma_supported(dev, mask); 215 return ops->dma_supported(dev, mask);
324 216
325 /* Copied from i386. Doesn't make much sense, because it will 217 /* Copied from i386. Doesn't make much sense, because it will
326 only work for pci_alloc_coherent. 218 only work for pci_alloc_coherent.
@@ -367,6 +259,7 @@ void *
367dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 259dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
368 gfp_t gfp) 260 gfp_t gfp)
369{ 261{
262 struct dma_mapping_ops *ops = get_dma_ops(dev);
370 void *memory = NULL; 263 void *memory = NULL;
371 struct page *page; 264 struct page *page;
372 unsigned long dma_mask = 0; 265 unsigned long dma_mask = 0;
@@ -376,7 +269,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
376 /* ignore region specifiers */ 269 /* ignore region specifiers */
377 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); 270 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
378 271
379 if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) 272 if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
380 return memory; 273 return memory;
381 274
382 if (!dev) { 275 if (!dev) {
@@ -435,8 +328,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
435 /* Let low level make its own zone decisions */ 328 /* Let low level make its own zone decisions */
436 gfp &= ~(GFP_DMA32|GFP_DMA); 329 gfp &= ~(GFP_DMA32|GFP_DMA);
437 330
438 if (dma_ops->alloc_coherent) 331 if (ops->alloc_coherent)
439 return dma_ops->alloc_coherent(dev, size, 332 return ops->alloc_coherent(dev, size,
440 dma_handle, gfp); 333 dma_handle, gfp);
441 return NULL; 334 return NULL;
442 } 335 }
@@ -448,14 +341,14 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
448 } 341 }
449 } 342 }
450 343
451 if (dma_ops->alloc_coherent) { 344 if (ops->alloc_coherent) {
452 free_pages((unsigned long)memory, get_order(size)); 345 free_pages((unsigned long)memory, get_order(size));
453 gfp &= ~(GFP_DMA|GFP_DMA32); 346 gfp &= ~(GFP_DMA|GFP_DMA32);
454 return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); 347 return ops->alloc_coherent(dev, size, dma_handle, gfp);
455 } 348 }
456 349
457 if (dma_ops->map_simple) { 350 if (ops->map_simple) {
458 *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), 351 *dma_handle = ops->map_simple(dev, virt_to_phys(memory),
459 size, 352 size,
460 PCI_DMA_BIDIRECTIONAL); 353 PCI_DMA_BIDIRECTIONAL);
461 if (*dma_handle != bad_dma_address) 354 if (*dma_handle != bad_dma_address)
@@ -477,12 +370,14 @@ EXPORT_SYMBOL(dma_alloc_coherent);
477void dma_free_coherent(struct device *dev, size_t size, 370void dma_free_coherent(struct device *dev, size_t size,
478 void *vaddr, dma_addr_t bus) 371 void *vaddr, dma_addr_t bus)
479{ 372{
373 struct dma_mapping_ops *ops = get_dma_ops(dev);
374
480 int order = get_order(size); 375 int order = get_order(size);
481 WARN_ON(irqs_disabled()); /* for portability */ 376 WARN_ON(irqs_disabled()); /* for portability */
482 if (dma_release_coherent(dev, order, vaddr)) 377 if (dma_release_from_coherent(dev, order, vaddr))
483 return; 378 return;
484 if (dma_ops->unmap_single) 379 if (ops->unmap_single)
485 dma_ops->unmap_single(dev, bus, size, 0); 380 ops->unmap_single(dev, bus, size, 0);
486 free_pages((unsigned long)vaddr, order); 381 free_pages((unsigned long)vaddr, order);
487} 382}
488EXPORT_SYMBOL(dma_free_coherent); 383EXPORT_SYMBOL(dma_free_coherent);
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index df5f142657d2..49285f8fd4d5 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -67,9 +67,6 @@ static u32 gart_unmapped_entry;
67 (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) 67 (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
68#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) 68#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
69 69
70#define to_pages(addr, size) \
71 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
72
73#define EMERGENCY_PAGES 32 /* = 128KB */ 70#define EMERGENCY_PAGES 32 /* = 128KB */
74 71
75#ifdef CONFIG_AGP 72#ifdef CONFIG_AGP
@@ -241,7 +238,7 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
241static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, 238static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
242 size_t size, int dir) 239 size_t size, int dir)
243{ 240{
244 unsigned long npages = to_pages(phys_mem, size); 241 unsigned long npages = iommu_num_pages(phys_mem, size);
245 unsigned long iommu_page = alloc_iommu(dev, npages); 242 unsigned long iommu_page = alloc_iommu(dev, npages);
246 int i; 243 int i;
247 244
@@ -304,7 +301,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
304 return; 301 return;
305 302
306 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; 303 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
307 npages = to_pages(dma_addr, size); 304 npages = iommu_num_pages(dma_addr, size);
308 for (i = 0; i < npages; i++) { 305 for (i = 0; i < npages; i++) {
309 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; 306 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
310 CLEAR_LEAK(iommu_page + i); 307 CLEAR_LEAK(iommu_page + i);
@@ -387,7 +384,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
387 } 384 }
388 385
389 addr = phys_addr; 386 addr = phys_addr;
390 pages = to_pages(s->offset, s->length); 387 pages = iommu_num_pages(s->offset, s->length);
391 while (pages--) { 388 while (pages--) {
392 iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); 389 iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
393 SET_LEAK(iommu_page); 390 SET_LEAK(iommu_page);
@@ -470,7 +467,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
470 467
471 seg_size += s->length; 468 seg_size += s->length;
472 need = nextneed; 469 need = nextneed;
473 pages += to_pages(s->offset, s->length); 470 pages += iommu_num_pages(s->offset, s->length);
474 ps = s; 471 ps = s;
475 } 472 }
476 if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) 473 if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
@@ -692,8 +689,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
692 689
693extern int agp_amd64_init(void); 690extern int agp_amd64_init(void);
694 691
695static const struct dma_mapping_ops gart_dma_ops = { 692static struct dma_mapping_ops gart_dma_ops = {
696 .mapping_error = NULL,
697 .map_single = gart_map_single, 693 .map_single = gart_map_single,
698 .map_simple = gart_map_simple, 694 .map_simple = gart_map_simple,
699 .unmap_single = gart_unmap_single, 695 .unmap_single = gart_unmap_single,
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 792b9179eff3..3f91f71cdc3e 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -72,21 +72,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
72 return nents; 72 return nents;
73} 73}
74 74
75/* Make sure we keep the same behaviour */ 75struct dma_mapping_ops nommu_dma_ops = {
76static int nommu_mapping_error(dma_addr_t dma_addr)
77{
78#ifdef CONFIG_X86_32
79 return 0;
80#else
81 return (dma_addr == bad_dma_address);
82#endif
83}
84
85
86const struct dma_mapping_ops nommu_dma_ops = {
87 .map_single = nommu_map_single, 76 .map_single = nommu_map_single,
88 .map_sg = nommu_map_sg, 77 .map_sg = nommu_map_sg,
89 .mapping_error = nommu_mapping_error,
90 .is_phys = 1, 78 .is_phys = 1,
91}; 79};
92 80
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 20df839b9c20..c4ce0332759e 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -18,7 +18,7 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
18 return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); 18 return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction);
19} 19}
20 20
21const struct dma_mapping_ops swiotlb_dma_ops = { 21struct dma_mapping_ops swiotlb_dma_ops = {
22 .mapping_error = swiotlb_dma_mapping_error, 22 .mapping_error = swiotlb_dma_mapping_error,
23 .alloc_coherent = swiotlb_alloc_coherent, 23 .alloc_coherent = swiotlb_alloc_coherent,
24 .free_coherent = swiotlb_free_coherent, 24 .free_coherent = swiotlb_free_coherent,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 53bc653ed5ca..3b7a1ddcc0bc 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -95,7 +95,6 @@ static inline void play_dead(void)
95{ 95{
96 /* This must be done before dead CPU ack */ 96 /* This must be done before dead CPU ack */
97 cpu_exit_clear(); 97 cpu_exit_clear();
98 wbinvd();
99 mb(); 98 mb();
100 /* Ack it */ 99 /* Ack it */
101 __get_cpu_var(cpu_state) = CPU_DEAD; 100 __get_cpu_var(cpu_state) = CPU_DEAD;
@@ -104,8 +103,8 @@ static inline void play_dead(void)
104 * With physical CPU hotplug, we should halt the cpu 103 * With physical CPU hotplug, we should halt the cpu
105 */ 104 */
106 local_irq_disable(); 105 local_irq_disable();
107 while (1) 106 /* mask all interrupts, flush any and all caches, and halt */
108 halt(); 107 wbinvd_halt();
109} 108}
110#else 109#else
111static inline void play_dead(void) 110static inline void play_dead(void)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3fb62a7d9a16..71553b664e2a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -93,14 +93,13 @@ DECLARE_PER_CPU(int, cpu_state);
93static inline void play_dead(void) 93static inline void play_dead(void)
94{ 94{
95 idle_task_exit(); 95 idle_task_exit();
96 wbinvd();
97 mb(); 96 mb();
98 /* Ack it */ 97 /* Ack it */
99 __get_cpu_var(cpu_state) = CPU_DEAD; 98 __get_cpu_var(cpu_state) = CPU_DEAD;
100 99
101 local_irq_disable(); 100 local_irq_disable();
102 while (1) 101 /* mask all interrupts, flush any and all caches, and halt */
103 halt(); 102 wbinvd_halt();
104} 103}
105#else 104#else
106static inline void play_dead(void) 105static inline void play_dead(void)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 06a9f643817e..724adfc63cb9 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -414,25 +414,20 @@ void native_machine_shutdown(void)
414 414
415 /* The boot cpu is always logical cpu 0 */ 415 /* The boot cpu is always logical cpu 0 */
416 int reboot_cpu_id = 0; 416 int reboot_cpu_id = 0;
417 cpumask_of_cpu_ptr(newmask, reboot_cpu_id);
418 417
419#ifdef CONFIG_X86_32 418#ifdef CONFIG_X86_32
420 /* See if there has been given a command line override */ 419 /* See if there has been given a command line override */
421 if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && 420 if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) &&
422 cpu_online(reboot_cpu)) { 421 cpu_online(reboot_cpu))
423 reboot_cpu_id = reboot_cpu; 422 reboot_cpu_id = reboot_cpu;
424 cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id);
425 }
426#endif 423#endif
427 424
428 /* Make certain the cpu I'm about to reboot on is online */ 425 /* Make certain the cpu I'm about to reboot on is online */
429 if (!cpu_online(reboot_cpu_id)) { 426 if (!cpu_online(reboot_cpu_id))
430 reboot_cpu_id = smp_processor_id(); 427 reboot_cpu_id = smp_processor_id();
431 cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id);
432 }
433 428
434 /* Make certain I only run on the appropriate processor */ 429 /* Make certain I only run on the appropriate processor */
435 set_cpus_allowed_ptr(current, newmask); 430 set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id));
436 431
437 /* O.K Now that I'm on the appropriate processor, 432 /* O.K Now that I'm on the appropriate processor,
438 * stop all of the others. 433 * stop all of the others.
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index c30fe25d470d..6f50664b2ba5 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -20,11 +20,45 @@
20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
21#define PAE_PGD_ATTR (_PAGE_PRESENT) 21#define PAE_PGD_ATTR (_PAGE_PRESENT)
22 22
23/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
24 * ~ control_page + PAGE_SIZE are used as data storage and stack for
25 * jumping back
26 */
27#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
28
29/* Minimal CPU state */
30#define ESP DATA(0x0)
31#define CR0 DATA(0x4)
32#define CR3 DATA(0x8)
33#define CR4 DATA(0xc)
34
35/* other data */
36#define CP_VA_CONTROL_PAGE DATA(0x10)
37#define CP_PA_PGD DATA(0x14)
38#define CP_PA_SWAP_PAGE DATA(0x18)
39#define CP_PA_BACKUP_PAGES_MAP DATA(0x1c)
40
23 .text 41 .text
24 .align PAGE_SIZE 42 .align PAGE_SIZE
25 .globl relocate_kernel 43 .globl relocate_kernel
26relocate_kernel: 44relocate_kernel:
27 movl 8(%esp), %ebp /* list of pages */ 45 /* Save the CPU context, used for jumping back */
46
47 pushl %ebx
48 pushl %esi
49 pushl %edi
50 pushl %ebp
51 pushf
52
53 movl 20+8(%esp), %ebp /* list of pages */
54 movl PTR(VA_CONTROL_PAGE)(%ebp), %edi
55 movl %esp, ESP(%edi)
56 movl %cr0, %eax
57 movl %eax, CR0(%edi)
58 movl %cr3, %eax
59 movl %eax, CR3(%edi)
60 movl %cr4, %eax
61 movl %eax, CR4(%edi)
28 62
29#ifdef CONFIG_X86_PAE 63#ifdef CONFIG_X86_PAE
30 /* map the control page at its virtual address */ 64 /* map the control page at its virtual address */
@@ -138,15 +172,25 @@ relocate_kernel:
138 172
139relocate_new_kernel: 173relocate_new_kernel:
140 /* read the arguments and say goodbye to the stack */ 174 /* read the arguments and say goodbye to the stack */
141 movl 4(%esp), %ebx /* page_list */ 175 movl 20+4(%esp), %ebx /* page_list */
142 movl 8(%esp), %ebp /* list of pages */ 176 movl 20+8(%esp), %ebp /* list of pages */
143 movl 12(%esp), %edx /* start address */ 177 movl 20+12(%esp), %edx /* start address */
144 movl 16(%esp), %ecx /* cpu_has_pae */ 178 movl 20+16(%esp), %ecx /* cpu_has_pae */
179 movl 20+20(%esp), %esi /* preserve_context */
145 180
146 /* zero out flags, and disable interrupts */ 181 /* zero out flags, and disable interrupts */
147 pushl $0 182 pushl $0
148 popfl 183 popfl
149 184
185 /* save some information for jumping back */
186 movl PTR(VA_CONTROL_PAGE)(%ebp), %edi
187 movl %edi, CP_VA_CONTROL_PAGE(%edi)
188 movl PTR(PA_PGD)(%ebp), %eax
189 movl %eax, CP_PA_PGD(%edi)
190 movl PTR(PA_SWAP_PAGE)(%ebp), %eax
191 movl %eax, CP_PA_SWAP_PAGE(%edi)
192 movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi)
193
150 /* get physical address of control page now */ 194 /* get physical address of control page now */
151 /* this is impossible after page table switch */ 195 /* this is impossible after page table switch */
152 movl PTR(PA_CONTROL_PAGE)(%ebp), %edi 196 movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
@@ -197,8 +241,90 @@ identity_mapped:
197 xorl %eax, %eax 241 xorl %eax, %eax
198 movl %eax, %cr3 242 movl %eax, %cr3
199 243
244 movl CP_PA_SWAP_PAGE(%edi), %eax
245 pushl %eax
246 pushl %ebx
247 call swap_pages
248 addl $8, %esp
249
250 /* To be certain of avoiding problems with self-modifying code
251 * I need to execute a serializing instruction here.
252 * So I flush the TLB, it's handy, and not processor dependent.
253 */
254 xorl %eax, %eax
255 movl %eax, %cr3
256
257 /* set all of the registers to known values */
258 /* leave %esp alone */
259
260 testl %esi, %esi
261 jnz 1f
262 xorl %edi, %edi
263 xorl %eax, %eax
264 xorl %ebx, %ebx
265 xorl %ecx, %ecx
266 xorl %edx, %edx
267 xorl %esi, %esi
268 xorl %ebp, %ebp
269 ret
2701:
271 popl %edx
272 movl CP_PA_SWAP_PAGE(%edi), %esp
273 addl $PAGE_SIZE, %esp
2742:
275 call *%edx
276
277 /* get the re-entry point of the peer system */
278 movl 0(%esp), %ebp
279 call 1f
2801:
281 popl %ebx
282 subl $(1b - relocate_kernel), %ebx
283 movl CP_VA_CONTROL_PAGE(%ebx), %edi
284 lea PAGE_SIZE(%ebx), %esp
285 movl CP_PA_SWAP_PAGE(%ebx), %eax
286 movl CP_PA_BACKUP_PAGES_MAP(%ebx), %edx
287 pushl %eax
288 pushl %edx
289 call swap_pages
290 addl $8, %esp
291 movl CP_PA_PGD(%ebx), %eax
292 movl %eax, %cr3
293 movl %cr0, %eax
294 orl $(1<<31), %eax
295 movl %eax, %cr0
296 lea PAGE_SIZE(%edi), %esp
297 movl %edi, %eax
298 addl $(virtual_mapped - relocate_kernel), %eax
299 pushl %eax
300 ret
301
302virtual_mapped:
303 movl CR4(%edi), %eax
304 movl %eax, %cr4
305 movl CR3(%edi), %eax
306 movl %eax, %cr3
307 movl CR0(%edi), %eax
308 movl %eax, %cr0
309 movl ESP(%edi), %esp
310 movl %ebp, %eax
311
312 popf
313 popl %ebp
314 popl %edi
315 popl %esi
316 popl %ebx
317 ret
318
200 /* Do the copies */ 319 /* Do the copies */
201 movl %ebx, %ecx 320swap_pages:
321 movl 8(%esp), %edx
322 movl 4(%esp), %ecx
323 pushl %ebp
324 pushl %ebx
325 pushl %edi
326 pushl %esi
327 movl %ecx, %ebx
202 jmp 1f 328 jmp 1f
203 329
2040: /* top, read another word from the indirection page */ 3300: /* top, read another word from the indirection page */
@@ -226,27 +352,31 @@ identity_mapped:
226 movl %ecx, %esi /* For every source page do a copy */ 352 movl %ecx, %esi /* For every source page do a copy */
227 andl $0xfffff000, %esi 353 andl $0xfffff000, %esi
228 354
355 movl %edi, %eax
356 movl %esi, %ebp
357
358 movl %edx, %edi
229 movl $1024, %ecx 359 movl $1024, %ecx
230 rep ; movsl 360 rep ; movsl
231 jmp 0b
232
2333:
234 361
235 /* To be certain of avoiding problems with self-modifying code 362 movl %ebp, %edi
236 * I need to execute a serializing instruction here. 363 movl %eax, %esi
237 * So I flush the TLB, it's handy, and not processor dependent. 364 movl $1024, %ecx
238 */ 365 rep ; movsl
239 xorl %eax, %eax
240 movl %eax, %cr3
241 366
242 /* set all of the registers to known values */ 367 movl %eax, %edi
243 /* leave %esp alone */ 368 movl %edx, %esi
369 movl $1024, %ecx
370 rep ; movsl
244 371
245 xorl %eax, %eax 372 lea PAGE_SIZE(%ebp), %esi
246 xorl %ebx, %ebx 373 jmp 0b
247 xorl %ecx, %ecx 3743:
248 xorl %edx, %edx 375 popl %esi
249 xorl %esi, %esi 376 popl %edi
250 xorl %edi, %edi 377 popl %ebx
251 xorl %ebp, %ebp 378 popl %ebp
252 ret 379 ret
380
381 .globl kexec_control_code_size
382.set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b520dae02bf4..a4656adab53b 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -445,7 +445,7 @@ static void __init reserve_early_setup_data(void)
445 * @size: Size of the crashkernel memory to reserve. 445 * @size: Size of the crashkernel memory to reserve.
446 * Returns the base address on success, and -1ULL on failure. 446 * Returns the base address on success, and -1ULL on failure.
447 */ 447 */
448unsigned long long find_and_reserve_crashkernel(unsigned long long size) 448unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
449{ 449{
450 const unsigned long long alignment = 16<<20; /* 16M */ 450 const unsigned long long alignment = 16<<20; /* 16M */
451 unsigned long long start = 0LL; 451 unsigned long long start = 0LL;
@@ -604,6 +604,14 @@ void __init setup_arch(char **cmdline_p)
604 early_cpu_init(); 604 early_cpu_init();
605 early_ioremap_init(); 605 early_ioremap_init();
606 606
607#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
608 /*
609 * Must be before kernel pagetables are setup
610 * or fixmap area is touched.
611 */
612 vmi_init();
613#endif
614
607 ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); 615 ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
608 screen_info = boot_params.screen_info; 616 screen_info = boot_params.screen_info;
609 edid_info = boot_params.edid_info; 617 edid_info = boot_params.edid_info;
@@ -788,10 +796,6 @@ void __init setup_arch(char **cmdline_p)
788 796
789 initmem_init(0, max_pfn); 797 initmem_init(0, max_pfn);
790 798
791#ifdef CONFIG_X86_64
792 dma32_reserve_bootmem();
793#endif
794
795#ifdef CONFIG_ACPI_SLEEP 799#ifdef CONFIG_ACPI_SLEEP
796 /* 800 /*
797 * Reserve low memory region for sleep support. 801 * Reserve low memory region for sleep support.
@@ -806,20 +810,21 @@ void __init setup_arch(char **cmdline_p)
806#endif 810#endif
807 reserve_crashkernel(); 811 reserve_crashkernel();
808 812
813#ifdef CONFIG_X86_64
814 /*
815 * dma32_reserve_bootmem() allocates bootmem which may conflict
816 * with the crashkernel command line, so do that after
817 * reserve_crashkernel()
818 */
819 dma32_reserve_bootmem();
820#endif
821
809 reserve_ibft_region(); 822 reserve_ibft_region();
810 823
811#ifdef CONFIG_KVM_CLOCK 824#ifdef CONFIG_KVM_CLOCK
812 kvmclock_init(); 825 kvmclock_init();
813#endif 826#endif
814 827
815#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
816 /*
817 * Must be after max_low_pfn is determined, and before kernel
818 * pagetables are setup.
819 */
820 vmi_init();
821#endif
822
823 paravirt_pagetable_setup_start(swapper_pg_dir); 828 paravirt_pagetable_setup_start(swapper_pg_dir);
824 paging_init(); 829 paging_init();
825 paravirt_pagetable_setup_done(swapper_pg_dir); 830 paravirt_pagetable_setup_done(swapper_pg_dir);
@@ -856,12 +861,6 @@ void __init setup_arch(char **cmdline_p)
856 init_apic_mappings(); 861 init_apic_mappings();
857 ioapic_init_mappings(); 862 ioapic_init_mappings();
858 863
859#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
860 if (def_to_bigsmp)
861 printk(KERN_WARNING "More than 8 CPUs detected and "
862 "CONFIG_X86_PC cannot handle it.\nUse "
863 "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
864#endif
865 kvm_guest_init(); 864 kvm_guest_init();
866 865
867 e820_reserve_resources(); 866 e820_reserve_resources();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index f7745f94c006..76e305e064f9 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -80,24 +80,6 @@ static void __init setup_per_cpu_maps(void)
80#endif 80#endif
81} 81}
82 82
83#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
84cpumask_t *cpumask_of_cpu_map __read_mostly;
85EXPORT_SYMBOL(cpumask_of_cpu_map);
86
87/* requires nr_cpu_ids to be initialized */
88static void __init setup_cpumask_of_cpu(void)
89{
90 int i;
91
92 /* alloc_bootmem zeroes memory */
93 cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
94 for (i = 0; i < nr_cpu_ids; i++)
95 cpu_set(i, cpumask_of_cpu_map[i]);
96}
97#else
98static inline void setup_cpumask_of_cpu(void) { }
99#endif
100
101#ifdef CONFIG_X86_32 83#ifdef CONFIG_X86_32
102/* 84/*
103 * Great future not-so-futuristic plan: make i386 and x86_64 do it 85 * Great future not-so-futuristic plan: make i386 and x86_64 do it
@@ -197,9 +179,6 @@ void __init setup_per_cpu_areas(void)
197 179
198 /* Setup node to cpumask map */ 180 /* Setup node to cpumask map */
199 setup_node_to_cpumask_map(); 181 setup_node_to_cpumask_map();
200
201 /* Setup cpumask_of_cpu map */
202 setup_cpumask_of_cpu();
203} 182}
204 183
205#endif 184#endif
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index b45ef8ddd651..ca316b5b742c 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -104,7 +104,16 @@ static inline int restore_i387(struct _fpstate __user *buf)
104 clts(); 104 clts();
105 task_thread_info(current)->status |= TS_USEDFPU; 105 task_thread_info(current)->status |= TS_USEDFPU;
106 } 106 }
107 return restore_fpu_checking((__force struct i387_fxsave_struct *)buf); 107 err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
108 if (unlikely(err)) {
109 /*
110 * Encountered an error while doing the restore from the
111 * user buffer, clear the fpu state.
112 */
113 clear_fpu(tsk);
114 clear_used_math();
115 }
116 return err;
108} 117}
109 118
110/* 119/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 332512767f4f..e139e617f422 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -326,12 +326,16 @@ static void __cpuinit start_secondary(void *unused)
326 * for which cpus receive the IPI. Holding this 326 * for which cpus receive the IPI. Holding this
327 * lock helps us to not include this cpu in a currently in progress 327 * lock helps us to not include this cpu in a currently in progress
328 * smp_call_function(). 328 * smp_call_function().
329 *
330 * We need to hold vector_lock so there the set of online cpus
331 * does not change while we are assigning vectors to cpus. Holding
332 * this lock ensures we don't half assign or remove an irq from a cpu.
329 */ 333 */
330 ipi_call_lock_irq(); 334 ipi_call_lock_irq();
331#ifdef CONFIG_X86_IO_APIC 335 lock_vector_lock();
332 setup_vector_irq(smp_processor_id()); 336 __setup_vector_irq(smp_processor_id());
333#endif
334 cpu_set(smp_processor_id(), cpu_online_map); 337 cpu_set(smp_processor_id(), cpu_online_map);
338 unlock_vector_lock();
335 ipi_call_unlock_irq(); 339 ipi_call_unlock_irq();
336 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 340 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
337 341
@@ -752,6 +756,14 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
752} 756}
753 757
754#ifdef CONFIG_X86_64 758#ifdef CONFIG_X86_64
759
760/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
761static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
762{
763 if (!after_bootmem)
764 free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
765}
766
755/* 767/*
756 * Allocate node local memory for the AP pda. 768 * Allocate node local memory for the AP pda.
757 * 769 *
@@ -780,8 +792,7 @@ int __cpuinit get_local_pda(int cpu)
780 792
781 if (oldpda) { 793 if (oldpda) {
782 memcpy(newpda, oldpda, size); 794 memcpy(newpda, oldpda, size);
783 if (!after_bootmem) 795 free_bootmem_pda(oldpda);
784 free_bootmem((unsigned long)oldpda, size);
785 } 796 }
786 797
787 newpda->in_bootmem = 0; 798 newpda->in_bootmem = 0;
@@ -1044,6 +1055,34 @@ static __init void disable_smp(void)
1044static int __init smp_sanity_check(unsigned max_cpus) 1055static int __init smp_sanity_check(unsigned max_cpus)
1045{ 1056{
1046 preempt_disable(); 1057 preempt_disable();
1058
1059#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
1060 if (def_to_bigsmp && nr_cpu_ids > 8) {
1061 unsigned int cpu;
1062 unsigned nr;
1063
1064 printk(KERN_WARNING
1065 "More than 8 CPUs detected - skipping them.\n"
1066 "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n");
1067
1068 nr = 0;
1069 for_each_present_cpu(cpu) {
1070 if (nr >= 8)
1071 cpu_clear(cpu, cpu_present_map);
1072 nr++;
1073 }
1074
1075 nr = 0;
1076 for_each_possible_cpu(cpu) {
1077 if (nr >= 8)
1078 cpu_clear(cpu, cpu_possible_map);
1079 nr++;
1080 }
1081
1082 nr_cpu_ids = 8;
1083 }
1084#endif
1085
1047 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { 1086 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
1048 printk(KERN_WARNING "weird, boot CPU (#%d) not listed" 1087 printk(KERN_WARNING "weird, boot CPU (#%d) not listed"
1049 "by the BIOS.\n", hard_smp_processor_id()); 1088 "by the BIOS.\n", hard_smp_processor_id());
@@ -1336,7 +1375,9 @@ int __cpu_disable(void)
1336 remove_siblinginfo(cpu); 1375 remove_siblinginfo(cpu);
1337 1376
1338 /* It's now safe to remove this processor from the online map */ 1377 /* It's now safe to remove this processor from the online map */
1378 lock_vector_lock();
1339 remove_cpu_from_maps(cpu); 1379 remove_cpu_from_maps(cpu);
1380 unlock_vector_lock();
1340 fixup_irqs(cpu_online_map); 1381 fixup_irqs(cpu_online_map);
1341 return 0; 1382 return 0;
1342} 1383}
@@ -1370,17 +1411,3 @@ void __cpu_die(unsigned int cpu)
1370 BUG(); 1411 BUG();
1371} 1412}
1372#endif 1413#endif
1373
1374/*
1375 * If the BIOS enumerates physical processors before logical,
1376 * maxcpus=N at enumeration-time can be used to disable HT.
1377 */
1378static int __init parse_maxcpus(char *arg)
1379{
1380 extern unsigned int maxcpus;
1381
1382 if (arg)
1383 maxcpus = simple_strtoul(arg, NULL, 0);
1384 return 0;
1385}
1386early_param("maxcpus", parse_maxcpus);
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 99941b37eca0..397e309839dd 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -8,18 +8,21 @@
8DEFINE_PER_CPU(unsigned long, this_cpu_off); 8DEFINE_PER_CPU(unsigned long, this_cpu_off);
9EXPORT_PER_CPU_SYMBOL(this_cpu_off); 9EXPORT_PER_CPU_SYMBOL(this_cpu_off);
10 10
11/* Initialize the CPU's GDT. This is either the boot CPU doing itself 11/*
12 (still using the master per-cpu area), or a CPU doing it for a 12 * Initialize the CPU's GDT. This is either the boot CPU doing itself
13 secondary which will soon come up. */ 13 * (still using the master per-cpu area), or a CPU doing it for a
14 * secondary which will soon come up.
15 */
14__cpuinit void init_gdt(int cpu) 16__cpuinit void init_gdt(int cpu)
15{ 17{
16 struct desc_struct *gdt = get_cpu_gdt_table(cpu); 18 struct desc_struct gdt;
17 19
18 pack_descriptor(&gdt[GDT_ENTRY_PERCPU], 20 pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF,
19 __per_cpu_offset[cpu], 0xFFFFF,
20 0x2 | DESCTYPE_S, 0x8); 21 0x2 | DESCTYPE_S, 0x8);
22 gdt.s = 1;
21 23
22 gdt[GDT_ENTRY_PERCPU].s = 1; 24 write_gdt_entry(get_cpu_gdt_table(cpu),
25 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
23 26
24 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; 27 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
25 per_cpu(cpu_number, cpu) = cpu; 28 per_cpu(cpu_number, cpu) = cpu;
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 3f18d73f420c..513caaca7115 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -1131,7 +1131,14 @@ asmlinkage void math_state_restore(void)
1131 } 1131 }
1132 1132
1133 clts(); /* Allow maths ops (or we recurse) */ 1133 clts(); /* Allow maths ops (or we recurse) */
1134 restore_fpu_checking(&me->thread.xstate->fxsave); 1134 /*
1135 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
1136 */
1137 if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) {
1138 stts();
1139 force_sig(SIGSEGV, me);
1140 return;
1141 }
1135 task_thread_info(me)->status |= TS_USEDFPU; 1142 task_thread_info(me)->status |= TS_USEDFPU;
1136 me->fpu_counter++; 1143 me->fpu_counter++;
1137} 1144}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 7603c0553909..46af71676738 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -104,7 +104,7 @@ __setup("notsc", notsc_setup);
104/* 104/*
105 * Read TSC and the reference counters. Take care of SMI disturbance 105 * Read TSC and the reference counters. Take care of SMI disturbance
106 */ 106 */
107static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) 107static u64 tsc_read_refs(u64 *pm, u64 *hpet)
108{ 108{
109 u64 t1, t2; 109 u64 t1, t2;
110 int i; 110 int i;
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 41e01b145c48..594ef47f0a63 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -184,8 +184,6 @@ static int __init visws_get_smp_config(unsigned int early)
184 return 1; 184 return 1;
185} 185}
186 186
187extern unsigned int __cpuinitdata maxcpus;
188
189/* 187/*
190 * The Visual Workstation is Intel MP compliant in the hardware 188 * The Visual Workstation is Intel MP compliant in the hardware
191 * sense, but it doesn't have a BIOS(-configuration table). 189 * sense, but it doesn't have a BIOS(-configuration table).
@@ -244,8 +242,8 @@ static int __init visws_find_smp_config(unsigned int reserve)
244 ncpus = CO_CPU_MAX; 242 ncpus = CO_CPU_MAX;
245 } 243 }
246 244
247 if (ncpus > maxcpus) 245 if (ncpus > setup_max_cpus)
248 ncpus = maxcpus; 246 ncpus = setup_max_cpus;
249 247
250#ifdef CONFIG_X86_LOCAL_APIC 248#ifdef CONFIG_X86_LOCAL_APIC
251 smp_found_config = 1; 249 smp_found_config = 1;
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 0a1b1a9d922d..6ca515d6db54 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -37,6 +37,7 @@
37#include <asm/timer.h> 37#include <asm/timer.h>
38#include <asm/vmi_time.h> 38#include <asm/vmi_time.h>
39#include <asm/kmap_types.h> 39#include <asm/kmap_types.h>
40#include <asm/setup.h>
40 41
41/* Convenient for calling VMI functions indirectly in the ROM */ 42/* Convenient for calling VMI functions indirectly in the ROM */
42typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); 43typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void);
@@ -683,7 +684,7 @@ void vmi_bringup(void)
683{ 684{
684 /* We must establish the lowmem mapping for MMU ops to work */ 685 /* We must establish the lowmem mapping for MMU ops to work */
685 if (vmi_ops.set_linear_mapping) 686 if (vmi_ops.set_linear_mapping)
686 vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0); 687 vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0);
687} 688}
688 689
689/* 690/*
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index cdb2363697d2..af5bdad84604 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -209,3 +209,11 @@ SECTIONS
209 209
210 DWARF_DEBUG 210 DWARF_DEBUG
211} 211}
212
213#ifdef CONFIG_KEXEC
214/* Link time checks */
215#include <asm/kexec.h>
216
217ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
218 "kexec control code size is too big")
219#endif
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 8d45fabc5f3b..ce3251ce5504 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
21 tristate "Kernel-based Virtual Machine (KVM) support" 21 tristate "Kernel-based Virtual Machine (KVM) support"
22 depends on HAVE_KVM 22 depends on HAVE_KVM
23 select PREEMPT_NOTIFIERS 23 select PREEMPT_NOTIFIERS
24 select MMU_NOTIFIER
24 select ANON_INODES 25 select ANON_INODES
25 ---help--- 26 ---help---
26 Support hosting fully virtualized guest machines using hardware 27 Support hosting fully virtualized guest machines using hardware
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b0e4ddca6c18..0bfe2bd305eb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -653,6 +653,84 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
653 account_shadowed(kvm, gfn); 653 account_shadowed(kvm, gfn);
654} 654}
655 655
656static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
657{
658 u64 *spte;
659 int need_tlb_flush = 0;
660
661 while ((spte = rmap_next(kvm, rmapp, NULL))) {
662 BUG_ON(!(*spte & PT_PRESENT_MASK));
663 rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
664 rmap_remove(kvm, spte);
665 set_shadow_pte(spte, shadow_trap_nonpresent_pte);
666 need_tlb_flush = 1;
667 }
668 return need_tlb_flush;
669}
670
671static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
672 int (*handler)(struct kvm *kvm, unsigned long *rmapp))
673{
674 int i;
675 int retval = 0;
676
677 /*
678 * If mmap_sem isn't taken, we can look the memslots with only
679 * the mmu_lock by skipping over the slots with userspace_addr == 0.
680 */
681 for (i = 0; i < kvm->nmemslots; i++) {
682 struct kvm_memory_slot *memslot = &kvm->memslots[i];
683 unsigned long start = memslot->userspace_addr;
684 unsigned long end;
685
686 /* mmu_lock protects userspace_addr */
687 if (!start)
688 continue;
689
690 end = start + (memslot->npages << PAGE_SHIFT);
691 if (hva >= start && hva < end) {
692 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
693 retval |= handler(kvm, &memslot->rmap[gfn_offset]);
694 retval |= handler(kvm,
695 &memslot->lpage_info[
696 gfn_offset /
697 KVM_PAGES_PER_HPAGE].rmap_pde);
698 }
699 }
700
701 return retval;
702}
703
704int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
705{
706 return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
707}
708
709static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
710{
711 u64 *spte;
712 int young = 0;
713
714 spte = rmap_next(kvm, rmapp, NULL);
715 while (spte) {
716 int _young;
717 u64 _spte = *spte;
718 BUG_ON(!(_spte & PT_PRESENT_MASK));
719 _young = _spte & PT_ACCESSED_MASK;
720 if (_young) {
721 young = 1;
722 clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte);
723 }
724 spte = rmap_next(kvm, rmapp, spte);
725 }
726 return young;
727}
728
729int kvm_age_hva(struct kvm *kvm, unsigned long hva)
730{
731 return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
732}
733
656#ifdef MMU_DEBUG 734#ifdef MMU_DEBUG
657static int is_empty_shadow_page(u64 *spt) 735static int is_empty_shadow_page(u64 *spt)
658{ 736{
@@ -1203,6 +1281,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1203 int r; 1281 int r;
1204 int largepage = 0; 1282 int largepage = 0;
1205 pfn_t pfn; 1283 pfn_t pfn;
1284 unsigned long mmu_seq;
1206 1285
1207 down_read(&current->mm->mmap_sem); 1286 down_read(&current->mm->mmap_sem);
1208 if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { 1287 if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
@@ -1210,6 +1289,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1210 largepage = 1; 1289 largepage = 1;
1211 } 1290 }
1212 1291
1292 mmu_seq = vcpu->kvm->mmu_notifier_seq;
1293 /* implicit mb(), we'll read before PT lock is unlocked */
1213 pfn = gfn_to_pfn(vcpu->kvm, gfn); 1294 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1214 up_read(&current->mm->mmap_sem); 1295 up_read(&current->mm->mmap_sem);
1215 1296
@@ -1220,6 +1301,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1220 } 1301 }
1221 1302
1222 spin_lock(&vcpu->kvm->mmu_lock); 1303 spin_lock(&vcpu->kvm->mmu_lock);
1304 if (mmu_notifier_retry(vcpu, mmu_seq))
1305 goto out_unlock;
1223 kvm_mmu_free_some_pages(vcpu); 1306 kvm_mmu_free_some_pages(vcpu);
1224 r = __direct_map(vcpu, v, write, largepage, gfn, pfn, 1307 r = __direct_map(vcpu, v, write, largepage, gfn, pfn,
1225 PT32E_ROOT_LEVEL); 1308 PT32E_ROOT_LEVEL);
@@ -1227,6 +1310,11 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1227 1310
1228 1311
1229 return r; 1312 return r;
1313
1314out_unlock:
1315 spin_unlock(&vcpu->kvm->mmu_lock);
1316 kvm_release_pfn_clean(pfn);
1317 return 0;
1230} 1318}
1231 1319
1232 1320
@@ -1345,6 +1433,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
1345 int r; 1433 int r;
1346 int largepage = 0; 1434 int largepage = 0;
1347 gfn_t gfn = gpa >> PAGE_SHIFT; 1435 gfn_t gfn = gpa >> PAGE_SHIFT;
1436 unsigned long mmu_seq;
1348 1437
1349 ASSERT(vcpu); 1438 ASSERT(vcpu);
1350 ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); 1439 ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -1358,6 +1447,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
1358 gfn &= ~(KVM_PAGES_PER_HPAGE-1); 1447 gfn &= ~(KVM_PAGES_PER_HPAGE-1);
1359 largepage = 1; 1448 largepage = 1;
1360 } 1449 }
1450 mmu_seq = vcpu->kvm->mmu_notifier_seq;
1451 /* implicit mb(), we'll read before PT lock is unlocked */
1361 pfn = gfn_to_pfn(vcpu->kvm, gfn); 1452 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1362 up_read(&current->mm->mmap_sem); 1453 up_read(&current->mm->mmap_sem);
1363 if (is_error_pfn(pfn)) { 1454 if (is_error_pfn(pfn)) {
@@ -1365,12 +1456,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
1365 return 1; 1456 return 1;
1366 } 1457 }
1367 spin_lock(&vcpu->kvm->mmu_lock); 1458 spin_lock(&vcpu->kvm->mmu_lock);
1459 if (mmu_notifier_retry(vcpu, mmu_seq))
1460 goto out_unlock;
1368 kvm_mmu_free_some_pages(vcpu); 1461 kvm_mmu_free_some_pages(vcpu);
1369 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, 1462 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
1370 largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); 1463 largepage, gfn, pfn, kvm_x86_ops->get_tdp_level());
1371 spin_unlock(&vcpu->kvm->mmu_lock); 1464 spin_unlock(&vcpu->kvm->mmu_lock);
1372 1465
1373 return r; 1466 return r;
1467
1468out_unlock:
1469 spin_unlock(&vcpu->kvm->mmu_lock);
1470 kvm_release_pfn_clean(pfn);
1471 return 0;
1374} 1472}
1375 1473
1376static void nonpaging_free(struct kvm_vcpu *vcpu) 1474static void nonpaging_free(struct kvm_vcpu *vcpu)
@@ -1670,6 +1768,8 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1670 gfn &= ~(KVM_PAGES_PER_HPAGE-1); 1768 gfn &= ~(KVM_PAGES_PER_HPAGE-1);
1671 vcpu->arch.update_pte.largepage = 1; 1769 vcpu->arch.update_pte.largepage = 1;
1672 } 1770 }
1771 vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
1772 /* implicit mb(), we'll read before PT lock is unlocked */
1673 pfn = gfn_to_pfn(vcpu->kvm, gfn); 1773 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1674 up_read(&current->mm->mmap_sem); 1774 up_read(&current->mm->mmap_sem);
1675 1775
@@ -1814,6 +1914,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
1814 spin_unlock(&vcpu->kvm->mmu_lock); 1914 spin_unlock(&vcpu->kvm->mmu_lock);
1815 return r; 1915 return r;
1816} 1916}
1917EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
1817 1918
1818void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) 1919void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
1819{ 1920{
@@ -1870,6 +1971,12 @@ void kvm_enable_tdp(void)
1870} 1971}
1871EXPORT_SYMBOL_GPL(kvm_enable_tdp); 1972EXPORT_SYMBOL_GPL(kvm_enable_tdp);
1872 1973
1974void kvm_disable_tdp(void)
1975{
1976 tdp_enabled = false;
1977}
1978EXPORT_SYMBOL_GPL(kvm_disable_tdp);
1979
1873static void free_mmu_pages(struct kvm_vcpu *vcpu) 1980static void free_mmu_pages(struct kvm_vcpu *vcpu)
1874{ 1981{
1875 struct kvm_mmu_page *sp; 1982 struct kvm_mmu_page *sp;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 4d918220baeb..f72ac1fa35f0 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -263,6 +263,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
263 pfn = vcpu->arch.update_pte.pfn; 263 pfn = vcpu->arch.update_pte.pfn;
264 if (is_error_pfn(pfn)) 264 if (is_error_pfn(pfn))
265 return; 265 return;
266 if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
267 return;
266 kvm_get_pfn(pfn); 268 kvm_get_pfn(pfn);
267 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 269 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
268 gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), 270 gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
@@ -380,6 +382,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
380 int r; 382 int r;
381 pfn_t pfn; 383 pfn_t pfn;
382 int largepage = 0; 384 int largepage = 0;
385 unsigned long mmu_seq;
383 386
384 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); 387 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
385 kvm_mmu_audit(vcpu, "pre page fault"); 388 kvm_mmu_audit(vcpu, "pre page fault");
@@ -413,6 +416,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
413 largepage = 1; 416 largepage = 1;
414 } 417 }
415 } 418 }
419 mmu_seq = vcpu->kvm->mmu_notifier_seq;
420 /* implicit mb(), we'll read before PT lock is unlocked */
416 pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); 421 pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
417 up_read(&current->mm->mmap_sem); 422 up_read(&current->mm->mmap_sem);
418 423
@@ -424,6 +429,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
424 } 429 }
425 430
426 spin_lock(&vcpu->kvm->mmu_lock); 431 spin_lock(&vcpu->kvm->mmu_lock);
432 if (mmu_notifier_retry(vcpu, mmu_seq))
433 goto out_unlock;
427 kvm_mmu_free_some_pages(vcpu); 434 kvm_mmu_free_some_pages(vcpu);
428 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, 435 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
429 largepage, &write_pt, pfn); 436 largepage, &write_pt, pfn);
@@ -439,6 +446,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
439 spin_unlock(&vcpu->kvm->mmu_lock); 446 spin_unlock(&vcpu->kvm->mmu_lock);
440 447
441 return write_pt; 448 return write_pt;
449
450out_unlock:
451 spin_unlock(&vcpu->kvm->mmu_lock);
452 kvm_release_pfn_clean(pfn);
453 return 0;
442} 454}
443 455
444static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) 456static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b756e876dce3..e2ee264740c7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -453,7 +453,8 @@ static __init int svm_hardware_setup(void)
453 if (npt_enabled) { 453 if (npt_enabled) {
454 printk(KERN_INFO "kvm: Nested Paging enabled\n"); 454 printk(KERN_INFO "kvm: Nested Paging enabled\n");
455 kvm_enable_tdp(); 455 kvm_enable_tdp();
456 } 456 } else
457 kvm_disable_tdp();
457 458
458 return 0; 459 return 0;
459 460
@@ -1007,10 +1008,13 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1007 struct kvm *kvm = svm->vcpu.kvm; 1008 struct kvm *kvm = svm->vcpu.kvm;
1008 u64 fault_address; 1009 u64 fault_address;
1009 u32 error_code; 1010 u32 error_code;
1011 bool event_injection = false;
1010 1012
1011 if (!irqchip_in_kernel(kvm) && 1013 if (!irqchip_in_kernel(kvm) &&
1012 is_external_interrupt(exit_int_info)) 1014 is_external_interrupt(exit_int_info)) {
1015 event_injection = true;
1013 push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); 1016 push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
1017 }
1014 1018
1015 fault_address = svm->vmcb->control.exit_info_2; 1019 fault_address = svm->vmcb->control.exit_info_2;
1016 error_code = svm->vmcb->control.exit_info_1; 1020 error_code = svm->vmcb->control.exit_info_1;
@@ -1024,6 +1028,8 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1024 (u32)fault_address, (u32)(fault_address >> 32), 1028 (u32)fault_address, (u32)(fault_address >> 32),
1025 handler); 1029 handler);
1026 1030
1031 if (event_injection)
1032 kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
1027 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); 1033 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
1028} 1034}
1029 1035
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0cac63701719..2a69773e3b26 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2298,6 +2298,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2298 cr2 = vmcs_readl(EXIT_QUALIFICATION); 2298 cr2 = vmcs_readl(EXIT_QUALIFICATION);
2299 KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, 2299 KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
2300 (u32)((u64)cr2 >> 32), handler); 2300 (u32)((u64)cr2 >> 32), handler);
2301 if (vect_info & VECTORING_INFO_VALID_MASK)
2302 kvm_mmu_unprotect_page_virt(vcpu, cr2);
2301 return kvm_mmu_page_fault(vcpu, cr2, error_code); 2303 return kvm_mmu_page_fault(vcpu, cr2, error_code);
2302 } 2304 }
2303 2305
@@ -3116,15 +3118,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
3116 return ERR_PTR(-ENOMEM); 3118 return ERR_PTR(-ENOMEM);
3117 3119
3118 allocate_vpid(vmx); 3120 allocate_vpid(vmx);
3119 if (id == 0 && vm_need_ept()) {
3120 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3121 VMX_EPT_WRITABLE_MASK |
3122 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
3123 kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK,
3124 VMX_EPT_FAKE_DIRTY_MASK, 0ull,
3125 VMX_EPT_EXECUTABLE_MASK);
3126 kvm_enable_tdp();
3127 }
3128 3121
3129 err = kvm_vcpu_init(&vmx->vcpu, kvm, id); 3122 err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
3130 if (err) 3123 if (err)
@@ -3303,8 +3296,17 @@ static int __init vmx_init(void)
3303 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); 3296 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP);
3304 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); 3297 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP);
3305 3298
3306 if (cpu_has_vmx_ept()) 3299 if (vm_need_ept()) {
3307 bypass_guest_pf = 0; 3300 bypass_guest_pf = 0;
3301 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3302 VMX_EPT_WRITABLE_MASK |
3303 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
3304 kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK,
3305 VMX_EPT_FAKE_DIRTY_MASK, 0ull,
3306 VMX_EPT_EXECUTABLE_MASK);
3307 kvm_enable_tdp();
3308 } else
3309 kvm_disable_tdp();
3308 3310
3309 if (bypass_guest_pf) 3311 if (bypass_guest_pf)
3310 kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); 3312 kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9f1cdb011cff..0d682fc6aeb3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -883,6 +883,7 @@ int kvm_dev_ioctl_check_extension(long ext)
883 case KVM_CAP_PIT: 883 case KVM_CAP_PIT:
884 case KVM_CAP_NOP_IO_DELAY: 884 case KVM_CAP_NOP_IO_DELAY:
885 case KVM_CAP_MP_STATE: 885 case KVM_CAP_MP_STATE:
886 case KVM_CAP_SYNC_MMU:
886 r = 1; 887 r = 1;
887 break; 888 break;
888 case KVM_CAP_COALESCED_MMIO: 889 case KVM_CAP_COALESCED_MMIO:
@@ -1495,6 +1496,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
1495 goto out; 1496 goto out;
1496 1497
1497 down_write(&kvm->slots_lock); 1498 down_write(&kvm->slots_lock);
1499 spin_lock(&kvm->mmu_lock);
1498 1500
1499 p = &kvm->arch.aliases[alias->slot]; 1501 p = &kvm->arch.aliases[alias->slot];
1500 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; 1502 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
@@ -1506,6 +1508,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
1506 break; 1508 break;
1507 kvm->arch.naliases = n; 1509 kvm->arch.naliases = n;
1508 1510
1511 spin_unlock(&kvm->mmu_lock);
1509 kvm_mmu_zap_all(kvm); 1512 kvm_mmu_zap_all(kvm);
1510 1513
1511 up_write(&kvm->slots_lock); 1514 up_write(&kvm->slots_lock);
@@ -3184,6 +3187,10 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
3184 kvm_desct->base |= seg_desc->base2 << 24; 3187 kvm_desct->base |= seg_desc->base2 << 24;
3185 kvm_desct->limit = seg_desc->limit0; 3188 kvm_desct->limit = seg_desc->limit0;
3186 kvm_desct->limit |= seg_desc->limit << 16; 3189 kvm_desct->limit |= seg_desc->limit << 16;
3190 if (seg_desc->g) {
3191 kvm_desct->limit <<= 12;
3192 kvm_desct->limit |= 0xfff;
3193 }
3187 kvm_desct->selector = selector; 3194 kvm_desct->selector = selector;
3188 kvm_desct->type = seg_desc->type; 3195 kvm_desct->type = seg_desc->type;
3189 kvm_desct->present = seg_desc->p; 3196 kvm_desct->present = seg_desc->p;
@@ -3223,6 +3230,7 @@ static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
3223static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 3230static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3224 struct desc_struct *seg_desc) 3231 struct desc_struct *seg_desc)
3225{ 3232{
3233 gpa_t gpa;
3226 struct descriptor_table dtable; 3234 struct descriptor_table dtable;
3227 u16 index = selector >> 3; 3235 u16 index = selector >> 3;
3228 3236
@@ -3232,13 +3240,16 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3232 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); 3240 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
3233 return 1; 3241 return 1;
3234 } 3242 }
3235 return kvm_read_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); 3243 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
3244 gpa += index * 8;
3245 return kvm_read_guest(vcpu->kvm, gpa, seg_desc, 8);
3236} 3246}
3237 3247
3238/* allowed just for 8 bytes segments */ 3248/* allowed just for 8 bytes segments */
3239static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 3249static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3240 struct desc_struct *seg_desc) 3250 struct desc_struct *seg_desc)
3241{ 3251{
3252 gpa_t gpa;
3242 struct descriptor_table dtable; 3253 struct descriptor_table dtable;
3243 u16 index = selector >> 3; 3254 u16 index = selector >> 3;
3244 3255
@@ -3246,7 +3257,9 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3246 3257
3247 if (dtable.limit < index * 8 + 7) 3258 if (dtable.limit < index * 8 + 7)
3248 return 1; 3259 return 1;
3249 return kvm_write_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); 3260 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
3261 gpa += index * 8;
3262 return kvm_write_guest(vcpu->kvm, gpa, seg_desc, 8);
3250} 3263}
3251 3264
3252static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, 3265static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
@@ -3258,55 +3271,7 @@ static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
3258 base_addr |= (seg_desc->base1 << 16); 3271 base_addr |= (seg_desc->base1 << 16);
3259 base_addr |= (seg_desc->base2 << 24); 3272 base_addr |= (seg_desc->base2 << 24);
3260 3273
3261 return base_addr; 3274 return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr);
3262}
3263
3264static int load_tss_segment32(struct kvm_vcpu *vcpu,
3265 struct desc_struct *seg_desc,
3266 struct tss_segment_32 *tss)
3267{
3268 u32 base_addr;
3269
3270 base_addr = get_tss_base_addr(vcpu, seg_desc);
3271
3272 return kvm_read_guest(vcpu->kvm, base_addr, tss,
3273 sizeof(struct tss_segment_32));
3274}
3275
3276static int save_tss_segment32(struct kvm_vcpu *vcpu,
3277 struct desc_struct *seg_desc,
3278 struct tss_segment_32 *tss)
3279{
3280 u32 base_addr;
3281
3282 base_addr = get_tss_base_addr(vcpu, seg_desc);
3283
3284 return kvm_write_guest(vcpu->kvm, base_addr, tss,
3285 sizeof(struct tss_segment_32));
3286}
3287
3288static int load_tss_segment16(struct kvm_vcpu *vcpu,
3289 struct desc_struct *seg_desc,
3290 struct tss_segment_16 *tss)
3291{
3292 u32 base_addr;
3293
3294 base_addr = get_tss_base_addr(vcpu, seg_desc);
3295
3296 return kvm_read_guest(vcpu->kvm, base_addr, tss,
3297 sizeof(struct tss_segment_16));
3298}
3299
3300static int save_tss_segment16(struct kvm_vcpu *vcpu,
3301 struct desc_struct *seg_desc,
3302 struct tss_segment_16 *tss)
3303{
3304 u32 base_addr;
3305
3306 base_addr = get_tss_base_addr(vcpu, seg_desc);
3307
3308 return kvm_write_guest(vcpu->kvm, base_addr, tss,
3309 sizeof(struct tss_segment_16));
3310} 3275}
3311 3276
3312static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) 3277static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
@@ -3466,20 +3431,26 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu,
3466} 3431}
3467 3432
3468static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, 3433static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
3469 struct desc_struct *cseg_desc, 3434 u32 old_tss_base,
3470 struct desc_struct *nseg_desc) 3435 struct desc_struct *nseg_desc)
3471{ 3436{
3472 struct tss_segment_16 tss_segment_16; 3437 struct tss_segment_16 tss_segment_16;
3473 int ret = 0; 3438 int ret = 0;
3474 3439
3475 if (load_tss_segment16(vcpu, cseg_desc, &tss_segment_16)) 3440 if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
3441 sizeof tss_segment_16))
3476 goto out; 3442 goto out;
3477 3443
3478 save_state_to_tss16(vcpu, &tss_segment_16); 3444 save_state_to_tss16(vcpu, &tss_segment_16);
3479 save_tss_segment16(vcpu, cseg_desc, &tss_segment_16);
3480 3445
3481 if (load_tss_segment16(vcpu, nseg_desc, &tss_segment_16)) 3446 if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
3447 sizeof tss_segment_16))
3448 goto out;
3449
3450 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
3451 &tss_segment_16, sizeof tss_segment_16))
3482 goto out; 3452 goto out;
3453
3483 if (load_state_from_tss16(vcpu, &tss_segment_16)) 3454 if (load_state_from_tss16(vcpu, &tss_segment_16))
3484 goto out; 3455 goto out;
3485 3456
@@ -3489,20 +3460,26 @@ out:
3489} 3460}
3490 3461
3491static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, 3462static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
3492 struct desc_struct *cseg_desc, 3463 u32 old_tss_base,
3493 struct desc_struct *nseg_desc) 3464 struct desc_struct *nseg_desc)
3494{ 3465{
3495 struct tss_segment_32 tss_segment_32; 3466 struct tss_segment_32 tss_segment_32;
3496 int ret = 0; 3467 int ret = 0;
3497 3468
3498 if (load_tss_segment32(vcpu, cseg_desc, &tss_segment_32)) 3469 if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
3470 sizeof tss_segment_32))
3499 goto out; 3471 goto out;
3500 3472
3501 save_state_to_tss32(vcpu, &tss_segment_32); 3473 save_state_to_tss32(vcpu, &tss_segment_32);
3502 save_tss_segment32(vcpu, cseg_desc, &tss_segment_32);
3503 3474
3504 if (load_tss_segment32(vcpu, nseg_desc, &tss_segment_32)) 3475 if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
3476 sizeof tss_segment_32))
3505 goto out; 3477 goto out;
3478
3479 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
3480 &tss_segment_32, sizeof tss_segment_32))
3481 goto out;
3482
3506 if (load_state_from_tss32(vcpu, &tss_segment_32)) 3483 if (load_state_from_tss32(vcpu, &tss_segment_32))
3507 goto out; 3484 goto out;
3508 3485
@@ -3517,16 +3494,20 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3517 struct desc_struct cseg_desc; 3494 struct desc_struct cseg_desc;
3518 struct desc_struct nseg_desc; 3495 struct desc_struct nseg_desc;
3519 int ret = 0; 3496 int ret = 0;
3497 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
3498 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
3520 3499
3521 kvm_get_segment(vcpu, &tr_seg, VCPU_SREG_TR); 3500 old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base);
3522 3501
3502 /* FIXME: Handle errors. Failure to read either TSS or their
3503 * descriptors should generate a pagefault.
3504 */
3523 if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) 3505 if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
3524 goto out; 3506 goto out;
3525 3507
3526 if (load_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc)) 3508 if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc))
3527 goto out; 3509 goto out;
3528 3510
3529
3530 if (reason != TASK_SWITCH_IRET) { 3511 if (reason != TASK_SWITCH_IRET) {
3531 int cpl; 3512 int cpl;
3532 3513
@@ -3544,8 +3525,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3544 3525
3545 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { 3526 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
3546 cseg_desc.type &= ~(1 << 1); //clear the B flag 3527 cseg_desc.type &= ~(1 << 1); //clear the B flag
3547 save_guest_segment_descriptor(vcpu, tr_seg.selector, 3528 save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc);
3548 &cseg_desc);
3549 } 3529 }
3550 3530
3551 if (reason == TASK_SWITCH_IRET) { 3531 if (reason == TASK_SWITCH_IRET) {
@@ -3557,10 +3537,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3557 kvm_x86_ops->cache_regs(vcpu); 3537 kvm_x86_ops->cache_regs(vcpu);
3558 3538
3559 if (nseg_desc.type & 8) 3539 if (nseg_desc.type & 8)
3560 ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc, 3540 ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base,
3561 &nseg_desc); 3541 &nseg_desc);
3562 else 3542 else
3563 ret = kvm_task_switch_16(vcpu, tss_selector, &cseg_desc, 3543 ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base,
3564 &nseg_desc); 3544 &nseg_desc);
3565 3545
3566 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { 3546 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
@@ -3995,16 +3975,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
3995 */ 3975 */
3996 if (!user_alloc) { 3976 if (!user_alloc) {
3997 if (npages && !old.rmap) { 3977 if (npages && !old.rmap) {
3978 unsigned long userspace_addr;
3979
3998 down_write(&current->mm->mmap_sem); 3980 down_write(&current->mm->mmap_sem);
3999 memslot->userspace_addr = do_mmap(NULL, 0, 3981 userspace_addr = do_mmap(NULL, 0,
4000 npages * PAGE_SIZE, 3982 npages * PAGE_SIZE,
4001 PROT_READ | PROT_WRITE, 3983 PROT_READ | PROT_WRITE,
4002 MAP_SHARED | MAP_ANONYMOUS, 3984 MAP_SHARED | MAP_ANONYMOUS,
4003 0); 3985 0);
4004 up_write(&current->mm->mmap_sem); 3986 up_write(&current->mm->mmap_sem);
4005 3987
4006 if (IS_ERR((void *)memslot->userspace_addr)) 3988 if (IS_ERR((void *)userspace_addr))
4007 return PTR_ERR((void *)memslot->userspace_addr); 3989 return PTR_ERR((void *)userspace_addr);
3990
3991 /* set userspace_addr atomically for kvm_hva_to_rmapp */
3992 spin_lock(&kvm->mmu_lock);
3993 memslot->userspace_addr = userspace_addr;
3994 spin_unlock(&kvm->mmu_lock);
4008 } else { 3995 } else {
4009 if (!old.user_alloc && old.rmap) { 3996 if (!old.user_alloc && old.rmap) {
4010 int ret; 3997 int ret;
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 0313a5eec412..d9249a882aa5 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1014,6 +1014,9 @@ __init void lguest_init(void)
1014 init_pg_tables_start = __pa(pg0); 1014 init_pg_tables_start = __pa(pg0);
1015 init_pg_tables_end = __pa(pg0); 1015 init_pg_tables_end = __pa(pg0);
1016 1016
1017 /* As described in head_32.S, we map the first 128M of memory. */
1018 max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
1019
1017 /* Load the %fs segment register (the per-cpu segment register) with 1020 /* Load the %fs segment register (the per-cpu segment register) with
1018 * the normal data segment to get through booting. */ 1021 * the normal data segment to get through booting. */
1019 asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); 1022 asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory");
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index dfdf428975c0..f118c110af32 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -52,7 +52,7 @@
52 jnz 100b 52 jnz 100b
53102: 53102:
54 .section .fixup,"ax" 54 .section .fixup,"ax"
55103: addl %r8d,%edx /* ecx is zerorest also */ 55103: addl %ecx,%edx /* ecx is zerorest also */
56 jmp copy_user_handle_tail 56 jmp copy_user_handle_tail
57 .previous 57 .previous
58 58
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S
index 40e0e309d27e..cb0c112386fb 100644
--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -32,7 +32,7 @@
32 jnz 100b 32 jnz 100b
33102: 33102:
34 .section .fixup,"ax" 34 .section .fixup,"ax"
35103: addl %r8d,%edx /* ecx is zerorest also */ 35103: addl %ecx,%edx /* ecx is zerorest also */
36 jmp copy_user_handle_tail 36 jmp copy_user_handle_tail
37 .previous 37 .previous
38 38
@@ -108,7 +108,6 @@ ENTRY(__copy_user_nocache)
108 jmp 60f 108 jmp 60f
10950: movl %ecx,%edx 10950: movl %ecx,%edx
11060: sfence 11060: sfence
111 movl %r8d,%ecx
112 jmp copy_user_handle_tail 111 jmp copy_user_handle_tail
113 .previous 112 .previous
114 113
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 1fbb844c3d7a..dfb932dcf136 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,5 +1,5 @@
1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
2 pat.o pgtable.o 2 pat.o pgtable.o gup.o
3 3
4obj-$(CONFIG_X86_32) += pgtable_32.o 4obj-$(CONFIG_X86_32) += pgtable_32.o
5 5
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
new file mode 100644
index 000000000000..007bb06c7504
--- /dev/null
+++ b/arch/x86/mm/gup.c
@@ -0,0 +1,298 @@
1/*
2 * Lockless get_user_pages_fast for x86
3 *
4 * Copyright (C) 2008 Nick Piggin
5 * Copyright (C) 2008 Novell Inc.
6 */
7#include <linux/sched.h>
8#include <linux/mm.h>
9#include <linux/vmstat.h>
10#include <linux/highmem.h>
11
12#include <asm/pgtable.h>
13
14static inline pte_t gup_get_pte(pte_t *ptep)
15{
16#ifndef CONFIG_X86_PAE
17 return *ptep;
18#else
19 /*
20 * With get_user_pages_fast, we walk down the pagetables without taking
21 * any locks. For this we would like to load the pointers atoimcally,
22 * but that is not possible (without expensive cmpxchg8b) on PAE. What
23 * we do have is the guarantee that a pte will only either go from not
24 * present to present, or present to not present or both -- it will not
25 * switch to a completely different present page without a TLB flush in
26 * between; something that we are blocking by holding interrupts off.
27 *
28 * Setting ptes from not present to present goes:
29 * ptep->pte_high = h;
30 * smp_wmb();
31 * ptep->pte_low = l;
32 *
33 * And present to not present goes:
34 * ptep->pte_low = 0;
35 * smp_wmb();
36 * ptep->pte_high = 0;
37 *
38 * We must ensure here that the load of pte_low sees l iff pte_high
39 * sees h. We load pte_high *after* loading pte_low, which ensures we
40 * don't see an older value of pte_high. *Then* we recheck pte_low,
41 * which ensures that we haven't picked up a changed pte high. We might
42 * have got rubbish values from pte_low and pte_high, but we are
43 * guaranteed that pte_low will not have the present bit set *unless*
44 * it is 'l'. And get_user_pages_fast only operates on present ptes, so
45 * we're safe.
46 *
47 * gup_get_pte should not be used or copied outside gup.c without being
48 * very careful -- it does not atomically load the pte or anything that
49 * is likely to be useful for you.
50 */
51 pte_t pte;
52
53retry:
54 pte.pte_low = ptep->pte_low;
55 smp_rmb();
56 pte.pte_high = ptep->pte_high;
57 smp_rmb();
58 if (unlikely(pte.pte_low != ptep->pte_low))
59 goto retry;
60
61 return pte;
62#endif
63}
64
65/*
66 * The performance critical leaf functions are made noinline otherwise gcc
67 * inlines everything into a single function which results in too much
68 * register pressure.
69 */
70static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
71 unsigned long end, int write, struct page **pages, int *nr)
72{
73 unsigned long mask;
74 pte_t *ptep;
75
76 mask = _PAGE_PRESENT|_PAGE_USER;
77 if (write)
78 mask |= _PAGE_RW;
79
80 ptep = pte_offset_map(&pmd, addr);
81 do {
82 pte_t pte = gup_get_pte(ptep);
83 struct page *page;
84
85 if ((pte_val(pte) & (mask | _PAGE_SPECIAL)) != mask) {
86 pte_unmap(ptep);
87 return 0;
88 }
89 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
90 page = pte_page(pte);
91 get_page(page);
92 pages[*nr] = page;
93 (*nr)++;
94
95 } while (ptep++, addr += PAGE_SIZE, addr != end);
96 pte_unmap(ptep - 1);
97
98 return 1;
99}
100
101static inline void get_head_page_multiple(struct page *page, int nr)
102{
103 VM_BUG_ON(page != compound_head(page));
104 VM_BUG_ON(page_count(page) == 0);
105 atomic_add(nr, &page->_count);
106}
107
108static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
109 unsigned long end, int write, struct page **pages, int *nr)
110{
111 unsigned long mask;
112 pte_t pte = *(pte_t *)&pmd;
113 struct page *head, *page;
114 int refs;
115
116 mask = _PAGE_PRESENT|_PAGE_USER;
117 if (write)
118 mask |= _PAGE_RW;
119 if ((pte_val(pte) & mask) != mask)
120 return 0;
121 /* hugepages are never "special" */
122 VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL);
123 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
124
125 refs = 0;
126 head = pte_page(pte);
127 page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
128 do {
129 VM_BUG_ON(compound_head(page) != head);
130 pages[*nr] = page;
131 (*nr)++;
132 page++;
133 refs++;
134 } while (addr += PAGE_SIZE, addr != end);
135 get_head_page_multiple(head, refs);
136
137 return 1;
138}
139
140static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
141 int write, struct page **pages, int *nr)
142{
143 unsigned long next;
144 pmd_t *pmdp;
145
146 pmdp = pmd_offset(&pud, addr);
147 do {
148 pmd_t pmd = *pmdp;
149
150 next = pmd_addr_end(addr, end);
151 if (pmd_none(pmd))
152 return 0;
153 if (unlikely(pmd_large(pmd))) {
154 if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
155 return 0;
156 } else {
157 if (!gup_pte_range(pmd, addr, next, write, pages, nr))
158 return 0;
159 }
160 } while (pmdp++, addr = next, addr != end);
161
162 return 1;
163}
164
165static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
166 unsigned long end, int write, struct page **pages, int *nr)
167{
168 unsigned long mask;
169 pte_t pte = *(pte_t *)&pud;
170 struct page *head, *page;
171 int refs;
172
173 mask = _PAGE_PRESENT|_PAGE_USER;
174 if (write)
175 mask |= _PAGE_RW;
176 if ((pte_val(pte) & mask) != mask)
177 return 0;
178 /* hugepages are never "special" */
179 VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL);
180 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
181
182 refs = 0;
183 head = pte_page(pte);
184 page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
185 do {
186 VM_BUG_ON(compound_head(page) != head);
187 pages[*nr] = page;
188 (*nr)++;
189 page++;
190 refs++;
191 } while (addr += PAGE_SIZE, addr != end);
192 get_head_page_multiple(head, refs);
193
194 return 1;
195}
196
197static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
198 int write, struct page **pages, int *nr)
199{
200 unsigned long next;
201 pud_t *pudp;
202
203 pudp = pud_offset(&pgd, addr);
204 do {
205 pud_t pud = *pudp;
206
207 next = pud_addr_end(addr, end);
208 if (pud_none(pud))
209 return 0;
210 if (unlikely(pud_large(pud))) {
211 if (!gup_huge_pud(pud, addr, next, write, pages, nr))
212 return 0;
213 } else {
214 if (!gup_pmd_range(pud, addr, next, write, pages, nr))
215 return 0;
216 }
217 } while (pudp++, addr = next, addr != end);
218
219 return 1;
220}
221
222int get_user_pages_fast(unsigned long start, int nr_pages, int write,
223 struct page **pages)
224{
225 struct mm_struct *mm = current->mm;
226 unsigned long addr, len, end;
227 unsigned long next;
228 pgd_t *pgdp;
229 int nr = 0;
230
231 start &= PAGE_MASK;
232 addr = start;
233 len = (unsigned long) nr_pages << PAGE_SHIFT;
234 end = start + len;
235 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
236 start, len)))
237 goto slow_irqon;
238
239 /*
240 * XXX: batch / limit 'nr', to avoid large irq off latency
241 * needs some instrumenting to determine the common sizes used by
242 * important workloads (eg. DB2), and whether limiting the batch size
243 * will decrease performance.
244 *
245 * It seems like we're in the clear for the moment. Direct-IO is
246 * the main guy that batches up lots of get_user_pages, and even
247 * they are limited to 64-at-a-time which is not so many.
248 */
249 /*
250 * This doesn't prevent pagetable teardown, but does prevent
251 * the pagetables and pages from being freed on x86.
252 *
253 * So long as we atomically load page table pointers versus teardown
254 * (which we do on x86, with the above PAE exception), we can follow the
255 * address down to the the page and take a ref on it.
256 */
257 local_irq_disable();
258 pgdp = pgd_offset(mm, addr);
259 do {
260 pgd_t pgd = *pgdp;
261
262 next = pgd_addr_end(addr, end);
263 if (pgd_none(pgd))
264 goto slow;
265 if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
266 goto slow;
267 } while (pgdp++, addr = next, addr != end);
268 local_irq_enable();
269
270 VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
271 return nr;
272
273 {
274 int ret;
275
276slow:
277 local_irq_enable();
278slow_irqon:
279 /* Try to get the remaining pages with get_user_pages */
280 start += nr << PAGE_SHIFT;
281 pages += nr;
282
283 down_read(&mm->mmap_sem);
284 ret = get_user_pages(current, mm, start,
285 (end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
286 up_read(&mm->mmap_sem);
287
288 /* Have to be a bit careful with return values */
289 if (nr > 0) {
290 if (ret < 0)
291 ret = nr;
292 else
293 ret += nr;
294 }
295
296 return ret;
297 }
298}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ec37121f6709..a87ea0e4b3dc 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -60,7 +60,7 @@ static unsigned long dma_reserve __initdata;
60 60
61DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 61DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
62 62
63int direct_gbpages __meminitdata 63int direct_gbpages
64#ifdef CONFIG_DIRECT_GBPAGES 64#ifdef CONFIG_DIRECT_GBPAGES
65 = 1 65 = 1
66#endif 66#endif
@@ -86,46 +86,13 @@ early_param("gbpages", parse_direct_gbpages_on);
86 * around without checking the pgd every time. 86 * around without checking the pgd every time.
87 */ 87 */
88 88
89void show_mem(void)
90{
91 long i, total = 0, reserved = 0;
92 long shared = 0, cached = 0;
93 struct page *page;
94 pg_data_t *pgdat;
95
96 printk(KERN_INFO "Mem-info:\n");
97 show_free_areas();
98 for_each_online_pgdat(pgdat) {
99 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
100 /*
101 * This loop can take a while with 256 GB and
102 * 4k pages so defer the NMI watchdog:
103 */
104 if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
105 touch_nmi_watchdog();
106
107 if (!pfn_valid(pgdat->node_start_pfn + i))
108 continue;
109
110 page = pfn_to_page(pgdat->node_start_pfn + i);
111 total++;
112 if (PageReserved(page))
113 reserved++;
114 else if (PageSwapCache(page))
115 cached++;
116 else if (page_count(page))
117 shared += page_count(page) - 1;
118 }
119 }
120 printk(KERN_INFO "%lu pages of RAM\n", total);
121 printk(KERN_INFO "%lu reserved pages\n", reserved);
122 printk(KERN_INFO "%lu pages shared\n", shared);
123 printk(KERN_INFO "%lu pages swap cached\n", cached);
124}
125
126int after_bootmem; 89int after_bootmem;
127 90
128static __init void *spp_getpage(void) 91/*
92 * NOTE: This function is marked __ref because it calls __init function
93 * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
94 */
95static __ref void *spp_getpage(void)
129{ 96{
130 void *ptr; 97 void *ptr;
131 98
@@ -351,6 +318,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
351{ 318{
352 unsigned long pages = 0; 319 unsigned long pages = 0;
353 unsigned long last_map_addr = end; 320 unsigned long last_map_addr = end;
321 unsigned long start = address;
354 322
355 int i = pmd_index(address); 323 int i = pmd_index(address);
356 324
@@ -371,6 +339,9 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
371 if (!pmd_large(*pmd)) 339 if (!pmd_large(*pmd))
372 last_map_addr = phys_pte_update(pmd, address, 340 last_map_addr = phys_pte_update(pmd, address,
373 end); 341 end);
342 /* Count entries we're using from level2_ident_pgt */
343 if (start == 0)
344 pages++;
374 continue; 345 continue;
375 } 346 }
376 347
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 016f335bbeea..6ba6f889c79d 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -170,7 +170,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
170 phys_addr &= PAGE_MASK; 170 phys_addr &= PAGE_MASK;
171 size = PAGE_ALIGN(last_addr+1) - phys_addr; 171 size = PAGE_ALIGN(last_addr+1) - phys_addr;
172 172
173 retval = reserve_memtype(phys_addr, phys_addr + size, 173 retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
174 prot_val, &new_prot_val); 174 prot_val, &new_prot_val);
175 if (retval) { 175 if (retval) {
176 pr_debug("Warning: reserve_memtype returned %d\n", retval); 176 pr_debug("Warning: reserve_memtype returned %d\n", retval);
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 0dcd42eb94e6..d4aa503caaa2 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -221,8 +221,7 @@ static int pageattr_test(void)
221 failed += print_split(&sc); 221 failed += print_split(&sc);
222 222
223 if (failed) { 223 if (failed) {
224 printk(KERN_ERR "NOT PASSED. Please report.\n"); 224 WARN(1, KERN_ERR "NOT PASSED. Please report.\n");
225 WARN_ON(1);
226 return -EINVAL; 225 return -EINVAL;
227 } else { 226 } else {
228 if (print) 227 if (print)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 65c6e46bf059..f5f5154ea11e 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -55,13 +55,19 @@ static void split_page_count(int level)
55 55
56int arch_report_meminfo(char *page) 56int arch_report_meminfo(char *page)
57{ 57{
58 int n = sprintf(page, "DirectMap4k: %8lu\n" 58 int n = sprintf(page, "DirectMap4k: %8lu kB\n",
59 "DirectMap2M: %8lu\n", 59 direct_pages_count[PG_LEVEL_4K] << 2);
60 direct_pages_count[PG_LEVEL_4K], 60#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
61 direct_pages_count[PG_LEVEL_2M]); 61 n += sprintf(page + n, "DirectMap2M: %8lu kB\n",
62 direct_pages_count[PG_LEVEL_2M] << 11);
63#else
64 n += sprintf(page + n, "DirectMap4M: %8lu kB\n",
65 direct_pages_count[PG_LEVEL_2M] << 12);
66#endif
62#ifdef CONFIG_X86_64 67#ifdef CONFIG_X86_64
63 n += sprintf(page + n, "DirectMap1G: %8lu\n", 68 if (direct_gbpages)
64 direct_pages_count[PG_LEVEL_1G]); 69 n += sprintf(page + n, "DirectMap1G: %8lu kB\n",
70 direct_pages_count[PG_LEVEL_1G] << 20);
65#endif 71#endif
66 return n; 72 return n;
67} 73}
@@ -592,10 +598,9 @@ repeat:
592 if (!pte_val(old_pte)) { 598 if (!pte_val(old_pte)) {
593 if (!primary) 599 if (!primary)
594 return 0; 600 return 0;
595 printk(KERN_WARNING "CPA: called for zero pte. " 601 WARN(1, KERN_WARNING "CPA: called for zero pte. "
596 "vaddr = %lx cpa->vaddr = %lx\n", address, 602 "vaddr = %lx cpa->vaddr = %lx\n", address,
597 cpa->vaddr); 603 cpa->vaddr);
598 WARN_ON(1);
599 return -EINVAL; 604 return -EINVAL;
600 } 605 }
601 606
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 557b2abceef8..d50302774fe2 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -207,6 +207,9 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
207 unsigned long addr; 207 unsigned long addr;
208 int i; 208 int i;
209 209
210 if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
211 return;
212
210 pud = pud_offset(pgd, 0); 213 pud = pud_offset(pgd, 0);
211 214
212 for (addr = i = 0; i < PREALLOCATED_PMDS; 215 for (addr = i = 0; i < PREALLOCATED_PMDS;
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index b4becbf8c570..cab0abbd1ebe 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -20,53 +20,6 @@
20#include <asm/tlb.h> 20#include <asm/tlb.h>
21#include <asm/tlbflush.h> 21#include <asm/tlbflush.h>
22 22
23void show_mem(void)
24{
25 int total = 0, reserved = 0;
26 int shared = 0, cached = 0;
27 int highmem = 0;
28 struct page *page;
29 pg_data_t *pgdat;
30 unsigned long i;
31 unsigned long flags;
32
33 printk(KERN_INFO "Mem-info:\n");
34 show_free_areas();
35 for_each_online_pgdat(pgdat) {
36 pgdat_resize_lock(pgdat, &flags);
37 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
38 if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
39 touch_nmi_watchdog();
40 page = pgdat_page_nr(pgdat, i);
41 total++;
42 if (PageHighMem(page))
43 highmem++;
44 if (PageReserved(page))
45 reserved++;
46 else if (PageSwapCache(page))
47 cached++;
48 else if (page_count(page))
49 shared += page_count(page) - 1;
50 }
51 pgdat_resize_unlock(pgdat, &flags);
52 }
53 printk(KERN_INFO "%d pages of RAM\n", total);
54 printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
55 printk(KERN_INFO "%d reserved pages\n", reserved);
56 printk(KERN_INFO "%d pages shared\n", shared);
57 printk(KERN_INFO "%d pages swap cached\n", cached);
58
59 printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY));
60 printk(KERN_INFO "%lu pages writeback\n",
61 global_page_state(NR_WRITEBACK));
62 printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
63 printk(KERN_INFO "%lu pages slab\n",
64 global_page_state(NR_SLAB_RECLAIMABLE) +
65 global_page_state(NR_SLAB_UNRECLAIMABLE));
66 printk(KERN_INFO "%lu pages pagetables\n",
67 global_page_state(NR_PAGETABLE));
68}
69
70/* 23/*
71 * Associate a virtual page frame with a given physical page frame 24 * Associate a virtual page frame with a given physical page frame
72 * and protection flags for that frame. 25 * and protection flags for that frame.
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 1eb2973a301c..16ae70fc57e7 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -178,7 +178,7 @@ void acpi_numa_arch_fixup(void)
178 * start of the node, and that the current "end" address is after 178 * start of the node, and that the current "end" address is after
179 * the previous one. 179 * the previous one.
180 */ 180 */
181static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk) 181static __init int node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk)
182{ 182{
183 /* 183 /*
184 * Only add present memory as told by the e820. 184 * Only add present memory as told by the e820.
@@ -189,10 +189,10 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c
189 if (memory_chunk->start_pfn >= max_pfn) { 189 if (memory_chunk->start_pfn >= max_pfn) {
190 printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n", 190 printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n",
191 memory_chunk->start_pfn, memory_chunk->end_pfn); 191 memory_chunk->start_pfn, memory_chunk->end_pfn);
192 return; 192 return -1;
193 } 193 }
194 if (memory_chunk->nid != nid) 194 if (memory_chunk->nid != nid)
195 return; 195 return -1;
196 196
197 if (!node_has_online_mem(nid)) 197 if (!node_has_online_mem(nid))
198 node_start_pfn[nid] = memory_chunk->start_pfn; 198 node_start_pfn[nid] = memory_chunk->start_pfn;
@@ -202,6 +202,8 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c
202 202
203 if (node_end_pfn[nid] < memory_chunk->end_pfn) 203 if (node_end_pfn[nid] < memory_chunk->end_pfn)
204 node_end_pfn[nid] = memory_chunk->end_pfn; 204 node_end_pfn[nid] = memory_chunk->end_pfn;
205
206 return 0;
205} 207}
206 208
207int __init get_memcfg_from_srat(void) 209int __init get_memcfg_from_srat(void)
@@ -259,7 +261,9 @@ int __init get_memcfg_from_srat(void)
259 printk(KERN_DEBUG 261 printk(KERN_DEBUG
260 "chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", 262 "chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
261 j, chunk->nid, chunk->start_pfn, chunk->end_pfn); 263 j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
262 node_read_chunk(chunk->nid, chunk); 264 if (node_read_chunk(chunk->nid, chunk))
265 continue;
266
263 e820_register_active_regions(chunk->nid, chunk->start_pfn, 267 e820_register_active_regions(chunk->nid, chunk->start_pfn,
264 min(chunk->end_pfn, max_pfn)); 268 min(chunk->end_pfn, max_pfn));
265 } 269 }
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index e641545d4796..cacba61ffbac 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -10,11 +10,12 @@
10 10
11#include <linux/oprofile.h> 11#include <linux/oprofile.h>
12#include <linux/smp.h> 12#include <linux/smp.h>
13#include <linux/ptrace.h>
14#include <linux/nmi.h>
13#include <asm/msr.h> 15#include <asm/msr.h>
14#include <asm/ptrace.h>
15#include <asm/fixmap.h> 16#include <asm/fixmap.h>
16#include <asm/apic.h> 17#include <asm/apic.h>
17#include <asm/nmi.h> 18
18 19
19#include "op_x86_model.h" 20#include "op_x86_model.h"
20#include "op_counter.h" 21#include "op_counter.h"
@@ -40,7 +41,7 @@ static unsigned int num_controls = NUM_CONTROLS_NON_HT;
40static inline void setup_num_counters(void) 41static inline void setup_num_counters(void)
41{ 42{
42#ifdef CONFIG_SMP 43#ifdef CONFIG_SMP
43 if (smp_num_siblings == 2){ 44 if (smp_num_siblings == 2) {
44 num_counters = NUM_COUNTERS_HT2; 45 num_counters = NUM_COUNTERS_HT2;
45 num_controls = NUM_CONTROLS_HT2; 46 num_controls = NUM_CONTROLS_HT2;
46 } 47 }
@@ -86,7 +87,7 @@ struct p4_event_binding {
86#define CTR_FLAME_2 (1 << 6) 87#define CTR_FLAME_2 (1 << 6)
87#define CTR_IQ_5 (1 << 7) 88#define CTR_IQ_5 (1 << 7)
88 89
89static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { 90static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
90 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 }, 91 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
91 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 }, 92 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
92 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 }, 93 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
@@ -97,32 +98,32 @@ static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
97 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 } 98 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
98}; 99};
99 100
100#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT 101#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
101 102
102/* p4 event codes in libop/op_event.h are indices into this table. */ 103/* p4 event codes in libop/op_event.h are indices into this table. */
103 104
104static struct p4_event_binding p4_events[NUM_EVENTS] = { 105static struct p4_event_binding p4_events[NUM_EVENTS] = {
105 106
106 { /* BRANCH_RETIRED */ 107 { /* BRANCH_RETIRED */
107 0x05, 0x06, 108 0x05, 0x06,
108 { {CTR_IQ_4, MSR_P4_CRU_ESCR2}, 109 { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
109 {CTR_IQ_5, MSR_P4_CRU_ESCR3} } 110 {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
110 }, 111 },
111 112
112 { /* MISPRED_BRANCH_RETIRED */ 113 { /* MISPRED_BRANCH_RETIRED */
113 0x04, 0x03, 114 0x04, 0x03,
114 { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, 115 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
115 { CTR_IQ_5, MSR_P4_CRU_ESCR1} } 116 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
116 }, 117 },
117 118
118 { /* TC_DELIVER_MODE */ 119 { /* TC_DELIVER_MODE */
119 0x01, 0x01, 120 0x01, 0x01,
120 { { CTR_MS_0, MSR_P4_TC_ESCR0}, 121 { { CTR_MS_0, MSR_P4_TC_ESCR0},
121 { CTR_MS_2, MSR_P4_TC_ESCR1} } 122 { CTR_MS_2, MSR_P4_TC_ESCR1} }
122 }, 123 },
123 124
124 { /* BPU_FETCH_REQUEST */ 125 { /* BPU_FETCH_REQUEST */
125 0x00, 0x03, 126 0x00, 0x03,
126 { { CTR_BPU_0, MSR_P4_BPU_ESCR0}, 127 { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
127 { CTR_BPU_2, MSR_P4_BPU_ESCR1} } 128 { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
128 }, 129 },
@@ -146,7 +147,7 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
146 }, 147 },
147 148
148 { /* LOAD_PORT_REPLAY */ 149 { /* LOAD_PORT_REPLAY */
149 0x02, 0x04, 150 0x02, 0x04,
150 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, 151 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
151 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } 152 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
152 }, 153 },
@@ -170,43 +171,43 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
170 }, 171 },
171 172
172 { /* BSQ_CACHE_REFERENCE */ 173 { /* BSQ_CACHE_REFERENCE */
173 0x07, 0x0c, 174 0x07, 0x0c,
174 { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, 175 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
175 { CTR_BPU_2, MSR_P4_BSU_ESCR1} } 176 { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
176 }, 177 },
177 178
178 { /* IOQ_ALLOCATION */ 179 { /* IOQ_ALLOCATION */
179 0x06, 0x03, 180 0x06, 0x03,
180 { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, 181 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
181 { 0, 0 } } 182 { 0, 0 } }
182 }, 183 },
183 184
184 { /* IOQ_ACTIVE_ENTRIES */ 185 { /* IOQ_ACTIVE_ENTRIES */
185 0x06, 0x1a, 186 0x06, 0x1a,
186 { { CTR_BPU_2, MSR_P4_FSB_ESCR1}, 187 { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
187 { 0, 0 } } 188 { 0, 0 } }
188 }, 189 },
189 190
190 { /* FSB_DATA_ACTIVITY */ 191 { /* FSB_DATA_ACTIVITY */
191 0x06, 0x17, 192 0x06, 0x17,
192 { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, 193 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
193 { CTR_BPU_2, MSR_P4_FSB_ESCR1} } 194 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
194 }, 195 },
195 196
196 { /* BSQ_ALLOCATION */ 197 { /* BSQ_ALLOCATION */
197 0x07, 0x05, 198 0x07, 0x05,
198 { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, 199 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
199 { 0, 0 } } 200 { 0, 0 } }
200 }, 201 },
201 202
202 { /* BSQ_ACTIVE_ENTRIES */ 203 { /* BSQ_ACTIVE_ENTRIES */
203 0x07, 0x06, 204 0x07, 0x06,
204 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */}, 205 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
205 { 0, 0 } } 206 { 0, 0 } }
206 }, 207 },
207 208
208 { /* X87_ASSIST */ 209 { /* X87_ASSIST */
209 0x05, 0x03, 210 0x05, 0x03,
210 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 211 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
211 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 212 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
212 }, 213 },
@@ -216,21 +217,21 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
216 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 217 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
217 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 218 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
218 }, 219 },
219 220
220 { /* PACKED_SP_UOP */ 221 { /* PACKED_SP_UOP */
221 0x01, 0x08, 222 0x01, 0x08,
222 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 223 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
223 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 224 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
224 }, 225 },
225 226
226 { /* PACKED_DP_UOP */ 227 { /* PACKED_DP_UOP */
227 0x01, 0x0c, 228 0x01, 0x0c,
228 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 229 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
229 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 230 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
230 }, 231 },
231 232
232 { /* SCALAR_SP_UOP */ 233 { /* SCALAR_SP_UOP */
233 0x01, 0x0a, 234 0x01, 0x0a,
234 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 235 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
235 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 236 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
236 }, 237 },
@@ -242,31 +243,31 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
242 }, 243 },
243 244
244 { /* 64BIT_MMX_UOP */ 245 { /* 64BIT_MMX_UOP */
245 0x01, 0x02, 246 0x01, 0x02,
246 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 247 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
247 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 248 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
248 }, 249 },
249 250
250 { /* 128BIT_MMX_UOP */ 251 { /* 128BIT_MMX_UOP */
251 0x01, 0x1a, 252 0x01, 0x1a,
252 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 253 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
253 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 254 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
254 }, 255 },
255 256
256 { /* X87_FP_UOP */ 257 { /* X87_FP_UOP */
257 0x01, 0x04, 258 0x01, 0x04,
258 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 259 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
259 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 260 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
260 }, 261 },
261 262
262 { /* X87_SIMD_MOVES_UOP */ 263 { /* X87_SIMD_MOVES_UOP */
263 0x01, 0x2e, 264 0x01, 0x2e,
264 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 265 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
265 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 266 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
266 }, 267 },
267 268
268 { /* MACHINE_CLEAR */ 269 { /* MACHINE_CLEAR */
269 0x05, 0x02, 270 0x05, 0x02,
270 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 271 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
271 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 272 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
272 }, 273 },
@@ -276,9 +277,9 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
276 { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, 277 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
277 { CTR_BPU_2, MSR_P4_FSB_ESCR1} } 278 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
278 }, 279 },
279 280
280 { /* TC_MS_XFER */ 281 { /* TC_MS_XFER */
281 0x00, 0x05, 282 0x00, 0x05,
282 { { CTR_MS_0, MSR_P4_MS_ESCR0}, 283 { { CTR_MS_0, MSR_P4_MS_ESCR0},
283 { CTR_MS_2, MSR_P4_MS_ESCR1} } 284 { CTR_MS_2, MSR_P4_MS_ESCR1} }
284 }, 285 },
@@ -308,7 +309,7 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
308 }, 309 },
309 310
310 { /* INSTR_RETIRED */ 311 { /* INSTR_RETIRED */
311 0x04, 0x02, 312 0x04, 0x02,
312 { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, 313 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
313 { CTR_IQ_5, MSR_P4_CRU_ESCR1} } 314 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
314 }, 315 },
@@ -319,14 +320,14 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
319 { CTR_IQ_5, MSR_P4_CRU_ESCR1} } 320 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
320 }, 321 },
321 322
322 { /* UOP_TYPE */ 323 { /* UOP_TYPE */
323 0x02, 0x02, 324 0x02, 0x02,
324 { { CTR_IQ_4, MSR_P4_RAT_ESCR0}, 325 { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
325 { CTR_IQ_5, MSR_P4_RAT_ESCR1} } 326 { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
326 }, 327 },
327 328
328 { /* RETIRED_MISPRED_BRANCH_TYPE */ 329 { /* RETIRED_MISPRED_BRANCH_TYPE */
329 0x02, 0x05, 330 0x02, 0x05,
330 { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, 331 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
331 { CTR_MS_2, MSR_P4_TBPU_ESCR1} } 332 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
332 }, 333 },
@@ -349,8 +350,8 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
349#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) 350#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
350#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) 351#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
351#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) 352#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
352#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) 353#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
353#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) 354#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
354 355
355#define CCCR_RESERVED_BITS 0x38030FFF 356#define CCCR_RESERVED_BITS 0x38030FFF
356#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) 357#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
@@ -360,15 +361,15 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
360#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) 361#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
361#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) 362#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
362#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) 363#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
363#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) 364#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
364#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) 365#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
365#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) 366#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
366#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) 367#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
367 368
368#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) 369#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
369#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) 370#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
370#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) 371#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0)
371#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) 372#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0)
372#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) 373#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
373 374
374 375
@@ -380,7 +381,7 @@ static unsigned int get_stagger(void)
380#ifdef CONFIG_SMP 381#ifdef CONFIG_SMP
381 int cpu = smp_processor_id(); 382 int cpu = smp_processor_id();
382 return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu))); 383 return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu)));
383#endif 384#endif
384 return 0; 385 return 0;
385} 386}
386 387
@@ -395,25 +396,23 @@ static unsigned long reset_value[NUM_COUNTERS_NON_HT];
395 396
396static void p4_fill_in_addresses(struct op_msrs * const msrs) 397static void p4_fill_in_addresses(struct op_msrs * const msrs)
397{ 398{
398 unsigned int i; 399 unsigned int i;
399 unsigned int addr, cccraddr, stag; 400 unsigned int addr, cccraddr, stag;
400 401
401 setup_num_counters(); 402 setup_num_counters();
402 stag = get_stagger(); 403 stag = get_stagger();
403 404
404 /* initialize some registers */ 405 /* initialize some registers */
405 for (i = 0; i < num_counters; ++i) { 406 for (i = 0; i < num_counters; ++i)
406 msrs->counters[i].addr = 0; 407 msrs->counters[i].addr = 0;
407 } 408 for (i = 0; i < num_controls; ++i)
408 for (i = 0; i < num_controls; ++i) {
409 msrs->controls[i].addr = 0; 409 msrs->controls[i].addr = 0;
410 } 410
411
412 /* the counter & cccr registers we pay attention to */ 411 /* the counter & cccr registers we pay attention to */
413 for (i = 0; i < num_counters; ++i) { 412 for (i = 0; i < num_counters; ++i) {
414 addr = p4_counters[VIRT_CTR(stag, i)].counter_address; 413 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
415 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address; 414 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
416 if (reserve_perfctr_nmi(addr)){ 415 if (reserve_perfctr_nmi(addr)) {
417 msrs->counters[i].addr = addr; 416 msrs->counters[i].addr = addr;
418 msrs->controls[i].addr = cccraddr; 417 msrs->controls[i].addr = cccraddr;
419 } 418 }
@@ -447,22 +446,22 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
447 if (reserve_evntsel_nmi(addr)) 446 if (reserve_evntsel_nmi(addr))
448 msrs->controls[i].addr = addr; 447 msrs->controls[i].addr = addr;
449 } 448 }
450 449
451 for (addr = MSR_P4_MS_ESCR0 + stag; 450 for (addr = MSR_P4_MS_ESCR0 + stag;
452 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 451 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
453 if (reserve_evntsel_nmi(addr)) 452 if (reserve_evntsel_nmi(addr))
454 msrs->controls[i].addr = addr; 453 msrs->controls[i].addr = addr;
455 } 454 }
456 455
457 for (addr = MSR_P4_IX_ESCR0 + stag; 456 for (addr = MSR_P4_IX_ESCR0 + stag;
458 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 457 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
459 if (reserve_evntsel_nmi(addr)) 458 if (reserve_evntsel_nmi(addr))
460 msrs->controls[i].addr = addr; 459 msrs->controls[i].addr = addr;
461 } 460 }
462 461
463 /* there are 2 remaining non-contiguously located ESCRs */ 462 /* there are 2 remaining non-contiguously located ESCRs */
464 463
465 if (num_counters == NUM_COUNTERS_NON_HT) { 464 if (num_counters == NUM_COUNTERS_NON_HT) {
466 /* standard non-HT CPUs handle both remaining ESCRs*/ 465 /* standard non-HT CPUs handle both remaining ESCRs*/
467 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) 466 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
468 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; 467 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
@@ -498,20 +497,20 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
498 unsigned int stag; 497 unsigned int stag;
499 498
500 stag = get_stagger(); 499 stag = get_stagger();
501 500
502 /* convert from counter *number* to counter *bit* */ 501 /* convert from counter *number* to counter *bit* */
503 counter_bit = 1 << VIRT_CTR(stag, ctr); 502 counter_bit = 1 << VIRT_CTR(stag, ctr);
504 503
505 /* find our event binding structure. */ 504 /* find our event binding structure. */
506 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) { 505 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
507 printk(KERN_ERR 506 printk(KERN_ERR
508 "oprofile: P4 event code 0x%lx out of range\n", 507 "oprofile: P4 event code 0x%lx out of range\n",
509 counter_config[ctr].event); 508 counter_config[ctr].event);
510 return; 509 return;
511 } 510 }
512 511
513 ev = &(p4_events[counter_config[ctr].event - 1]); 512 ev = &(p4_events[counter_config[ctr].event - 1]);
514 513
515 for (i = 0; i < maxbind; i++) { 514 for (i = 0; i < maxbind; i++) {
516 if (ev->bindings[i].virt_counter & counter_bit) { 515 if (ev->bindings[i].virt_counter & counter_bit) {
517 516
@@ -526,25 +525,24 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
526 ESCR_SET_OS_1(escr, counter_config[ctr].kernel); 525 ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
527 } 526 }
528 ESCR_SET_EVENT_SELECT(escr, ev->event_select); 527 ESCR_SET_EVENT_SELECT(escr, ev->event_select);
529 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); 528 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
530 ESCR_WRITE(escr, high, ev, i); 529 ESCR_WRITE(escr, high, ev, i);
531 530
532 /* modify CCCR */ 531 /* modify CCCR */
533 CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); 532 CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
534 CCCR_CLEAR(cccr); 533 CCCR_CLEAR(cccr);
535 CCCR_SET_REQUIRED_BITS(cccr); 534 CCCR_SET_REQUIRED_BITS(cccr);
536 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); 535 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
537 if (stag == 0) { 536 if (stag == 0)
538 CCCR_SET_PMI_OVF_0(cccr); 537 CCCR_SET_PMI_OVF_0(cccr);
539 } else { 538 else
540 CCCR_SET_PMI_OVF_1(cccr); 539 CCCR_SET_PMI_OVF_1(cccr);
541 }
542 CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); 540 CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
543 return; 541 return;
544 } 542 }
545 } 543 }
546 544
547 printk(KERN_ERR 545 printk(KERN_ERR
548 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n", 546 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
549 counter_config[ctr].event, stag, ctr); 547 counter_config[ctr].event, stag, ctr);
550} 548}
@@ -559,14 +557,14 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
559 stag = get_stagger(); 557 stag = get_stagger();
560 558
561 rdmsr(MSR_IA32_MISC_ENABLE, low, high); 559 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
562 if (! MISC_PMC_ENABLED_P(low)) { 560 if (!MISC_PMC_ENABLED_P(low)) {
563 printk(KERN_ERR "oprofile: P4 PMC not available\n"); 561 printk(KERN_ERR "oprofile: P4 PMC not available\n");
564 return; 562 return;
565 } 563 }
566 564
567 /* clear the cccrs we will use */ 565 /* clear the cccrs we will use */
568 for (i = 0 ; i < num_counters ; i++) { 566 for (i = 0 ; i < num_counters ; i++) {
569 if (unlikely(!CTRL_IS_RESERVED(msrs,i))) 567 if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
570 continue; 568 continue;
571 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 569 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
572 CCCR_CLEAR(low); 570 CCCR_CLEAR(low);
@@ -576,14 +574,14 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
576 574
577 /* clear all escrs (including those outside our concern) */ 575 /* clear all escrs (including those outside our concern) */
578 for (i = num_counters; i < num_controls; i++) { 576 for (i = num_counters; i < num_controls; i++) {
579 if (unlikely(!CTRL_IS_RESERVED(msrs,i))) 577 if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
580 continue; 578 continue;
581 wrmsr(msrs->controls[i].addr, 0, 0); 579 wrmsr(msrs->controls[i].addr, 0, 0);
582 } 580 }
583 581
584 /* setup all counters */ 582 /* setup all counters */
585 for (i = 0 ; i < num_counters ; ++i) { 583 for (i = 0 ; i < num_counters ; ++i) {
586 if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) { 584 if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) {
587 reset_value[i] = counter_config[i].count; 585 reset_value[i] = counter_config[i].count;
588 pmc_setup_one_p4_counter(i); 586 pmc_setup_one_p4_counter(i);
589 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); 587 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
@@ -603,11 +601,11 @@ static int p4_check_ctrs(struct pt_regs * const regs,
603 stag = get_stagger(); 601 stag = get_stagger();
604 602
605 for (i = 0; i < num_counters; ++i) { 603 for (i = 0; i < num_counters; ++i) {
606 604
607 if (!reset_value[i]) 605 if (!reset_value[i])
608 continue; 606 continue;
609 607
610 /* 608 /*
611 * there is some eccentricity in the hardware which 609 * there is some eccentricity in the hardware which
612 * requires that we perform 2 extra corrections: 610 * requires that we perform 2 extra corrections:
613 * 611 *
@@ -616,24 +614,24 @@ static int p4_check_ctrs(struct pt_regs * const regs,
616 * 614 *
617 * - write the counter back twice to ensure it gets 615 * - write the counter back twice to ensure it gets
618 * updated properly. 616 * updated properly.
619 * 617 *
620 * the former seems to be related to extra NMIs happening 618 * the former seems to be related to extra NMIs happening
621 * during the current NMI; the latter is reported as errata 619 * during the current NMI; the latter is reported as errata
622 * N15 in intel doc 249199-029, pentium 4 specification 620 * N15 in intel doc 249199-029, pentium 4 specification
623 * update, though their suggested work-around does not 621 * update, though their suggested work-around does not
624 * appear to solve the problem. 622 * appear to solve the problem.
625 */ 623 */
626 624
627 real = VIRT_CTR(stag, i); 625 real = VIRT_CTR(stag, i);
628 626
629 CCCR_READ(low, high, real); 627 CCCR_READ(low, high, real);
630 CTR_READ(ctr, high, real); 628 CTR_READ(ctr, high, real);
631 if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { 629 if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
632 oprofile_add_sample(regs, i); 630 oprofile_add_sample(regs, i);
633 CTR_WRITE(reset_value[i], real); 631 CTR_WRITE(reset_value[i], real);
634 CCCR_CLEAR_OVF(low); 632 CCCR_CLEAR_OVF(low);
635 CCCR_WRITE(low, high, real); 633 CCCR_WRITE(low, high, real);
636 CTR_WRITE(reset_value[i], real); 634 CTR_WRITE(reset_value[i], real);
637 } 635 }
638 } 636 }
639 637
@@ -683,15 +681,16 @@ static void p4_shutdown(struct op_msrs const * const msrs)
683 int i; 681 int i;
684 682
685 for (i = 0 ; i < num_counters ; ++i) { 683 for (i = 0 ; i < num_counters ; ++i) {
686 if (CTR_IS_RESERVED(msrs,i)) 684 if (CTR_IS_RESERVED(msrs, i))
687 release_perfctr_nmi(msrs->counters[i].addr); 685 release_perfctr_nmi(msrs->counters[i].addr);
688 } 686 }
689 /* some of the control registers are specially reserved in 687 /*
688 * some of the control registers are specially reserved in
690 * conjunction with the counter registers (hence the starting offset). 689 * conjunction with the counter registers (hence the starting offset).
691 * This saves a few bits. 690 * This saves a few bits.
692 */ 691 */
693 for (i = num_counters ; i < num_controls ; ++i) { 692 for (i = num_counters ; i < num_controls ; ++i) {
694 if (CTRL_IS_RESERVED(msrs,i)) 693 if (CTRL_IS_RESERVED(msrs, i))
695 release_evntsel_nmi(msrs->controls[i].addr); 694 release_evntsel_nmi(msrs->controls[i].addr);
696 } 695 }
697} 696}
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index ff3a6a336342..4bdaa590375d 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -23,7 +23,8 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)
23 pci_read_config_byte(d, reg++, &busno); 23 pci_read_config_byte(d, reg++, &busno);
24 pci_read_config_byte(d, reg++, &suba); 24 pci_read_config_byte(d, reg++, &suba);
25 pci_read_config_byte(d, reg++, &subb); 25 pci_read_config_byte(d, reg++, &subb);
26 DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); 26 dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno,
27 suba, subb);
27 if (busno) 28 if (busno)
28 pci_scan_bus_with_sysdata(busno); /* Bus A */ 29 pci_scan_bus_with_sysdata(busno); /* Bus A */
29 if (suba < subb) 30 if (suba < subb)
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index a09505806b82..5807d1bc73f7 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -128,10 +128,8 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
128 pr = pci_find_parent_resource(dev, r); 128 pr = pci_find_parent_resource(dev, r);
129 if (!r->start || !pr || 129 if (!r->start || !pr ||
130 request_resource(pr, r) < 0) { 130 request_resource(pr, r) < 0) {
131 printk(KERN_ERR "PCI: Cannot allocate " 131 dev_err(&dev->dev, "BAR %d: can't "
132 "resource region %d " 132 "allocate resource\n", idx);
133 "of bridge %s\n",
134 idx, pci_name(dev));
135 /* 133 /*
136 * Something is wrong with the region. 134 * Something is wrong with the region.
137 * Invalidate the resource to prevent 135 * Invalidate the resource to prevent
@@ -166,15 +164,15 @@ static void __init pcibios_allocate_resources(int pass)
166 else 164 else
167 disabled = !(command & PCI_COMMAND_MEMORY); 165 disabled = !(command & PCI_COMMAND_MEMORY);
168 if (pass == disabled) { 166 if (pass == disabled) {
169 DBG("PCI: Resource %08lx-%08lx " 167 dev_dbg(&dev->dev, "resource %#08llx-%#08llx "
170 "(f=%lx, d=%d, p=%d)\n", 168 "(f=%lx, d=%d, p=%d)\n",
171 r->start, r->end, r->flags, disabled, pass); 169 (unsigned long long) r->start,
170 (unsigned long long) r->end,
171 r->flags, disabled, pass);
172 pr = pci_find_parent_resource(dev, r); 172 pr = pci_find_parent_resource(dev, r);
173 if (!pr || request_resource(pr, r) < 0) { 173 if (!pr || request_resource(pr, r) < 0) {
174 printk(KERN_ERR "PCI: Cannot allocate " 174 dev_err(&dev->dev, "BAR %d: can't "
175 "resource region %d " 175 "allocate resource\n", idx);
176 "of device %s\n",
177 idx, pci_name(dev));
178 /* We'll assign a new address later */ 176 /* We'll assign a new address later */
179 r->end -= r->start; 177 r->end -= r->start;
180 r->start = 0; 178 r->start = 0;
@@ -187,8 +185,7 @@ static void __init pcibios_allocate_resources(int pass)
187 /* Turn the ROM off, leave the resource region, 185 /* Turn the ROM off, leave the resource region,
188 * but keep it unregistered. */ 186 * but keep it unregistered. */
189 u32 reg; 187 u32 reg;
190 DBG("PCI: Switching off ROM of %s\n", 188 dev_dbg(&dev->dev, "disabling ROM\n");
191 pci_name(dev));
192 r->flags &= ~IORESOURCE_ROM_ENABLE; 189 r->flags &= ~IORESOURCE_ROM_ENABLE;
193 pci_read_config_dword(dev, 190 pci_read_config_dword(dev,
194 dev->rom_base_reg, &reg); 191 dev->rom_base_reg, &reg);
@@ -257,8 +254,7 @@ void pcibios_set_master(struct pci_dev *dev)
257 lat = pcibios_max_latency; 254 lat = pcibios_max_latency;
258 else 255 else
259 return; 256 return;
260 printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", 257 dev_printk(KERN_DEBUG, &dev->dev, "setting latency timer to %d\n", lat);
261 pci_name(dev), lat);
262 pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); 258 pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
263} 259}
264 260
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 6a06a2eb0597..fec0123b33a9 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -436,7 +436,7 @@ static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
436{ 436{
437 WARN_ON_ONCE(pirq >= 9); 437 WARN_ON_ONCE(pirq >= 9);
438 if (pirq > 8) { 438 if (pirq > 8) {
439 printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); 439 dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq);
440 return 0; 440 return 0;
441 } 441 }
442 return read_config_nybble(router, 0x74, pirq-1); 442 return read_config_nybble(router, 0x74, pirq-1);
@@ -446,7 +446,7 @@ static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
446{ 446{
447 WARN_ON_ONCE(pirq >= 9); 447 WARN_ON_ONCE(pirq >= 9);
448 if (pirq > 8) { 448 if (pirq > 8) {
449 printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); 449 dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq);
450 return 0; 450 return 0;
451 } 451 }
452 write_config_nybble(router, 0x74, pirq-1, irq); 452 write_config_nybble(router, 0x74, pirq-1, irq);
@@ -492,15 +492,17 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq
492 irq = 0; 492 irq = 0;
493 if (pirq <= 4) 493 if (pirq <= 4)
494 irq = read_config_nybble(router, 0x56, pirq - 1); 494 irq = read_config_nybble(router, 0x56, pirq - 1);
495 printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", 495 dev_info(&dev->dev,
496 dev->vendor, dev->device, pirq, irq); 496 "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n",
497 dev->vendor, dev->device, pirq, irq);
497 return irq; 498 return irq;
498} 499}
499 500
500static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) 501static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
501{ 502{
502 printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", 503 dev_info(&dev->dev,
503 dev->vendor, dev->device, pirq, irq); 504 "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n",
505 dev->vendor, dev->device, pirq, irq);
504 if (pirq <= 4) 506 if (pirq <= 4)
505 write_config_nybble(router, 0x56, pirq - 1, irq); 507 write_config_nybble(router, 0x56, pirq - 1, irq);
506 return 1; 508 return 1;
@@ -730,7 +732,6 @@ static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router,
730 switch (device) { 732 switch (device) {
731 case PCI_DEVICE_ID_AL_M1533: 733 case PCI_DEVICE_ID_AL_M1533:
732 case PCI_DEVICE_ID_AL_M1563: 734 case PCI_DEVICE_ID_AL_M1563:
733 printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n");
734 r->name = "ALI"; 735 r->name = "ALI";
735 r->get = pirq_ali_get; 736 r->get = pirq_ali_get;
736 r->set = pirq_ali_set; 737 r->set = pirq_ali_set;
@@ -840,11 +841,9 @@ static void __init pirq_find_router(struct irq_router *r)
840 h->probe(r, pirq_router_dev, pirq_router_dev->device)) 841 h->probe(r, pirq_router_dev, pirq_router_dev->device))
841 break; 842 break;
842 } 843 }
843 printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", 844 dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n",
844 pirq_router.name, 845 pirq_router.name,
845 pirq_router_dev->vendor, 846 pirq_router_dev->vendor, pirq_router_dev->device);
846 pirq_router_dev->device,
847 pci_name(pirq_router_dev));
848 847
849 /* The device remains referenced for the kernel lifetime */ 848 /* The device remains referenced for the kernel lifetime */
850} 849}
@@ -877,7 +876,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
877 /* Find IRQ pin */ 876 /* Find IRQ pin */
878 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); 877 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
879 if (!pin) { 878 if (!pin) {
880 DBG(KERN_DEBUG " -> no interrupt pin\n"); 879 dev_dbg(&dev->dev, "no interrupt pin\n");
881 return 0; 880 return 0;
882 } 881 }
883 pin = pin - 1; 882 pin = pin - 1;
@@ -887,20 +886,20 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
887 if (!pirq_table) 886 if (!pirq_table)
888 return 0; 887 return 0;
889 888
890 DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin);
891 info = pirq_get_info(dev); 889 info = pirq_get_info(dev);
892 if (!info) { 890 if (!info) {
893 DBG(" -> not found in routing table\n" KERN_DEBUG); 891 dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n",
892 'A' + pin);
894 return 0; 893 return 0;
895 } 894 }
896 pirq = info->irq[pin].link; 895 pirq = info->irq[pin].link;
897 mask = info->irq[pin].bitmap; 896 mask = info->irq[pin].bitmap;
898 if (!pirq) { 897 if (!pirq) {
899 DBG(" -> not routed\n" KERN_DEBUG); 898 dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin);
900 return 0; 899 return 0;
901 } 900 }
902 DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, 901 dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x",
903 pirq_table->exclusive_irqs); 902 'A' + pin, pirq, mask, pirq_table->exclusive_irqs);
904 mask &= pcibios_irq_mask; 903 mask &= pcibios_irq_mask;
905 904
906 /* Work around broken HP Pavilion Notebooks which assign USB to 905 /* Work around broken HP Pavilion Notebooks which assign USB to
@@ -930,10 +929,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
930 if (pci_probe & PCI_USE_PIRQ_MASK) 929 if (pci_probe & PCI_USE_PIRQ_MASK)
931 newirq = 0; 930 newirq = 0;
932 else 931 else
933 printk("\n" KERN_WARNING 932 dev_warn(&dev->dev, "IRQ %d doesn't match PIRQ mask "
934 "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n" 933 "%#x; try pci=usepirqmask\n", newirq, mask);
935 KERN_DEBUG, newirq,
936 pci_name(dev));
937 } 934 }
938 if (!newirq && assign) { 935 if (!newirq && assign) {
939 for (i = 0; i < 16; i++) { 936 for (i = 0; i < 16; i++) {
@@ -944,39 +941,35 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
944 newirq = i; 941 newirq = i;
945 } 942 }
946 } 943 }
947 DBG(" -> newirq=%d", newirq); 944 dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin, newirq);
948 945
949 /* Check if it is hardcoded */ 946 /* Check if it is hardcoded */
950 if ((pirq & 0xf0) == 0xf0) { 947 if ((pirq & 0xf0) == 0xf0) {
951 irq = pirq & 0xf; 948 irq = pirq & 0xf;
952 DBG(" -> hardcoded IRQ %d\n", irq); 949 msg = "hardcoded";
953 msg = "Hardcoded";
954 } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ 950 } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \
955 ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { 951 ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) {
956 DBG(" -> got IRQ %d\n", irq); 952 msg = "found";
957 msg = "Found";
958 eisa_set_level_irq(irq); 953 eisa_set_level_irq(irq);
959 } else if (newirq && r->set && 954 } else if (newirq && r->set &&
960 (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { 955 (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
961 DBG(" -> assigning IRQ %d", newirq);
962 if (r->set(pirq_router_dev, dev, pirq, newirq)) { 956 if (r->set(pirq_router_dev, dev, pirq, newirq)) {
963 eisa_set_level_irq(newirq); 957 eisa_set_level_irq(newirq);
964 DBG(" ... OK\n"); 958 msg = "assigned";
965 msg = "Assigned";
966 irq = newirq; 959 irq = newirq;
967 } 960 }
968 } 961 }
969 962
970 if (!irq) { 963 if (!irq) {
971 DBG(" ... failed\n");
972 if (newirq && mask == (1 << newirq)) { 964 if (newirq && mask == (1 << newirq)) {
973 msg = "Guessed"; 965 msg = "guessed";
974 irq = newirq; 966 irq = newirq;
975 } else 967 } else {
968 dev_dbg(&dev->dev, "can't route interrupt\n");
976 return 0; 969 return 0;
970 }
977 } 971 }
978 printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, 972 dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin, irq);
979 pci_name(dev));
980 973
981 /* Update IRQ for all devices with the same pirq value */ 974 /* Update IRQ for all devices with the same pirq value */
982 while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { 975 while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) {
@@ -996,17 +989,17 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
996 (!(pci_probe & PCI_USE_PIRQ_MASK) || \ 989 (!(pci_probe & PCI_USE_PIRQ_MASK) || \
997 ((1 << dev2->irq) & mask))) { 990 ((1 << dev2->irq) & mask))) {
998#ifndef CONFIG_PCI_MSI 991#ifndef CONFIG_PCI_MSI
999 printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", 992 dev_info(&dev2->dev, "IRQ routing conflict: "
1000 pci_name(dev2), dev2->irq, irq); 993 "have IRQ %d, want IRQ %d\n",
994 dev2->irq, irq);
1001#endif 995#endif
1002 continue; 996 continue;
1003 } 997 }
1004 dev2->irq = irq; 998 dev2->irq = irq;
1005 pirq_penalty[irq]++; 999 pirq_penalty[irq]++;
1006 if (dev != dev2) 1000 if (dev != dev2)
1007 printk(KERN_INFO 1001 dev_info(&dev->dev, "sharing IRQ %d with %s\n",
1008 "PCI: Sharing IRQ %d with %s\n", 1002 irq, pci_name(dev2));
1009 irq, pci_name(dev2));
1010 } 1003 }
1011 } 1004 }
1012 return 1; 1005 return 1;
@@ -1025,8 +1018,7 @@ static void __init pcibios_fixup_irqs(void)
1025 * already in use. 1018 * already in use.
1026 */ 1019 */
1027 if (dev->irq >= 16) { 1020 if (dev->irq >= 16) {
1028 DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", 1021 dev_dbg(&dev->dev, "ignoring bogus IRQ %d\n", dev->irq);
1029 pci_name(dev), dev->irq);
1030 dev->irq = 0; 1022 dev->irq = 0;
1031 } 1023 }
1032 /* 1024 /*
@@ -1070,12 +1062,12 @@ static void __init pcibios_fixup_irqs(void)
1070 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, 1062 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
1071 PCI_SLOT(bridge->devfn), pin); 1063 PCI_SLOT(bridge->devfn), pin);
1072 if (irq >= 0) 1064 if (irq >= 0)
1073 printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", 1065 dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n",
1074 pci_name(bridge), 'A' + pin, irq); 1066 pci_name(bridge),
1067 'A' + pin, irq);
1075 } 1068 }
1076 if (irq >= 0) { 1069 if (irq >= 0) {
1077 printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", 1070 dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq);
1078 pci_name(dev), 'A' + pin, irq);
1079 dev->irq = irq; 1071 dev->irq = irq;
1080 } 1072 }
1081 } 1073 }
@@ -1231,25 +1223,24 @@ static int pirq_enable_irq(struct pci_dev *dev)
1231 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, 1223 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
1232 PCI_SLOT(bridge->devfn), pin); 1224 PCI_SLOT(bridge->devfn), pin);
1233 if (irq >= 0) 1225 if (irq >= 0)
1234 printk(KERN_WARNING 1226 dev_warn(&dev->dev, "using bridge %s "
1235 "PCI: using PPB %s[%c] to get irq %d\n", 1227 "INT %c to get IRQ %d\n",
1236 pci_name(bridge), 1228 pci_name(bridge), 'A' + pin,
1237 'A' + pin, irq); 1229 irq);
1238 dev = bridge; 1230 dev = bridge;
1239 } 1231 }
1240 dev = temp_dev; 1232 dev = temp_dev;
1241 if (irq >= 0) { 1233 if (irq >= 0) {
1242 printk(KERN_INFO 1234 dev_info(&dev->dev, "PCI->APIC IRQ transform: "
1243 "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", 1235 "INT %c -> IRQ %d\n", 'A' + pin, irq);
1244 pci_name(dev), 'A' + pin, irq);
1245 dev->irq = irq; 1236 dev->irq = irq;
1246 return 0; 1237 return 0;
1247 } else 1238 } else
1248 msg = " Probably buggy MP table."; 1239 msg = "; probably buggy MP table";
1249 } else if (pci_probe & PCI_BIOS_IRQ_SCAN) 1240 } else if (pci_probe & PCI_BIOS_IRQ_SCAN)
1250 msg = ""; 1241 msg = "";
1251 else 1242 else
1252 msg = " Please try using pci=biosirq."; 1243 msg = "; please try using pci=biosirq";
1253 1244
1254 /* 1245 /*
1255 * With IDE legacy devices the IRQ lookup failure is not 1246 * With IDE legacy devices the IRQ lookup failure is not
@@ -1259,9 +1250,8 @@ static int pirq_enable_irq(struct pci_dev *dev)
1259 !(dev->class & 0x5)) 1250 !(dev->class & 0x5))
1260 return 0; 1251 return 0;
1261 1252
1262 printk(KERN_WARNING 1253 dev_warn(&dev->dev, "can't find IRQ for PCI INT %c%s\n",
1263 "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", 1254 'A' + pin, msg);
1264 'A' + pin, pci_name(dev), msg);
1265 } 1255 }
1266 return 0; 1256 return 0;
1267} 1257}
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 23faaa890ffc..2bd5c53f6386 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -365,7 +365,7 @@ static void __init pci_mmcfg_reject_broken(int early)
365 return; 365 return;
366 366
367reject: 367reject:
368 printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); 368 printk(KERN_INFO "PCI: Not using MMCONFIG.\n");
369 pci_mmcfg_arch_free(); 369 pci_mmcfg_arch_free();
370 kfree(pci_mmcfg_config); 370 kfree(pci_mmcfg_config);
371 pci_mmcfg_config = NULL; 371 pci_mmcfg_config = NULL;
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index f4b16dc11dad..1177845d3186 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -131,13 +131,14 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)
131 u8 busno, suba, subb; 131 u8 busno, suba, subb;
132 int quad = BUS2QUAD(d->bus->number); 132 int quad = BUS2QUAD(d->bus->number);
133 133
134 printk("PCI: Searching for i450NX host bridges on %s\n", pci_name(d)); 134 dev_info(&d->dev, "searching for i450NX host bridges\n");
135 reg = 0xd0; 135 reg = 0xd0;
136 for(pxb=0; pxb<2; pxb++) { 136 for(pxb=0; pxb<2; pxb++) {
137 pci_read_config_byte(d, reg++, &busno); 137 pci_read_config_byte(d, reg++, &busno);
138 pci_read_config_byte(d, reg++, &suba); 138 pci_read_config_byte(d, reg++, &suba);
139 pci_read_config_byte(d, reg++, &subb); 139 pci_read_config_byte(d, reg++, &subb);
140 DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); 140 dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n",
141 pxb, busno, suba, subb);
141 if (busno) { 142 if (busno) {
142 /* Bus A */ 143 /* Bus A */
143 pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); 144 pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno));
diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c
index 7dc5d5cf50a2..d3e083dea720 100644
--- a/arch/x86/power/cpu_32.c
+++ b/arch/x86/power/cpu_32.c
@@ -45,7 +45,7 @@ static void __save_processor_state(struct saved_context *ctxt)
45 ctxt->cr0 = read_cr0(); 45 ctxt->cr0 = read_cr0();
46 ctxt->cr2 = read_cr2(); 46 ctxt->cr2 = read_cr2();
47 ctxt->cr3 = read_cr3(); 47 ctxt->cr3 = read_cr3();
48 ctxt->cr4 = read_cr4(); 48 ctxt->cr4 = read_cr4_safe();
49} 49}
50 50
51/* Needed by apm.c */ 51/* Needed by apm.c */
@@ -98,7 +98,9 @@ static void __restore_processor_state(struct saved_context *ctxt)
98 /* 98 /*
99 * control registers 99 * control registers
100 */ 100 */
101 write_cr4(ctxt->cr4); 101 /* cr4 was introduced in the Pentium CPU */
102 if (ctxt->cr4)
103 write_cr4(ctxt->cr4);
102 write_cr3(ctxt->cr3); 104 write_cr3(ctxt->cr3);
103 write_cr2(ctxt->cr2); 105 write_cr2(ctxt->cr2);
104 write_cr0(ctxt->cr0); 106 write_cr0(ctxt->cr0);
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index b95aa6cfe3cb..4fc7e872c85e 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -28,9 +28,9 @@ ENTRY(swsusp_arch_suspend)
28 ret 28 ret
29 29
30ENTRY(restore_image) 30ENTRY(restore_image)
31 movl resume_pg_dir, %ecx 31 movl resume_pg_dir, %eax
32 subl $__PAGE_OFFSET, %ecx 32 subl $__PAGE_OFFSET, %eax
33 movl %ecx, %cr3 33 movl %eax, %cr3
34 34
35 movl restore_pblist, %edx 35 movl restore_pblist, %edx
36 .p2align 4,,7 36 .p2align 4,,7
@@ -52,17 +52,21 @@ copy_loop:
52 52
53done: 53done:
54 /* go back to the original page tables */ 54 /* go back to the original page tables */
55 movl $swapper_pg_dir, %ecx 55 movl $swapper_pg_dir, %eax
56 subl $__PAGE_OFFSET, %ecx 56 subl $__PAGE_OFFSET, %eax
57 movl %ecx, %cr3 57 movl %eax, %cr3
58 /* Flush TLB, including "global" things (vmalloc) */ 58 /* Flush TLB, including "global" things (vmalloc) */
59 movl mmu_cr4_features, %eax 59 movl mmu_cr4_features, %ecx
60 movl %eax, %edx 60 jecxz 1f # cr4 Pentium and higher, skip if zero
61 movl %ecx, %edx
61 andl $~(1<<7), %edx; # PGE 62 andl $~(1<<7), %edx; # PGE
62 movl %edx, %cr4; # turn off PGE 63 movl %edx, %cr4; # turn off PGE
63 movl %cr3, %ecx; # flush TLB 641:
64 movl %ecx, %cr3 65 movl %cr3, %eax; # flush TLB
65 movl %eax, %cr4; # turn PGE back on 66 movl %eax, %cr3
67 jecxz 1f # cr4 Pentium and higher, skip if zero
68 movl %ecx, %cr4; # turn PGE back on
691:
66 70
67 movl saved_context_esp, %esp 71 movl saved_context_esp, %esp
68 movl saved_context_ebp, %ebp 72 movl saved_context_ebp, %ebp