aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig11
-rw-r--r--arch/x86/Kconfig.cpu28
-rw-r--r--arch/x86/Kconfig.debug24
-rw-r--r--arch/x86/ia32/ia32entry.S8
-rw-r--r--arch/x86/include/asm/a.out-core.h4
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--arch/x86/include/asm/fixmap_64.h4
-rw-r--r--arch/x86/include/asm/i387.h8
-rw-r--r--arch/x86/include/asm/iomap.h3
-rw-r--r--arch/x86/include/asm/kvm.h7
-rw-r--r--arch/x86/include/asm/math_emu.h29
-rw-r--r--arch/x86/include/asm/mmzone_32.h2
-rw-r--r--arch/x86/include/asm/mmzone_64.h2
-rw-r--r--arch/x86/include/asm/mpspec.h6
-rw-r--r--arch/x86/include/asm/page.h1
-rw-r--r--arch/x86/include/asm/paravirt.h18
-rw-r--r--arch/x86/include/asm/pgtable.h26
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/seccomp_32.h6
-rw-r--r--arch/x86/include/asm/seccomp_64.h8
-rw-r--r--arch/x86/include/asm/spinlock.h1
-rw-r--r--arch/x86/include/asm/traps.h4
-rw-r--r--arch/x86/include/asm/xen/page.h2
-rw-r--r--arch/x86/kernel/acpi/boot.c23
-rw-r--r--arch/x86/kernel/acpi/sleep.c4
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S30
-rw-r--r--arch/x86/kernel/apic.c2
-rw-r--r--arch/x86/kernel/apm_32.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig11
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c40
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c4
-rw-r--r--arch/x86/kernel/ds.c3
-rw-r--r--arch/x86/kernel/efi.c7
-rw-r--r--arch/x86/kernel/efi_64.c21
-rw-r--r--arch/x86/kernel/ftrace.c17
-rw-r--r--arch/x86/kernel/hpet.c14
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/i8237.c17
-rw-r--r--arch/x86/kernel/io_apic.c20
-rw-r--r--arch/x86/kernel/kprobes.c3
-rw-r--r--arch/x86/kernel/olpc.c2
-rw-r--r--arch/x86/kernel/paravirt.c26
-rw-r--r--arch/x86/kernel/process.c6
-rw-r--r--arch/x86/kernel/process_32.c3
-rw-r--r--arch/x86/kernel/process_64.c9
-rw-r--r--arch/x86/kernel/ptrace.c18
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/setup.c7
-rw-r--r--arch/x86/kernel/time_64.c2
-rw-r--r--arch/x86/kernel/traps.c25
-rw-r--r--arch/x86/kernel/tsc.c110
-rw-r--r--arch/x86/kernel/vmi_32.c11
-rw-r--r--arch/x86/kernel/vmiclock_32.c7
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/irq.c7
-rw-r--r--arch/x86/kvm/irq.h1
-rw-r--r--arch/x86/kvm/lapic.c66
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.c9
-rw-r--r--arch/x86/kvm/svm.c1
-rw-r--r--arch/x86/kvm/vmx.c5
-rw-r--r--arch/x86/kvm/x86.c10
-rw-r--r--arch/x86/lguest/boot.c21
-rw-r--r--arch/x86/mach-default/setup.c2
-rw-r--r--arch/x86/mach-voyager/setup.c2
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c24
-rw-r--r--arch/x86/math-emu/fpu_aux.c31
-rw-r--r--arch/x86/math-emu/fpu_entry.c6
-rw-r--r--arch/x86/math-emu/fpu_proto.h4
-rw-r--r--arch/x86/math-emu/fpu_system.h16
-rw-r--r--arch/x86/math-emu/get_address.c69
-rw-r--r--arch/x86/mm/fault.c8
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/iomap_32.c11
-rw-r--r--arch/x86/mm/ioremap.c19
-rw-r--r--arch/x86/mm/kmmio.c164
-rw-r--r--arch/x86/mm/numa_64.c2
-rw-r--r--arch/x86/mm/pageattr.c39
-rw-r--r--arch/x86/mm/pat.c85
-rw-r--r--arch/x86/mm/testmmiotrace.c70
-rw-r--r--arch/x86/oprofile/op_model_ppro.c14
-rw-r--r--arch/x86/xen/enlighten.c3
87 files changed, 791 insertions, 580 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 73f7fe8fd4d..bc2fbadff9f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1802,6 +1802,17 @@ config DMAR
1802 and include PCI device scope covered by these DMA 1802 and include PCI device scope covered by these DMA
1803 remapping devices. 1803 remapping devices.
1804 1804
1805config DMAR_DEFAULT_ON
1806 def_bool y
1807 prompt "Enable DMA Remapping Devices by default"
1808 depends on DMAR
1809 help
1810 Selecting this option will enable a DMAR device at boot time if
1811 one is found. If this option is not selected, DMAR support can
1812 be enabled by passing intel_iommu=on to the kernel. It is
1813 recommended you say N here while the DMAR code remains
1814 experimental.
1815
1805config DMAR_GFX_WA 1816config DMAR_GFX_WA
1806 def_bool y 1817 def_bool y
1807 prompt "Support for Graphics workaround" 1818 prompt "Support for Graphics workaround"
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 8078955845a..c98d52e8296 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -167,9 +167,9 @@ config MK7
167config MK8 167config MK8
168 bool "Opteron/Athlon64/Hammer/K8" 168 bool "Opteron/Athlon64/Hammer/K8"
169 help 169 help
170 Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables 170 Select this for an AMD Opteron or Athlon64 Hammer-family processor.
171 use of some extended instructions, and passes appropriate optimization 171 Enables use of some extended instructions, and passes appropriate
172 flags to GCC. 172 optimization flags to GCC.
173 173
174config MCRUSOE 174config MCRUSOE
175 bool "Crusoe" 175 bool "Crusoe"
@@ -256,9 +256,11 @@ config MPSC
256config MCORE2 256config MCORE2
257 bool "Core 2/newer Xeon" 257 bool "Core 2/newer Xeon"
258 help 258 help
259 Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and 53xx) 259
260 CPUs. You can distinguish newer from older Xeons by the CPU family 260 Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
261 in /proc/cpuinfo. Newer ones have 6 and older ones 15 (not a typo) 261 53xx) CPUs. You can distinguish newer from older Xeons by the CPU
262 family in /proc/cpuinfo. Newer ones have 6 and older ones 15
263 (not a typo)
262 264
263config GENERIC_CPU 265config GENERIC_CPU
264 bool "Generic-x86-64" 266 bool "Generic-x86-64"
@@ -320,14 +322,14 @@ config X86_PPRO_FENCE
320 bool "PentiumPro memory ordering errata workaround" 322 bool "PentiumPro memory ordering errata workaround"
321 depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1 323 depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1
322 help 324 help
323 Old PentiumPro multiprocessor systems had errata that could cause memory 325 Old PentiumPro multiprocessor systems had errata that could cause
324 operations to violate the x86 ordering standard in rare cases. Enabling this 326 memory operations to violate the x86 ordering standard in rare cases.
325 option will attempt to work around some (but not all) occurances of 327 Enabling this option will attempt to work around some (but not all)
326 this problem, at the cost of much heavier spinlock and memory barrier 328 occurances of this problem, at the cost of much heavier spinlock and
327 operations. 329 memory barrier operations.
328 330
329 If unsure, say n here. Even distro kernels should think twice before enabling 331 If unsure, say n here. Even distro kernels should think twice before
330 this: there are few systems, and an unlikely bug. 332 enabling this: there are few systems, and an unlikely bug.
331 333
332config X86_F00F_BUG 334config X86_F00F_BUG
333 def_bool y 335 def_bool y
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 10d6cc3fd05..e1983fa025d 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -174,28 +174,8 @@ config IOMMU_LEAK
174 Add a simple leak tracer to the IOMMU code. This is useful when you 174 Add a simple leak tracer to the IOMMU code. This is useful when you
175 are debugging a buggy device driver that leaks IOMMU mappings. 175 are debugging a buggy device driver that leaks IOMMU mappings.
176 176
177config MMIOTRACE 177config HAVE_MMIOTRACE_SUPPORT
178 bool "Memory mapped IO tracing" 178 def_bool y
179 depends on DEBUG_KERNEL && PCI
180 select TRACING
181 help
182 Mmiotrace traces Memory Mapped I/O access and is meant for
183 debugging and reverse engineering. It is called from the ioremap
184 implementation and works via page faults. Tracing is disabled by
185 default and can be enabled at run-time.
186
187 See Documentation/tracers/mmiotrace.txt.
188 If you are not helping to develop drivers, say N.
189
190config MMIOTRACE_TEST
191 tristate "Test module for mmiotrace"
192 depends on MMIOTRACE && m
193 help
194 This is a dumb module for testing mmiotrace. It is very dangerous
195 as it will write garbage to IO memory starting at a given address.
196 However, it should be safe to use on e.g. unused portion of VRAM.
197
198 Say N, unless you absolutely know what you are doing.
199 179
200# 180#
201# IO delay types: 181# IO delay types:
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 256b00b6189..5a0d76dc56a 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -418,9 +418,9 @@ ENTRY(ia32_syscall)
418 orl $TS_COMPAT,TI_status(%r10) 418 orl $TS_COMPAT,TI_status(%r10)
419 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) 419 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
420 jnz ia32_tracesys 420 jnz ia32_tracesys
421ia32_do_syscall:
422 cmpl $(IA32_NR_syscalls-1),%eax 421 cmpl $(IA32_NR_syscalls-1),%eax
423 ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ 422 ja ia32_badsys
423ia32_do_call:
424 IA32_ARG_FIXUP 424 IA32_ARG_FIXUP
425 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative 425 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
426ia32_sysret: 426ia32_sysret:
@@ -435,7 +435,9 @@ ia32_tracesys:
435 call syscall_trace_enter 435 call syscall_trace_enter
436 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ 436 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
437 RESTORE_REST 437 RESTORE_REST
438 jmp ia32_do_syscall 438 cmpl $(IA32_NR_syscalls-1),%eax
439 ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
440 jmp ia32_do_call
439END(ia32_syscall) 441END(ia32_syscall)
440 442
441ia32_badsys: 443ia32_badsys:
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
index 37822206083..3c601f8224b 100644
--- a/arch/x86/include/asm/a.out-core.h
+++ b/arch/x86/include/asm/a.out-core.h
@@ -23,8 +23,6 @@
23 */ 23 */
24static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) 24static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
25{ 25{
26 u16 gs;
27
28/* changed the size calculations - should hopefully work better. lbt */ 26/* changed the size calculations - should hopefully work better. lbt */
29 dump->magic = CMAGIC; 27 dump->magic = CMAGIC;
30 dump->start_code = 0; 28 dump->start_code = 0;
@@ -57,7 +55,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
57 dump->regs.ds = (u16)regs->ds; 55 dump->regs.ds = (u16)regs->ds;
58 dump->regs.es = (u16)regs->es; 56 dump->regs.es = (u16)regs->es;
59 dump->regs.fs = (u16)regs->fs; 57 dump->regs.fs = (u16)regs->fs;
60 savesegment(gs, gs); 58 savesegment(gs, dump->regs.gs);
61 dump->regs.orig_ax = regs->orig_ax; 59 dump->regs.orig_ax = regs->orig_ax;
62 dump->regs.ip = regs->ip; 60 dump->regs.ip = regs->ip;
63 dump->regs.cs = (u16)regs->cs; 61 dump->regs.cs = (u16)regs->cs;
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index ea408dcba51..7301e60dc4a 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -93,6 +93,7 @@
93#define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ 93#define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */
94#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ 94#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
95#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ 95#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */
96#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */
96 97
97/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 98/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
98#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ 99#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index ca5ffb2856b..edc90f23e70 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -37,8 +37,6 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
37 37
38#else /* !CONFIG_X86_32 */ 38#else /* !CONFIG_X86_32 */
39 39
40#define MAX_EFI_IO_PAGES 100
41
42extern u64 efi_call0(void *fp); 40extern u64 efi_call0(void *fp);
43extern u64 efi_call1(void *fp, u64 arg1); 41extern u64 efi_call1(void *fp, u64 arg1);
44extern u64 efi_call2(void *fp, u64 arg1, u64 arg2); 42extern u64 efi_call2(void *fp, u64 arg1, u64 arg2);
diff --git a/arch/x86/include/asm/fixmap_64.h b/arch/x86/include/asm/fixmap_64.h
index 00a30ab9b1a..8be740977db 100644
--- a/arch/x86/include/asm/fixmap_64.h
+++ b/arch/x86/include/asm/fixmap_64.h
@@ -16,7 +16,6 @@
16#include <asm/apicdef.h> 16#include <asm/apicdef.h>
17#include <asm/page.h> 17#include <asm/page.h>
18#include <asm/vsyscall.h> 18#include <asm/vsyscall.h>
19#include <asm/efi.h>
20 19
21/* 20/*
22 * Here we define all the compile-time 'special' virtual 21 * Here we define all the compile-time 'special' virtual
@@ -43,9 +42,6 @@ enum fixed_addresses {
43 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ 42 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
44 FIX_IO_APIC_BASE_0, 43 FIX_IO_APIC_BASE_0,
45 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, 44 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
46 FIX_EFI_IO_MAP_LAST_PAGE,
47 FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE
48 + MAX_EFI_IO_PAGES - 1,
49#ifdef CONFIG_PARAVIRT 45#ifdef CONFIG_PARAVIRT
50 FIX_PARAVIRT_BOOTMAP, 46 FIX_PARAVIRT_BOOTMAP,
51#endif 47#endif
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 48f0004db8c..71c9e518398 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -172,7 +172,13 @@ static inline void __save_init_fpu(struct task_struct *tsk)
172 172
173#else /* CONFIG_X86_32 */ 173#else /* CONFIG_X86_32 */
174 174
175extern void finit(void); 175#ifdef CONFIG_MATH_EMULATION
176extern void finit_task(struct task_struct *tsk);
177#else
178static inline void finit_task(struct task_struct *tsk)
179{
180}
181#endif
176 182
177static inline void tolerant_fwait(void) 183static inline void tolerant_fwait(void)
178{ 184{
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
index c1f06289b14..86af26091d6 100644
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -23,6 +23,9 @@
23#include <asm/pgtable.h> 23#include <asm/pgtable.h>
24#include <asm/tlbflush.h> 24#include <asm/tlbflush.h>
25 25
26int
27is_io_mapping_possible(resource_size_t base, unsigned long size);
28
26void * 29void *
27iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); 30iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
28 31
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index d2e3bf3608a..886c9402ec4 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -9,6 +9,13 @@
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/ioctl.h> 10#include <linux/ioctl.h>
11 11
12/* Select x86 specific features in <linux/kvm.h> */
13#define __KVM_HAVE_PIT
14#define __KVM_HAVE_IOAPIC
15#define __KVM_HAVE_DEVICE_ASSIGNMENT
16#define __KVM_HAVE_MSI
17#define __KVM_HAVE_USER_NMI
18
12/* Architectural interrupt line count. */ 19/* Architectural interrupt line count. */
13#define KVM_NR_INTERRUPTS 256 20#define KVM_NR_INTERRUPTS 256
14 21
diff --git a/arch/x86/include/asm/math_emu.h b/arch/x86/include/asm/math_emu.h
index 5a65b107ad5..031f6266f42 100644
--- a/arch/x86/include/asm/math_emu.h
+++ b/arch/x86/include/asm/math_emu.h
@@ -1,31 +1,18 @@
1#ifndef _ASM_X86_MATH_EMU_H 1#ifndef _ASM_X86_MATH_EMU_H
2#define _ASM_X86_MATH_EMU_H 2#define _ASM_X86_MATH_EMU_H
3 3
4#include <asm/ptrace.h>
5#include <asm/vm86.h>
6
4/* This structure matches the layout of the data saved to the stack 7/* This structure matches the layout of the data saved to the stack
5 following a device-not-present interrupt, part of it saved 8 following a device-not-present interrupt, part of it saved
6 automatically by the 80386/80486. 9 automatically by the 80386/80486.
7 */ 10 */
8struct info { 11struct math_emu_info {
9 long ___orig_eip; 12 long ___orig_eip;
10 long ___ebx; 13 union {
11 long ___ecx; 14 struct pt_regs *regs;
12 long ___edx; 15 struct kernel_vm86_regs *vm86;
13 long ___esi; 16 };
14 long ___edi;
15 long ___ebp;
16 long ___eax;
17 long ___ds;
18 long ___es;
19 long ___fs;
20 long ___orig_eax;
21 long ___eip;
22 long ___cs;
23 long ___eflags;
24 long ___esp;
25 long ___ss;
26 long ___vm86_es; /* This and the following only in vm86 mode */
27 long ___vm86_ds;
28 long ___vm86_fs;
29 long ___vm86_gs;
30}; 17};
31#endif /* _ASM_X86_MATH_EMU_H */ 18#endif /* _ASM_X86_MATH_EMU_H */
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 07f1af494ca..105fb90a063 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -32,8 +32,6 @@ static inline void get_memcfg_numa(void)
32 get_memcfg_numa_flat(); 32 get_memcfg_numa_flat();
33} 33}
34 34
35extern int early_pfn_to_nid(unsigned long pfn);
36
37extern void resume_map_numa_kva(pgd_t *pgd); 35extern void resume_map_numa_kva(pgd_t *pgd);
38 36
39#else /* !CONFIG_NUMA */ 37#else /* !CONFIG_NUMA */
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
index a5b3817d4b9..a29f48c2a32 100644
--- a/arch/x86/include/asm/mmzone_64.h
+++ b/arch/x86/include/asm/mmzone_64.h
@@ -40,8 +40,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
40#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ 40#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \
41 NODE_DATA(nid)->node_spanned_pages) 41 NODE_DATA(nid)->node_spanned_pages)
42 42
43extern int early_pfn_to_nid(unsigned long pfn);
44
45#ifdef CONFIG_NUMA_EMU 43#ifdef CONFIG_NUMA_EMU
46#define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024) 44#define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024)
47#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) 45#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 62d14ce3cd0..bd22f2a3713 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -60,6 +60,7 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
60 u32 gsi); 60 u32 gsi);
61extern void mp_config_acpi_legacy_irqs(void); 61extern void mp_config_acpi_legacy_irqs(void);
62extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low); 62extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
63extern int acpi_probe_gsi(void);
63#ifdef CONFIG_X86_IO_APIC 64#ifdef CONFIG_X86_IO_APIC
64extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, 65extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
65 u32 gsi, int triggering, int polarity); 66 u32 gsi, int triggering, int polarity);
@@ -71,6 +72,11 @@ mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
71 return 0; 72 return 0;
72} 73}
73#endif 74#endif
75#else /* !CONFIG_ACPI: */
76static inline int acpi_probe_gsi(void)
77{
78 return 0;
79}
74#endif /* CONFIG_ACPI */ 80#endif /* CONFIG_ACPI */
75 81
76#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) 82#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS)
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index e9873a2e869..776579119a0 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -57,7 +57,6 @@ typedef struct { pgdval_t pgd; } pgd_t;
57typedef struct { pgprotval_t pgprot; } pgprot_t; 57typedef struct { pgprotval_t pgprot; } pgprot_t;
58 58
59extern int page_is_ram(unsigned long pagenr); 59extern int page_is_ram(unsigned long pagenr);
60extern int pagerange_is_ram(unsigned long start, unsigned long end);
61extern int devmem_is_allowed(unsigned long pagenr); 60extern int devmem_is_allowed(unsigned long pagenr);
62extern void map_devmem(unsigned long pfn, unsigned long size, 61extern void map_devmem(unsigned long pfn, unsigned long size,
63 pgprot_t vma_prot); 62 pgprot_t vma_prot);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index ba3e2ff6aed..e299287e8e3 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -1352,14 +1352,7 @@ static inline void arch_leave_lazy_cpu_mode(void)
1352 PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); 1352 PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
1353} 1353}
1354 1354
1355static inline void arch_flush_lazy_cpu_mode(void) 1355void arch_flush_lazy_cpu_mode(void);
1356{
1357 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) {
1358 arch_leave_lazy_cpu_mode();
1359 arch_enter_lazy_cpu_mode();
1360 }
1361}
1362
1363 1356
1364#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE 1357#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
1365static inline void arch_enter_lazy_mmu_mode(void) 1358static inline void arch_enter_lazy_mmu_mode(void)
@@ -1372,13 +1365,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
1372 PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); 1365 PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
1373} 1366}
1374 1367
1375static inline void arch_flush_lazy_mmu_mode(void) 1368void arch_flush_lazy_mmu_mode(void);
1376{
1377 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) {
1378 arch_leave_lazy_mmu_mode();
1379 arch_enter_lazy_mmu_mode();
1380 }
1381}
1382 1369
1383static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, 1370static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
1384 unsigned long phys, pgprot_t flags) 1371 unsigned long phys, pgprot_t flags)
@@ -1402,6 +1389,7 @@ static inline int __raw_spin_is_contended(struct raw_spinlock *lock)
1402{ 1389{
1403 return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); 1390 return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
1404} 1391}
1392#define __raw_spin_is_contended __raw_spin_is_contended
1405 1393
1406static __always_inline void __raw_spin_lock(struct raw_spinlock *lock) 1394static __always_inline void __raw_spin_lock(struct raw_spinlock *lock)
1407{ 1395{
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 06bbcbd66e9..4f5af8447d5 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -302,16 +302,30 @@ static inline pte_t pte_mkspecial(pte_t pte)
302 302
303extern pteval_t __supported_pte_mask; 303extern pteval_t __supported_pte_mask;
304 304
305/*
306 * Mask out unsupported bits in a present pgprot. Non-present pgprots
307 * can use those bits for other purposes, so leave them be.
308 */
309static inline pgprotval_t massage_pgprot(pgprot_t pgprot)
310{
311 pgprotval_t protval = pgprot_val(pgprot);
312
313 if (protval & _PAGE_PRESENT)
314 protval &= __supported_pte_mask;
315
316 return protval;
317}
318
305static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) 319static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
306{ 320{
307 return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) | 321 return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) |
308 pgprot_val(pgprot)) & __supported_pte_mask); 322 massage_pgprot(pgprot));
309} 323}
310 324
311static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) 325static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
312{ 326{
313 return __pmd((((phys_addr_t)page_nr << PAGE_SHIFT) | 327 return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) |
314 pgprot_val(pgprot)) & __supported_pte_mask); 328 massage_pgprot(pgprot));
315} 329}
316 330
317static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) 331static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
@@ -323,7 +337,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
323 * the newprot (if present): 337 * the newprot (if present):
324 */ 338 */
325 val &= _PAGE_CHG_MASK; 339 val &= _PAGE_CHG_MASK;
326 val |= pgprot_val(newprot) & (~_PAGE_CHG_MASK) & __supported_pte_mask; 340 val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK;
327 341
328 return __pte(val); 342 return __pte(val);
329} 343}
@@ -339,7 +353,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
339 353
340#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK) 354#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK)
341 355
342#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) 356#define canon_pgprot(p) __pgprot(massage_pgprot(p))
343 357
344static inline int is_new_memtype_allowed(unsigned long flags, 358static inline int is_new_memtype_allowed(unsigned long flags,
345 unsigned long new_flags) 359 unsigned long new_flags)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 091cd8855f2..3bfd5235a9e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -353,7 +353,7 @@ struct i387_soft_struct {
353 u8 no_update; 353 u8 no_update;
354 u8 rm; 354 u8 rm;
355 u8 alimit; 355 u8 alimit;
356 struct info *info; 356 struct math_emu_info *info;
357 u32 entry_eip; 357 u32 entry_eip;
358}; 358};
359 359
diff --git a/arch/x86/include/asm/seccomp_32.h b/arch/x86/include/asm/seccomp_32.h
index a6ad87b352c..b811d6f5780 100644
--- a/arch/x86/include/asm/seccomp_32.h
+++ b/arch/x86/include/asm/seccomp_32.h
@@ -1,12 +1,6 @@
1#ifndef _ASM_X86_SECCOMP_32_H 1#ifndef _ASM_X86_SECCOMP_32_H
2#define _ASM_X86_SECCOMP_32_H 2#define _ASM_X86_SECCOMP_32_H
3 3
4#include <linux/thread_info.h>
5
6#ifdef TIF_32BIT
7#error "unexpected TIF_32BIT on i386"
8#endif
9
10#include <linux/unistd.h> 4#include <linux/unistd.h>
11 5
12#define __NR_seccomp_read __NR_read 6#define __NR_seccomp_read __NR_read
diff --git a/arch/x86/include/asm/seccomp_64.h b/arch/x86/include/asm/seccomp_64.h
index 4171bb794e9..84ec1bd161a 100644
--- a/arch/x86/include/asm/seccomp_64.h
+++ b/arch/x86/include/asm/seccomp_64.h
@@ -1,14 +1,6 @@
1#ifndef _ASM_X86_SECCOMP_64_H 1#ifndef _ASM_X86_SECCOMP_64_H
2#define _ASM_X86_SECCOMP_64_H 2#define _ASM_X86_SECCOMP_64_H
3 3
4#include <linux/thread_info.h>
5
6#ifdef TIF_32BIT
7#error "unexpected TIF_32BIT on x86_64"
8#else
9#define TIF_32BIT TIF_IA32
10#endif
11
12#include <linux/unistd.h> 4#include <linux/unistd.h>
13#include <asm/ia32_unistd.h> 5#include <asm/ia32_unistd.h>
14 6
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index d17c91981da..8247e94ac6b 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -245,6 +245,7 @@ static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
245{ 245{
246 return __ticket_spin_is_contended(lock); 246 return __ticket_spin_is_contended(lock);
247} 247}
248#define __raw_spin_is_contended __raw_spin_is_contended
248 249
249static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) 250static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
250{ 251{
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 2ee0a3bceed..cf3bb053da0 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -41,7 +41,7 @@ dotraplinkage void do_int3(struct pt_regs *, long);
41dotraplinkage void do_overflow(struct pt_regs *, long); 41dotraplinkage void do_overflow(struct pt_regs *, long);
42dotraplinkage void do_bounds(struct pt_regs *, long); 42dotraplinkage void do_bounds(struct pt_regs *, long);
43dotraplinkage void do_invalid_op(struct pt_regs *, long); 43dotraplinkage void do_invalid_op(struct pt_regs *, long);
44dotraplinkage void do_device_not_available(struct pt_regs *, long); 44dotraplinkage void do_device_not_available(struct pt_regs);
45dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long); 45dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long);
46dotraplinkage void do_invalid_TSS(struct pt_regs *, long); 46dotraplinkage void do_invalid_TSS(struct pt_regs *, long);
47dotraplinkage void do_segment_not_present(struct pt_regs *, long); 47dotraplinkage void do_segment_not_present(struct pt_regs *, long);
@@ -77,7 +77,7 @@ extern int panic_on_unrecovered_nmi;
77extern int kstack_depth_to_print; 77extern int kstack_depth_to_print;
78 78
79void math_error(void __user *); 79void math_error(void __user *);
80asmlinkage void math_emulate(long); 80void math_emulate(struct math_emu_info *);
81#ifdef CONFIG_X86_32 81#ifdef CONFIG_X86_32
82unsigned long patch_espfix_desc(unsigned long, unsigned long); 82unsigned long patch_espfix_desc(unsigned long, unsigned long);
83#else 83#else
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 7ef617ef1df..4bd990ee43d 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -137,7 +137,7 @@ static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot)
137 pte_t pte; 137 pte_t pte;
138 138
139 pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) | 139 pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) |
140 (pgprot_val(pgprot) & __supported_pte_mask); 140 massage_pgprot(pgprot);
141 141
142 return pte; 142 return pte;
143} 143}
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d37593c2f43..7678f10c456 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -973,6 +973,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
973 nr_ioapics++; 973 nr_ioapics++;
974} 974}
975 975
976int __init acpi_probe_gsi(void)
977{
978 int idx;
979 int gsi;
980 int max_gsi = 0;
981
982 if (acpi_disabled)
983 return 0;
984
985 if (!acpi_ioapic)
986 return 0;
987
988 max_gsi = 0;
989 for (idx = 0; idx < nr_ioapics; idx++) {
990 gsi = mp_ioapic_routing[idx].gsi_end;
991
992 if (gsi > max_gsi)
993 max_gsi = gsi;
994 }
995
996 return max_gsi + 1;
997}
998
976static void assign_to_mp_irq(struct mp_config_intsrc *m, 999static void assign_to_mp_irq(struct mp_config_intsrc *m,
977 struct mp_config_intsrc *mp_irq) 1000 struct mp_config_intsrc *mp_irq)
978{ 1001{
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 707c1f6f95f..a60c1f3bcb8 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -156,11 +156,11 @@ static int __init acpi_sleep_setup(char *str)
156#ifdef CONFIG_HIBERNATION 156#ifdef CONFIG_HIBERNATION
157 if (strncmp(str, "s4_nohwsig", 10) == 0) 157 if (strncmp(str, "s4_nohwsig", 10) == 0)
158 acpi_no_s4_hw_signature(); 158 acpi_no_s4_hw_signature();
159 if (strncmp(str, "s4_nonvs", 8) == 0)
160 acpi_s4_no_nvs();
159#endif 161#endif
160 if (strncmp(str, "old_ordering", 12) == 0) 162 if (strncmp(str, "old_ordering", 12) == 0)
161 acpi_old_suspend_ordering(); 163 acpi_old_suspend_ordering();
162 if (strncmp(str, "s4_nonvs", 8) == 0)
163 acpi_s4_no_nvs();
164 str = strchr(str, ','); 164 str = strchr(str, ',');
165 if (str != NULL) 165 if (str != NULL)
166 str += strspn(str, ", \t"); 166 str += strspn(str, ", \t");
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index bcc293423a7..96258d9dc97 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -13,7 +13,6 @@
13 * Hooray, we are in Long 64-bit mode (but still running in low memory) 13 * Hooray, we are in Long 64-bit mode (but still running in low memory)
14 */ 14 */
15ENTRY(wakeup_long64) 15ENTRY(wakeup_long64)
16wakeup_long64:
17 movq saved_magic, %rax 16 movq saved_magic, %rax
18 movq $0x123456789abcdef0, %rdx 17 movq $0x123456789abcdef0, %rdx
19 cmpq %rdx, %rax 18 cmpq %rdx, %rax
@@ -34,16 +33,12 @@ wakeup_long64:
34 33
35 movq saved_rip, %rax 34 movq saved_rip, %rax
36 jmp *%rax 35 jmp *%rax
36ENDPROC(wakeup_long64)
37 37
38bogus_64_magic: 38bogus_64_magic:
39 jmp bogus_64_magic 39 jmp bogus_64_magic
40 40
41 .align 2 41ENTRY(do_suspend_lowlevel)
42 .p2align 4,,15
43.globl do_suspend_lowlevel
44 .type do_suspend_lowlevel,@function
45do_suspend_lowlevel:
46.LFB5:
47 subq $8, %rsp 42 subq $8, %rsp
48 xorl %eax, %eax 43 xorl %eax, %eax
49 call save_processor_state 44 call save_processor_state
@@ -67,7 +62,7 @@ do_suspend_lowlevel:
67 pushfq 62 pushfq
68 popq pt_regs_flags(%rax) 63 popq pt_regs_flags(%rax)
69 64
70 movq $.L97, saved_rip(%rip) 65 movq $resume_point, saved_rip(%rip)
71 66
72 movq %rsp, saved_rsp 67 movq %rsp, saved_rsp
73 movq %rbp, saved_rbp 68 movq %rbp, saved_rbp
@@ -78,14 +73,12 @@ do_suspend_lowlevel:
78 addq $8, %rsp 73 addq $8, %rsp
79 movl $3, %edi 74 movl $3, %edi
80 xorl %eax, %eax 75 xorl %eax, %eax
81 jmp acpi_enter_sleep_state 76 call acpi_enter_sleep_state
82.L97: 77 /* in case something went wrong, restore the machine status and go on */
83 .p2align 4,,7 78 jmp resume_point
84.L99:
85 .align 4
86 movl $24, %eax
87 movw %ax, %ds
88 79
80 .align 4
81resume_point:
89 /* We don't restore %rax, it must be 0 anyway */ 82 /* We don't restore %rax, it must be 0 anyway */
90 movq $saved_context, %rax 83 movq $saved_context, %rax
91 movq saved_context_cr4(%rax), %rbx 84 movq saved_context_cr4(%rax), %rbx
@@ -117,12 +110,9 @@ do_suspend_lowlevel:
117 xorl %eax, %eax 110 xorl %eax, %eax
118 addq $8, %rsp 111 addq $8, %rsp
119 jmp restore_processor_state 112 jmp restore_processor_state
120.LFE5: 113ENDPROC(do_suspend_lowlevel)
121.Lfe5: 114
122 .size do_suspend_lowlevel, .Lfe5-do_suspend_lowlevel
123
124.data 115.data
125ALIGN
126ENTRY(saved_rbp) .quad 0 116ENTRY(saved_rbp) .quad 0
127ENTRY(saved_rsi) .quad 0 117ENTRY(saved_rsi) .quad 0
128ENTRY(saved_rdi) .quad 0 118ENTRY(saved_rdi) .quad 0
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 115449f869e..570f36e44e5 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -862,7 +862,7 @@ void clear_local_APIC(void)
862 } 862 }
863 863
864 /* lets not touch this if we didn't frob it */ 864 /* lets not touch this if we didn't frob it */
865#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) 865#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
866 if (maxlvt >= 5) { 866 if (maxlvt >= 5) {
867 v = apic_read(APIC_LVTTHMR); 867 v = apic_read(APIC_LVTTHMR);
868 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); 868 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 98807bb095a..266ec6c18b6 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1192,6 +1192,7 @@ static int suspend(int vetoable)
1192 device_suspend(PMSG_SUSPEND); 1192 device_suspend(PMSG_SUSPEND);
1193 local_irq_disable(); 1193 local_irq_disable();
1194 device_power_down(PMSG_SUSPEND); 1194 device_power_down(PMSG_SUSPEND);
1195 sysdev_suspend(PMSG_SUSPEND);
1195 1196
1196 local_irq_enable(); 1197 local_irq_enable();
1197 1198
@@ -1208,6 +1209,7 @@ static int suspend(int vetoable)
1208 if (err != APM_SUCCESS) 1209 if (err != APM_SUCCESS)
1209 apm_error("suspend", err); 1210 apm_error("suspend", err);
1210 err = (err == APM_SUCCESS) ? 0 : -EIO; 1211 err = (err == APM_SUCCESS) ? 0 : -EIO;
1212 sysdev_resume();
1211 device_power_up(PMSG_RESUME); 1213 device_power_up(PMSG_RESUME);
1212 local_irq_enable(); 1214 local_irq_enable();
1213 device_resume(PMSG_RESUME); 1215 device_resume(PMSG_RESUME);
@@ -1228,6 +1230,7 @@ static void standby(void)
1228 1230
1229 local_irq_disable(); 1231 local_irq_disable();
1230 device_power_down(PMSG_SUSPEND); 1232 device_power_down(PMSG_SUSPEND);
1233 sysdev_suspend(PMSG_SUSPEND);
1231 local_irq_enable(); 1234 local_irq_enable();
1232 1235
1233 err = set_system_power_state(APM_STATE_STANDBY); 1236 err = set_system_power_state(APM_STATE_STANDBY);
@@ -1235,6 +1238,7 @@ static void standby(void)
1235 apm_error("standby", err); 1238 apm_error("standby", err);
1236 1239
1237 local_irq_disable(); 1240 local_irq_disable();
1241 sysdev_resume();
1238 device_power_up(PMSG_RESUME); 1242 device_power_up(PMSG_RESUME);
1239 local_irq_enable(); 1243 local_irq_enable();
1240} 1244}
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index efae3b22a0f..65792c2cc46 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -245,17 +245,6 @@ config X86_E_POWERSAVER
245 245
246comment "shared options" 246comment "shared options"
247 247
248config X86_ACPI_CPUFREQ_PROC_INTF
249 bool "/proc/acpi/processor/../performance interface (deprecated)"
250 depends on PROC_FS
251 depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI
252 help
253 This enables the deprecated /proc/acpi/processor/../performance
254 interface. While it is helpful for debugging, the generic,
255 cross-architecture cpufreq interfaces should be used.
256
257 If in doubt, say N.
258
259config X86_SPEEDSTEP_LIB 248config X86_SPEEDSTEP_LIB
260 tristate 249 tristate
261 default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD) 250 default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD)
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index b585e04cbc9..3178c3acd97 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -277,7 +277,6 @@ static struct cpufreq_driver p4clockmod_driver = {
277 .name = "p4-clockmod", 277 .name = "p4-clockmod",
278 .owner = THIS_MODULE, 278 .owner = THIS_MODULE,
279 .attr = p4clockmod_attr, 279 .attr = p4clockmod_attr,
280 .hide_interface = 1,
281}; 280};
282 281
283 282
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 5c28b37dea1..6428aa17b40 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -939,10 +939,25 @@ static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
939 free_cpumask_var(data->acpi_data.shared_cpu_map); 939 free_cpumask_var(data->acpi_data.shared_cpu_map);
940} 940}
941 941
942static int get_transition_latency(struct powernow_k8_data *data)
943{
944 int max_latency = 0;
945 int i;
946 for (i = 0; i < data->acpi_data.state_count; i++) {
947 int cur_latency = data->acpi_data.states[i].transition_latency
948 + data->acpi_data.states[i].bus_master_latency;
949 if (cur_latency > max_latency)
950 max_latency = cur_latency;
951 }
952 /* value in usecs, needs to be in nanoseconds */
953 return 1000 * max_latency;
954}
955
942#else 956#else
943static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } 957static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
944static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } 958static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
945static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } 959static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
960static int get_transition_latency(struct powernow_k8_data *data) { return 0; }
946#endif /* CONFIG_X86_POWERNOW_K8_ACPI */ 961#endif /* CONFIG_X86_POWERNOW_K8_ACPI */
947 962
948/* Take a frequency, and issue the fid/vid transition command */ 963/* Take a frequency, and issue the fid/vid transition command */
@@ -1142,8 +1157,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1142 data->cpu = pol->cpu; 1157 data->cpu = pol->cpu;
1143 data->currpstate = HW_PSTATE_INVALID; 1158 data->currpstate = HW_PSTATE_INVALID;
1144 1159
1145 rc = powernow_k8_cpu_init_acpi(data); 1160 if (powernow_k8_cpu_init_acpi(data)) {
1146 if (rc) {
1147 /* 1161 /*
1148 * Use the PSB BIOS structure. This is only availabe on 1162 * Use the PSB BIOS structure. This is only availabe on
1149 * an UP version, and is deprecated by AMD. 1163 * an UP version, and is deprecated by AMD.
@@ -1161,19 +1175,28 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1161 "ACPI maintainers and complain to your BIOS " 1175 "ACPI maintainers and complain to your BIOS "
1162 "vendor.\n"); 1176 "vendor.\n");
1163#endif 1177#endif
1164 goto err_out; 1178 kfree(data);
1179 return -ENODEV;
1165 } 1180 }
1166 if (pol->cpu != 0) { 1181 if (pol->cpu != 0) {
1167 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " 1182 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
1168 "CPU other than CPU0. Complain to your BIOS " 1183 "CPU other than CPU0. Complain to your BIOS "
1169 "vendor.\n"); 1184 "vendor.\n");
1170 goto err_out; 1185 kfree(data);
1186 return -ENODEV;
1171 } 1187 }
1172 rc = find_psb_table(data); 1188 rc = find_psb_table(data);
1173 if (rc) { 1189 if (rc) {
1174 goto err_out; 1190 kfree(data);
1191 return -ENODEV;
1175 } 1192 }
1176 } 1193 /* Take a crude guess here.
1194 * That guess was in microseconds, so multiply with 1000 */
1195 pol->cpuinfo.transition_latency = (
1196 ((data->rvo + 8) * data->vstable * VST_UNITS_20US) +
1197 ((1 << data->irt) * 30)) * 1000;
1198 } else /* ACPI _PSS objects available */
1199 pol->cpuinfo.transition_latency = get_transition_latency(data);
1177 1200
1178 /* only run on specific CPU from here on */ 1201 /* only run on specific CPU from here on */
1179 oldmask = current->cpus_allowed; 1202 oldmask = current->cpus_allowed;
@@ -1204,11 +1227,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1204 cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu)); 1227 cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu));
1205 data->available_cores = pol->cpus; 1228 data->available_cores = pol->cpus;
1206 1229
1207 /* Take a crude guess here.
1208 * That guess was in microseconds, so multiply with 1000 */
1209 pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US)
1210 + (3 * (1 << data->irt) * 10)) * 1000;
1211
1212 if (cpu_family == CPU_HW_PSTATE) 1230 if (cpu_family == CPU_HW_PSTATE)
1213 pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); 1231 pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
1214 else 1232 else
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 430e5c38a54..24ff26a38ad 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -291,6 +291,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
291 ds_init_intel(c); 291 ds_init_intel(c);
292 } 292 }
293 293
294 if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush)
295 set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR);
296
294#ifdef CONFIG_X86_64 297#ifdef CONFIG_X86_64
295 if (c->x86 == 15) 298 if (c->x86 == 15)
296 c->x86_cache_alignment = c->x86_clflush_size * 2; 299 c->x86_cache_alignment = c->x86_clflush_size * 2;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 1c838032fd3..fe79985ce0f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -295,11 +295,11 @@ void do_machine_check(struct pt_regs * regs, long error_code)
295 * If we know that the error was in user space, send a 295 * If we know that the error was in user space, send a
296 * SIGBUS. Otherwise, panic if tolerance is low. 296 * SIGBUS. Otherwise, panic if tolerance is low.
297 * 297 *
298 * do_exit() takes an awful lot of locks and has a slight 298 * force_sig() takes an awful lot of locks and has a slight
299 * risk of deadlocking. 299 * risk of deadlocking.
300 */ 300 */
301 if (user_space) { 301 if (user_space) {
302 do_exit(SIGBUS); 302 force_sig(SIGBUS, current);
303 } else if (panic_on_oops || tolerant < 2) { 303 } else if (panic_on_oops || tolerant < 2) {
304 mce_panic("Uncorrected machine check", 304 mce_panic("Uncorrected machine check",
305 &panicm, mcestart); 305 &panicm, mcestart);
@@ -490,7 +490,7 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
490 490
491} 491}
492 492
493static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) 493static void mce_cpu_features(struct cpuinfo_x86 *c)
494{ 494{
495 switch (c->x86_vendor) { 495 switch (c->x86_vendor) {
496 case X86_VENDOR_INTEL: 496 case X86_VENDOR_INTEL:
@@ -734,6 +734,7 @@ __setup("mce=", mcheck_enable);
734static int mce_resume(struct sys_device *dev) 734static int mce_resume(struct sys_device *dev)
735{ 735{
736 mce_init(NULL); 736 mce_init(NULL);
737 mce_cpu_features(&current_cpu_data);
737 return 0; 738 return 0;
738} 739}
739 740
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 8ae8c4ff094..f2ee0ae29bd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -121,7 +121,7 @@ static long threshold_restart_bank(void *_tr)
121} 121}
122 122
123/* cpu init entry point, called from mce.c with preempt off */ 123/* cpu init entry point, called from mce.c with preempt off */
124void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) 124void mce_amd_feature_init(struct cpuinfo_x86 *c)
125{ 125{
126 unsigned int bank, block; 126 unsigned int bank, block;
127 unsigned int cpu = smp_processor_id(); 127 unsigned int cpu = smp_processor_id();
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 4b48f251fd3..f44c3662436 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -30,7 +30,7 @@ asmlinkage void smp_thermal_interrupt(void)
30 irq_exit(); 30 irq_exit();
31} 31}
32 32
33static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) 33static void intel_init_thermal(struct cpuinfo_x86 *c)
34{ 34{
35 u32 l, h; 35 u32 l, h;
36 int tm2 = 0; 36 int tm2 = 0;
@@ -84,7 +84,7 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c)
84 return; 84 return;
85} 85}
86 86
87void __cpuinit mce_intel_feature_init(struct cpuinfo_x86 *c) 87void mce_intel_feature_init(struct cpuinfo_x86 *c)
88{ 88{
89 intel_init_thermal(c); 89 intel_init_thermal(c);
90} 90}
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 169a120587b..87b67e3a765 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -729,7 +729,7 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
729 729
730 spin_unlock_irqrestore(&ds_lock, irq); 730 spin_unlock_irqrestore(&ds_lock, irq);
731 731
732 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); 732 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
733 ds_resume_pebs(tracer); 733 ds_resume_pebs(tracer);
734 734
735 return tracer; 735 return tracer;
@@ -1029,5 +1029,4 @@ void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
1029 1029
1030void ds_exit_thread(struct task_struct *tsk) 1030void ds_exit_thread(struct task_struct *tsk)
1031{ 1031{
1032 WARN_ON(tsk->thread.ds_ctx);
1033} 1032}
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1119d247fe1..eb1ef3b67dd 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -467,7 +467,7 @@ void __init efi_enter_virtual_mode(void)
467 efi_memory_desc_t *md; 467 efi_memory_desc_t *md;
468 efi_status_t status; 468 efi_status_t status;
469 unsigned long size; 469 unsigned long size;
470 u64 end, systab, addr, npages; 470 u64 end, systab, addr, npages, end_pfn;
471 void *p, *va; 471 void *p, *va;
472 472
473 efi.systab = NULL; 473 efi.systab = NULL;
@@ -479,7 +479,10 @@ void __init efi_enter_virtual_mode(void)
479 size = md->num_pages << EFI_PAGE_SHIFT; 479 size = md->num_pages << EFI_PAGE_SHIFT;
480 end = md->phys_addr + size; 480 end = md->phys_addr + size;
481 481
482 if (PFN_UP(end) <= max_low_pfn_mapped) 482 end_pfn = PFN_UP(end);
483 if (end_pfn <= max_low_pfn_mapped
484 || (end_pfn > (1UL << (32 - PAGE_SHIFT))
485 && end_pfn <= max_pfn_mapped))
483 va = __va(md->phys_addr); 486 va = __va(md->phys_addr);
484 else 487 else
485 va = efi_ioremap(md->phys_addr, size); 488 va = efi_ioremap(md->phys_addr, size);
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 652c5287215..cb783b92c50 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -99,24 +99,11 @@ void __init efi_call_phys_epilog(void)
99 99
100void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size) 100void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size)
101{ 101{
102 static unsigned pages_mapped __initdata; 102 unsigned long last_map_pfn;
103 unsigned i, pages;
104 unsigned long offset;
105 103
106 pages = PFN_UP(phys_addr + size) - PFN_DOWN(phys_addr); 104 last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
107 offset = phys_addr & ~PAGE_MASK; 105 if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size)
108 phys_addr &= PAGE_MASK;
109
110 if (pages_mapped + pages > MAX_EFI_IO_PAGES)
111 return NULL; 106 return NULL;
112 107
113 for (i = 0; i < pages; i++) { 108 return (void __iomem *)__va(phys_addr);
114 __set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped,
115 phys_addr, PAGE_KERNEL);
116 phys_addr += PAGE_SIZE;
117 pages_mapped++;
118 }
119
120 return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \
121 (pages_mapped - pages)) + offset;
122} 109}
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1b43086b097..231bdd3c5b1 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -488,20 +488,21 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
488 * ignore such a protection. 488 * ignore such a protection.
489 */ 489 */
490 asm volatile( 490 asm volatile(
491 "1: " _ASM_MOV " (%[parent_old]), %[old]\n" 491 "1: " _ASM_MOV " (%[parent]), %[old]\n"
492 "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n" 492 "2: " _ASM_MOV " %[return_hooker], (%[parent])\n"
493 " movl $0, %[faulted]\n" 493 " movl $0, %[faulted]\n"
494 "3:\n"
494 495
495 ".section .fixup, \"ax\"\n" 496 ".section .fixup, \"ax\"\n"
496 "3: movl $1, %[faulted]\n" 497 "4: movl $1, %[faulted]\n"
498 " jmp 3b\n"
497 ".previous\n" 499 ".previous\n"
498 500
499 _ASM_EXTABLE(1b, 3b) 501 _ASM_EXTABLE(1b, 4b)
500 _ASM_EXTABLE(2b, 3b) 502 _ASM_EXTABLE(2b, 4b)
501 503
502 : [parent_replaced] "=r" (parent), [old] "=r" (old), 504 : [old] "=r" (old), [faulted] "=r" (faulted)
503 [faulted] "=r" (faulted) 505 : [parent] "r" (parent), [return_hooker] "r" (return_hooker)
504 : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
505 : "memory" 506 : "memory"
506 ); 507 );
507 508
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 64d5ad0b8ad..a00545fe5cd 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -269,6 +269,8 @@ static void hpet_set_mode(enum clock_event_mode mode,
269 now = hpet_readl(HPET_COUNTER); 269 now = hpet_readl(HPET_COUNTER);
270 cmp = now + (unsigned long) delta; 270 cmp = now + (unsigned long) delta;
271 cfg = hpet_readl(HPET_Tn_CFG(timer)); 271 cfg = hpet_readl(HPET_Tn_CFG(timer));
272 /* Make sure we use edge triggered interrupts */
273 cfg &= ~HPET_TN_LEVEL;
272 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | 274 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
273 HPET_TN_SETVAL | HPET_TN_32BIT; 275 HPET_TN_SETVAL | HPET_TN_32BIT;
274 hpet_writel(cfg, HPET_Tn_CFG(timer)); 276 hpet_writel(cfg, HPET_Tn_CFG(timer));
@@ -897,7 +899,7 @@ static unsigned long hpet_rtc_flags;
897static int hpet_prev_update_sec; 899static int hpet_prev_update_sec;
898static struct rtc_time hpet_alarm_time; 900static struct rtc_time hpet_alarm_time;
899static unsigned long hpet_pie_count; 901static unsigned long hpet_pie_count;
900static unsigned long hpet_t1_cmp; 902static u32 hpet_t1_cmp;
901static unsigned long hpet_default_delta; 903static unsigned long hpet_default_delta;
902static unsigned long hpet_pie_delta; 904static unsigned long hpet_pie_delta;
903static unsigned long hpet_pie_limit; 905static unsigned long hpet_pie_limit;
@@ -905,6 +907,14 @@ static unsigned long hpet_pie_limit;
905static rtc_irq_handler irq_handler; 907static rtc_irq_handler irq_handler;
906 908
907/* 909/*
910 * Check that the hpet counter c1 is ahead of the c2
911 */
912static inline int hpet_cnt_ahead(u32 c1, u32 c2)
913{
914 return (s32)(c2 - c1) < 0;
915}
916
917/*
908 * Registers a IRQ handler. 918 * Registers a IRQ handler.
909 */ 919 */
910int hpet_register_irq_handler(rtc_irq_handler handler) 920int hpet_register_irq_handler(rtc_irq_handler handler)
@@ -1075,7 +1085,7 @@ static void hpet_rtc_timer_reinit(void)
1075 hpet_t1_cmp += delta; 1085 hpet_t1_cmp += delta;
1076 hpet_writel(hpet_t1_cmp, HPET_T1_CMP); 1086 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
1077 lost_ints++; 1087 lost_ints++;
1078 } while ((long)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); 1088 } while (!hpet_cnt_ahead(hpet_t1_cmp, hpet_readl(HPET_COUNTER)));
1079 1089
1080 if (lost_ints) { 1090 if (lost_ints) {
1081 if (hpet_rtc_flags & RTC_PIE) 1091 if (hpet_rtc_flags & RTC_PIE)
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index b0f61f0dcd0..f2f8540a7f3 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -136,7 +136,7 @@ int init_fpu(struct task_struct *tsk)
136#ifdef CONFIG_X86_32 136#ifdef CONFIG_X86_32
137 if (!HAVE_HWFP) { 137 if (!HAVE_HWFP) {
138 memset(tsk->thread.xstate, 0, xstate_size); 138 memset(tsk->thread.xstate, 0, xstate_size);
139 finit(); 139 finit_task(tsk);
140 set_stopped_child_used_math(tsk); 140 set_stopped_child_used_math(tsk);
141 return 0; 141 return 0;
142 } 142 }
diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c
index dbd6c1d1b63..b42ca694dc6 100644
--- a/arch/x86/kernel/i8237.c
+++ b/arch/x86/kernel/i8237.c
@@ -28,10 +28,10 @@ static int i8237A_resume(struct sys_device *dev)
28 28
29 flags = claim_dma_lock(); 29 flags = claim_dma_lock();
30 30
31 dma_outb(DMA1_RESET_REG, 0); 31 dma_outb(0, DMA1_RESET_REG);
32 dma_outb(DMA2_RESET_REG, 0); 32 dma_outb(0, DMA2_RESET_REG);
33 33
34 for (i = 0;i < 8;i++) { 34 for (i = 0; i < 8; i++) {
35 set_dma_addr(i, 0x000000); 35 set_dma_addr(i, 0x000000);
36 /* DMA count is a bit weird so this is not 0 */ 36 /* DMA count is a bit weird so this is not 0 */
37 set_dma_count(i, 1); 37 set_dma_count(i, 1);
@@ -51,14 +51,14 @@ static int i8237A_suspend(struct sys_device *dev, pm_message_t state)
51} 51}
52 52
53static struct sysdev_class i8237_sysdev_class = { 53static struct sysdev_class i8237_sysdev_class = {
54 .name = "i8237", 54 .name = "i8237",
55 .suspend = i8237A_suspend, 55 .suspend = i8237A_suspend,
56 .resume = i8237A_resume, 56 .resume = i8237A_resume,
57}; 57};
58 58
59static struct sys_device device_i8237A = { 59static struct sys_device device_i8237A = {
60 .id = 0, 60 .id = 0,
61 .cls = &i8237_sysdev_class, 61 .cls = &i8237_sysdev_class,
62}; 62};
63 63
64static int __init i8237A_init_sysfs(void) 64static int __init i8237A_init_sysfs(void)
@@ -68,5 +68,4 @@ static int __init i8237A_init_sysfs(void)
68 error = sysdev_register(&device_i8237A); 68 error = sysdev_register(&device_i8237A);
69 return error; 69 return error;
70} 70}
71
72device_initcall(i8237A_init_sysfs); 71device_initcall(i8237A_init_sysfs);
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9b0c480c383..bc7ac4da90d 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3841,14 +3841,24 @@ int __init io_apic_get_redir_entries (int ioapic)
3841 3841
3842void __init probe_nr_irqs_gsi(void) 3842void __init probe_nr_irqs_gsi(void)
3843{ 3843{
3844 int idx;
3845 int nr = 0; 3844 int nr = 0;
3846 3845
3847 for (idx = 0; idx < nr_ioapics; idx++) 3846 nr = acpi_probe_gsi();
3848 nr += io_apic_get_redir_entries(idx) + 1; 3847 if (nr > nr_irqs_gsi) {
3849
3850 if (nr > nr_irqs_gsi)
3851 nr_irqs_gsi = nr; 3848 nr_irqs_gsi = nr;
3849 } else {
3850 /* for acpi=off or acpi is not compiled in */
3851 int idx;
3852
3853 nr = 0;
3854 for (idx = 0; idx < nr_ioapics; idx++)
3855 nr += io_apic_get_redir_entries(idx) + 1;
3856
3857 if (nr > nr_irqs_gsi)
3858 nr_irqs_gsi = nr;
3859 }
3860
3861 printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
3852} 3862}
3853 3863
3854/* -------------------------------------------------------------------------- 3864/* --------------------------------------------------------------------------
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index e948b28a5a9..4558dd3918c 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -193,6 +193,9 @@ static int __kprobes can_boost(kprobe_opcode_t *opcodes)
193 kprobe_opcode_t opcode; 193 kprobe_opcode_t opcode;
194 kprobe_opcode_t *orig_opcodes = opcodes; 194 kprobe_opcode_t *orig_opcodes = opcodes;
195 195
196 if (search_exception_tables(opcodes))
197 return 0; /* Page fault may occur on this address. */
198
196retry: 199retry:
197 if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) 200 if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
198 return 0; 201 return 0;
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 7a13fac63a1..4006c522adc 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -203,7 +203,7 @@ static void __init platform_detect(void)
203static void __init platform_detect(void) 203static void __init platform_detect(void)
204{ 204{
205 /* stopgap until OFW support is added to the kernel */ 205 /* stopgap until OFW support is added to the kernel */
206 olpc_platform_info.boardrev = 0xc2; 206 olpc_platform_info.boardrev = olpc_board(0xc2);
207} 207}
208#endif 208#endif
209 209
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e4c8fb60887..c6520a4e85d 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -268,6 +268,32 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
268 return __get_cpu_var(paravirt_lazy_mode); 268 return __get_cpu_var(paravirt_lazy_mode);
269} 269}
270 270
271void arch_flush_lazy_mmu_mode(void)
272{
273 preempt_disable();
274
275 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
276 WARN_ON(preempt_count() == 1);
277 arch_leave_lazy_mmu_mode();
278 arch_enter_lazy_mmu_mode();
279 }
280
281 preempt_enable();
282}
283
284void arch_flush_lazy_cpu_mode(void)
285{
286 preempt_disable();
287
288 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
289 WARN_ON(preempt_count() == 1);
290 arch_leave_lazy_cpu_mode();
291 arch_enter_lazy_cpu_mode();
292 }
293
294 preempt_enable();
295}
296
271struct pv_info pv_info = { 297struct pv_info pv_info = {
272 .name = "bare hardware", 298 .name = "bare hardware",
273 .paravirt_enabled = 0, 299 .paravirt_enabled = 0,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e68bb9e3086..6d12f7e37f8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -180,6 +180,9 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
180 180
181 trace_power_start(&it, POWER_CSTATE, (ax>>4)+1); 181 trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
182 if (!need_resched()) { 182 if (!need_resched()) {
183 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
184 clflush((void *)&current_thread_info()->flags);
185
183 __monitor((void *)&current_thread_info()->flags, 0, 0); 186 __monitor((void *)&current_thread_info()->flags, 0, 0);
184 smp_mb(); 187 smp_mb();
185 if (!need_resched()) 188 if (!need_resched())
@@ -194,6 +197,9 @@ static void mwait_idle(void)
194 struct power_trace it; 197 struct power_trace it;
195 if (!need_resched()) { 198 if (!need_resched()) {
196 trace_power_start(&it, POWER_CSTATE, 1); 199 trace_power_start(&it, POWER_CSTATE, 1);
200 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
201 clflush((void *)&current_thread_info()->flags);
202
197 __monitor((void *)&current_thread_info()->flags, 0, 0); 203 __monitor((void *)&current_thread_info()->flags, 0, 0);
198 smp_mb(); 204 smp_mb();
199 if (!need_resched()) 205 if (!need_resched())
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a546f55c77b..bd4da2af08a 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -104,9 +104,6 @@ void cpu_idle(void)
104 check_pgt_cache(); 104 check_pgt_cache();
105 rmb(); 105 rmb();
106 106
107 if (rcu_pending(cpu))
108 rcu_check_callbacks(cpu, 0);
109
110 if (cpu_is_offline(cpu)) 107 if (cpu_is_offline(cpu))
111 play_dead(); 108 play_dead();
112 109
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 416fb9282f4..85b4cb5c198 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -40,6 +40,7 @@
40#include <linux/uaccess.h> 40#include <linux/uaccess.h>
41#include <linux/io.h> 41#include <linux/io.h>
42#include <linux/ftrace.h> 42#include <linux/ftrace.h>
43#include <linux/dmi.h>
43 44
44#include <asm/pgtable.h> 45#include <asm/pgtable.h>
45#include <asm/system.h> 46#include <asm/system.h>
@@ -151,14 +152,18 @@ void __show_regs(struct pt_regs *regs, int all)
151 unsigned long d0, d1, d2, d3, d6, d7; 152 unsigned long d0, d1, d2, d3, d6, d7;
152 unsigned int fsindex, gsindex; 153 unsigned int fsindex, gsindex;
153 unsigned int ds, cs, es; 154 unsigned int ds, cs, es;
155 const char *board;
154 156
155 printk("\n"); 157 printk("\n");
156 print_modules(); 158 print_modules();
157 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", 159 board = dmi_get_system_info(DMI_PRODUCT_NAME);
160 if (!board)
161 board = "";
162 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
158 current->pid, current->comm, print_tainted(), 163 current->pid, current->comm, print_tainted(),
159 init_utsname()->release, 164 init_utsname()->release,
160 (int)strcspn(init_utsname()->version, " "), 165 (int)strcspn(init_utsname()->version, " "),
161 init_utsname()->version); 166 init_utsname()->version, board);
162 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); 167 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
163 printk_address(regs->ip, 1); 168 printk_address(regs->ip, 1);
164 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, 169 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a5df5f82fb..06ca07f6ad8 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -810,12 +810,16 @@ static void ptrace_bts_untrace(struct task_struct *child)
810 810
811static void ptrace_bts_detach(struct task_struct *child) 811static void ptrace_bts_detach(struct task_struct *child)
812{ 812{
813 if (unlikely(child->bts)) { 813 /*
814 ds_release_bts(child->bts); 814 * Ptrace_detach() races with ptrace_untrace() in case
815 child->bts = NULL; 815 * the child dies and is reaped by another thread.
816 816 *
817 ptrace_bts_free_buffer(child); 817 * We only do the memory accounting at this point and
818 } 818 * leave the buffer deallocation and the bts tracer
819 * release to ptrace_bts_untrace() which will be called
820 * later on with tasklist_lock held.
821 */
822 release_locked_buffer(child->bts_buffer, child->bts_size);
819} 823}
820#else 824#else
821static inline void ptrace_bts_fork(struct task_struct *tsk) {} 825static inline void ptrace_bts_fork(struct task_struct *tsk) {}
@@ -1384,7 +1388,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
1384#ifdef CONFIG_X86_32 1388#ifdef CONFIG_X86_32
1385# define IS_IA32 1 1389# define IS_IA32 1
1386#elif defined CONFIG_IA32_EMULATION 1390#elif defined CONFIG_IA32_EMULATION
1387# define IS_IA32 test_thread_flag(TIF_IA32) 1391# define IS_IA32 is_compat_task()
1388#else 1392#else
1389# define IS_IA32 0 1393# define IS_IA32 0
1390#endif 1394#endif
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2b46eb41643..4526b3a75ed 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -217,6 +217,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
217 DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"), 217 DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"),
218 }, 218 },
219 }, 219 },
220 { /* Handle problems with rebooting on Dell XPS710 */
221 .callback = set_bios_reboot,
222 .ident = "Dell XPS710",
223 .matches = {
224 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
225 DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"),
226 },
227 },
220 { } 228 { }
221}; 229};
222 230
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ae0d8042cf6..6a8811a6932 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -607,7 +607,7 @@ struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
607static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) 607static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
608{ 608{
609 printk(KERN_NOTICE 609 printk(KERN_NOTICE
610 "%s detected: BIOS may corrupt low RAM, working it around.\n", 610 "%s detected: BIOS may corrupt low RAM, working around it.\n",
611 d->ident); 611 d->ident);
612 612
613 e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED); 613 e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED);
@@ -770,6 +770,9 @@ void __init setup_arch(char **cmdline_p)
770 770
771 finish_e820_parsing(); 771 finish_e820_parsing();
772 772
773 if (efi_enabled)
774 efi_init();
775
773 dmi_scan_machine(); 776 dmi_scan_machine();
774 777
775 dmi_check_system(bad_bios_dmi_table); 778 dmi_check_system(bad_bios_dmi_table);
@@ -789,8 +792,6 @@ void __init setup_arch(char **cmdline_p)
789 insert_resource(&iomem_resource, &data_resource); 792 insert_resource(&iomem_resource, &data_resource);
790 insert_resource(&iomem_resource, &bss_resource); 793 insert_resource(&iomem_resource, &bss_resource);
791 794
792 if (efi_enabled)
793 efi_init();
794 795
795#ifdef CONFIG_X86_32 796#ifdef CONFIG_X86_32
796 if (ppro_with_ram_bug()) { 797 if (ppro_with_ram_bug()) {
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index e6e695acd72..241ec3923f6 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -115,7 +115,7 @@ unsigned long __init calibrate_cpu(void)
115 115
116static struct irqaction irq0 = { 116static struct irqaction irq0 = {
117 .handler = timer_interrupt, 117 .handler = timer_interrupt,
118 .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING, 118 .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING | IRQF_TIMER,
119 .mask = CPU_MASK_NONE, 119 .mask = CPU_MASK_NONE,
120 .name = "timer" 120 .name = "timer"
121}; 121};
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 98c2d055284..a9e7548e179 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -99,6 +99,12 @@ static inline void preempt_conditional_sti(struct pt_regs *regs)
99 local_irq_enable(); 99 local_irq_enable();
100} 100}
101 101
102static inline void conditional_cli(struct pt_regs *regs)
103{
104 if (regs->flags & X86_EFLAGS_IF)
105 local_irq_disable();
106}
107
102static inline void preempt_conditional_cli(struct pt_regs *regs) 108static inline void preempt_conditional_cli(struct pt_regs *regs)
103{ 109{
104 if (regs->flags & X86_EFLAGS_IF) 110 if (regs->flags & X86_EFLAGS_IF)
@@ -626,8 +632,10 @@ clear_dr7:
626 632
627#ifdef CONFIG_X86_32 633#ifdef CONFIG_X86_32
628debug_vm86: 634debug_vm86:
635 /* reenable preemption: handle_vm86_trap() might sleep */
636 dec_preempt_count();
629 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); 637 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
630 preempt_conditional_cli(regs); 638 conditional_cli(regs);
631 return; 639 return;
632#endif 640#endif
633 641
@@ -896,7 +904,7 @@ asmlinkage void math_state_restore(void)
896EXPORT_SYMBOL_GPL(math_state_restore); 904EXPORT_SYMBOL_GPL(math_state_restore);
897 905
898#ifndef CONFIG_MATH_EMULATION 906#ifndef CONFIG_MATH_EMULATION
899asmlinkage void math_emulate(long arg) 907void math_emulate(struct math_emu_info *info)
900{ 908{
901 printk(KERN_EMERG 909 printk(KERN_EMERG
902 "math-emulation not enabled and no coprocessor found.\n"); 910 "math-emulation not enabled and no coprocessor found.\n");
@@ -906,16 +914,19 @@ asmlinkage void math_emulate(long arg)
906} 914}
907#endif /* CONFIG_MATH_EMULATION */ 915#endif /* CONFIG_MATH_EMULATION */
908 916
909dotraplinkage void __kprobes 917dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs)
910do_device_not_available(struct pt_regs *regs, long error)
911{ 918{
912#ifdef CONFIG_X86_32 919#ifdef CONFIG_X86_32
913 if (read_cr0() & X86_CR0_EM) { 920 if (read_cr0() & X86_CR0_EM) {
914 conditional_sti(regs); 921 struct math_emu_info info = { };
915 math_emulate(0); 922
923 conditional_sti(&regs);
924
925 info.regs = &regs;
926 math_emulate(&info);
916 } else { 927 } else {
917 math_state_restore(); /* interrupts still off */ 928 math_state_restore(); /* interrupts still off */
918 conditional_sti(regs); 929 conditional_sti(&regs);
919 } 930 }
920#else 931#else
921 math_state_restore(); 932 math_state_restore();
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 599e5816863..d5cebb52d45 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -273,30 +273,43 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
273 * use the TSC value at the transitions to calculate a pretty 273 * use the TSC value at the transitions to calculate a pretty
274 * good value for the TSC frequencty. 274 * good value for the TSC frequencty.
275 */ 275 */
276static inline int pit_expect_msb(unsigned char val) 276static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
277{ 277{
278 int count = 0; 278 int count;
279 u64 tsc = 0;
279 280
280 for (count = 0; count < 50000; count++) { 281 for (count = 0; count < 50000; count++) {
281 /* Ignore LSB */ 282 /* Ignore LSB */
282 inb(0x42); 283 inb(0x42);
283 if (inb(0x42) != val) 284 if (inb(0x42) != val)
284 break; 285 break;
286 tsc = get_cycles();
285 } 287 }
286 return count > 50; 288 *deltap = get_cycles() - tsc;
289 *tscp = tsc;
290
291 /*
292 * We require _some_ success, but the quality control
293 * will be based on the error terms on the TSC values.
294 */
295 return count > 5;
287} 296}
288 297
289/* 298/*
290 * How many MSB values do we want to see? We aim for a 299 * How many MSB values do we want to see? We aim for
291 * 15ms calibration, which assuming a 2us counter read 300 * a maximum error rate of 500ppm (in practice the
292 * error should give us roughly 150 ppm precision for 301 * real error is much smaller), but refuse to spend
293 * the calibration. 302 * more than 25ms on it.
294 */ 303 */
295#define QUICK_PIT_MS 15 304#define MAX_QUICK_PIT_MS 25
296#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) 305#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
297 306
298static unsigned long quick_pit_calibrate(void) 307static unsigned long quick_pit_calibrate(void)
299{ 308{
309 int i;
310 u64 tsc, delta;
311 unsigned long d1, d2;
312
300 /* Set the Gate high, disable speaker */ 313 /* Set the Gate high, disable speaker */
301 outb((inb(0x61) & ~0x02) | 0x01, 0x61); 314 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
302 315
@@ -315,45 +328,52 @@ static unsigned long quick_pit_calibrate(void)
315 outb(0xff, 0x42); 328 outb(0xff, 0x42);
316 outb(0xff, 0x42); 329 outb(0xff, 0x42);
317 330
318 if (pit_expect_msb(0xff)) { 331 /*
319 int i; 332 * The PIT starts counting at the next edge, so we
320 u64 t1, t2, delta; 333 * need to delay for a microsecond. The easiest way
321 unsigned char expect = 0xfe; 334 * to do that is to just read back the 16-bit counter
322 335 * once from the PIT.
323 t1 = get_cycles(); 336 */
324 for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) { 337 inb(0x42);
325 if (!pit_expect_msb(expect)) 338 inb(0x42);
326 goto failed; 339
340 if (pit_expect_msb(0xff, &tsc, &d1)) {
341 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
342 if (!pit_expect_msb(0xff-i, &delta, &d2))
343 break;
344
345 /*
346 * Iterate until the error is less than 500 ppm
347 */
348 delta -= tsc;
349 if (d1+d2 < delta >> 11)
350 goto success;
327 } 351 }
328 t2 = get_cycles();
329
330 /*
331 * Make sure we can rely on the second TSC timestamp:
332 */
333 if (!pit_expect_msb(expect))
334 goto failed;
335
336 /*
337 * Ok, if we get here, then we've seen the
338 * MSB of the PIT decrement QUICK_PIT_ITERATIONS
339 * times, and each MSB had many hits, so we never
340 * had any sudden jumps.
341 *
342 * As a result, we can depend on there not being
343 * any odd delays anywhere, and the TSC reads are
344 * reliable.
345 *
346 * kHz = ticks / time-in-seconds / 1000;
347 * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000
348 * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000)
349 */
350 delta = (t2 - t1)*PIT_TICK_RATE;
351 do_div(delta, QUICK_PIT_ITERATIONS*256*1000);
352 printk("Fast TSC calibration using PIT\n");
353 return delta;
354 } 352 }
355failed: 353 printk("Fast TSC calibration failed\n");
356 return 0; 354 return 0;
355
356success:
357 /*
358 * Ok, if we get here, then we've seen the
359 * MSB of the PIT decrement 'i' times, and the
360 * error has shrunk to less than 500 ppm.
361 *
362 * As a result, we can depend on there not being
363 * any odd delays anywhere, and the TSC reads are
364 * reliable (within the error). We also adjust the
365 * delta to the middle of the error bars, just
366 * because it looks nicer.
367 *
368 * kHz = ticks / time-in-seconds / 1000;
369 * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000
370 * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000)
371 */
372 delta += (long)(d2 - d1)/2;
373 delta *= PIT_TICK_RATE;
374 do_div(delta, i*256*1000);
375 printk("Fast TSC calibration using PIT\n");
376 return delta;
357} 377}
358 378
359/** 379/**
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 1d3302cc2dd..bef58b4982d 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -321,6 +321,16 @@ static void vmi_release_pmd(unsigned long pfn)
321} 321}
322 322
323/* 323/*
324 * We use the pgd_free hook for releasing the pgd page:
325 */
326static void vmi_pgd_free(struct mm_struct *mm, pgd_t *pgd)
327{
328 unsigned long pfn = __pa(pgd) >> PAGE_SHIFT;
329
330 vmi_ops.release_page(pfn, VMI_PAGE_L2);
331}
332
333/*
324 * Helper macros for MMU update flags. We can defer updates until a flush 334 * Helper macros for MMU update flags. We can defer updates until a flush
325 * or page invalidation only if the update is to the current address space 335 * or page invalidation only if the update is to the current address space
326 * (otherwise, there is no flush). We must check against init_mm, since 336 * (otherwise, there is no flush). We must check against init_mm, since
@@ -762,6 +772,7 @@ static inline int __init activate_vmi(void)
762 if (vmi_ops.release_page) { 772 if (vmi_ops.release_page) {
763 pv_mmu_ops.release_pte = vmi_release_pte; 773 pv_mmu_ops.release_pte = vmi_release_pte;
764 pv_mmu_ops.release_pmd = vmi_release_pmd; 774 pv_mmu_ops.release_pmd = vmi_release_pmd;
775 pv_mmu_ops.pgd_free = vmi_pgd_free;
765 } 776 }
766 777
767 /* Set linear is needed in all cases */ 778 /* Set linear is needed in all cases */
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index c4c1f9e0940..e5b088fffa4 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -202,7 +202,7 @@ static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
202static struct irqaction vmi_clock_action = { 202static struct irqaction vmi_clock_action = {
203 .name = "vmi-timer", 203 .name = "vmi-timer",
204 .handler = vmi_timer_interrupt, 204 .handler = vmi_timer_interrupt,
205 .flags = IRQF_DISABLED | IRQF_NOBALANCING, 205 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER,
206 .mask = CPU_MASK_ALL, 206 .mask = CPU_MASK_ALL,
207}; 207};
208 208
@@ -283,10 +283,13 @@ void __devinit vmi_time_ap_init(void)
283#endif 283#endif
284 284
285/** vmi clocksource */ 285/** vmi clocksource */
286static struct clocksource clocksource_vmi;
286 287
287static cycle_t read_real_cycles(void) 288static cycle_t read_real_cycles(void)
288{ 289{
289 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); 290 cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
291 return ret >= clocksource_vmi.cycle_last ?
292 ret : clocksource_vmi.cycle_last;
290} 293}
291 294
292static struct clocksource clocksource_vmi = { 295static struct clocksource clocksource_vmi = {
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index e665d1c623c..72bd275a9b5 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -207,7 +207,7 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
207 hrtimer_add_expires_ns(&pt->timer, pt->period); 207 hrtimer_add_expires_ns(&pt->timer, pt->period);
208 pt->scheduled = hrtimer_get_expires_ns(&pt->timer); 208 pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
209 if (pt->period) 209 if (pt->period)
210 ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer); 210 ps->channels[0].count_load_time = ktime_get();
211 211
212 return (pt->period == 0 ? 0 : 1); 212 return (pt->period == 0 ? 0 : 1);
213} 213}
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index c019b8edcdb..cf17ed52f6f 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -87,13 +87,6 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
87} 87}
88EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); 88EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
89 89
90void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
91{
92 kvm_apic_timer_intr_post(vcpu, vec);
93 /* TODO: PIT, RTC etc. */
94}
95EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
96
97void __kvm_migrate_timers(struct kvm_vcpu *vcpu) 90void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
98{ 91{
99 __kvm_migrate_apic_timer(vcpu); 92 __kvm_migrate_apic_timer(vcpu);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2bf32a03cee..82579ee538d 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -89,7 +89,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
89 89
90void kvm_pic_reset(struct kvm_kpic_state *s); 90void kvm_pic_reset(struct kvm_kpic_state *s);
91 91
92void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
93void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); 92void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
94void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); 93void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
95void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); 94void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index afac68c0815..f0b67f2cdd6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -35,6 +35,12 @@
35#include "kvm_cache_regs.h" 35#include "kvm_cache_regs.h"
36#include "irq.h" 36#include "irq.h"
37 37
38#ifndef CONFIG_X86_64
39#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
40#else
41#define mod_64(x, y) ((x) % (y))
42#endif
43
38#define PRId64 "d" 44#define PRId64 "d"
39#define PRIx64 "llx" 45#define PRIx64 "llx"
40#define PRIu64 "u" 46#define PRIu64 "u"
@@ -511,52 +517,22 @@ static void apic_send_ipi(struct kvm_lapic *apic)
511 517
512static u32 apic_get_tmcct(struct kvm_lapic *apic) 518static u32 apic_get_tmcct(struct kvm_lapic *apic)
513{ 519{
514 u64 counter_passed; 520 ktime_t remaining;
515 ktime_t passed, now; 521 s64 ns;
516 u32 tmcct; 522 u32 tmcct;
517 523
518 ASSERT(apic != NULL); 524 ASSERT(apic != NULL);
519 525
520 now = apic->timer.dev.base->get_time();
521 tmcct = apic_get_reg(apic, APIC_TMICT);
522
523 /* if initial count is 0, current count should also be 0 */ 526 /* if initial count is 0, current count should also be 0 */
524 if (tmcct == 0) 527 if (apic_get_reg(apic, APIC_TMICT) == 0)
525 return 0; 528 return 0;
526 529
527 if (unlikely(ktime_to_ns(now) <= 530 remaining = hrtimer_expires_remaining(&apic->timer.dev);
528 ktime_to_ns(apic->timer.last_update))) { 531 if (ktime_to_ns(remaining) < 0)
529 /* Wrap around */ 532 remaining = ktime_set(0, 0);
530 passed = ktime_add(( { 533
531 (ktime_t) { 534 ns = mod_64(ktime_to_ns(remaining), apic->timer.period);
532 .tv64 = KTIME_MAX - 535 tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
533 (apic->timer.last_update).tv64}; }
534 ), now);
535 apic_debug("time elapsed\n");
536 } else
537 passed = ktime_sub(now, apic->timer.last_update);
538
539 counter_passed = div64_u64(ktime_to_ns(passed),
540 (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
541
542 if (counter_passed > tmcct) {
543 if (unlikely(!apic_lvtt_period(apic))) {
544 /* one-shot timers stick at 0 until reset */
545 tmcct = 0;
546 } else {
547 /*
548 * periodic timers reset to APIC_TMICT when they
549 * hit 0. The while loop simulates this happening N
550 * times. (counter_passed %= tmcct) would also work,
551 * but might be slower or not work on 32-bit??
552 */
553 while (counter_passed > tmcct)
554 counter_passed -= tmcct;
555 tmcct -= counter_passed;
556 }
557 } else {
558 tmcct -= counter_passed;
559 }
560 536
561 return tmcct; 537 return tmcct;
562} 538}
@@ -653,8 +629,6 @@ static void start_apic_timer(struct kvm_lapic *apic)
653{ 629{
654 ktime_t now = apic->timer.dev.base->get_time(); 630 ktime_t now = apic->timer.dev.base->get_time();
655 631
656 apic->timer.last_update = now;
657
658 apic->timer.period = apic_get_reg(apic, APIC_TMICT) * 632 apic->timer.period = apic_get_reg(apic, APIC_TMICT) *
659 APIC_BUS_CYCLE_NS * apic->timer.divide_count; 633 APIC_BUS_CYCLE_NS * apic->timer.divide_count;
660 atomic_set(&apic->timer.pending, 0); 634 atomic_set(&apic->timer.pending, 0);
@@ -1110,16 +1084,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
1110 } 1084 }
1111} 1085}
1112 1086
1113void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
1114{
1115 struct kvm_lapic *apic = vcpu->arch.apic;
1116
1117 if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
1118 apic->timer.last_update = ktime_add_ns(
1119 apic->timer.last_update,
1120 apic->timer.period);
1121}
1122
1123int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) 1087int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
1124{ 1088{
1125 int vector = kvm_apic_has_interrupt(vcpu); 1089 int vector = kvm_apic_has_interrupt(vcpu);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 81858881287..45ab6ee7120 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -12,7 +12,6 @@ struct kvm_lapic {
12 atomic_t pending; 12 atomic_t pending;
13 s64 period; /* unit: ns */ 13 s64 period; /* unit: ns */
14 u32 divide_count; 14 u32 divide_count;
15 ktime_t last_update;
16 struct hrtimer dev; 15 struct hrtimer dev;
17 } timer; 16 } timer;
18 struct kvm_vcpu *vcpu; 17 struct kvm_vcpu *vcpu;
@@ -42,7 +41,6 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
42void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); 41void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
43int kvm_lapic_enabled(struct kvm_vcpu *vcpu); 42int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
44int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); 43int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
45void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
46 44
47void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); 45void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
48void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); 46void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 83f11c7474a..2d4477c7147 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1698,8 +1698,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1698 if (largepage) 1698 if (largepage)
1699 spte |= PT_PAGE_SIZE_MASK; 1699 spte |= PT_PAGE_SIZE_MASK;
1700 if (mt_mask) { 1700 if (mt_mask) {
1701 mt_mask = get_memory_type(vcpu, gfn) << 1701 if (!kvm_is_mmio_pfn(pfn)) {
1702 kvm_x86_ops->get_mt_mask_shift(); 1702 mt_mask = get_memory_type(vcpu, gfn) <<
1703 kvm_x86_ops->get_mt_mask_shift();
1704 mt_mask |= VMX_EPT_IGMT_BIT;
1705 } else
1706 mt_mask = MTRR_TYPE_UNCACHABLE <<
1707 kvm_x86_ops->get_mt_mask_shift();
1703 spte |= mt_mask; 1708 spte |= mt_mask;
1704 } 1709 }
1705 1710
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1452851ae25..a9e769e4e25 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1600,7 +1600,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
1600 /* Okay, we can deliver the interrupt: grab it and update PIC state. */ 1600 /* Okay, we can deliver the interrupt: grab it and update PIC state. */
1601 intr_vector = kvm_cpu_get_interrupt(vcpu); 1601 intr_vector = kvm_cpu_get_interrupt(vcpu);
1602 svm_inject_irq(svm, intr_vector); 1602 svm_inject_irq(svm, intr_vector);
1603 kvm_timer_intr_post(vcpu, intr_vector);
1604out: 1603out:
1605 update_cr8_intercept(vcpu); 1604 update_cr8_intercept(vcpu);
1606} 1605}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6259d746764..7611af57682 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -903,6 +903,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
903 data = vmcs_readl(GUEST_SYSENTER_ESP); 903 data = vmcs_readl(GUEST_SYSENTER_ESP);
904 break; 904 break;
905 default: 905 default:
906 vmx_load_host_state(to_vmx(vcpu));
906 msr = find_msr_entry(to_vmx(vcpu), msr_index); 907 msr = find_msr_entry(to_vmx(vcpu), msr_index);
907 if (msr) { 908 if (msr) {
908 data = msr->data; 909 data = msr->data;
@@ -3285,7 +3286,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3285 } 3286 }
3286 if (vcpu->arch.interrupt.pending) { 3287 if (vcpu->arch.interrupt.pending) {
3287 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); 3288 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
3288 kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
3289 if (kvm_cpu_has_interrupt(vcpu)) 3289 if (kvm_cpu_has_interrupt(vcpu))
3290 enable_irq_window(vcpu); 3290 enable_irq_window(vcpu);
3291 } 3291 }
@@ -3687,8 +3687,7 @@ static int __init vmx_init(void)
3687 if (vm_need_ept()) { 3687 if (vm_need_ept()) {
3688 bypass_guest_pf = 0; 3688 bypass_guest_pf = 0;
3689 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | 3689 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3690 VMX_EPT_WRITABLE_MASK | 3690 VMX_EPT_WRITABLE_MASK);
3691 VMX_EPT_IGMT_BIT);
3692 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, 3691 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
3693 VMX_EPT_EXECUTABLE_MASK, 3692 VMX_EPT_EXECUTABLE_MASK,
3694 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); 3693 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cc17546a240..758b7a155ae 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -967,7 +967,6 @@ int kvm_dev_ioctl_check_extension(long ext)
967 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 967 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
968 case KVM_CAP_SET_TSS_ADDR: 968 case KVM_CAP_SET_TSS_ADDR:
969 case KVM_CAP_EXT_CPUID: 969 case KVM_CAP_EXT_CPUID:
970 case KVM_CAP_CLOCKSOURCE:
971 case KVM_CAP_PIT: 970 case KVM_CAP_PIT:
972 case KVM_CAP_NOP_IO_DELAY: 971 case KVM_CAP_NOP_IO_DELAY:
973 case KVM_CAP_MP_STATE: 972 case KVM_CAP_MP_STATE:
@@ -992,6 +991,9 @@ int kvm_dev_ioctl_check_extension(long ext)
992 case KVM_CAP_IOMMU: 991 case KVM_CAP_IOMMU:
993 r = iommu_found(); 992 r = iommu_found();
994 break; 993 break;
994 case KVM_CAP_CLOCKSOURCE:
995 r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
996 break;
995 default: 997 default:
996 r = 0; 998 r = 0;
997 break; 999 break;
@@ -4127,9 +4129,13 @@ static void kvm_free_vcpus(struct kvm *kvm)
4127 4129
4128} 4130}
4129 4131
4130void kvm_arch_destroy_vm(struct kvm *kvm) 4132void kvm_arch_sync_events(struct kvm *kvm)
4131{ 4133{
4132 kvm_free_all_assigned_devices(kvm); 4134 kvm_free_all_assigned_devices(kvm);
4135}
4136
4137void kvm_arch_destroy_vm(struct kvm *kvm)
4138{
4133 kvm_iommu_unmap_guest(kvm); 4139 kvm_iommu_unmap_guest(kvm);
4134 kvm_free_pit(kvm); 4140 kvm_free_pit(kvm);
4135 kfree(kvm->arch.vpic); 4141 kfree(kvm->arch.vpic);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 92f1c6f3e19..960a8d9c049 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -343,6 +343,11 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
343 * flush_tlb_user() for both user and kernel mappings unless 343 * flush_tlb_user() for both user and kernel mappings unless
344 * the Page Global Enable (PGE) feature bit is set. */ 344 * the Page Global Enable (PGE) feature bit is set. */
345 *dx |= 0x00002000; 345 *dx |= 0x00002000;
346 /* We also lie, and say we're family id 5. 6 or greater
347 * leads to a rdmsr in early_init_intel which we can't handle.
348 * Family ID is returned as bits 8-12 in ax. */
349 *ax &= 0xFFFFF0FF;
350 *ax |= 0x00000500;
346 break; 351 break;
347 case 0x80000000: 352 case 0x80000000:
348 /* Futureproof this a little: if they ask how much extended 353 /* Futureproof this a little: if they ask how much extended
@@ -589,19 +594,21 @@ static void __init lguest_init_IRQ(void)
589 /* Some systems map "vectors" to interrupts weirdly. Lguest has 594 /* Some systems map "vectors" to interrupts weirdly. Lguest has
590 * a straightforward 1 to 1 mapping, so force that here. */ 595 * a straightforward 1 to 1 mapping, so force that here. */
591 __get_cpu_var(vector_irq)[vector] = i; 596 __get_cpu_var(vector_irq)[vector] = i;
592 if (vector != SYSCALL_VECTOR) { 597 if (vector != SYSCALL_VECTOR)
593 set_intr_gate(vector, 598 set_intr_gate(vector, interrupt[i]);
594 interrupt[vector-FIRST_EXTERNAL_VECTOR]);
595 set_irq_chip_and_handler_name(i, &lguest_irq_controller,
596 handle_level_irq,
597 "level");
598 }
599 } 599 }
600 /* This call is required to set up for 4k stacks, where we have 600 /* This call is required to set up for 4k stacks, where we have
601 * separate stacks for hard and soft interrupts. */ 601 * separate stacks for hard and soft interrupts. */
602 irq_ctx_init(smp_processor_id()); 602 irq_ctx_init(smp_processor_id());
603} 603}
604 604
605void lguest_setup_irq(unsigned int irq)
606{
607 irq_to_desc_alloc_cpu(irq, 0);
608 set_irq_chip_and_handler_name(irq, &lguest_irq_controller,
609 handle_level_irq, "level");
610}
611
605/* 612/*
606 * Time. 613 * Time.
607 * 614 *
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index a265a7c6319..50b59187112 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -96,7 +96,7 @@ void __init trap_init_hook(void)
96 96
97static struct irqaction irq0 = { 97static struct irqaction irq0 = {
98 .handler = timer_interrupt, 98 .handler = timer_interrupt,
99 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, 99 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
100 .mask = CPU_MASK_NONE, 100 .mask = CPU_MASK_NONE,
101 .name = "timer" 101 .name = "timer"
102}; 102};
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
index d914a7996a6..8e5118371f0 100644
--- a/arch/x86/mach-voyager/setup.c
+++ b/arch/x86/mach-voyager/setup.c
@@ -56,7 +56,7 @@ void __init trap_init_hook(void)
56 56
57static struct irqaction irq0 = { 57static struct irqaction irq0 = {
58 .handler = timer_interrupt, 58 .handler = timer_interrupt,
59 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, 59 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
60 .mask = CPU_MASK_NONE, 60 .mask = CPU_MASK_NONE,
61 .name = "timer" 61 .name = "timer"
62}; 62};
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 7ffcdeec463..b9cc84a2a4f 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -65,7 +65,7 @@ static volatile unsigned long smp_invalidate_needed;
65 65
66/* Bitmask of CPUs present in the system - exported by i386_syms.c, used 66/* Bitmask of CPUs present in the system - exported by i386_syms.c, used
67 * by scheduler but indexed physically */ 67 * by scheduler but indexed physically */
68cpumask_t phys_cpu_present_map = CPU_MASK_NONE; 68static cpumask_t voyager_phys_cpu_present_map = CPU_MASK_NONE;
69 69
70/* The internal functions */ 70/* The internal functions */
71static void send_CPI(__u32 cpuset, __u8 cpi); 71static void send_CPI(__u32 cpuset, __u8 cpi);
@@ -366,19 +366,19 @@ void __init find_smp_config(void)
366 /* set up everything for just this CPU, we can alter 366 /* set up everything for just this CPU, we can alter
367 * this as we start the other CPUs later */ 367 * this as we start the other CPUs later */
368 /* now get the CPU disposition from the extended CMOS */ 368 /* now get the CPU disposition from the extended CMOS */
369 cpus_addr(phys_cpu_present_map)[0] = 369 cpus_addr(voyager_phys_cpu_present_map)[0] =
370 voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK); 370 voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK);
371 cpus_addr(phys_cpu_present_map)[0] |= 371 cpus_addr(voyager_phys_cpu_present_map)[0] |=
372 voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 1) << 8; 372 voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 1) << 8;
373 cpus_addr(phys_cpu_present_map)[0] |= 373 cpus_addr(voyager_phys_cpu_present_map)[0] |=
374 voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 374 voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK +
375 2) << 16; 375 2) << 16;
376 cpus_addr(phys_cpu_present_map)[0] |= 376 cpus_addr(voyager_phys_cpu_present_map)[0] |=
377 voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 377 voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK +
378 3) << 24; 378 3) << 24;
379 init_cpu_possible(&phys_cpu_present_map); 379 init_cpu_possible(&voyager_phys_cpu_present_map);
380 printk("VOYAGER SMP: phys_cpu_present_map = 0x%lx\n", 380 printk("VOYAGER SMP: voyager_phys_cpu_present_map = 0x%lx\n",
381 cpus_addr(phys_cpu_present_map)[0]); 381 cpus_addr(voyager_phys_cpu_present_map)[0]);
382 /* Here we set up the VIC to enable SMP */ 382 /* Here we set up the VIC to enable SMP */
383 /* enable the CPIs by writing the base vector to their register */ 383 /* enable the CPIs by writing the base vector to their register */
384 outb(VIC_DEFAULT_CPI_BASE, VIC_CPI_BASE_REGISTER); 384 outb(VIC_DEFAULT_CPI_BASE, VIC_CPI_BASE_REGISTER);
@@ -628,15 +628,15 @@ void __init smp_boot_cpus(void)
628 /* now that the cat has probed the Voyager System Bus, sanity 628 /* now that the cat has probed the Voyager System Bus, sanity
629 * check the cpu map */ 629 * check the cpu map */
630 if (((voyager_quad_processors | voyager_extended_vic_processors) 630 if (((voyager_quad_processors | voyager_extended_vic_processors)
631 & cpus_addr(phys_cpu_present_map)[0]) != 631 & cpus_addr(voyager_phys_cpu_present_map)[0]) !=
632 cpus_addr(phys_cpu_present_map)[0]) { 632 cpus_addr(voyager_phys_cpu_present_map)[0]) {
633 /* should panic */ 633 /* should panic */
634 printk("\n\n***WARNING*** " 634 printk("\n\n***WARNING*** "
635 "Sanity check of CPU present map FAILED\n"); 635 "Sanity check of CPU present map FAILED\n");
636 } 636 }
637 } else if (voyager_level == 4) 637 } else if (voyager_level == 4)
638 voyager_extended_vic_processors = 638 voyager_extended_vic_processors =
639 cpus_addr(phys_cpu_present_map)[0]; 639 cpus_addr(voyager_phys_cpu_present_map)[0];
640 640
641 /* this sets up the idle task to run on the current cpu */ 641 /* this sets up the idle task to run on the current cpu */
642 voyager_extended_cpus = 1; 642 voyager_extended_cpus = 1;
@@ -670,7 +670,7 @@ void __init smp_boot_cpus(void)
670 /* loop over all the extended VIC CPUs and boot them. The 670 /* loop over all the extended VIC CPUs and boot them. The
671 * Quad CPUs must be bootstrapped by their extended VIC cpu */ 671 * Quad CPUs must be bootstrapped by their extended VIC cpu */
672 for (i = 0; i < nr_cpu_ids; i++) { 672 for (i = 0; i < nr_cpu_ids; i++) {
673 if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map)) 673 if (i == boot_cpu_id || !cpu_isset(i, voyager_phys_cpu_present_map))
674 continue; 674 continue;
675 do_boot_cpu(i); 675 do_boot_cpu(i);
676 /* This udelay seems to be needed for the Quad boots 676 /* This udelay seems to be needed for the Quad boots
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c
index 491e737ce54..aa098708877 100644
--- a/arch/x86/math-emu/fpu_aux.c
+++ b/arch/x86/math-emu/fpu_aux.c
@@ -30,20 +30,29 @@ static void fclex(void)
30} 30}
31 31
32/* Needs to be externally visible */ 32/* Needs to be externally visible */
33void finit(void) 33void finit_task(struct task_struct *tsk)
34{ 34{
35 control_word = 0x037f; 35 struct i387_soft_struct *soft = &tsk->thread.xstate->soft;
36 partial_status = 0; 36 struct address *oaddr, *iaddr;
37 top = 0; /* We don't keep top in the status word internally. */ 37 soft->cwd = 0x037f;
38 fpu_tag_word = 0xffff; 38 soft->swd = 0;
39 soft->ftop = 0; /* We don't keep top in the status word internally. */
40 soft->twd = 0xffff;
39 /* The behaviour is different from that detailed in 41 /* The behaviour is different from that detailed in
40 Section 15.1.6 of the Intel manual */ 42 Section 15.1.6 of the Intel manual */
41 operand_address.offset = 0; 43 oaddr = (struct address *)&soft->foo;
42 operand_address.selector = 0; 44 oaddr->offset = 0;
43 instruction_address.offset = 0; 45 oaddr->selector = 0;
44 instruction_address.selector = 0; 46 iaddr = (struct address *)&soft->fip;
45 instruction_address.opcode = 0; 47 iaddr->offset = 0;
46 no_ip_update = 1; 48 iaddr->selector = 0;
49 iaddr->opcode = 0;
50 soft->no_update = 1;
51}
52
53void finit(void)
54{
55 finit_task(current);
47} 56}
48 57
49/* 58/*
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index c7b06feb139..5d87f586f8d 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -131,7 +131,7 @@ u_char emulating = 0;
131static int valid_prefix(u_char *Byte, u_char __user ** fpu_eip, 131static int valid_prefix(u_char *Byte, u_char __user ** fpu_eip,
132 overrides * override); 132 overrides * override);
133 133
134asmlinkage void math_emulate(long arg) 134void math_emulate(struct math_emu_info *info)
135{ 135{
136 u_char FPU_modrm, byte1; 136 u_char FPU_modrm, byte1;
137 unsigned short code; 137 unsigned short code;
@@ -161,7 +161,7 @@ asmlinkage void math_emulate(long arg)
161 RE_ENTRANT_CHECK_ON; 161 RE_ENTRANT_CHECK_ON;
162#endif /* RE_ENTRANT_CHECKING */ 162#endif /* RE_ENTRANT_CHECKING */
163 163
164 SETUP_DATA_AREA(arg); 164 FPU_info = info;
165 165
166 FPU_ORIG_EIP = FPU_EIP; 166 FPU_ORIG_EIP = FPU_EIP;
167 167
@@ -659,7 +659,7 @@ static int valid_prefix(u_char *Byte, u_char __user **fpu_eip,
659 } 659 }
660} 660}
661 661
662void math_abort(struct info *info, unsigned int signal) 662void math_abort(struct math_emu_info *info, unsigned int signal)
663{ 663{
664 FPU_EIP = FPU_ORIG_EIP; 664 FPU_EIP = FPU_ORIG_EIP;
665 current->thread.trap_no = 16; 665 current->thread.trap_no = 16;
diff --git a/arch/x86/math-emu/fpu_proto.h b/arch/x86/math-emu/fpu_proto.h
index aa49b6a0d85..9779df436b7 100644
--- a/arch/x86/math-emu/fpu_proto.h
+++ b/arch/x86/math-emu/fpu_proto.h
@@ -51,8 +51,8 @@ extern void ffreep(void);
51extern void fst_i_(void); 51extern void fst_i_(void);
52extern void fstp_i(void); 52extern void fstp_i(void);
53/* fpu_entry.c */ 53/* fpu_entry.c */
54asmlinkage extern void math_emulate(long arg); 54extern void math_emulate(struct math_emu_info *info);
55extern void math_abort(struct info *info, unsigned int signal); 55extern void math_abort(struct math_emu_info *info, unsigned int signal);
56/* fpu_etc.c */ 56/* fpu_etc.c */
57extern void FPU_etc(void); 57extern void FPU_etc(void);
58/* fpu_tags.c */ 58/* fpu_tags.c */
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index 13488fa153e..50fa0ec2c8a 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -16,10 +16,6 @@
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/mm.h> 17#include <linux/mm.h>
18 18
19/* This sets the pointer FPU_info to point to the argument part
20 of the stack frame of math_emulate() */
21#define SETUP_DATA_AREA(arg) FPU_info = (struct info *) &arg
22
23/* s is always from a cpu register, and the cpu does bounds checking 19/* s is always from a cpu register, and the cpu does bounds checking
24 * during register load --> no further bounds checks needed */ 20 * during register load --> no further bounds checks needed */
25#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3]) 21#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3])
@@ -38,12 +34,12 @@
38#define I387 (current->thread.xstate) 34#define I387 (current->thread.xstate)
39#define FPU_info (I387->soft.info) 35#define FPU_info (I387->soft.info)
40 36
41#define FPU_CS (*(unsigned short *) &(FPU_info->___cs)) 37#define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs))
42#define FPU_SS (*(unsigned short *) &(FPU_info->___ss)) 38#define FPU_SS (*(unsigned short *) &(FPU_info->regs->ss))
43#define FPU_DS (*(unsigned short *) &(FPU_info->___ds)) 39#define FPU_DS (*(unsigned short *) &(FPU_info->regs->ds))
44#define FPU_EAX (FPU_info->___eax) 40#define FPU_EAX (FPU_info->regs->ax)
45#define FPU_EFLAGS (FPU_info->___eflags) 41#define FPU_EFLAGS (FPU_info->regs->flags)
46#define FPU_EIP (FPU_info->___eip) 42#define FPU_EIP (FPU_info->regs->ip)
47#define FPU_ORIG_EIP (FPU_info->___orig_eip) 43#define FPU_ORIG_EIP (FPU_info->___orig_eip)
48 44
49#define FPU_lookahead (I387->soft.lookahead) 45#define FPU_lookahead (I387->soft.lookahead)
diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c
index d701e2b39e4..420b3b6e391 100644
--- a/arch/x86/math-emu/get_address.c
+++ b/arch/x86/math-emu/get_address.c
@@ -29,46 +29,43 @@
29#define FPU_WRITE_BIT 0x10 29#define FPU_WRITE_BIT 0x10
30 30
31static int reg_offset[] = { 31static int reg_offset[] = {
32 offsetof(struct info, ___eax), 32 offsetof(struct pt_regs, ax),
33 offsetof(struct info, ___ecx), 33 offsetof(struct pt_regs, cx),
34 offsetof(struct info, ___edx), 34 offsetof(struct pt_regs, dx),
35 offsetof(struct info, ___ebx), 35 offsetof(struct pt_regs, bx),
36 offsetof(struct info, ___esp), 36 offsetof(struct pt_regs, sp),
37 offsetof(struct info, ___ebp), 37 offsetof(struct pt_regs, bp),
38 offsetof(struct info, ___esi), 38 offsetof(struct pt_regs, si),
39 offsetof(struct info, ___edi) 39 offsetof(struct pt_regs, di)
40}; 40};
41 41
42#define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info)) 42#define REG_(x) (*(long *)(reg_offset[(x)] + (u_char *)FPU_info->regs))
43 43
44static int reg_offset_vm86[] = { 44static int reg_offset_vm86[] = {
45 offsetof(struct info, ___cs), 45 offsetof(struct pt_regs, cs),
46 offsetof(struct info, ___vm86_ds), 46 offsetof(struct kernel_vm86_regs, ds),
47 offsetof(struct info, ___vm86_es), 47 offsetof(struct kernel_vm86_regs, es),
48 offsetof(struct info, ___vm86_fs), 48 offsetof(struct kernel_vm86_regs, fs),
49 offsetof(struct info, ___vm86_gs), 49 offsetof(struct kernel_vm86_regs, gs),
50 offsetof(struct info, ___ss), 50 offsetof(struct pt_regs, ss),
51 offsetof(struct info, ___vm86_ds) 51 offsetof(struct kernel_vm86_regs, ds)
52}; 52};
53 53
54#define VM86_REG_(x) (*(unsigned short *) \ 54#define VM86_REG_(x) (*(unsigned short *) \
55 (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info)) 55 (reg_offset_vm86[((unsigned)x)] + (u_char *)FPU_info->regs))
56
57/* This dummy, gs is not saved on the stack. */
58#define ___GS ___ds
59 56
60static int reg_offset_pm[] = { 57static int reg_offset_pm[] = {
61 offsetof(struct info, ___cs), 58 offsetof(struct pt_regs, cs),
62 offsetof(struct info, ___ds), 59 offsetof(struct pt_regs, ds),
63 offsetof(struct info, ___es), 60 offsetof(struct pt_regs, es),
64 offsetof(struct info, ___fs), 61 offsetof(struct pt_regs, fs),
65 offsetof(struct info, ___GS), 62 offsetof(struct pt_regs, ds), /* dummy, not saved on stack */
66 offsetof(struct info, ___ss), 63 offsetof(struct pt_regs, ss),
67 offsetof(struct info, ___ds) 64 offsetof(struct pt_regs, ds)
68}; 65};
69 66
70#define PM_REG_(x) (*(unsigned short *) \ 67#define PM_REG_(x) (*(unsigned short *) \
71 (reg_offset_pm[((unsigned)x)]+(u_char *) FPU_info)) 68 (reg_offset_pm[((unsigned)x)] + (u_char *)FPU_info->regs))
72 69
73/* Decode the SIB byte. This function assumes mod != 0 */ 70/* Decode the SIB byte. This function assumes mod != 0 */
74static int sib(int mod, unsigned long *fpu_eip) 71static int sib(int mod, unsigned long *fpu_eip)
@@ -349,34 +346,34 @@ void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip,
349 } 346 }
350 switch (rm) { 347 switch (rm) {
351 case 0: 348 case 0:
352 address += FPU_info->___ebx + FPU_info->___esi; 349 address += FPU_info->regs->bx + FPU_info->regs->si;
353 break; 350 break;
354 case 1: 351 case 1:
355 address += FPU_info->___ebx + FPU_info->___edi; 352 address += FPU_info->regs->bx + FPU_info->regs->di;
356 break; 353 break;
357 case 2: 354 case 2:
358 address += FPU_info->___ebp + FPU_info->___esi; 355 address += FPU_info->regs->bp + FPU_info->regs->si;
359 if (addr_modes.override.segment == PREFIX_DEFAULT) 356 if (addr_modes.override.segment == PREFIX_DEFAULT)
360 addr_modes.override.segment = PREFIX_SS_; 357 addr_modes.override.segment = PREFIX_SS_;
361 break; 358 break;
362 case 3: 359 case 3:
363 address += FPU_info->___ebp + FPU_info->___edi; 360 address += FPU_info->regs->bp + FPU_info->regs->di;
364 if (addr_modes.override.segment == PREFIX_DEFAULT) 361 if (addr_modes.override.segment == PREFIX_DEFAULT)
365 addr_modes.override.segment = PREFIX_SS_; 362 addr_modes.override.segment = PREFIX_SS_;
366 break; 363 break;
367 case 4: 364 case 4:
368 address += FPU_info->___esi; 365 address += FPU_info->regs->si;
369 break; 366 break;
370 case 5: 367 case 5:
371 address += FPU_info->___edi; 368 address += FPU_info->regs->di;
372 break; 369 break;
373 case 6: 370 case 6:
374 address += FPU_info->___ebp; 371 address += FPU_info->regs->bp;
375 if (addr_modes.override.segment == PREFIX_DEFAULT) 372 if (addr_modes.override.segment == PREFIX_DEFAULT)
376 addr_modes.override.segment = PREFIX_SS_; 373 addr_modes.override.segment = PREFIX_SS_;
377 break; 374 break;
378 case 7: 375 case 7:
379 address += FPU_info->___ebx; 376 address += FPU_info->regs->bx;
380 break; 377 break;
381 } 378 }
382 379
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 90dfae511a4..c76ef1d701c 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -603,8 +603,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
603 603
604 si_code = SEGV_MAPERR; 604 si_code = SEGV_MAPERR;
605 605
606 if (notify_page_fault(regs))
607 return;
608 if (unlikely(kmmio_fault(regs, address))) 606 if (unlikely(kmmio_fault(regs, address)))
609 return; 607 return;
610 608
@@ -634,6 +632,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
634 if (spurious_fault(address, error_code)) 632 if (spurious_fault(address, error_code))
635 return; 633 return;
636 634
635 /* kprobes don't want to hook the spurious faults. */
636 if (notify_page_fault(regs))
637 return;
637 /* 638 /*
638 * Don't take the mm semaphore here. If we fixup a prefetch 639 * Don't take the mm semaphore here. If we fixup a prefetch
639 * fault we could otherwise deadlock. 640 * fault we could otherwise deadlock.
@@ -641,6 +642,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
641 goto bad_area_nosemaphore; 642 goto bad_area_nosemaphore;
642 } 643 }
643 644
645 /* kprobes don't want to hook the spurious faults. */
646 if (notify_page_fault(regs))
647 return;
644 648
645 /* 649 /*
646 * It's safe to allow irq's after cr2 has been saved and the 650 * It's safe to allow irq's after cr2 has been saved and the
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e6d36b49025..b1352250096 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -714,6 +714,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
714 pos = start_pfn << PAGE_SHIFT; 714 pos = start_pfn << PAGE_SHIFT;
715 end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) 715 end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
716 << (PMD_SHIFT - PAGE_SHIFT); 716 << (PMD_SHIFT - PAGE_SHIFT);
717 if (end_pfn > (end >> PAGE_SHIFT))
718 end_pfn = end >> PAGE_SHIFT;
717 if (start_pfn < end_pfn) { 719 if (start_pfn < end_pfn) {
718 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); 720 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
719 pos = end_pfn << PAGE_SHIFT; 721 pos = end_pfn << PAGE_SHIFT;
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index ca53224fc56..04102d42ff4 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -20,6 +20,17 @@
20#include <asm/pat.h> 20#include <asm/pat.h>
21#include <linux/module.h> 21#include <linux/module.h>
22 22
23int is_io_mapping_possible(resource_size_t base, unsigned long size)
24{
25#ifndef CONFIG_X86_PAE
26 /* There is no way to map greater than 1 << 32 address without PAE */
27 if (base + size > 0x100000000ULL)
28 return 0;
29#endif
30 return 1;
31}
32EXPORT_SYMBOL_GPL(is_io_mapping_possible);
33
23/* Map 'pfn' using fixed map 'type' and protections 'prot' 34/* Map 'pfn' using fixed map 'type' and protections 'prot'
24 */ 35 */
25void * 36void *
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index af750ab973b..f45d5e29a72 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -134,25 +134,6 @@ int page_is_ram(unsigned long pagenr)
134 return 0; 134 return 0;
135} 135}
136 136
137int pagerange_is_ram(unsigned long start, unsigned long end)
138{
139 int ram_page = 0, not_rampage = 0;
140 unsigned long page_nr;
141
142 for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
143 ++page_nr) {
144 if (page_is_ram(page_nr))
145 ram_page = 1;
146 else
147 not_rampage = 1;
148
149 if (ram_page == not_rampage)
150 return -1;
151 }
152
153 return ram_page;
154}
155
156/* 137/*
157 * Fix up the linear direct mapping of the kernel to avoid cache attribute 138 * Fix up the linear direct mapping of the kernel to avoid cache attribute
158 * conflicts. 139 * conflicts.
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 93d82038af4..6a518dd08a3 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -32,11 +32,14 @@ struct kmmio_fault_page {
32 struct list_head list; 32 struct list_head list;
33 struct kmmio_fault_page *release_next; 33 struct kmmio_fault_page *release_next;
34 unsigned long page; /* location of the fault page */ 34 unsigned long page; /* location of the fault page */
35 bool old_presence; /* page presence prior to arming */
36 bool armed;
35 37
36 /* 38 /*
37 * Number of times this page has been registered as a part 39 * Number of times this page has been registered as a part
38 * of a probe. If zero, page is disarmed and this may be freed. 40 * of a probe. If zero, page is disarmed and this may be freed.
39 * Used only by writers (RCU). 41 * Used only by writers (RCU) and post_kmmio_handler().
42 * Protected by kmmio_lock, when linked into kmmio_page_table.
40 */ 43 */
41 int count; 44 int count;
42}; 45};
@@ -105,57 +108,85 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
105 return NULL; 108 return NULL;
106} 109}
107 110
108static void set_page_present(unsigned long addr, bool present, 111static void set_pmd_presence(pmd_t *pmd, bool present, bool *old)
109 unsigned int *pglevel) 112{
113 pmdval_t v = pmd_val(*pmd);
114 *old = !!(v & _PAGE_PRESENT);
115 v &= ~_PAGE_PRESENT;
116 if (present)
117 v |= _PAGE_PRESENT;
118 set_pmd(pmd, __pmd(v));
119}
120
121static void set_pte_presence(pte_t *pte, bool present, bool *old)
122{
123 pteval_t v = pte_val(*pte);
124 *old = !!(v & _PAGE_PRESENT);
125 v &= ~_PAGE_PRESENT;
126 if (present)
127 v |= _PAGE_PRESENT;
128 set_pte_atomic(pte, __pte(v));
129}
130
131static int set_page_presence(unsigned long addr, bool present, bool *old)
110{ 132{
111 pteval_t pteval;
112 pmdval_t pmdval;
113 unsigned int level; 133 unsigned int level;
114 pmd_t *pmd;
115 pte_t *pte = lookup_address(addr, &level); 134 pte_t *pte = lookup_address(addr, &level);
116 135
117 if (!pte) { 136 if (!pte) {
118 pr_err("kmmio: no pte for page 0x%08lx\n", addr); 137 pr_err("kmmio: no pte for page 0x%08lx\n", addr);
119 return; 138 return -1;
120 } 139 }
121 140
122 if (pglevel)
123 *pglevel = level;
124
125 switch (level) { 141 switch (level) {
126 case PG_LEVEL_2M: 142 case PG_LEVEL_2M:
127 pmd = (pmd_t *)pte; 143 set_pmd_presence((pmd_t *)pte, present, old);
128 pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT;
129 if (present)
130 pmdval |= _PAGE_PRESENT;
131 set_pmd(pmd, __pmd(pmdval));
132 break; 144 break;
133
134 case PG_LEVEL_4K: 145 case PG_LEVEL_4K:
135 pteval = pte_val(*pte) & ~_PAGE_PRESENT; 146 set_pte_presence(pte, present, old);
136 if (present)
137 pteval |= _PAGE_PRESENT;
138 set_pte_atomic(pte, __pte(pteval));
139 break; 147 break;
140
141 default: 148 default:
142 pr_err("kmmio: unexpected page level 0x%x.\n", level); 149 pr_err("kmmio: unexpected page level 0x%x.\n", level);
143 return; 150 return -1;
144 } 151 }
145 152
146 __flush_tlb_one(addr); 153 __flush_tlb_one(addr);
154 return 0;
147} 155}
148 156
149/** Mark the given page as not present. Access to it will trigger a fault. */ 157/*
150static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) 158 * Mark the given page as not present. Access to it will trigger a fault.
159 *
160 * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the
161 * protection is ignored here. RCU read lock is assumed held, so the struct
162 * will not disappear unexpectedly. Furthermore, the caller must guarantee,
163 * that double arming the same virtual address (page) cannot occur.
164 *
165 * Double disarming on the other hand is allowed, and may occur when a fault
166 * and mmiotrace shutdown happen simultaneously.
167 */
168static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
151{ 169{
152 set_page_present(page & PAGE_MASK, false, pglevel); 170 int ret;
171 WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n");
172 if (f->armed) {
173 pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n",
174 f->page, f->count, f->old_presence);
175 }
176 ret = set_page_presence(f->page, false, &f->old_presence);
177 WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page);
178 f->armed = true;
179 return ret;
153} 180}
154 181
155/** Mark the given page as present. */ 182/** Restore the given page to saved presence state. */
156static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) 183static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
157{ 184{
158 set_page_present(page & PAGE_MASK, true, pglevel); 185 bool tmp;
186 int ret = set_page_presence(f->page, f->old_presence, &tmp);
187 WARN_ONCE(ret < 0,
188 KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
189 f->armed = false;
159} 190}
160 191
161/* 192/*
@@ -202,28 +233,32 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
202 233
203 ctx = &get_cpu_var(kmmio_ctx); 234 ctx = &get_cpu_var(kmmio_ctx);
204 if (ctx->active) { 235 if (ctx->active) {
205 disarm_kmmio_fault_page(faultpage->page, NULL);
206 if (addr == ctx->addr) { 236 if (addr == ctx->addr) {
207 /* 237 /*
208 * On SMP we sometimes get recursive probe hits on the 238 * A second fault on the same page means some other
209 * same address. Context is already saved, fall out. 239 * condition needs handling by do_page_fault(), the
240 * page really not being present is the most common.
210 */ 241 */
211 pr_debug("kmmio: duplicate probe hit on CPU %d, for " 242 pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n",
212 "address 0x%08lx.\n", 243 addr, smp_processor_id());
213 smp_processor_id(), addr); 244
214 ret = 1; 245 if (!faultpage->old_presence)
215 goto no_kmmio_ctx; 246 pr_info("kmmio: unexpected secondary hit for "
216 } 247 "address 0x%08lx on CPU %d.\n", addr,
217 /* 248 smp_processor_id());
218 * Prevent overwriting already in-flight context. 249 } else {
219 * This should not happen, let's hope disarming at least 250 /*
220 * prevents a panic. 251 * Prevent overwriting already in-flight context.
221 */ 252 * This should not happen, let's hope disarming at
222 pr_emerg("kmmio: recursive probe hit on CPU %d, " 253 * least prevents a panic.
254 */
255 pr_emerg("kmmio: recursive probe hit on CPU %d, "
223 "for address 0x%08lx. Ignoring.\n", 256 "for address 0x%08lx. Ignoring.\n",
224 smp_processor_id(), addr); 257 smp_processor_id(), addr);
225 pr_emerg("kmmio: previous hit was at 0x%08lx.\n", 258 pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
226 ctx->addr); 259 ctx->addr);
260 disarm_kmmio_fault_page(faultpage);
261 }
227 goto no_kmmio_ctx; 262 goto no_kmmio_ctx;
228 } 263 }
229 ctx->active++; 264 ctx->active++;
@@ -244,7 +279,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
244 regs->flags &= ~X86_EFLAGS_IF; 279 regs->flags &= ~X86_EFLAGS_IF;
245 280
246 /* Now we set present bit in PTE and single step. */ 281 /* Now we set present bit in PTE and single step. */
247 disarm_kmmio_fault_page(ctx->fpage->page, NULL); 282 disarm_kmmio_fault_page(ctx->fpage);
248 283
249 /* 284 /*
250 * If another cpu accesses the same page while we are stepping, 285 * If another cpu accesses the same page while we are stepping,
@@ -275,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
275 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); 310 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
276 311
277 if (!ctx->active) { 312 if (!ctx->active) {
278 pr_debug("kmmio: spurious debug trap on CPU %d.\n", 313 pr_warning("kmmio: spurious debug trap on CPU %d.\n",
279 smp_processor_id()); 314 smp_processor_id());
280 goto out; 315 goto out;
281 } 316 }
@@ -283,7 +318,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
283 if (ctx->probe && ctx->probe->post_handler) 318 if (ctx->probe && ctx->probe->post_handler)
284 ctx->probe->post_handler(ctx->probe, condition, regs); 319 ctx->probe->post_handler(ctx->probe, condition, regs);
285 320
286 arm_kmmio_fault_page(ctx->fpage->page, NULL); 321 /* Prevent racing against release_kmmio_fault_page(). */
322 spin_lock(&kmmio_lock);
323 if (ctx->fpage->count)
324 arm_kmmio_fault_page(ctx->fpage);
325 spin_unlock(&kmmio_lock);
287 326
288 regs->flags &= ~X86_EFLAGS_TF; 327 regs->flags &= ~X86_EFLAGS_TF;
289 regs->flags |= ctx->saved_flags; 328 regs->flags |= ctx->saved_flags;
@@ -315,20 +354,24 @@ static int add_kmmio_fault_page(unsigned long page)
315 f = get_kmmio_fault_page(page); 354 f = get_kmmio_fault_page(page);
316 if (f) { 355 if (f) {
317 if (!f->count) 356 if (!f->count)
318 arm_kmmio_fault_page(f->page, NULL); 357 arm_kmmio_fault_page(f);
319 f->count++; 358 f->count++;
320 return 0; 359 return 0;
321 } 360 }
322 361
323 f = kmalloc(sizeof(*f), GFP_ATOMIC); 362 f = kzalloc(sizeof(*f), GFP_ATOMIC);
324 if (!f) 363 if (!f)
325 return -1; 364 return -1;
326 365
327 f->count = 1; 366 f->count = 1;
328 f->page = page; 367 f->page = page;
329 list_add_rcu(&f->list, kmmio_page_list(f->page));
330 368
331 arm_kmmio_fault_page(f->page, NULL); 369 if (arm_kmmio_fault_page(f)) {
370 kfree(f);
371 return -1;
372 }
373
374 list_add_rcu(&f->list, kmmio_page_list(f->page));
332 375
333 return 0; 376 return 0;
334} 377}
@@ -347,7 +390,7 @@ static void release_kmmio_fault_page(unsigned long page,
347 f->count--; 390 f->count--;
348 BUG_ON(f->count < 0); 391 BUG_ON(f->count < 0);
349 if (!f->count) { 392 if (!f->count) {
350 disarm_kmmio_fault_page(f->page, NULL); 393 disarm_kmmio_fault_page(f);
351 f->release_next = *release_list; 394 f->release_next = *release_list;
352 *release_list = f; 395 *release_list = f;
353 } 396 }
@@ -408,23 +451,24 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
408 451
409static void remove_kmmio_fault_pages(struct rcu_head *head) 452static void remove_kmmio_fault_pages(struct rcu_head *head)
410{ 453{
411 struct kmmio_delayed_release *dr = container_of( 454 struct kmmio_delayed_release *dr =
412 head, 455 container_of(head, struct kmmio_delayed_release, rcu);
413 struct kmmio_delayed_release,
414 rcu);
415 struct kmmio_fault_page *p = dr->release_list; 456 struct kmmio_fault_page *p = dr->release_list;
416 struct kmmio_fault_page **prevp = &dr->release_list; 457 struct kmmio_fault_page **prevp = &dr->release_list;
417 unsigned long flags; 458 unsigned long flags;
459
418 spin_lock_irqsave(&kmmio_lock, flags); 460 spin_lock_irqsave(&kmmio_lock, flags);
419 while (p) { 461 while (p) {
420 if (!p->count) 462 if (!p->count) {
421 list_del_rcu(&p->list); 463 list_del_rcu(&p->list);
422 else 464 prevp = &p->release_next;
465 } else {
423 *prevp = p->release_next; 466 *prevp = p->release_next;
424 prevp = &p->release_next; 467 }
425 p = p->release_next; 468 p = p->release_next;
426 } 469 }
427 spin_unlock_irqrestore(&kmmio_lock, flags); 470 spin_unlock_irqrestore(&kmmio_lock, flags);
471
428 /* This is the real RCU destroy call. */ 472 /* This is the real RCU destroy call. */
429 call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); 473 call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
430} 474}
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 71a14f89f89..f3516da035d 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -145,7 +145,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes,
145 return shift; 145 return shift;
146} 146}
147 147
148int early_pfn_to_nid(unsigned long pfn) 148int __meminit __early_pfn_to_nid(unsigned long pfn)
149{ 149{
150 return phys_to_nid(pfn << PAGE_SHIFT); 150 return phys_to_nid(pfn << PAGE_SHIFT);
151} 151}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 84ba74820ad..7233bd7e357 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -508,18 +508,24 @@ static int split_large_page(pte_t *kpte, unsigned long address)
508#endif 508#endif
509 509
510 /* 510 /*
511 * Install the new, split up pagetable. Important details here: 511 * Install the new, split up pagetable.
512 * 512 *
513 * On Intel the NX bit of all levels must be cleared to make a 513 * We use the standard kernel pagetable protections for the new
514 * page executable. See section 4.13.2 of Intel 64 and IA-32 514 * pagetable protections, the actual ptes set above control the
515 * Architectures Software Developer's Manual). 515 * primary protection behavior:
516 */
517 __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE)));
518
519 /*
520 * Intel Atom errata AAH41 workaround.
516 * 521 *
517 * Mark the entry present. The current mapping might be 522 * The real fix should be in hw or in a microcode update, but
518 * set to not present, which we preserved above. 523 * we also probabilistically try to reduce the window of having
524 * a large TLB mixed with 4K TLBs while instruction fetches are
525 * going on.
519 */ 526 */
520 ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte))); 527 __flush_tlb_all();
521 pgprot_val(ref_prot) |= _PAGE_PRESENT; 528
522 __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
523 base = NULL; 529 base = NULL;
524 530
525out_unlock: 531out_unlock:
@@ -575,7 +581,6 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
575 address = cpa->vaddr[cpa->curpage]; 581 address = cpa->vaddr[cpa->curpage];
576 else 582 else
577 address = *cpa->vaddr; 583 address = *cpa->vaddr;
578
579repeat: 584repeat:
580 kpte = lookup_address(address, &level); 585 kpte = lookup_address(address, &level);
581 if (!kpte) 586 if (!kpte)
@@ -812,6 +817,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
812 817
813 vm_unmap_aliases(); 818 vm_unmap_aliases();
814 819
820 /*
821 * If we're called with lazy mmu updates enabled, the
822 * in-memory pte state may be stale. Flush pending updates to
823 * bring them up to date.
824 */
825 arch_flush_lazy_mmu_mode();
826
815 cpa.vaddr = addr; 827 cpa.vaddr = addr;
816 cpa.numpages = numpages; 828 cpa.numpages = numpages;
817 cpa.mask_set = mask_set; 829 cpa.mask_set = mask_set;
@@ -854,6 +866,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
854 } else 866 } else
855 cpa_flush_all(cache); 867 cpa_flush_all(cache);
856 868
869 /*
870 * If we've been called with lazy mmu updates enabled, then
871 * make sure that everything gets flushed out before we
872 * return.
873 */
874 arch_flush_lazy_mmu_mode();
875
857out: 876out:
858 return ret; 877 return ret;
859} 878}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7b61036427d..e0ab173b697 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -11,6 +11,7 @@
11#include <linux/bootmem.h> 11#include <linux/bootmem.h>
12#include <linux/debugfs.h> 12#include <linux/debugfs.h>
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/module.h>
14#include <linux/gfp.h> 15#include <linux/gfp.h>
15#include <linux/mm.h> 16#include <linux/mm.h>
16#include <linux/fs.h> 17#include <linux/fs.h>
@@ -211,6 +212,33 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
211static struct memtype *cached_entry; 212static struct memtype *cached_entry;
212static u64 cached_start; 213static u64 cached_start;
213 214
215static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
216{
217 int ram_page = 0, not_rampage = 0;
218 unsigned long page_nr;
219
220 for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
221 ++page_nr) {
222 /*
223 * For legacy reasons, physical address range in the legacy ISA
224 * region is tracked as non-RAM. This will allow users of
225 * /dev/mem to map portions of legacy ISA region, even when
226 * some of those portions are listed(or not even listed) with
227 * different e820 types(RAM/reserved/..)
228 */
229 if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) &&
230 page_is_ram(page_nr))
231 ram_page = 1;
232 else
233 not_rampage = 1;
234
235 if (ram_page == not_rampage)
236 return -1;
237 }
238
239 return ram_page;
240}
241
214/* 242/*
215 * For RAM pages, mark the pages as non WB memory type using 243 * For RAM pages, mark the pages as non WB memory type using
216 * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or 244 * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
@@ -336,20 +364,12 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
336 if (new_type) 364 if (new_type)
337 *new_type = actual_type; 365 *new_type = actual_type;
338 366
339 /* 367 is_range_ram = pat_pagerange_is_ram(start, end);
340 * For legacy reasons, some parts of the physical address range in the 368 if (is_range_ram == 1)
341 * legacy 1MB region is treated as non-RAM (even when listed as RAM in 369 return reserve_ram_pages_type(start, end, req_type,
342 * the e820 tables). So we will track the memory attributes of this 370 new_type);
343 * legacy 1MB region using the linear memtype_list always. 371 else if (is_range_ram < 0)
344 */ 372 return -EINVAL;
345 if (end >= ISA_END_ADDRESS) {
346 is_range_ram = pagerange_is_ram(start, end);
347 if (is_range_ram == 1)
348 return reserve_ram_pages_type(start, end, req_type,
349 new_type);
350 else if (is_range_ram < 0)
351 return -EINVAL;
352 }
353 373
354 new = kmalloc(sizeof(struct memtype), GFP_KERNEL); 374 new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
355 if (!new) 375 if (!new)
@@ -446,19 +466,11 @@ int free_memtype(u64 start, u64 end)
446 if (is_ISA_range(start, end - 1)) 466 if (is_ISA_range(start, end - 1))
447 return 0; 467 return 0;
448 468
449 /* 469 is_range_ram = pat_pagerange_is_ram(start, end);
450 * For legacy reasons, some parts of the physical address range in the 470 if (is_range_ram == 1)
451 * legacy 1MB region is treated as non-RAM (even when listed as RAM in 471 return free_ram_pages_type(start, end);
452 * the e820 tables). So we will track the memory attributes of this 472 else if (is_range_ram < 0)
453 * legacy 1MB region using the linear memtype_list always. 473 return -EINVAL;
454 */
455 if (end >= ISA_END_ADDRESS) {
456 is_range_ram = pagerange_is_ram(start, end);
457 if (is_range_ram == 1)
458 return free_ram_pages_type(start, end);
459 else if (is_range_ram < 0)
460 return -EINVAL;
461 }
462 474
463 spin_lock(&memtype_lock); 475 spin_lock(&memtype_lock);
464 list_for_each_entry(entry, &memtype_list, nd) { 476 list_for_each_entry(entry, &memtype_list, nd) {
@@ -626,17 +638,13 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
626 unsigned long flags; 638 unsigned long flags;
627 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); 639 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
628 640
629 is_ram = pagerange_is_ram(paddr, paddr + size); 641 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
630 642
631 if (is_ram != 0) { 643 /*
632 /* 644 * reserve_pfn_range() doesn't support RAM pages.
633 * For mapping RAM pages, drivers need to call 645 */
634 * set_memory_[uc|wc|wb] directly, for reserve and free, before 646 if (is_ram != 0)
635 * setting up the PTE. 647 return -EINVAL;
636 */
637 WARN_ON_ONCE(1);
638 return 0;
639 }
640 648
641 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); 649 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
642 if (ret) 650 if (ret)
@@ -693,7 +701,7 @@ static void free_pfn_range(u64 paddr, unsigned long size)
693{ 701{
694 int is_ram; 702 int is_ram;
695 703
696 is_ram = pagerange_is_ram(paddr, paddr + size); 704 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
697 if (is_ram == 0) 705 if (is_ram == 0)
698 free_memtype(paddr, paddr + size); 706 free_memtype(paddr, paddr + size);
699} 707}
@@ -861,6 +869,7 @@ pgprot_t pgprot_writecombine(pgprot_t prot)
861 else 869 else
862 return pgprot_noncached(prot); 870 return pgprot_noncached(prot);
863} 871}
872EXPORT_SYMBOL_GPL(pgprot_writecombine);
864 873
865#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) 874#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
866 875
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index ab50a8d7402..427fd1b56df 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Written by Pekka Paalanen, 2008 <pq@iki.fi> 2 * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi>
3 */ 3 */
4#include <linux/module.h> 4#include <linux/module.h>
5#include <linux/io.h> 5#include <linux/io.h>
@@ -9,35 +9,74 @@
9 9
10static unsigned long mmio_address; 10static unsigned long mmio_address;
11module_param(mmio_address, ulong, 0); 11module_param(mmio_address, ulong, 0);
12MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); 12MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB "
13 "(or 8 MB if read_far is non-zero).");
14
15static unsigned long read_far = 0x400100;
16module_param(read_far, ulong, 0);
17MODULE_PARM_DESC(read_far, " Offset of a 32-bit read within 8 MB "
18 "(default: 0x400100).");
19
20static unsigned v16(unsigned i)
21{
22 return i * 12 + 7;
23}
24
25static unsigned v32(unsigned i)
26{
27 return i * 212371 + 13;
28}
13 29
14static void do_write_test(void __iomem *p) 30static void do_write_test(void __iomem *p)
15{ 31{
16 unsigned int i; 32 unsigned int i;
33 pr_info(MODULE_NAME ": write test.\n");
17 mmiotrace_printk("Write test.\n"); 34 mmiotrace_printk("Write test.\n");
35
18 for (i = 0; i < 256; i++) 36 for (i = 0; i < 256; i++)
19 iowrite8(i, p + i); 37 iowrite8(i, p + i);
38
20 for (i = 1024; i < (5 * 1024); i += 2) 39 for (i = 1024; i < (5 * 1024); i += 2)
21 iowrite16(i * 12 + 7, p + i); 40 iowrite16(v16(i), p + i);
41
22 for (i = (5 * 1024); i < (16 * 1024); i += 4) 42 for (i = (5 * 1024); i < (16 * 1024); i += 4)
23 iowrite32(i * 212371 + 13, p + i); 43 iowrite32(v32(i), p + i);
24} 44}
25 45
26static void do_read_test(void __iomem *p) 46static void do_read_test(void __iomem *p)
27{ 47{
28 unsigned int i; 48 unsigned int i;
49 unsigned errs[3] = { 0 };
50 pr_info(MODULE_NAME ": read test.\n");
29 mmiotrace_printk("Read test.\n"); 51 mmiotrace_printk("Read test.\n");
52
30 for (i = 0; i < 256; i++) 53 for (i = 0; i < 256; i++)
31 ioread8(p + i); 54 if (ioread8(p + i) != i)
55 ++errs[0];
56
32 for (i = 1024; i < (5 * 1024); i += 2) 57 for (i = 1024; i < (5 * 1024); i += 2)
33 ioread16(p + i); 58 if (ioread16(p + i) != v16(i))
59 ++errs[1];
60
34 for (i = (5 * 1024); i < (16 * 1024); i += 4) 61 for (i = (5 * 1024); i < (16 * 1024); i += 4)
35 ioread32(p + i); 62 if (ioread32(p + i) != v32(i))
63 ++errs[2];
64
65 mmiotrace_printk("Read errors: 8-bit %d, 16-bit %d, 32-bit %d.\n",
66 errs[0], errs[1], errs[2]);
36} 67}
37 68
38static void do_test(void) 69static void do_read_far_test(void __iomem *p)
39{ 70{
40 void __iomem *p = ioremap_nocache(mmio_address, 0x4000); 71 pr_info(MODULE_NAME ": read far test.\n");
72 mmiotrace_printk("Read far test.\n");
73
74 ioread32(p + read_far);
75}
76
77static void do_test(unsigned long size)
78{
79 void __iomem *p = ioremap_nocache(mmio_address, size);
41 if (!p) { 80 if (!p) {
42 pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); 81 pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
43 return; 82 return;
@@ -45,11 +84,15 @@ static void do_test(void)
45 mmiotrace_printk("ioremap returned %p.\n", p); 84 mmiotrace_printk("ioremap returned %p.\n", p);
46 do_write_test(p); 85 do_write_test(p);
47 do_read_test(p); 86 do_read_test(p);
87 if (read_far && read_far < size - 4)
88 do_read_far_test(p);
48 iounmap(p); 89 iounmap(p);
49} 90}
50 91
51static int __init init(void) 92static int __init init(void)
52{ 93{
94 unsigned long size = (read_far) ? (8 << 20) : (16 << 10);
95
53 if (mmio_address == 0) { 96 if (mmio_address == 0) {
54 pr_err(MODULE_NAME ": you have to use the module argument " 97 pr_err(MODULE_NAME ": you have to use the module argument "
55 "mmio_address.\n"); 98 "mmio_address.\n");
@@ -58,10 +101,11 @@ static int __init init(void)
58 return -ENXIO; 101 return -ENXIO;
59 } 102 }
60 103
61 pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " 104 pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI "
62 "in PCI address space, and writing " 105 "address space, and writing 16 kB of rubbish in there.\n",
63 "rubbish in there.\n", mmio_address); 106 size >> 10, mmio_address);
64 do_test(); 107 do_test(size);
108 pr_info(MODULE_NAME ": All done.\n");
65 return 0; 109 return 0;
66} 110}
67 111
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index e9f80c744cf..10131fbdaad 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -78,8 +78,18 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
78 if (cpu_has_arch_perfmon) { 78 if (cpu_has_arch_perfmon) {
79 union cpuid10_eax eax; 79 union cpuid10_eax eax;
80 eax.full = cpuid_eax(0xa); 80 eax.full = cpuid_eax(0xa);
81 if (counter_width < eax.split.bit_width) 81
82 counter_width = eax.split.bit_width; 82 /*
83 * For Core2 (family 6, model 15), don't reset the
84 * counter width:
85 */
86 if (!(eax.split.version_id == 0 &&
87 current_cpu_data.x86 == 6 &&
88 current_cpu_data.x86_model == 15)) {
89
90 if (counter_width < eax.split.bit_width)
91 counter_width = eax.split.bit_width;
92 }
83 } 93 }
84 94
85 /* clear all counters */ 95 /* clear all counters */
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bea215230b2..b58e9633814 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1672,6 +1672,9 @@ asmlinkage void __init xen_start_kernel(void)
1672 possible map and a non-dummy shared_info. */ 1672 possible map and a non-dummy shared_info. */
1673 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 1673 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
1674 1674
1675 local_irq_disable();
1676 early_boot_irqs_off();
1677
1675 xen_raw_console_write("mapping kernel into physical memory\n"); 1678 xen_raw_console_write("mapping kernel into physical memory\n");
1676 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); 1679 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
1677 1680