aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-10-17 03:58:25 -0400
committerIngo Molnar <mingo@elte.hu>2009-10-17 03:58:25 -0400
commitbb3c3e807140816b5f5fd4840473ee52a916ad4f (patch)
tree9e8a69d266a7df86ca16177eefffab4b4e910753 /arch/x86
parent595c36490deb49381dc51231a3d5e6b66786ed27 (diff)
parent012abeea669ea49636cf952d13298bb68654146a (diff)
Merge commit 'v2.6.32-rc5' into perf/probes
Conflicts: kernel/trace/trace_event_profile.c Merge reason: update to -rc5 and resolve conflict. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig26
-rw-r--r--arch/x86/Kconfig.cpu3
-rw-r--r--arch/x86/Makefile4
-rw-r--r--arch/x86/boot/compressed/head_32.S3
-rw-r--r--arch/x86/boot/compressed/head_64.S3
-rw-r--r--arch/x86/boot/compressed/vmlinux.lds.S6
-rw-r--r--arch/x86/boot/install.sh4
-rw-r--r--arch/x86/ia32/ia32entry.S36
-rw-r--r--arch/x86/include/asm/cache.h4
-rw-r--r--arch/x86/include/asm/checksum_32.h3
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h30
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/include/asm/mmu_context.h6
-rw-r--r--arch/x86/include/asm/nmi.h3
-rw-r--r--arch/x86/include/asm/paravirt.h28
-rw-r--r--arch/x86/include/asm/paravirt_types.h10
-rw-r--r--arch/x86/include/asm/pci.h6
-rw-r--r--arch/x86/include/asm/pgtable_types.h1
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/topology.h10
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c7
-rw-r--r--arch/x86/kernel/apic/nmi.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c84
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c67
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c17
-rw-r--r--arch/x86/kernel/cpu/perf_event.c3
-rw-r--r--arch/x86/kernel/dumpstack_32.c1
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/e820.c4
-rw-r--r--arch/x86/kernel/early_printk.c10
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/head_64.S4
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c10
-rw-r--r--arch/x86/kernel/init_task.c5
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/ldt.c4
-rw-r--r--arch/x86/kernel/pci-dma.c4
-rw-r--r--arch/x86/kernel/pci-gart_64.c1
-rw-r--r--arch/x86/kernel/process.c6
-rw-r--r--arch/x86/kernel/reboot.c1
-rw-r--r--arch/x86/kernel/smpboot.c9
-rw-r--r--arch/x86/kernel/time.c4
-rw-r--r--arch/x86/kernel/trampoline.c12
-rw-r--r--arch/x86/kernel/trampoline_64.S4
-rw-r--r--arch/x86/kernel/traps.c7
-rw-r--r--arch/x86/kernel/tsc_sync.c2
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S79
-rw-r--r--arch/x86/kernel/vsyscall_64.c10
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/mmu.c84
-rw-r--r--arch/x86/kvm/paging_tmpl.h18
-rw-r--r--arch/x86/kvm/svm.c25
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/lib/Makefile4
-rw-r--r--arch/x86/lib/cmpxchg8b_emu.S57
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/fault.c19
-rw-r--r--arch/x86/mm/init.c63
-rw-r--r--arch/x86/mm/pageattr.c1
-rw-r--r--arch/x86/mm/pat.c7
-rw-r--r--arch/x86/mm/setup_nx.c69
-rw-r--r--arch/x86/mm/tlb.c15
-rw-r--r--arch/x86/pci/common.c2
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/vdso/Makefile2
-rw-r--r--arch/x86/xen/debugfs.c2
-rw-r--r--arch/x86/xen/enlighten.c10
-rw-r--r--arch/x86/xen/mmu.c4
74 files changed, 577 insertions, 388 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 93698794aa3a..07e01149e3bf 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -86,10 +86,6 @@ config STACKTRACE_SUPPORT
86config HAVE_LATENCYTOP_SUPPORT 86config HAVE_LATENCYTOP_SUPPORT
87 def_bool y 87 def_bool y
88 88
89config FAST_CMPXCHG_LOCAL
90 bool
91 default y
92
93config MMU 89config MMU
94 def_bool y 90 def_bool y
95 91
@@ -432,6 +428,17 @@ config X86_NUMAQ
432 of Flat Logical. You will need a new lynxer.elf file to flash your 428 of Flat Logical. You will need a new lynxer.elf file to flash your
433 firmware with - send email to <Martin.Bligh@us.ibm.com>. 429 firmware with - send email to <Martin.Bligh@us.ibm.com>.
434 430
431config X86_SUPPORTS_MEMORY_FAILURE
432 bool
433 # MCE code calls memory_failure():
434 depends on X86_MCE
435 # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags:
436 depends on !X86_NUMAQ
437 # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH:
438 depends on X86_64 || !SPARSEMEM
439 select ARCH_SUPPORTS_MEMORY_FAILURE
440 default y
441
435config X86_VISWS 442config X86_VISWS
436 bool "SGI 320/540 (Visual Workstation)" 443 bool "SGI 320/540 (Visual Workstation)"
437 depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT 444 depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT
@@ -484,7 +491,7 @@ if PARAVIRT_GUEST
484source "arch/x86/xen/Kconfig" 491source "arch/x86/xen/Kconfig"
485 492
486config VMI 493config VMI
487 bool "VMI Guest support" 494 bool "VMI Guest support (DEPRECATED)"
488 select PARAVIRT 495 select PARAVIRT
489 depends on X86_32 496 depends on X86_32
490 ---help--- 497 ---help---
@@ -493,6 +500,15 @@ config VMI
493 at the moment), by linking the kernel to a GPL-ed ROM module 500 at the moment), by linking the kernel to a GPL-ed ROM module
494 provided by the hypervisor. 501 provided by the hypervisor.
495 502
503 As of September 2009, VMware has started a phased retirement
504 of this feature from VMware's products. Please see
505 feature-removal-schedule.txt for details. If you are
506 planning to enable this option, please note that you cannot
507 live migrate a VMI enabled VM to a future VMware product,
508 which doesn't support VMI. So if you expect your kernel to
509 seamlessly migrate to newer VMware products, keep this
510 disabled.
511
496config KVM_CLOCK 512config KVM_CLOCK
497 bool "KVM paravirtualized clock" 513 bool "KVM paravirtualized clock"
498 select PARAVIRT 514 select PARAVIRT
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 527519b8a9f9..f2824fb8c79c 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -400,7 +400,7 @@ config X86_TSC
400 400
401config X86_CMPXCHG64 401config X86_CMPXCHG64
402 def_bool y 402 def_bool y
403 depends on X86_PAE || X86_64 403 depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
404 404
405# this should be set for all -march=.. options where the compiler 405# this should be set for all -march=.. options where the compiler
406# generates cmov. 406# generates cmov.
@@ -412,6 +412,7 @@ config X86_MINIMUM_CPU_FAMILY
412 int 412 int
413 default "64" if X86_64 413 default "64" if X86_64
414 default "6" if X86_32 && X86_P6_NOP 414 default "6" if X86_32 && X86_P6_NOP
415 default "5" if X86_32 && X86_CMPXCHG64
415 default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) 416 default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK)
416 default "3" 417 default "3"
417 418
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 4aefc034e9a0..ba7a6df4db92 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -182,8 +182,8 @@ archclean:
182define archhelp 182define archhelp
183 echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' 183 echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
184 echo ' install - Install kernel using' 184 echo ' install - Install kernel using'
185 echo ' (your) ~/bin/installkernel or' 185 echo ' (your) ~/bin/$(INSTALLKERNEL) or'
186 echo ' (distribution) /sbin/installkernel or' 186 echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
187 echo ' install to $$(INSTALL_PATH) and run lilo' 187 echo ' install to $$(INSTALL_PATH) and run lilo'
188 echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' 188 echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
189 echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' 189 echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 75e4f001e706..f543b70ffae2 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -23,13 +23,14 @@
23 */ 23 */
24 .text 24 .text
25 25
26#include <linux/init.h>
26#include <linux/linkage.h> 27#include <linux/linkage.h>
27#include <asm/segment.h> 28#include <asm/segment.h>
28#include <asm/page_types.h> 29#include <asm/page_types.h>
29#include <asm/boot.h> 30#include <asm/boot.h>
30#include <asm/asm-offsets.h> 31#include <asm/asm-offsets.h>
31 32
32 .section ".text.head","ax",@progbits 33 __HEAD
33ENTRY(startup_32) 34ENTRY(startup_32)
34 cld 35 cld
35 /* 36 /*
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index f62c284db9eb..077e1b69198e 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -24,6 +24,7 @@
24 .code32 24 .code32
25 .text 25 .text
26 26
27#include <linux/init.h>
27#include <linux/linkage.h> 28#include <linux/linkage.h>
28#include <asm/segment.h> 29#include <asm/segment.h>
29#include <asm/pgtable_types.h> 30#include <asm/pgtable_types.h>
@@ -33,7 +34,7 @@
33#include <asm/processor-flags.h> 34#include <asm/processor-flags.h>
34#include <asm/asm-offsets.h> 35#include <asm/asm-offsets.h>
35 36
36 .section ".text.head" 37 __HEAD
37 .code32 38 .code32
38ENTRY(startup_32) 39ENTRY(startup_32)
39 cld 40 cld
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index cc353e1b3ffd..f4193bb48782 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -1,3 +1,5 @@
1#include <asm-generic/vmlinux.lds.h>
2
1OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) 3OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
2 4
3#undef i386 5#undef i386
@@ -18,9 +20,9 @@ SECTIONS
18 * address 0. 20 * address 0.
19 */ 21 */
20 . = 0; 22 . = 0;
21 .text.head : { 23 .head.text : {
22 _head = . ; 24 _head = . ;
23 *(.text.head) 25 HEAD_TEXT
24 _ehead = . ; 26 _ehead = . ;
25 } 27 }
26 .rodata.compressed : { 28 .rodata.compressed : {
diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh
index 8d60ee15dfd9..d13ec1c38640 100644
--- a/arch/x86/boot/install.sh
+++ b/arch/x86/boot/install.sh
@@ -33,8 +33,8 @@ verify "$3"
33 33
34# User may have a custom install script 34# User may have a custom install script
35 35
36if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi 36if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
37if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi 37if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
38 38
39# Default install - same as make zlilo 39# Default install - same as make zlilo
40 40
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 74619c4f9fda..1733f9f65e82 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -21,8 +21,8 @@
21#define __AUDIT_ARCH_LE 0x40000000 21#define __AUDIT_ARCH_LE 0x40000000
22 22
23#ifndef CONFIG_AUDITSYSCALL 23#ifndef CONFIG_AUDITSYSCALL
24#define sysexit_audit int_ret_from_sys_call 24#define sysexit_audit ia32_ret_from_sys_call
25#define sysretl_audit int_ret_from_sys_call 25#define sysretl_audit ia32_ret_from_sys_call
26#endif 26#endif
27 27
28#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) 28#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
@@ -39,12 +39,12 @@
39 .endm 39 .endm
40 40
41 /* clobbers %eax */ 41 /* clobbers %eax */
42 .macro CLEAR_RREGS _r9=rax 42 .macro CLEAR_RREGS offset=0, _r9=rax
43 xorl %eax,%eax 43 xorl %eax,%eax
44 movq %rax,R11(%rsp) 44 movq %rax,\offset+R11(%rsp)
45 movq %rax,R10(%rsp) 45 movq %rax,\offset+R10(%rsp)
46 movq %\_r9,R9(%rsp) 46 movq %\_r9,\offset+R9(%rsp)
47 movq %rax,R8(%rsp) 47 movq %rax,\offset+R8(%rsp)
48 .endm 48 .endm
49 49
50 /* 50 /*
@@ -172,6 +172,10 @@ sysexit_from_sys_call:
172 movl RIP-R11(%rsp),%edx /* User %eip */ 172 movl RIP-R11(%rsp),%edx /* User %eip */
173 CFI_REGISTER rip,rdx 173 CFI_REGISTER rip,rdx
174 RESTORE_ARGS 1,24,1,1,1,1 174 RESTORE_ARGS 1,24,1,1,1,1
175 xorq %r8,%r8
176 xorq %r9,%r9
177 xorq %r10,%r10
178 xorq %r11,%r11
175 popfq 179 popfq
176 CFI_ADJUST_CFA_OFFSET -8 180 CFI_ADJUST_CFA_OFFSET -8
177 /*CFI_RESTORE rflags*/ 181 /*CFI_RESTORE rflags*/
@@ -202,7 +206,7 @@ sysexit_from_sys_call:
202 206
203 .macro auditsys_exit exit,ebpsave=RBP 207 .macro auditsys_exit exit,ebpsave=RBP
204 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) 208 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
205 jnz int_ret_from_sys_call 209 jnz ia32_ret_from_sys_call
206 TRACE_IRQS_ON 210 TRACE_IRQS_ON
207 sti 211 sti
208 movl %eax,%esi /* second arg, syscall return value */ 212 movl %eax,%esi /* second arg, syscall return value */
@@ -218,8 +222,9 @@ sysexit_from_sys_call:
218 cli 222 cli
219 TRACE_IRQS_OFF 223 TRACE_IRQS_OFF
220 testl %edi,TI_flags(%r10) 224 testl %edi,TI_flags(%r10)
221 jnz int_with_check 225 jz \exit
222 jmp \exit 226 CLEAR_RREGS -ARGOFFSET
227 jmp int_with_check
223 .endm 228 .endm
224 229
225sysenter_auditsys: 230sysenter_auditsys:
@@ -329,6 +334,9 @@ sysretl_from_sys_call:
329 CFI_REGISTER rip,rcx 334 CFI_REGISTER rip,rcx
330 movl EFLAGS-ARGOFFSET(%rsp),%r11d 335 movl EFLAGS-ARGOFFSET(%rsp),%r11d
331 /*CFI_REGISTER rflags,r11*/ 336 /*CFI_REGISTER rflags,r11*/
337 xorq %r10,%r10
338 xorq %r9,%r9
339 xorq %r8,%r8
332 TRACE_IRQS_ON 340 TRACE_IRQS_ON
333 movl RSP-ARGOFFSET(%rsp),%esp 341 movl RSP-ARGOFFSET(%rsp),%esp
334 CFI_RESTORE rsp 342 CFI_RESTORE rsp
@@ -353,7 +361,7 @@ cstar_tracesys:
353#endif 361#endif
354 xchgl %r9d,%ebp 362 xchgl %r9d,%ebp
355 SAVE_REST 363 SAVE_REST
356 CLEAR_RREGS r9 364 CLEAR_RREGS 0, r9
357 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ 365 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
358 movq %rsp,%rdi /* &pt_regs -> arg1 */ 366 movq %rsp,%rdi /* &pt_regs -> arg1 */
359 call syscall_trace_enter 367 call syscall_trace_enter
@@ -425,6 +433,8 @@ ia32_do_call:
425 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative 433 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
426ia32_sysret: 434ia32_sysret:
427 movq %rax,RAX-ARGOFFSET(%rsp) 435 movq %rax,RAX-ARGOFFSET(%rsp)
436ia32_ret_from_sys_call:
437 CLEAR_RREGS -ARGOFFSET
428 jmp int_ret_from_sys_call 438 jmp int_ret_from_sys_call
429 439
430ia32_tracesys: 440ia32_tracesys:
@@ -442,8 +452,8 @@ END(ia32_syscall)
442 452
443ia32_badsys: 453ia32_badsys:
444 movq $0,ORIG_RAX-ARGOFFSET(%rsp) 454 movq $0,ORIG_RAX-ARGOFFSET(%rsp)
445 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 455 movq $-ENOSYS,%rax
446 jmp int_ret_from_sys_call 456 jmp ia32_sysret
447 457
448quiet_ni_syscall: 458quiet_ni_syscall:
449 movq $-ENOSYS,%rax 459 movq $-ENOSYS,%rax
diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h
index 5d367caa0e36..549860d3be8f 100644
--- a/arch/x86/include/asm/cache.h
+++ b/arch/x86/include/asm/cache.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_X86_CACHE_H 1#ifndef _ASM_X86_CACHE_H
2#define _ASM_X86_CACHE_H 2#define _ASM_X86_CACHE_H
3 3
4#include <linux/linkage.h>
5
4/* L1 cache line size */ 6/* L1 cache line size */
5#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) 7#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
6#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) 8#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
@@ -13,7 +15,7 @@
13#ifdef CONFIG_SMP 15#ifdef CONFIG_SMP
14#define __cacheline_aligned_in_smp \ 16#define __cacheline_aligned_in_smp \
15 __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) \ 17 __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) \
16 __attribute__((__section__(".data.page_aligned"))) 18 __page_aligned_data
17#endif 19#endif
18#endif 20#endif
19 21
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h
index 7c5ef8b14d92..46fc474fd819 100644
--- a/arch/x86/include/asm/checksum_32.h
+++ b/arch/x86/include/asm/checksum_32.h
@@ -161,7 +161,8 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
161 "adcl $0, %0 ;\n" 161 "adcl $0, %0 ;\n"
162 : "=&r" (sum) 162 : "=&r" (sum)
163 : "r" (saddr), "r" (daddr), 163 : "r" (saddr), "r" (daddr),
164 "r" (htonl(len)), "r" (htonl(proto)), "0" (sum)); 164 "r" (htonl(len)), "r" (htonl(proto)), "0" (sum)
165 : "memory");
165 166
166 return csum_fold(sum); 167 return csum_fold(sum);
167} 168}
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 82ceb788a981..ee1931be6593 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -312,19 +312,23 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
312 312
313extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); 313extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64);
314 314
315#define cmpxchg64(ptr, o, n) \ 315#define cmpxchg64(ptr, o, n) \
316({ \ 316({ \
317 __typeof__(*(ptr)) __ret; \ 317 __typeof__(*(ptr)) __ret; \
318 if (likely(boot_cpu_data.x86 > 4)) \ 318 __typeof__(*(ptr)) __old = (o); \
319 __ret = (__typeof__(*(ptr)))__cmpxchg64((ptr), \ 319 __typeof__(*(ptr)) __new = (n); \
320 (unsigned long long)(o), \ 320 alternative_io("call cmpxchg8b_emu", \
321 (unsigned long long)(n)); \ 321 "lock; cmpxchg8b (%%esi)" , \
322 else \ 322 X86_FEATURE_CX8, \
323 __ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr), \ 323 "=A" (__ret), \
324 (unsigned long long)(o), \ 324 "S" ((ptr)), "0" (__old), \
325 (unsigned long long)(n)); \ 325 "b" ((unsigned int)__new), \
326 __ret; \ 326 "c" ((unsigned int)(__new>>32)) \
327}) 327 : "memory"); \
328 __ret; })
329
330
331
328#define cmpxchg64_local(ptr, o, n) \ 332#define cmpxchg64_local(ptr, o, n) \
329({ \ 333({ \
330 __typeof__(*(ptr)) __ret; \ 334 __typeof__(*(ptr)) __ret; \
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3be000435fad..d83892226f73 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -796,6 +796,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
796#define KVM_ARCH_WANT_MMU_NOTIFIER 796#define KVM_ARCH_WANT_MMU_NOTIFIER
797int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); 797int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
798int kvm_age_hva(struct kvm *kvm, unsigned long hva); 798int kvm_age_hva(struct kvm *kvm, unsigned long hva);
799void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
799int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); 800int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
800int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); 801int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
801int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); 802int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index b608a64c5814..f1363b72364f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -133,6 +133,8 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
133static inline void enable_p5_mce(void) {} 133static inline void enable_p5_mce(void) {}
134#endif 134#endif
135 135
136extern void (*x86_mce_decode_callback)(struct mce *m);
137
136void mce_setup(struct mce *m); 138void mce_setup(struct mce *m);
137void mce_log(struct mce *m); 139void mce_log(struct mce *m);
138DECLARE_PER_CPU(struct sys_device, mce_dev); 140DECLARE_PER_CPU(struct sys_device, mce_dev);
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index f923203dc39a..4a2d4e0c18d9 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -37,12 +37,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
37 37
38 if (likely(prev != next)) { 38 if (likely(prev != next)) {
39 /* stop flush ipis for the previous mm */ 39 /* stop flush ipis for the previous mm */
40 cpu_clear(cpu, prev->cpu_vm_mask); 40 cpumask_clear_cpu(cpu, mm_cpumask(prev));
41#ifdef CONFIG_SMP 41#ifdef CONFIG_SMP
42 percpu_write(cpu_tlbstate.state, TLBSTATE_OK); 42 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
43 percpu_write(cpu_tlbstate.active_mm, next); 43 percpu_write(cpu_tlbstate.active_mm, next);
44#endif 44#endif
45 cpu_set(cpu, next->cpu_vm_mask); 45 cpumask_set_cpu(cpu, mm_cpumask(next));
46 46
47 /* Re-load page tables */ 47 /* Re-load page tables */
48 load_cr3(next->pgd); 48 load_cr3(next->pgd);
@@ -58,7 +58,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
58 percpu_write(cpu_tlbstate.state, TLBSTATE_OK); 58 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
59 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); 59 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
60 60
61 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { 61 if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
62 /* We were in lazy tlb mode and leave_mm disabled 62 /* We were in lazy tlb mode and leave_mm disabled
63 * tlb flush IPI delivery. We must reload CR3 63 * tlb flush IPI delivery. We must reload CR3
64 * to make sure to use no freed page tables. 64 * to make sure to use no freed page tables.
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index e63cf7d441e1..139d4c1a33a7 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -40,8 +40,7 @@ extern unsigned int nmi_watchdog;
40#define NMI_INVALID 3 40#define NMI_INVALID 3
41 41
42struct ctl_table; 42struct ctl_table;
43struct file; 43extern int proc_nmi_enabled(struct ctl_table *, int ,
44extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
45 void __user *, size_t *, loff_t *); 44 void __user *, size_t *, loff_t *);
46extern int unknown_nmi_panic; 45extern int unknown_nmi_panic;
47 46
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 8aebcc41041d..efb38994859c 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -840,42 +840,22 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
840 840
841static inline unsigned long __raw_local_save_flags(void) 841static inline unsigned long __raw_local_save_flags(void)
842{ 842{
843 unsigned long f; 843 return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
844
845 asm volatile(paravirt_alt(PARAVIRT_CALL)
846 : "=a"(f)
847 : paravirt_type(pv_irq_ops.save_fl),
848 paravirt_clobber(CLBR_EAX)
849 : "memory", "cc");
850 return f;
851} 844}
852 845
853static inline void raw_local_irq_restore(unsigned long f) 846static inline void raw_local_irq_restore(unsigned long f)
854{ 847{
855 asm volatile(paravirt_alt(PARAVIRT_CALL) 848 PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
856 : "=a"(f)
857 : PV_FLAGS_ARG(f),
858 paravirt_type(pv_irq_ops.restore_fl),
859 paravirt_clobber(CLBR_EAX)
860 : "memory", "cc");
861} 849}
862 850
863static inline void raw_local_irq_disable(void) 851static inline void raw_local_irq_disable(void)
864{ 852{
865 asm volatile(paravirt_alt(PARAVIRT_CALL) 853 PVOP_VCALLEE0(pv_irq_ops.irq_disable);
866 :
867 : paravirt_type(pv_irq_ops.irq_disable),
868 paravirt_clobber(CLBR_EAX)
869 : "memory", "eax", "cc");
870} 854}
871 855
872static inline void raw_local_irq_enable(void) 856static inline void raw_local_irq_enable(void)
873{ 857{
874 asm volatile(paravirt_alt(PARAVIRT_CALL) 858 PVOP_VCALLEE0(pv_irq_ops.irq_enable);
875 :
876 : paravirt_type(pv_irq_ops.irq_enable),
877 paravirt_clobber(CLBR_EAX)
878 : "memory", "eax", "cc");
879} 859}
880 860
881static inline unsigned long __raw_local_irq_save(void) 861static inline unsigned long __raw_local_irq_save(void)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index dd0f5b32489d..9357473c8da0 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -494,10 +494,11 @@ int paravirt_disable_iospace(void);
494#define EXTRA_CLOBBERS 494#define EXTRA_CLOBBERS
495#define VEXTRA_CLOBBERS 495#define VEXTRA_CLOBBERS
496#else /* CONFIG_X86_64 */ 496#else /* CONFIG_X86_64 */
497/* [re]ax isn't an arg, but the return val */
497#define PVOP_VCALL_ARGS \ 498#define PVOP_VCALL_ARGS \
498 unsigned long __edi = __edi, __esi = __esi, \ 499 unsigned long __edi = __edi, __esi = __esi, \
499 __edx = __edx, __ecx = __ecx 500 __edx = __edx, __ecx = __ecx, __eax = __eax
500#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax 501#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
501 502
502#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) 503#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
503#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) 504#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x))
@@ -509,6 +510,7 @@ int paravirt_disable_iospace(void);
509 "=c" (__ecx) 510 "=c" (__ecx)
510#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) 511#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax)
511 512
513/* void functions are still allowed [re]ax for scratch */
512#define PVOP_VCALLEE_CLOBBERS "=a" (__eax) 514#define PVOP_VCALLEE_CLOBBERS "=a" (__eax)
513#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS 515#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
514 516
@@ -583,8 +585,8 @@ int paravirt_disable_iospace(void);
583 VEXTRA_CLOBBERS, \ 585 VEXTRA_CLOBBERS, \
584 pre, post, ##__VA_ARGS__) 586 pre, post, ##__VA_ARGS__)
585 587
586#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ 588#define __PVOP_VCALLEESAVE(op, pre, post, ...) \
587 ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ 589 ____PVOP_VCALL(op.func, CLBR_RET_REG, \
588 PVOP_VCALLEE_CLOBBERS, , \ 590 PVOP_VCALLEE_CLOBBERS, , \
589 pre, post, ##__VA_ARGS__) 591 pre, post, ##__VA_ARGS__)
590 592
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index f76a162c082c..ada8c201d513 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -143,7 +143,11 @@ static inline int __pcibus_to_node(const struct pci_bus *bus)
143static inline const struct cpumask * 143static inline const struct cpumask *
144cpumask_of_pcibus(const struct pci_bus *bus) 144cpumask_of_pcibus(const struct pci_bus *bus)
145{ 145{
146 return cpumask_of_node(__pcibus_to_node(bus)); 146 int node;
147
148 node = __pcibus_to_node(bus);
149 return (node == -1) ? cpu_online_mask :
150 cpumask_of_node(node);
147} 151}
148#endif 152#endif
149 153
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7b467bf3c680..d1f4a760be23 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -277,6 +277,7 @@ static inline pteval_t pte_flags(pte_t pte)
277typedef struct page *pgtable_t; 277typedef struct page *pgtable_t;
278 278
279extern pteval_t __supported_pte_mask; 279extern pteval_t __supported_pte_mask;
280extern void set_nx(void);
280extern int nx_enabled; 281extern int nx_enabled;
281 282
282#define pgprot_writecombine pgprot_writecombine 283#define pgprot_writecombine pgprot_writecombine
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 6a84ed166aec..1e796782cd7b 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -121,7 +121,6 @@ static inline void arch_send_call_function_single_ipi(int cpu)
121 smp_ops.send_call_func_single_ipi(cpu); 121 smp_ops.send_call_func_single_ipi(cpu);
122} 122}
123 123
124#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
125static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) 124static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
126{ 125{
127 smp_ops.send_call_func_ipi(mask); 126 smp_ops.send_call_func_ipi(mask);
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 6f0695d744bf..25a92842dd99 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -165,21 +165,11 @@ static inline int numa_node_id(void)
165 return 0; 165 return 0;
166} 166}
167 167
168static inline int cpu_to_node(int cpu)
169{
170 return 0;
171}
172
173static inline int early_cpu_to_node(int cpu) 168static inline int early_cpu_to_node(int cpu)
174{ 169{
175 return 0; 170 return 0;
176} 171}
177 172
178static inline const struct cpumask *cpumask_of_node(int node)
179{
180 return cpu_online_mask;
181}
182
183static inline void setup_node_to_cpumask_map(void) { } 173static inline void setup_node_to_cpumask_map(void) { }
184 174
185#endif 175#endif
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 8c44c232efcb..59cdfa4686b2 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -48,7 +48,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
48 * P4, Core and beyond CPUs 48 * P4, Core and beyond CPUs
49 */ 49 */
50 if (c->x86_vendor == X86_VENDOR_INTEL && 50 if (c->x86_vendor == X86_VENDOR_INTEL &&
51 (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 14))) 51 (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 14)))
52 flags->bm_control = 0; 52 flags->bm_control = 0;
53} 53}
54EXPORT_SYMBOL(acpi_processor_power_init_bm_check); 54EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 64970b9885f2..dc69f28489f5 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -227,17 +227,14 @@ static struct irq_cfg *get_one_free_irq_cfg(int node)
227 227
228 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); 228 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
229 if (cfg) { 229 if (cfg) {
230 if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { 230 if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
231 kfree(cfg); 231 kfree(cfg);
232 cfg = NULL; 232 cfg = NULL;
233 } else if (!alloc_cpumask_var_node(&cfg->old_domain, 233 } else if (!zalloc_cpumask_var_node(&cfg->old_domain,
234 GFP_ATOMIC, node)) { 234 GFP_ATOMIC, node)) {
235 free_cpumask_var(cfg->domain); 235 free_cpumask_var(cfg->domain);
236 kfree(cfg); 236 kfree(cfg);
237 cfg = NULL; 237 cfg = NULL;
238 } else {
239 cpumask_clear(cfg->domain);
240 cpumask_clear(cfg->old_domain);
241 } 238 }
242 } 239 }
243 240
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index cb66a22d98ad..7ff61d6a188a 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -508,14 +508,14 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
508/* 508/*
509 * proc handler for /proc/sys/kernel/nmi 509 * proc handler for /proc/sys/kernel/nmi
510 */ 510 */
511int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, 511int proc_nmi_enabled(struct ctl_table *table, int write,
512 void __user *buffer, size_t *length, loff_t *ppos) 512 void __user *buffer, size_t *length, loff_t *ppos)
513{ 513{
514 int old_state; 514 int old_state;
515 515
516 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; 516 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
517 old_state = nmi_watchdog_enabled; 517 old_state = nmi_watchdog_enabled;
518 proc_dointvec(table, write, file, buffer, length, ppos); 518 proc_dointvec(table, write, buffer, length, ppos);
519 if (!!old_state == !!nmi_watchdog_enabled) 519 if (!!old_state == !!nmi_watchdog_enabled)
520 return 0; 520 return 0;
521 521
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 7029f0e2acad..472763d92098 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -98,8 +98,9 @@ static struct notifier_block mce_raise_nb = {
98}; 98};
99 99
100/* Inject mce on current CPU */ 100/* Inject mce on current CPU */
101static int raise_local(struct mce *m) 101static int raise_local(void)
102{ 102{
103 struct mce *m = &__get_cpu_var(injectm);
103 int context = MCJ_CTX(m->inject_flags); 104 int context = MCJ_CTX(m->inject_flags);
104 int ret = 0; 105 int ret = 0;
105 int cpu = m->extcpu; 106 int cpu = m->extcpu;
@@ -167,12 +168,12 @@ static void raise_mce(struct mce *m)
167 } 168 }
168 cpu_relax(); 169 cpu_relax();
169 } 170 }
170 raise_local(m); 171 raise_local();
171 put_cpu(); 172 put_cpu();
172 put_online_cpus(); 173 put_online_cpus();
173 } else 174 } else
174#endif 175#endif
175 raise_local(m); 176 raise_local();
176} 177}
177 178
178/* Error injection interface */ 179/* Error injection interface */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 2f5aab26320e..b1598a9436d0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -85,6 +85,18 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
85static DEFINE_PER_CPU(struct mce, mces_seen); 85static DEFINE_PER_CPU(struct mce, mces_seen);
86static int cpu_missing; 86static int cpu_missing;
87 87
88static void default_decode_mce(struct mce *m)
89{
90 pr_emerg("No human readable MCE decoding support on this CPU type.\n");
91 pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
92}
93
94/*
95 * CPU/chipset specific EDAC code can register a callback here to print
96 * MCE errors in a human-readable form:
97 */
98void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
99EXPORT_SYMBOL(x86_mce_decode_callback);
88 100
89/* MCA banks polled by the period polling timer for corrected events */ 101/* MCA banks polled by the period polling timer for corrected events */
90DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { 102DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@@ -165,49 +177,46 @@ void mce_log(struct mce *mce)
165 set_bit(0, &mce_need_notify); 177 set_bit(0, &mce_need_notify);
166} 178}
167 179
168void __weak decode_mce(struct mce *m)
169{
170 return;
171}
172
173static void print_mce(struct mce *m) 180static void print_mce(struct mce *m)
174{ 181{
175 printk(KERN_EMERG 182 pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
176 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
177 m->extcpu, m->mcgstatus, m->bank, m->status); 183 m->extcpu, m->mcgstatus, m->bank, m->status);
184
178 if (m->ip) { 185 if (m->ip) {
179 printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", 186 pr_emerg("RIP%s %02x:<%016Lx> ",
180 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", 187 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
181 m->cs, m->ip); 188 m->cs, m->ip);
189
182 if (m->cs == __KERNEL_CS) 190 if (m->cs == __KERNEL_CS)
183 print_symbol("{%s}", m->ip); 191 print_symbol("{%s}", m->ip);
184 printk(KERN_CONT "\n"); 192 pr_cont("\n");
185 } 193 }
186 printk(KERN_EMERG "TSC %llx ", m->tsc); 194
195 pr_emerg("TSC %llx ", m->tsc);
187 if (m->addr) 196 if (m->addr)
188 printk(KERN_CONT "ADDR %llx ", m->addr); 197 pr_cont("ADDR %llx ", m->addr);
189 if (m->misc) 198 if (m->misc)
190 printk(KERN_CONT "MISC %llx ", m->misc); 199 pr_cont("MISC %llx ", m->misc);
191 printk(KERN_CONT "\n"); 200
192 printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", 201 pr_cont("\n");
193 m->cpuvendor, m->cpuid, m->time, m->socketid, 202 pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
194 m->apicid); 203 m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
195 204
196 decode_mce(m); 205 /*
206 * Print out human-readable details about the MCE error,
207 * (if the CPU has an implementation for that):
208 */
209 x86_mce_decode_callback(m);
197} 210}
198 211
199static void print_mce_head(void) 212static void print_mce_head(void)
200{ 213{
201 printk(KERN_EMERG "\nHARDWARE ERROR\n"); 214 pr_emerg("\nHARDWARE ERROR\n");
202} 215}
203 216
204static void print_mce_tail(void) 217static void print_mce_tail(void)
205{ 218{
206 printk(KERN_EMERG "This is not a software problem!\n" 219 pr_emerg("This is not a software problem!\n");
207#if (!defined(CONFIG_EDAC) || !defined(CONFIG_CPU_SUP_AMD))
208 "Run through mcelog --ascii to decode and contact your hardware vendor\n"
209#endif
210 );
211} 220}
212 221
213#define PANIC_TIMEOUT 5 /* 5 seconds */ 222#define PANIC_TIMEOUT 5 /* 5 seconds */
@@ -221,6 +230,7 @@ static atomic_t mce_fake_paniced;
221static void wait_for_panic(void) 230static void wait_for_panic(void)
222{ 231{
223 long timeout = PANIC_TIMEOUT*USEC_PER_SEC; 232 long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
233
224 preempt_disable(); 234 preempt_disable();
225 local_irq_enable(); 235 local_irq_enable();
226 while (timeout-- > 0) 236 while (timeout-- > 0)
@@ -288,6 +298,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
288static int msr_to_offset(u32 msr) 298static int msr_to_offset(u32 msr)
289{ 299{
290 unsigned bank = __get_cpu_var(injectm.bank); 300 unsigned bank = __get_cpu_var(injectm.bank);
301
291 if (msr == rip_msr) 302 if (msr == rip_msr)
292 return offsetof(struct mce, ip); 303 return offsetof(struct mce, ip);
293 if (msr == MSR_IA32_MCx_STATUS(bank)) 304 if (msr == MSR_IA32_MCx_STATUS(bank))
@@ -305,13 +316,25 @@ static int msr_to_offset(u32 msr)
305static u64 mce_rdmsrl(u32 msr) 316static u64 mce_rdmsrl(u32 msr)
306{ 317{
307 u64 v; 318 u64 v;
319
308 if (__get_cpu_var(injectm).finished) { 320 if (__get_cpu_var(injectm).finished) {
309 int offset = msr_to_offset(msr); 321 int offset = msr_to_offset(msr);
322
310 if (offset < 0) 323 if (offset < 0)
311 return 0; 324 return 0;
312 return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); 325 return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
313 } 326 }
314 rdmsrl(msr, v); 327
328 if (rdmsrl_safe(msr, &v)) {
329 WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr);
330 /*
331 * Return zero in case the access faulted. This should
332 * not happen normally but can happen if the CPU does
333 * something weird, or if the code is buggy.
334 */
335 v = 0;
336 }
337
315 return v; 338 return v;
316} 339}
317 340
@@ -319,6 +342,7 @@ static void mce_wrmsrl(u32 msr, u64 v)
319{ 342{
320 if (__get_cpu_var(injectm).finished) { 343 if (__get_cpu_var(injectm).finished) {
321 int offset = msr_to_offset(msr); 344 int offset = msr_to_offset(msr);
345
322 if (offset >= 0) 346 if (offset >= 0)
323 *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; 347 *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
324 return; 348 return;
@@ -415,7 +439,7 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
415 m->ip = mce_rdmsrl(rip_msr); 439 m->ip = mce_rdmsrl(rip_msr);
416} 440}
417 441
418#ifdef CONFIG_X86_LOCAL_APIC 442#ifdef CONFIG_X86_LOCAL_APIC
419/* 443/*
420 * Called after interrupts have been reenabled again 444 * Called after interrupts have been reenabled again
421 * when a MCE happened during an interrupts off region 445 * when a MCE happened during an interrupts off region
@@ -1172,6 +1196,7 @@ static int mce_banks_init(void)
1172 return -ENOMEM; 1196 return -ENOMEM;
1173 for (i = 0; i < banks; i++) { 1197 for (i = 0; i < banks; i++) {
1174 struct mce_bank *b = &mce_banks[i]; 1198 struct mce_bank *b = &mce_banks[i];
1199
1175 b->ctl = -1ULL; 1200 b->ctl = -1ULL;
1176 b->init = 1; 1201 b->init = 1;
1177 } 1202 }
@@ -1203,6 +1228,7 @@ static int __cpuinit mce_cap_init(void)
1203 banks = b; 1228 banks = b;
1204 if (!mce_banks) { 1229 if (!mce_banks) {
1205 int err = mce_banks_init(); 1230 int err = mce_banks_init();
1231
1206 if (err) 1232 if (err)
1207 return err; 1233 return err;
1208 } 1234 }
@@ -1237,6 +1263,7 @@ static void mce_init(void)
1237 1263
1238 for (i = 0; i < banks; i++) { 1264 for (i = 0; i < banks; i++) {
1239 struct mce_bank *b = &mce_banks[i]; 1265 struct mce_bank *b = &mce_banks[i];
1266
1240 if (!b->init) 1267 if (!b->init)
1241 continue; 1268 continue;
1242 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); 1269 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
@@ -1626,6 +1653,7 @@ static int mce_disable(void)
1626 1653
1627 for (i = 0; i < banks; i++) { 1654 for (i = 0; i < banks; i++) {
1628 struct mce_bank *b = &mce_banks[i]; 1655 struct mce_bank *b = &mce_banks[i];
1656
1629 if (b->init) 1657 if (b->init)
1630 wrmsrl(MSR_IA32_MCx_CTL(i), 0); 1658 wrmsrl(MSR_IA32_MCx_CTL(i), 0);
1631 } 1659 }
@@ -1911,6 +1939,7 @@ static void mce_disable_cpu(void *h)
1911 cmci_clear(); 1939 cmci_clear();
1912 for (i = 0; i < banks; i++) { 1940 for (i = 0; i < banks; i++) {
1913 struct mce_bank *b = &mce_banks[i]; 1941 struct mce_bank *b = &mce_banks[i];
1942
1914 if (b->init) 1943 if (b->init)
1915 wrmsrl(MSR_IA32_MCx_CTL(i), 0); 1944 wrmsrl(MSR_IA32_MCx_CTL(i), 0);
1916 } 1945 }
@@ -1928,6 +1957,7 @@ static void mce_reenable_cpu(void *h)
1928 cmci_reenable(); 1957 cmci_reenable();
1929 for (i = 0; i < banks; i++) { 1958 for (i = 0; i < banks; i++) {
1930 struct mce_bank *b = &mce_banks[i]; 1959 struct mce_bank *b = &mce_banks[i];
1960
1931 if (b->init) 1961 if (b->init)
1932 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); 1962 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
1933 } 1963 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 889f665fe93d..7c785634af2b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -8,6 +8,7 @@
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/interrupt.h> 9#include <linux/interrupt.h>
10#include <linux/percpu.h> 10#include <linux/percpu.h>
11#include <linux/sched.h>
11#include <asm/apic.h> 12#include <asm/apic.h>
12#include <asm/processor.h> 13#include <asm/processor.h>
13#include <asm/msr.h> 14#include <asm/msr.h>
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 63a56d147e4a..b3a1dba75330 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -34,20 +34,31 @@
34/* How long to wait between reporting thermal events */ 34/* How long to wait between reporting thermal events */
35#define CHECK_INTERVAL (300 * HZ) 35#define CHECK_INTERVAL (300 * HZ)
36 36
37static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; 37/*
38static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); 38 * Current thermal throttling state:
39static DEFINE_PER_CPU(bool, thermal_throttle_active); 39 */
40struct thermal_state {
41 bool is_throttled;
42
43 u64 next_check;
44 unsigned long throttle_count;
45 unsigned long last_throttle_count;
46};
47
48static DEFINE_PER_CPU(struct thermal_state, thermal_state);
40 49
41static atomic_t therm_throt_en = ATOMIC_INIT(0); 50static atomic_t therm_throt_en = ATOMIC_INIT(0);
42 51
43#ifdef CONFIG_SYSFS 52#ifdef CONFIG_SYSFS
44#define define_therm_throt_sysdev_one_ro(_name) \ 53#define define_therm_throt_sysdev_one_ro(_name) \
45 static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) 54 static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
46 55
47#define define_therm_throt_sysdev_show_func(name) \ 56#define define_therm_throt_sysdev_show_func(name) \
48static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ 57 \
49 struct sysdev_attribute *attr, \ 58static ssize_t therm_throt_sysdev_show_##name( \
50 char *buf) \ 59 struct sys_device *dev, \
60 struct sysdev_attribute *attr, \
61 char *buf) \
51{ \ 62{ \
52 unsigned int cpu = dev->id; \ 63 unsigned int cpu = dev->id; \
53 ssize_t ret; \ 64 ssize_t ret; \
@@ -55,7 +66,7 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
55 preempt_disable(); /* CPU hotplug */ \ 66 preempt_disable(); /* CPU hotplug */ \
56 if (cpu_online(cpu)) \ 67 if (cpu_online(cpu)) \
57 ret = sprintf(buf, "%lu\n", \ 68 ret = sprintf(buf, "%lu\n", \
58 per_cpu(thermal_throttle_##name, cpu)); \ 69 per_cpu(thermal_state, cpu).name); \
59 else \ 70 else \
60 ret = 0; \ 71 ret = 0; \
61 preempt_enable(); \ 72 preempt_enable(); \
@@ -63,11 +74,11 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
63 return ret; \ 74 return ret; \
64} 75}
65 76
66define_therm_throt_sysdev_show_func(count); 77define_therm_throt_sysdev_show_func(throttle_count);
67define_therm_throt_sysdev_one_ro(count); 78define_therm_throt_sysdev_one_ro(throttle_count);
68 79
69static struct attribute *thermal_throttle_attrs[] = { 80static struct attribute *thermal_throttle_attrs[] = {
70 &attr_count.attr, 81 &attr_throttle_count.attr,
71 NULL 82 NULL
72}; 83};
73 84
@@ -93,33 +104,39 @@ static struct attribute_group thermal_throttle_attr_group = {
93 * 1 : Event should be logged further, and a message has been 104 * 1 : Event should be logged further, and a message has been
94 * printed to the syslog. 105 * printed to the syslog.
95 */ 106 */
96static int therm_throt_process(int curr) 107static int therm_throt_process(bool is_throttled)
97{ 108{
98 unsigned int cpu = smp_processor_id(); 109 struct thermal_state *state;
99 __u64 tmp_jiffs = get_jiffies_64(); 110 unsigned int this_cpu;
100 bool was_throttled = __get_cpu_var(thermal_throttle_active); 111 bool was_throttled;
101 bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr; 112 u64 now;
113
114 this_cpu = smp_processor_id();
115 now = get_jiffies_64();
116 state = &per_cpu(thermal_state, this_cpu);
117
118 was_throttled = state->is_throttled;
119 state->is_throttled = is_throttled;
102 120
103 if (is_throttled) 121 if (is_throttled)
104 __get_cpu_var(thermal_throttle_count)++; 122 state->throttle_count++;
105 123
106 if (!(was_throttled ^ is_throttled) && 124 if (time_before64(now, state->next_check) &&
107 time_before64(tmp_jiffs, __get_cpu_var(next_check))) 125 state->throttle_count != state->last_throttle_count)
108 return 0; 126 return 0;
109 127
110 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; 128 state->next_check = now + CHECK_INTERVAL;
129 state->last_throttle_count = state->throttle_count;
111 130
112 /* if we just entered the thermal event */ 131 /* if we just entered the thermal event */
113 if (is_throttled) { 132 if (is_throttled) {
114 printk(KERN_CRIT "CPU%d: Temperature above threshold, " 133 printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count);
115 "cpu clock throttled (total events = %lu)\n",
116 cpu, __get_cpu_var(thermal_throttle_count));
117 134
118 add_taint(TAINT_MACHINE_CHECK); 135 add_taint(TAINT_MACHINE_CHECK);
119 return 1; 136 return 1;
120 } 137 }
121 if (was_throttled) { 138 if (was_throttled) {
122 printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); 139 printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu);
123 return 1; 140 return 1;
124 } 141 }
125 142
@@ -213,7 +230,7 @@ static void intel_thermal_interrupt(void)
213 __u64 msr_val; 230 __u64 msr_val;
214 231
215 rdmsrl(MSR_IA32_THERM_STATUS, msr_val); 232 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
216 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) 233 if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0))
217 mce_log_therm_throt_event(msr_val); 234 mce_log_therm_throt_event(msr_val);
218} 235}
219 236
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index f04e72527604..3c1b12d461d1 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -96,17 +96,24 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
96 unsigned long long base, size; 96 unsigned long long base, size;
97 char *ptr; 97 char *ptr;
98 char line[LINE_SIZE]; 98 char line[LINE_SIZE];
99 int length;
99 size_t linelen; 100 size_t linelen;
100 101
101 if (!capable(CAP_SYS_ADMIN)) 102 if (!capable(CAP_SYS_ADMIN))
102 return -EPERM; 103 return -EPERM;
103 if (!len)
104 return -EINVAL;
105 104
106 memset(line, 0, LINE_SIZE); 105 memset(line, 0, LINE_SIZE);
107 if (len > LINE_SIZE) 106
108 len = LINE_SIZE; 107 length = len;
109 if (copy_from_user(line, buf, len - 1)) 108 length--;
109
110 if (length > LINE_SIZE - 1)
111 length = LINE_SIZE - 1;
112
113 if (length < 0)
114 return -EINVAL;
115
116 if (copy_from_user(line, buf, length))
110 return -EFAULT; 117 return -EFAULT;
111 118
112 linelen = strlen(line); 119 linelen = strlen(line);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index a3c7adb06b78..b5801c311846 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1790,6 +1790,9 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
1790void set_perf_event_pending(void) 1790void set_perf_event_pending(void)
1791{ 1791{
1792#ifdef CONFIG_X86_LOCAL_APIC 1792#ifdef CONFIG_X86_LOCAL_APIC
1793 if (!x86_pmu.apic || !x86_pmu_initialized())
1794 return;
1795
1793 apic->send_IPI_self(LOCAL_PENDING_VECTOR); 1796 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1794#endif 1797#endif
1795} 1798}
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index bca5fba91c9e..f7dd2a7c3bf4 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -5,7 +5,6 @@
5#include <linux/kallsyms.h> 5#include <linux/kallsyms.h>
6#include <linux/kprobes.h> 6#include <linux/kprobes.h>
7#include <linux/uaccess.h> 7#include <linux/uaccess.h>
8#include <linux/utsname.h>
9#include <linux/hardirq.h> 8#include <linux/hardirq.h>
10#include <linux/kdebug.h> 9#include <linux/kdebug.h>
11#include <linux/module.h> 10#include <linux/module.h>
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 54b0a3276766..a071e6be177e 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -5,7 +5,6 @@
5#include <linux/kallsyms.h> 5#include <linux/kallsyms.h>
6#include <linux/kprobes.h> 6#include <linux/kprobes.h>
7#include <linux/uaccess.h> 7#include <linux/uaccess.h>
8#include <linux/utsname.h>
9#include <linux/hardirq.h> 8#include <linux/hardirq.h>
10#include <linux/kdebug.h> 9#include <linux/kdebug.h>
11#include <linux/module.h> 10#include <linux/module.h>
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 85419bb7d4ab..d17d482a04f4 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1378,8 +1378,8 @@ static unsigned long ram_alignment(resource_size_t pos)
1378 if (mb < 16) 1378 if (mb < 16)
1379 return 1024*1024; 1379 return 1024*1024;
1380 1380
1381 /* To 32MB for anything above that */ 1381 /* To 64MB for anything above that */
1382 return 32*1024*1024; 1382 return 64*1024*1024;
1383} 1383}
1384 1384
1385#define MAX_RESOURCE_SIZE ((resource_size_t)-1) 1385#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 2acfd3fdc0cc..b9c830c12b4a 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -178,6 +178,11 @@ asmlinkage void early_printk(const char *fmt, ...)
178 178
179static inline void early_console_register(struct console *con, int keep_early) 179static inline void early_console_register(struct console *con, int keep_early)
180{ 180{
181 if (early_console->index != -1) {
182 printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n",
183 con->name);
184 return;
185 }
181 early_console = con; 186 early_console = con;
182 if (keep_early) 187 if (keep_early)
183 early_console->flags &= ~CON_BOOT; 188 early_console->flags &= ~CON_BOOT;
@@ -201,8 +206,11 @@ static int __init setup_early_printk(char *buf)
201 206
202 while (*buf != '\0') { 207 while (*buf != '\0') {
203 if (!strncmp(buf, "serial", 6)) { 208 if (!strncmp(buf, "serial", 6)) {
204 early_serial_init(buf + 6); 209 buf += 6;
210 early_serial_init(buf);
205 early_console_register(&early_serial_console, keep); 211 early_console_register(&early_serial_console, keep);
212 if (!strncmp(buf, ",ttyS", 5))
213 buf += 5;
206 } 214 }
207 if (!strncmp(buf, "ttyS", 4)) { 215 if (!strncmp(buf, "ttyS", 4)) {
208 early_serial_init(buf + 4); 216 early_serial_init(buf + 4);
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index b766e8c7252d..050c278481b1 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -79,7 +79,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE)
79 * any particular GDT layout, because we load our own as soon as we 79 * any particular GDT layout, because we load our own as soon as we
80 * can. 80 * can.
81 */ 81 */
82.section .text.head,"ax",@progbits 82__HEAD
83ENTRY(startup_32) 83ENTRY(startup_32)
84 /* test KEEP_SEGMENTS flag to see if the bootloader is asking 84 /* test KEEP_SEGMENTS flag to see if the bootloader is asking
85 us to not reload segments */ 85 us to not reload segments */
@@ -608,7 +608,7 @@ ENTRY(initial_code)
608/* 608/*
609 * BSS section 609 * BSS section
610 */ 610 */
611.section ".bss.page_aligned","wa" 611__PAGE_ALIGNED_BSS
612 .align PAGE_SIZE_asm 612 .align PAGE_SIZE_asm
613#ifdef CONFIG_X86_PAE 613#ifdef CONFIG_X86_PAE
614swapper_pg_pmd: 614swapper_pg_pmd:
@@ -626,7 +626,7 @@ ENTRY(empty_zero_page)
626 * This starts the data section. 626 * This starts the data section.
627 */ 627 */
628#ifdef CONFIG_X86_PAE 628#ifdef CONFIG_X86_PAE
629.section ".data.page_aligned","wa" 629__PAGE_ALIGNED_DATA
630 /* Page-aligned for the benefit of paravirt? */ 630 /* Page-aligned for the benefit of paravirt? */
631 .align PAGE_SIZE_asm 631 .align PAGE_SIZE_asm
632ENTRY(swapper_pg_dir) 632ENTRY(swapper_pg_dir)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index fa54f78e2a05..780cd928fcd5 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -40,7 +40,7 @@ L4_START_KERNEL = pgd_index(__START_KERNEL_map)
40L3_START_KERNEL = pud_index(__START_KERNEL_map) 40L3_START_KERNEL = pud_index(__START_KERNEL_map)
41 41
42 .text 42 .text
43 .section .text.head 43 __HEAD
44 .code64 44 .code64
45 .globl startup_64 45 .globl startup_64
46startup_64: 46startup_64:
@@ -418,7 +418,7 @@ ENTRY(phys_base)
418ENTRY(idt_table) 418ENTRY(idt_table)
419 .skip IDT_ENTRIES * 16 419 .skip IDT_ENTRIES * 16
420 420
421 .section .bss.page_aligned, "aw", @nobits 421 __PAGE_ALIGNED_BSS
422 .align PAGE_SIZE 422 .align PAGE_SIZE
423ENTRY(empty_zero_page) 423ENTRY(empty_zero_page)
424 .skip PAGE_SIZE 424 .skip PAGE_SIZE
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 43cec6bdda63..9c3bd4a2050e 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -10,6 +10,16 @@
10EXPORT_SYMBOL(mcount); 10EXPORT_SYMBOL(mcount);
11#endif 11#endif
12 12
13/*
14 * Note, this is a prototype to get at the symbol for
15 * the export, but dont use it from C code, it is used
16 * by assembly code and is not using C calling convention!
17 */
18#ifndef CONFIG_X86_CMPXCHG64
19extern void cmpxchg8b_emu(void);
20EXPORT_SYMBOL(cmpxchg8b_emu);
21#endif
22
13/* Networking helper routines. */ 23/* Networking helper routines. */
14EXPORT_SYMBOL(csum_partial_copy_generic); 24EXPORT_SYMBOL(csum_partial_copy_generic);
15 25
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index 270ff83efc11..3a54dcb9cd0e 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -20,9 +20,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
20 * way process stacks are handled. This is done by having a special 20 * way process stacks are handled. This is done by having a special
21 * "init_task" linker map entry.. 21 * "init_task" linker map entry..
22 */ 22 */
23union thread_union init_thread_union 23union thread_union init_thread_union __init_task_data =
24 __attribute__((__section__(".data.init_task"))) = 24 { INIT_THREAD_INFO(init_task) };
25 { INIT_THREAD_INFO(init_task) };
26 25
27/* 26/*
28 * Initial task structure. 27 * Initial task structure.
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 74656d1d4e30..04bbd5278568 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -63,10 +63,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
63 for_each_online_cpu(j) 63 for_each_online_cpu(j)
64 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); 64 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
65 seq_printf(p, " Spurious interrupts\n"); 65 seq_printf(p, " Spurious interrupts\n");
66 seq_printf(p, "%*s: ", prec, "CNT"); 66 seq_printf(p, "%*s: ", prec, "PMI");
67 for_each_online_cpu(j) 67 for_each_online_cpu(j)
68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); 68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
69 seq_printf(p, " Performance counter interrupts\n"); 69 seq_printf(p, " Performance monitoring interrupts\n");
70 seq_printf(p, "%*s: ", prec, "PND"); 70 seq_printf(p, "%*s: ", prec, "PND");
71 for_each_online_cpu(j) 71 for_each_online_cpu(j)
72 seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); 72 seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 71f1d99a635d..ec6ef60cbd17 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -67,8 +67,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
67#ifdef CONFIG_SMP 67#ifdef CONFIG_SMP
68 preempt_disable(); 68 preempt_disable();
69 load_LDT(pc); 69 load_LDT(pc);
70 if (!cpus_equal(current->mm->cpu_vm_mask, 70 if (!cpumask_equal(mm_cpumask(current->mm),
71 cpumask_of_cpu(smp_processor_id()))) 71 cpumask_of(smp_processor_id())))
72 smp_call_function(flush_ldt, current->mm, 1); 72 smp_call_function(flush_ldt, current->mm, 1);
73 preempt_enable(); 73 preempt_enable();
74#else 74#else
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 64b838eac18c..b2a71dca5642 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -35,7 +35,7 @@ int iommu_detected __read_mostly = 0;
35 35
36/* 36/*
37 * This variable becomes 1 if iommu=pt is passed on the kernel command line. 37 * This variable becomes 1 if iommu=pt is passed on the kernel command line.
38 * If this variable is 1, IOMMU implementations do no DMA ranslation for 38 * If this variable is 1, IOMMU implementations do no DMA translation for
39 * devices and allow every device to access to whole physical memory. This is 39 * devices and allow every device to access to whole physical memory. This is
40 * useful if a user want to use an IOMMU only for KVM device assignment to 40 * useful if a user want to use an IOMMU only for KVM device assignment to
41 * guests and not for driver dma translation. 41 * guests and not for driver dma translation.
@@ -311,7 +311,7 @@ void pci_iommu_shutdown(void)
311 amd_iommu_shutdown(); 311 amd_iommu_shutdown();
312} 312}
313/* Must execute after PCI subsystem */ 313/* Must execute after PCI subsystem */
314fs_initcall(pci_iommu_init); 314rootfs_initcall(pci_iommu_init);
315 315
316#ifdef CONFIG_PCI 316#ifdef CONFIG_PCI
317/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ 317/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 98a827ee9ed7..a7f1b64f86e0 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -16,6 +16,7 @@
16#include <linux/agp_backend.h> 16#include <linux/agp_backend.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/sched.h>
19#include <linux/string.h> 20#include <linux/string.h>
20#include <linux/spinlock.h> 21#include <linux/spinlock.h>
21#include <linux/pci.h> 22#include <linux/pci.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 847ab4160315..5284cd2b5776 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -555,10 +555,8 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
555void __init init_c1e_mask(void) 555void __init init_c1e_mask(void)
556{ 556{
557 /* If we're using c1e_idle, we need to allocate c1e_mask. */ 557 /* If we're using c1e_idle, we need to allocate c1e_mask. */
558 if (pm_idle == c1e_idle) { 558 if (pm_idle == c1e_idle)
559 alloc_cpumask_var(&c1e_mask, GFP_KERNEL); 559 zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
560 cpumask_clear(c1e_mask);
561 }
562} 560}
563 561
564static int __init idle_setup(char *str) 562static int __init idle_setup(char *str)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 27349f92a6d7..a1a3cdda06e1 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -4,6 +4,7 @@
4#include <linux/pm.h> 4#include <linux/pm.h>
5#include <linux/efi.h> 5#include <linux/efi.h>
6#include <linux/dmi.h> 6#include <linux/dmi.h>
7#include <linux/sched.h>
7#include <linux/tboot.h> 8#include <linux/tboot.h>
8#include <acpi/reboot.h> 9#include <acpi/reboot.h>
9#include <asm/io.h> 10#include <asm/io.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 09c5e077dff7..565ebc65920e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1059,12 +1059,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1059#endif 1059#endif
1060 current_thread_info()->cpu = 0; /* needed? */ 1060 current_thread_info()->cpu = 0; /* needed? */
1061 for_each_possible_cpu(i) { 1061 for_each_possible_cpu(i) {
1062 alloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); 1062 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
1063 alloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); 1063 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
1064 alloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL); 1064 zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
1065 cpumask_clear(per_cpu(cpu_core_map, i));
1066 cpumask_clear(per_cpu(cpu_sibling_map, i));
1067 cpumask_clear(cpu_data(i).llc_shared_map);
1068 } 1065 }
1069 set_cpu_sibling_map(0); 1066 set_cpu_sibling_map(0);
1070 1067
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index e293ac56c723..be2573448ed9 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -38,7 +38,8 @@ unsigned long profile_pc(struct pt_regs *regs)
38#ifdef CONFIG_FRAME_POINTER 38#ifdef CONFIG_FRAME_POINTER
39 return *(unsigned long *)(regs->bp + sizeof(long)); 39 return *(unsigned long *)(regs->bp + sizeof(long));
40#else 40#else
41 unsigned long *sp = (unsigned long *)regs->sp; 41 unsigned long *sp =
42 (unsigned long *)kernel_stack_pointer(regs);
42 /* 43 /*
43 * Return address is either directly at stack pointer 44 * Return address is either directly at stack pointer
44 * or above a saved flags. Eflags has bits 22-31 zero, 45 * or above a saved flags. Eflags has bits 22-31 zero,
@@ -93,7 +94,6 @@ static struct irqaction irq0 = {
93 94
94void __init setup_default_timer_irq(void) 95void __init setup_default_timer_irq(void)
95{ 96{
96 irq0.mask = cpumask_of_cpu(0);
97 setup_irq(0, &irq0); 97 setup_irq(0, &irq0);
98} 98}
99 99
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index 699f7eeb896a..cd022121cab6 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -3,8 +3,16 @@
3#include <asm/trampoline.h> 3#include <asm/trampoline.h>
4#include <asm/e820.h> 4#include <asm/e820.h>
5 5
6#if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP)
7#define __trampinit
8#define __trampinitdata
9#else
10#define __trampinit __cpuinit
11#define __trampinitdata __cpuinitdata
12#endif
13
6/* ready for x86_64 and x86 */ 14/* ready for x86_64 and x86 */
7unsigned char *__cpuinitdata trampoline_base = __va(TRAMPOLINE_BASE); 15unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE);
8 16
9void __init reserve_trampoline_memory(void) 17void __init reserve_trampoline_memory(void)
10{ 18{
@@ -26,7 +34,7 @@ void __init reserve_trampoline_memory(void)
26 * bootstrap into the page concerned. The caller 34 * bootstrap into the page concerned. The caller
27 * has made sure it's suitably aligned. 35 * has made sure it's suitably aligned.
28 */ 36 */
29unsigned long __cpuinit setup_trampoline(void) 37unsigned long __trampinit setup_trampoline(void)
30{ 38{
31 memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); 39 memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
32 return virt_to_phys(trampoline_base); 40 return virt_to_phys(trampoline_base);
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index 596d54c660a5..3af2dff58b21 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -32,8 +32,12 @@
32#include <asm/segment.h> 32#include <asm/segment.h>
33#include <asm/processor-flags.h> 33#include <asm/processor-flags.h>
34 34
35#ifdef CONFIG_ACPI_SLEEP
36.section .rodata, "a", @progbits
37#else
35/* We can free up the trampoline after bootup if cpu hotplug is not supported. */ 38/* We can free up the trampoline after bootup if cpu hotplug is not supported. */
36__CPUINITRODATA 39__CPUINITRODATA
40#endif
37.code16 41.code16
38 42
39ENTRY(trampoline_data) 43ENTRY(trampoline_data)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9346e102338d..7e37dcee0cc3 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -14,7 +14,6 @@
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/kprobes.h> 15#include <linux/kprobes.h>
16#include <linux/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/utsname.h>
18#include <linux/kdebug.h> 17#include <linux/kdebug.h>
19#include <linux/kernel.h> 18#include <linux/kernel.h>
20#include <linux/module.h> 19#include <linux/module.h>
@@ -73,11 +72,9 @@ char ignore_fpu_irq;
73 72
74/* 73/*
75 * The IDT has to be page-aligned to simplify the Pentium 74 * The IDT has to be page-aligned to simplify the Pentium
76 * F0 0F bug workaround.. We have a special link segment 75 * F0 0F bug workaround.
77 * for this.
78 */ 76 */
79gate_desc idt_table[NR_VECTORS] 77gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
80 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
81#endif 78#endif
82 79
83DECLARE_BITMAP(used_vectors, NR_VECTORS); 80DECLARE_BITMAP(used_vectors, NR_VECTORS);
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 027b5b498993..f37930954d15 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -114,7 +114,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
114 return; 114 return;
115 115
116 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { 116 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
117 pr_info("Skipping synchronization checks as TSC is reliable.\n"); 117 printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n");
118 return; 118 return;
119 } 119 }
120 120
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 31e6f6cfe53e..d430e4c30193 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -648,7 +648,7 @@ static inline int __init activate_vmi(void)
648 648
649 pv_info.paravirt_enabled = 1; 649 pv_info.paravirt_enabled = 1;
650 pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; 650 pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
651 pv_info.name = "vmi"; 651 pv_info.name = "vmi [deprecated]";
652 652
653 pv_init_ops.patch = vmi_patch; 653 pv_init_ops.patch = vmi_patch;
654 654
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index a46acccec38a..92929fb3f9fa 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -65,17 +65,11 @@ SECTIONS
65#endif 65#endif
66 66
67 /* Text and read-only data */ 67 /* Text and read-only data */
68
69 /* bootstrapping code */
70 .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
71 _text = .;
72 *(.text.head)
73 } :text = 0x9090
74
75 /* The rest of the text */
76 .text : AT(ADDR(.text) - LOAD_OFFSET) { 68 .text : AT(ADDR(.text) - LOAD_OFFSET) {
69 _text = .;
70 /* bootstrapping code */
71 HEAD_TEXT
77#ifdef CONFIG_X86_32 72#ifdef CONFIG_X86_32
78 /* not really needed, already page aligned */
79 . = ALIGN(PAGE_SIZE); 73 . = ALIGN(PAGE_SIZE);
80 *(.text.page_aligned) 74 *(.text.page_aligned)
81#endif 75#endif
@@ -94,13 +88,7 @@ SECTIONS
94 88
95 NOTES :text :note 89 NOTES :text :note
96 90
97 /* Exception table */ 91 EXCEPTION_TABLE(16) :text = 0x9090
98 . = ALIGN(16);
99 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
100 __start___ex_table = .;
101 *(__ex_table)
102 __stop___ex_table = .;
103 } :text = 0x9090
104 92
105 RO_DATA(PAGE_SIZE) 93 RO_DATA(PAGE_SIZE)
106 94
@@ -118,7 +106,6 @@ SECTIONS
118#endif 106#endif
119 107
120 PAGE_ALIGNED_DATA(PAGE_SIZE) 108 PAGE_ALIGNED_DATA(PAGE_SIZE)
121 *(.data.idt)
122 109
123 CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES) 110 CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES)
124 111
@@ -135,24 +122,21 @@ SECTIONS
135#ifdef CONFIG_X86_64 122#ifdef CONFIG_X86_64
136 123
137#define VSYSCALL_ADDR (-10*1024*1024) 124#define VSYSCALL_ADDR (-10*1024*1024)
138#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data) + SIZEOF(.data) + \
139 PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
140#define VSYSCALL_VIRT_ADDR ((ADDR(.data) + SIZEOF(.data) + \
141 PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
142 125
143#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) 126#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
144#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) 127#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
145 128
146#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR) 129#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
147#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) 130#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
148 131
132 . = ALIGN(4096);
133 __vsyscall_0 = .;
134
149 . = VSYSCALL_ADDR; 135 . = VSYSCALL_ADDR;
150 .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { 136 .vsyscall_0 : AT(VLOAD(.vsyscall_0)) {
151 *(.vsyscall_0) 137 *(.vsyscall_0)
152 } :user 138 } :user
153 139
154 __vsyscall_0 = VSYSCALL_VIRT_ADDR;
155
156 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); 140 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
157 .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { 141 .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
158 *(.vsyscall_fn) 142 *(.vsyscall_fn)
@@ -192,11 +176,9 @@ SECTIONS
192 *(.vsyscall_3) 176 *(.vsyscall_3)
193 } 177 }
194 178
195 . = VSYSCALL_VIRT_ADDR + PAGE_SIZE; 179 . = __vsyscall_0 + PAGE_SIZE;
196 180
197#undef VSYSCALL_ADDR 181#undef VSYSCALL_ADDR
198#undef VSYSCALL_PHYS_ADDR
199#undef VSYSCALL_VIRT_ADDR
200#undef VLOAD_OFFSET 182#undef VLOAD_OFFSET
201#undef VLOAD 183#undef VLOAD
202#undef VVIRT_OFFSET 184#undef VVIRT_OFFSET
@@ -219,36 +201,12 @@ SECTIONS
219 PERCPU_VADDR(0, :percpu) 201 PERCPU_VADDR(0, :percpu)
220#endif 202#endif
221 203
222 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { 204 INIT_TEXT_SECTION(PAGE_SIZE)
223 _sinittext = .;
224 INIT_TEXT
225 _einittext = .;
226 }
227#ifdef CONFIG_X86_64 205#ifdef CONFIG_X86_64
228 :init 206 :init
229#endif 207#endif
230 208
231 .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { 209 INIT_DATA_SECTION(16)
232 INIT_DATA
233 }
234
235 . = ALIGN(16);
236 .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
237 __setup_start = .;
238 *(.init.setup)
239 __setup_end = .;
240 }
241 .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
242 __initcall_start = .;
243 INITCALLS
244 __initcall_end = .;
245 }
246
247 .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
248 __con_initcall_start = .;
249 *(.con_initcall.init)
250 __con_initcall_end = .;
251 }
252 210
253 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { 211 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
254 __x86_cpu_dev_start = .; 212 __x86_cpu_dev_start = .;
@@ -256,8 +214,6 @@ SECTIONS
256 __x86_cpu_dev_end = .; 214 __x86_cpu_dev_end = .;
257 } 215 }
258 216
259 SECURITY_INIT
260
261 . = ALIGN(8); 217 . = ALIGN(8);
262 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { 218 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
263 __parainstructions = .; 219 __parainstructions = .;
@@ -288,15 +244,6 @@ SECTIONS
288 EXIT_DATA 244 EXIT_DATA
289 } 245 }
290 246
291#ifdef CONFIG_BLK_DEV_INITRD
292 . = ALIGN(PAGE_SIZE);
293 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
294 __initramfs_start = .;
295 *(.init.ramfs)
296 __initramfs_end = .;
297 }
298#endif
299
300#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) 247#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
301 PERCPU(PAGE_SIZE) 248 PERCPU(PAGE_SIZE)
302#endif 249#endif
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index cf53a78e2dcf..8cb4974ff599 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -228,19 +228,11 @@ static long __vsyscall(3) venosys_1(void)
228} 228}
229 229
230#ifdef CONFIG_SYSCTL 230#ifdef CONFIG_SYSCTL
231
232static int
233vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
234 void __user *buffer, size_t *lenp, loff_t *ppos)
235{
236 return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
237}
238
239static ctl_table kernel_table2[] = { 231static ctl_table kernel_table2[] = {
240 { .procname = "vsyscall64", 232 { .procname = "vsyscall64",
241 .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), 233 .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
242 .mode = 0644, 234 .mode = 0644,
243 .proc_handler = vsyscall_sysctl_change }, 235 .proc_handler = proc_dointvec },
244 {} 236 {}
245}; 237};
246 238
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 1ae5ceba7eb2..7024224f0fc8 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -664,7 +664,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
664{ 664{
665 ktime_t now = apic->lapic_timer.timer.base->get_time(); 665 ktime_t now = apic->lapic_timer.timer.base->get_time();
666 666
667 apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) * 667 apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) *
668 APIC_BUS_CYCLE_NS * apic->divide_count; 668 APIC_BUS_CYCLE_NS * apic->divide_count;
669 atomic_set(&apic->lapic_timer.pending, 0); 669 atomic_set(&apic->lapic_timer.pending, 0);
670 670
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index eca41ae9f453..685a4ffac8e6 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644);
156#define CREATE_TRACE_POINTS 156#define CREATE_TRACE_POINTS
157#include "mmutrace.h" 157#include "mmutrace.h"
158 158
159#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
160
159#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) 161#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
160 162
161struct kvm_rmap_desc { 163struct kvm_rmap_desc {
@@ -634,9 +636,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
634 if (*spte & shadow_accessed_mask) 636 if (*spte & shadow_accessed_mask)
635 kvm_set_pfn_accessed(pfn); 637 kvm_set_pfn_accessed(pfn);
636 if (is_writeble_pte(*spte)) 638 if (is_writeble_pte(*spte))
637 kvm_release_pfn_dirty(pfn); 639 kvm_set_pfn_dirty(pfn);
638 else
639 kvm_release_pfn_clean(pfn);
640 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); 640 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
641 if (!*rmapp) { 641 if (!*rmapp) {
642 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); 642 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
@@ -748,7 +748,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
748 return write_protected; 748 return write_protected;
749} 749}
750 750
751static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) 751static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
752{ 752{
753 u64 *spte; 753 u64 *spte;
754 int need_tlb_flush = 0; 754 int need_tlb_flush = 0;
@@ -763,8 +763,45 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
763 return need_tlb_flush; 763 return need_tlb_flush;
764} 764}
765 765
766static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, 766static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
767 int (*handler)(struct kvm *kvm, unsigned long *rmapp)) 767{
768 int need_flush = 0;
769 u64 *spte, new_spte;
770 pte_t *ptep = (pte_t *)data;
771 pfn_t new_pfn;
772
773 WARN_ON(pte_huge(*ptep));
774 new_pfn = pte_pfn(*ptep);
775 spte = rmap_next(kvm, rmapp, NULL);
776 while (spte) {
777 BUG_ON(!is_shadow_present_pte(*spte));
778 rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
779 need_flush = 1;
780 if (pte_write(*ptep)) {
781 rmap_remove(kvm, spte);
782 __set_spte(spte, shadow_trap_nonpresent_pte);
783 spte = rmap_next(kvm, rmapp, NULL);
784 } else {
785 new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
786 new_spte |= (u64)new_pfn << PAGE_SHIFT;
787
788 new_spte &= ~PT_WRITABLE_MASK;
789 new_spte &= ~SPTE_HOST_WRITEABLE;
790 if (is_writeble_pte(*spte))
791 kvm_set_pfn_dirty(spte_to_pfn(*spte));
792 __set_spte(spte, new_spte);
793 spte = rmap_next(kvm, rmapp, spte);
794 }
795 }
796 if (need_flush)
797 kvm_flush_remote_tlbs(kvm);
798
799 return 0;
800}
801
802static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, u64 data,
803 int (*handler)(struct kvm *kvm, unsigned long *rmapp,
804 u64 data))
768{ 805{
769 int i, j; 806 int i, j;
770 int retval = 0; 807 int retval = 0;
@@ -786,13 +823,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
786 if (hva >= start && hva < end) { 823 if (hva >= start && hva < end) {
787 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; 824 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
788 825
789 retval |= handler(kvm, &memslot->rmap[gfn_offset]); 826 retval |= handler(kvm, &memslot->rmap[gfn_offset],
827 data);
790 828
791 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { 829 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
792 int idx = gfn_offset; 830 int idx = gfn_offset;
793 idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); 831 idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
794 retval |= handler(kvm, 832 retval |= handler(kvm,
795 &memslot->lpage_info[j][idx].rmap_pde); 833 &memslot->lpage_info[j][idx].rmap_pde,
834 data);
796 } 835 }
797 } 836 }
798 } 837 }
@@ -802,10 +841,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
802 841
803int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 842int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
804{ 843{
805 return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 844 return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
806} 845}
807 846
808static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) 847void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
848{
849 kvm_handle_hva(kvm, hva, (u64)&pte, kvm_set_pte_rmapp);
850}
851
852static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
809{ 853{
810 u64 *spte; 854 u64 *spte;
811 int young = 0; 855 int young = 0;
@@ -841,13 +885,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
841 gfn = unalias_gfn(vcpu->kvm, gfn); 885 gfn = unalias_gfn(vcpu->kvm, gfn);
842 rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); 886 rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
843 887
844 kvm_unmap_rmapp(vcpu->kvm, rmapp); 888 kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
845 kvm_flush_remote_tlbs(vcpu->kvm); 889 kvm_flush_remote_tlbs(vcpu->kvm);
846} 890}
847 891
848int kvm_age_hva(struct kvm *kvm, unsigned long hva) 892int kvm_age_hva(struct kvm *kvm, unsigned long hva)
849{ 893{
850 return kvm_handle_hva(kvm, hva, kvm_age_rmapp); 894 return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
851} 895}
852 896
853#ifdef MMU_DEBUG 897#ifdef MMU_DEBUG
@@ -1756,7 +1800,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1756 unsigned pte_access, int user_fault, 1800 unsigned pte_access, int user_fault,
1757 int write_fault, int dirty, int level, 1801 int write_fault, int dirty, int level,
1758 gfn_t gfn, pfn_t pfn, bool speculative, 1802 gfn_t gfn, pfn_t pfn, bool speculative,
1759 bool can_unsync) 1803 bool can_unsync, bool reset_host_protection)
1760{ 1804{
1761 u64 spte; 1805 u64 spte;
1762 int ret = 0; 1806 int ret = 0;
@@ -1783,6 +1827,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1783 spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, 1827 spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
1784 kvm_is_mmio_pfn(pfn)); 1828 kvm_is_mmio_pfn(pfn));
1785 1829
1830 if (reset_host_protection)
1831 spte |= SPTE_HOST_WRITEABLE;
1832
1786 spte |= (u64)pfn << PAGE_SHIFT; 1833 spte |= (u64)pfn << PAGE_SHIFT;
1787 1834
1788 if ((pte_access & ACC_WRITE_MASK) 1835 if ((pte_access & ACC_WRITE_MASK)
@@ -1828,7 +1875,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1828 unsigned pt_access, unsigned pte_access, 1875 unsigned pt_access, unsigned pte_access,
1829 int user_fault, int write_fault, int dirty, 1876 int user_fault, int write_fault, int dirty,
1830 int *ptwrite, int level, gfn_t gfn, 1877 int *ptwrite, int level, gfn_t gfn,
1831 pfn_t pfn, bool speculative) 1878 pfn_t pfn, bool speculative,
1879 bool reset_host_protection)
1832{ 1880{
1833 int was_rmapped = 0; 1881 int was_rmapped = 0;
1834 int was_writeble = is_writeble_pte(*sptep); 1882 int was_writeble = is_writeble_pte(*sptep);
@@ -1860,7 +1908,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1860 } 1908 }
1861 1909
1862 if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, 1910 if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
1863 dirty, level, gfn, pfn, speculative, true)) { 1911 dirty, level, gfn, pfn, speculative, true,
1912 reset_host_protection)) {
1864 if (write_fault) 1913 if (write_fault)
1865 *ptwrite = 1; 1914 *ptwrite = 1;
1866 kvm_x86_ops->tlb_flush(vcpu); 1915 kvm_x86_ops->tlb_flush(vcpu);
@@ -1877,8 +1926,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1877 page_header_update_slot(vcpu->kvm, sptep, gfn); 1926 page_header_update_slot(vcpu->kvm, sptep, gfn);
1878 if (!was_rmapped) { 1927 if (!was_rmapped) {
1879 rmap_count = rmap_add(vcpu, sptep, gfn); 1928 rmap_count = rmap_add(vcpu, sptep, gfn);
1880 if (!is_rmap_spte(*sptep)) 1929 kvm_release_pfn_clean(pfn);
1881 kvm_release_pfn_clean(pfn);
1882 if (rmap_count > RMAP_RECYCLE_THRESHOLD) 1930 if (rmap_count > RMAP_RECYCLE_THRESHOLD)
1883 rmap_recycle(vcpu, sptep, gfn); 1931 rmap_recycle(vcpu, sptep, gfn);
1884 } else { 1932 } else {
@@ -1909,7 +1957,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
1909 if (iterator.level == level) { 1957 if (iterator.level == level) {
1910 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, 1958 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
1911 0, write, 1, &pt_write, 1959 0, write, 1, &pt_write,
1912 level, gfn, pfn, false); 1960 level, gfn, pfn, false, true);
1913 ++vcpu->stat.pf_fixed; 1961 ++vcpu->stat.pf_fixed;
1914 break; 1962 break;
1915 } 1963 }
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d2fec9c12d22..72558f8ff3f5 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -273,9 +273,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
273 if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) 273 if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
274 return; 274 return;
275 kvm_get_pfn(pfn); 275 kvm_get_pfn(pfn);
276 /*
277 * we call mmu_set_spte() with reset_host_protection = true beacuse that
278 * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
279 */
276 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 280 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
277 gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, 281 gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
278 gpte_to_gfn(gpte), pfn, true); 282 gpte_to_gfn(gpte), pfn, true, true);
279} 283}
280 284
281/* 285/*
@@ -308,7 +312,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
308 user_fault, write_fault, 312 user_fault, write_fault,
309 gw->ptes[gw->level-1] & PT_DIRTY_MASK, 313 gw->ptes[gw->level-1] & PT_DIRTY_MASK,
310 ptwrite, level, 314 ptwrite, level,
311 gw->gfn, pfn, false); 315 gw->gfn, pfn, false, true);
312 break; 316 break;
313 } 317 }
314 318
@@ -558,6 +562,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
558static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 562static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
559{ 563{
560 int i, offset, nr_present; 564 int i, offset, nr_present;
565 bool reset_host_protection;
561 566
562 offset = nr_present = 0; 567 offset = nr_present = 0;
563 568
@@ -595,9 +600,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
595 600
596 nr_present++; 601 nr_present++;
597 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); 602 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
603 if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) {
604 pte_access &= ~ACC_WRITE_MASK;
605 reset_host_protection = 0;
606 } else {
607 reset_host_protection = 1;
608 }
598 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, 609 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
599 is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, 610 is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
600 spte_to_pfn(sp->spt[i]), true, false); 611 spte_to_pfn(sp->spt[i]), true, false,
612 reset_host_protection);
601 } 613 }
602 614
603 return !nr_present; 615 return !nr_present;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 944cc9c04b3c..c17404add91f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -767,6 +767,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
767 rdtscll(tsc_this); 767 rdtscll(tsc_this);
768 delta = vcpu->arch.host_tsc - tsc_this; 768 delta = vcpu->arch.host_tsc - tsc_this;
769 svm->vmcb->control.tsc_offset += delta; 769 svm->vmcb->control.tsc_offset += delta;
770 if (is_nested(svm))
771 svm->nested.hsave->control.tsc_offset += delta;
770 vcpu->cpu = cpu; 772 vcpu->cpu = cpu;
771 kvm_migrate_timers(vcpu); 773 kvm_migrate_timers(vcpu);
772 svm->asid_generation = 0; 774 svm->asid_generation = 0;
@@ -2057,10 +2059,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
2057 2059
2058 switch (ecx) { 2060 switch (ecx) {
2059 case MSR_IA32_TSC: { 2061 case MSR_IA32_TSC: {
2060 u64 tsc; 2062 u64 tsc_offset;
2063
2064 if (is_nested(svm))
2065 tsc_offset = svm->nested.hsave->control.tsc_offset;
2066 else
2067 tsc_offset = svm->vmcb->control.tsc_offset;
2061 2068
2062 rdtscll(tsc); 2069 *data = tsc_offset + native_read_tsc();
2063 *data = svm->vmcb->control.tsc_offset + tsc;
2064 break; 2070 break;
2065 } 2071 }
2066 case MSR_K6_STAR: 2072 case MSR_K6_STAR:
@@ -2146,10 +2152,17 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
2146 2152
2147 switch (ecx) { 2153 switch (ecx) {
2148 case MSR_IA32_TSC: { 2154 case MSR_IA32_TSC: {
2149 u64 tsc; 2155 u64 tsc_offset = data - native_read_tsc();
2156 u64 g_tsc_offset = 0;
2157
2158 if (is_nested(svm)) {
2159 g_tsc_offset = svm->vmcb->control.tsc_offset -
2160 svm->nested.hsave->control.tsc_offset;
2161 svm->nested.hsave->control.tsc_offset = tsc_offset;
2162 }
2163
2164 svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset;
2150 2165
2151 rdtscll(tsc);
2152 svm->vmcb->control.tsc_offset = data - tsc;
2153 break; 2166 break;
2154 } 2167 }
2155 case MSR_K6_STAR: 2168 case MSR_K6_STAR:
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f3812014bd0b..ed53b42caba1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -709,7 +709,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
709 if (vcpu->cpu != cpu) { 709 if (vcpu->cpu != cpu) {
710 vcpu_clear(vmx); 710 vcpu_clear(vmx);
711 kvm_migrate_timers(vcpu); 711 kvm_migrate_timers(vcpu);
712 vpid_sync_vcpu_all(vmx); 712 set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
713 local_irq_disable(); 713 local_irq_disable();
714 list_add(&vmx->local_vcpus_link, 714 list_add(&vmx->local_vcpus_link,
715 &per_cpu(vcpus_on_cpu, cpu)); 715 &per_cpu(vcpus_on_cpu, cpu));
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index be451ee44249..9b9695322f56 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1591,6 +1591,8 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
1591 1591
1592 if (cpuid->nent < 1) 1592 if (cpuid->nent < 1)
1593 goto out; 1593 goto out;
1594 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
1595 cpuid->nent = KVM_MAX_CPUID_ENTRIES;
1594 r = -ENOMEM; 1596 r = -ENOMEM;
1595 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); 1597 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
1596 if (!cpuid_entries) 1598 if (!cpuid_entries)
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 965026472c71..a2d6472895fb 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -29,7 +29,9 @@ ifeq ($(CONFIG_X86_32),y)
29 lib-y += checksum_32.o 29 lib-y += checksum_32.o
30 lib-y += strstr_32.o 30 lib-y += strstr_32.o
31 lib-y += semaphore_32.o string_32.o 31 lib-y += semaphore_32.o string_32.o
32 32ifneq ($(CONFIG_X86_CMPXCHG64),y)
33 lib-y += cmpxchg8b_emu.o
34endif
33 lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o 35 lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
34else 36else
35 obj-y += io_64.o iomap_copy_64.o 37 obj-y += io_64.o iomap_copy_64.o
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
new file mode 100644
index 000000000000..828cb710dec2
--- /dev/null
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -0,0 +1,57 @@
1/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; version 2
5 * of the License.
6 *
7 */
8
9#include <linux/linkage.h>
10#include <asm/alternative-asm.h>
11#include <asm/frame.h>
12#include <asm/dwarf2.h>
13
14
15.text
16
17/*
18 * Inputs:
19 * %esi : memory location to compare
20 * %eax : low 32 bits of old value
21 * %edx : high 32 bits of old value
22 * %ebx : low 32 bits of new value
23 * %ecx : high 32 bits of new value
24 */
25ENTRY(cmpxchg8b_emu)
26CFI_STARTPROC
27
28#
29# Emulate 'cmpxchg8b (%esi)' on UP except we don't
30# set the whole ZF thing (caller will just compare
31# eax:edx with the expected value)
32#
33cmpxchg8b_emu:
34 pushfl
35 cli
36
37 cmpl (%esi), %eax
38 jne not_same
39 cmpl 4(%esi), %edx
40 jne half_same
41
42 movl %ebx, (%esi)
43 movl %ecx, 4(%esi)
44
45 popfl
46 ret
47
48 not_same:
49 movl (%esi), %eax
50 half_same:
51 movl 4(%esi), %edx
52
53 popfl
54 ret
55
56CFI_ENDPROC
57ENDPROC(cmpxchg8b_emu)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 9b5a9f59a478..06630d26e56d 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,9 +1,10 @@
1obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 1obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
2 pat.o pgtable.o physaddr.o gup.o 2 pat.o pgtable.o physaddr.o gup.o setup_nx.o
3 3
4# Make sure __phys_addr has no stackprotector 4# Make sure __phys_addr has no stackprotector
5nostackp := $(call cc-option, -fno-stack-protector) 5nostackp := $(call cc-option, -fno-stack-protector)
6CFLAGS_physaddr.o := $(nostackp) 6CFLAGS_physaddr.o := $(nostackp)
7CFLAGS_setup_nx.o := $(nostackp)
7 8
8obj-$(CONFIG_SMP) += tlb.o 9obj-$(CONFIG_SMP) += tlb.o
9 10
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 923ea3fb7037..8f4e2ac93928 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -168,6 +168,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
168 info.si_errno = 0; 168 info.si_errno = 0;
169 info.si_code = si_code; 169 info.si_code = si_code;
170 info.si_addr = (void __user *)address; 170 info.si_addr = (void __user *)address;
171 info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
171 172
172 force_sig_info(si_signo, &info, tsk); 173 force_sig_info(si_signo, &info, tsk);
173} 174}
@@ -791,10 +792,12 @@ out_of_memory(struct pt_regs *regs, unsigned long error_code,
791} 792}
792 793
793static void 794static void
794do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) 795do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
796 unsigned int fault)
795{ 797{
796 struct task_struct *tsk = current; 798 struct task_struct *tsk = current;
797 struct mm_struct *mm = tsk->mm; 799 struct mm_struct *mm = tsk->mm;
800 int code = BUS_ADRERR;
798 801
799 up_read(&mm->mmap_sem); 802 up_read(&mm->mmap_sem);
800 803
@@ -810,7 +813,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
810 tsk->thread.error_code = error_code; 813 tsk->thread.error_code = error_code;
811 tsk->thread.trap_no = 14; 814 tsk->thread.trap_no = 14;
812 815
813 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); 816#ifdef CONFIG_MEMORY_FAILURE
817 if (fault & VM_FAULT_HWPOISON) {
818 printk(KERN_ERR
819 "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
820 tsk->comm, tsk->pid, address);
821 code = BUS_MCEERR_AR;
822 }
823#endif
824 force_sig_info_fault(SIGBUS, code, address, tsk);
814} 825}
815 826
816static noinline void 827static noinline void
@@ -820,8 +831,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
820 if (fault & VM_FAULT_OOM) { 831 if (fault & VM_FAULT_OOM) {
821 out_of_memory(regs, error_code, address); 832 out_of_memory(regs, error_code, address);
822 } else { 833 } else {
823 if (fault & VM_FAULT_SIGBUS) 834 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
824 do_sigbus(regs, error_code, address); 835 do_sigbus(regs, error_code, address, fault);
825 else 836 else
826 BUG(); 837 BUG();
827 } 838 }
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 0607119cef94..73ffd5536f62 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -28,69 +28,6 @@ int direct_gbpages
28#endif 28#endif
29; 29;
30 30
31int nx_enabled;
32
33#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
34static int disable_nx __cpuinitdata;
35
36/*
37 * noexec = on|off
38 *
39 * Control non-executable mappings for processes.
40 *
41 * on Enable
42 * off Disable
43 */
44static int __init noexec_setup(char *str)
45{
46 if (!str)
47 return -EINVAL;
48 if (!strncmp(str, "on", 2)) {
49 __supported_pte_mask |= _PAGE_NX;
50 disable_nx = 0;
51 } else if (!strncmp(str, "off", 3)) {
52 disable_nx = 1;
53 __supported_pte_mask &= ~_PAGE_NX;
54 }
55 return 0;
56}
57early_param("noexec", noexec_setup);
58#endif
59
60#ifdef CONFIG_X86_PAE
61static void __init set_nx(void)
62{
63 unsigned int v[4], l, h;
64
65 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
66 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
67
68 if ((v[3] & (1 << 20)) && !disable_nx) {
69 rdmsr(MSR_EFER, l, h);
70 l |= EFER_NX;
71 wrmsr(MSR_EFER, l, h);
72 nx_enabled = 1;
73 __supported_pte_mask |= _PAGE_NX;
74 }
75 }
76}
77#else
78static inline void set_nx(void)
79{
80}
81#endif
82
83#ifdef CONFIG_X86_64
84void __cpuinit check_efer(void)
85{
86 unsigned long efer;
87
88 rdmsrl(MSR_EFER, efer);
89 if (!(efer & EFER_NX) || disable_nx)
90 __supported_pte_mask &= ~_PAGE_NX;
91}
92#endif
93
94static void __init find_early_table_space(unsigned long end, int use_pse, 31static void __init find_early_table_space(unsigned long end, int use_pse,
95 int use_gbpages) 32 int use_gbpages)
96{ 33{
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 24952fdc7e40..dd38bfbefd1f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -144,6 +144,7 @@ void clflush_cache_range(void *vaddr, unsigned int size)
144 144
145 mb(); 145 mb();
146} 146}
147EXPORT_SYMBOL_GPL(clflush_cache_range);
147 148
148static void __cpa_flush_all(void *arg) 149static void __cpa_flush_all(void *arg)
149{ 150{
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7257cf3decf9..e78cd0ec2bcf 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -81,6 +81,7 @@ enum {
81void pat_init(void) 81void pat_init(void)
82{ 82{
83 u64 pat; 83 u64 pat;
84 bool boot_cpu = !boot_pat_state;
84 85
85 if (!pat_enabled) 86 if (!pat_enabled)
86 return; 87 return;
@@ -122,8 +123,10 @@ void pat_init(void)
122 rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); 123 rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
123 124
124 wrmsrl(MSR_IA32_CR_PAT, pat); 125 wrmsrl(MSR_IA32_CR_PAT, pat);
125 printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n", 126
126 smp_processor_id(), boot_pat_state, pat); 127 if (boot_cpu)
128 printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
129 smp_processor_id(), boot_pat_state, pat);
127} 130}
128 131
129#undef PAT 132#undef PAT
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
new file mode 100644
index 000000000000..513d8ed5d2ec
--- /dev/null
+++ b/arch/x86/mm/setup_nx.c
@@ -0,0 +1,69 @@
1#include <linux/spinlock.h>
2#include <linux/errno.h>
3#include <linux/init.h>
4
5#include <asm/pgtable.h>
6
7int nx_enabled;
8
9#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
10static int disable_nx __cpuinitdata;
11
12/*
13 * noexec = on|off
14 *
15 * Control non-executable mappings for processes.
16 *
17 * on Enable
18 * off Disable
19 */
20static int __init noexec_setup(char *str)
21{
22 if (!str)
23 return -EINVAL;
24 if (!strncmp(str, "on", 2)) {
25 __supported_pte_mask |= _PAGE_NX;
26 disable_nx = 0;
27 } else if (!strncmp(str, "off", 3)) {
28 disable_nx = 1;
29 __supported_pte_mask &= ~_PAGE_NX;
30 }
31 return 0;
32}
33early_param("noexec", noexec_setup);
34#endif
35
36#ifdef CONFIG_X86_PAE
37void __init set_nx(void)
38{
39 unsigned int v[4], l, h;
40
41 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
42 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
43
44 if ((v[3] & (1 << 20)) && !disable_nx) {
45 rdmsr(MSR_EFER, l, h);
46 l |= EFER_NX;
47 wrmsr(MSR_EFER, l, h);
48 nx_enabled = 1;
49 __supported_pte_mask |= _PAGE_NX;
50 }
51 }
52}
53#else
54void set_nx(void)
55{
56}
57#endif
58
59#ifdef CONFIG_X86_64
60void __cpuinit check_efer(void)
61{
62 unsigned long efer;
63
64 rdmsrl(MSR_EFER, efer);
65 if (!(efer & EFER_NX) || disable_nx)
66 __supported_pte_mask &= ~_PAGE_NX;
67}
68#endif
69
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index c814e144a3f0..36fe08eeb5c3 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -59,7 +59,8 @@ void leave_mm(int cpu)
59{ 59{
60 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) 60 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
61 BUG(); 61 BUG();
62 cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); 62 cpumask_clear_cpu(cpu,
63 mm_cpumask(percpu_read(cpu_tlbstate.active_mm)));
63 load_cr3(swapper_pg_dir); 64 load_cr3(swapper_pg_dir);
64} 65}
65EXPORT_SYMBOL_GPL(leave_mm); 66EXPORT_SYMBOL_GPL(leave_mm);
@@ -234,8 +235,8 @@ void flush_tlb_current_task(void)
234 preempt_disable(); 235 preempt_disable();
235 236
236 local_flush_tlb(); 237 local_flush_tlb();
237 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) 238 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
238 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); 239 flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
239 preempt_enable(); 240 preempt_enable();
240} 241}
241 242
@@ -249,8 +250,8 @@ void flush_tlb_mm(struct mm_struct *mm)
249 else 250 else
250 leave_mm(smp_processor_id()); 251 leave_mm(smp_processor_id());
251 } 252 }
252 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) 253 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
253 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); 254 flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
254 255
255 preempt_enable(); 256 preempt_enable();
256} 257}
@@ -268,8 +269,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
268 leave_mm(smp_processor_id()); 269 leave_mm(smp_processor_id());
269 } 270 }
270 271
271 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) 272 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
272 flush_tlb_others(&mm->cpu_vm_mask, mm, va); 273 flush_tlb_others(mm_cpumask(mm), mm, va);
273 274
274 preempt_enable(); 275 preempt_enable();
275} 276}
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 5db96d4304de..1331fcf26143 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -646,7 +646,7 @@ int get_mp_bus_to_node(int busnum)
646 646
647#else /* CONFIG_X86_32 */ 647#else /* CONFIG_X86_32 */
648 648
649static unsigned char mp_bus_to_node[BUS_NR] = { 649static int mp_bus_to_node[BUS_NR] = {
650 [0 ... BUS_NR - 1] = -1 650 [0 ... BUS_NR - 1] = -1
651}; 651};
652 652
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 52e62e57fedd..b22d13b0c71d 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -266,7 +266,7 @@ void pcibios_set_master(struct pci_dev *dev)
266 pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); 266 pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
267} 267}
268 268
269static struct vm_operations_struct pci_mmap_ops = { 269static const struct vm_operations_struct pci_mmap_ops = {
270 .access = generic_access_phys, 270 .access = generic_access_phys,
271}; 271};
272 272
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 88112b49f02c..6b4ffedb93c9 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -122,7 +122,7 @@ quiet_cmd_vdso = VDSO $@
122 $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ 122 $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
123 -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) 123 -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^)
124 124
125VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv) 125VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
126GCOV_PROFILE := n 126GCOV_PROFILE := n
127 127
128# 128#
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index b53225d2cac3..e133ce25e290 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -100,7 +100,7 @@ static int xen_array_release(struct inode *inode, struct file *file)
100 return 0; 100 return 0;
101} 101}
102 102
103static struct file_operations u32_array_fops = { 103static const struct file_operations u32_array_fops = {
104 .owner = THIS_MODULE, 104 .owner = THIS_MODULE,
105 .open = u32_array_open, 105 .open = u32_array_open,
106 .release= xen_array_release, 106 .release= xen_array_release,
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 544eb7496531..3439616d69f1 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1082,6 +1082,11 @@ asmlinkage void __init xen_start_kernel(void)
1082 1082
1083 __supported_pte_mask |= _PAGE_IOMAP; 1083 __supported_pte_mask |= _PAGE_IOMAP;
1084 1084
1085#ifdef CONFIG_X86_64
1086 /* Work out if we support NX */
1087 check_efer();
1088#endif
1089
1085 xen_setup_features(); 1090 xen_setup_features();
1086 1091
1087 /* Get mfn list */ 1092 /* Get mfn list */
@@ -1123,11 +1128,6 @@ asmlinkage void __init xen_start_kernel(void)
1123 1128
1124 pgd = (pgd_t *)xen_start_info->pt_base; 1129 pgd = (pgd_t *)xen_start_info->pt_base;
1125 1130
1126#ifdef CONFIG_X86_64
1127 /* Work out if we support NX */
1128 check_efer();
1129#endif
1130
1131 /* Don't do the full vcpu_info placement stuff until we have a 1131 /* Don't do the full vcpu_info placement stuff until we have a
1132 possible map and a non-dummy shared_info. */ 1132 possible map and a non-dummy shared_info. */
1133 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 1133 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 093dd59b5385..3bf7b1d250ce 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1165,14 +1165,14 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
1165 /* Get the "official" set of cpus referring to our pagetable. */ 1165 /* Get the "official" set of cpus referring to our pagetable. */
1166 if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { 1166 if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
1167 for_each_online_cpu(cpu) { 1167 for_each_online_cpu(cpu) {
1168 if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask) 1168 if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
1169 && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) 1169 && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
1170 continue; 1170 continue;
1171 smp_call_function_single(cpu, drop_other_mm_ref, mm, 1); 1171 smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
1172 } 1172 }
1173 return; 1173 return;
1174 } 1174 }
1175 cpumask_copy(mask, &mm->cpu_vm_mask); 1175 cpumask_copy(mask, mm_cpumask(mm));
1176 1176
1177 /* It's possible that a vcpu may have a stale reference to our 1177 /* It's possible that a vcpu may have a stale reference to our
1178 cr3, because its in lazy mode, and it hasn't yet flushed 1178 cr3, because its in lazy mode, and it hasn't yet flushed