aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2009-12-01 02:16:22 -0500
committerHerbert Xu <herbert@gondor.apana.org.au>2009-12-01 02:16:22 -0500
commit838632438145ac6863377eb12d8b8eef9c55d288 (patch)
treefbb0757df837f3c75a99c518a3596c38daef162d /arch/x86
parent9996508b3353063f2d6c48c1a28a84543d72d70b (diff)
parent29e553631b2a0d4eebd23db630572e1027a9967a (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig42
-rw-r--r--arch/x86/Kconfig.cpu3
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/boot/compressed/head_32.S3
-rw-r--r--arch/x86/boot/compressed/head_64.S3
-rw-r--r--arch/x86/boot/compressed/vmlinux.lds.S6
-rw-r--r--arch/x86/boot/install.sh4
-rw-r--r--arch/x86/boot/setup.ld3
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c10
-rw-r--r--arch/x86/ia32/ia32entry.S43
-rw-r--r--arch/x86/include/asm/acpi.h1
-rw-r--r--arch/x86/include/asm/amd_iommu.h1
-rw-r--r--arch/x86/include/asm/apic.h13
-rw-r--r--arch/x86/include/asm/cache.h4
-rw-r--r--arch/x86/include/asm/checksum_32.h3
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h30
-rw-r--r--arch/x86/include/asm/desc.h2
-rw-r--r--arch/x86/include/asm/dma-mapping.h10
-rw-r--r--arch/x86/include/asm/entry_arch.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/include/asm/mmu_context.h6
-rw-r--r--arch/x86/include/asm/nmi.h3
-rw-r--r--arch/x86/include/asm/paravirt.h28
-rw-r--r--arch/x86/include/asm/paravirt_types.h10
-rw-r--r--arch/x86/include/asm/pci.h6
-rw-r--r--arch/x86/include/asm/perf_event.h (renamed from arch/x86/include/asm/perf_counter.h)30
-rw-r--r--arch/x86/include/asm/pgtable_types.h1
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/string_32.h1
-rw-r--r--arch/x86/include/asm/syscall.h14
-rw-r--r--arch/x86/include/asm/topology.h11
-rw-r--r--arch/x86/include/asm/uaccess_32.h2
-rw-r--r--arch/x86/include/asm/unistd_32.h2
-rw-r--r--arch/x86/include/asm/unistd_64.h4
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h42
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/acpi/processor.c3
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.lds.S3
-rw-r--r--arch/x86/kernel/amd_iommu.c2
-rw-r--r--arch/x86/kernel/amd_iommu_init.c24
-rw-r--r--arch/x86/kernel/apic/apic.c37
-rw-r--r--arch/x86/kernel/apic/io_apic.c11
-rw-r--r--arch/x86/kernel/apic/nmi.c4
-rw-r--r--arch/x86/kernel/apic/probe_64.c15
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c19
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/common.c5
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c30
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c19
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c87
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c67
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c29
-rw-r--r--arch/x86/kernel/cpu/perf_event.c (renamed from arch/x86/kernel/cpu/perf_counter.c)604
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c2
-rw-r--r--arch/x86/kernel/cpu/vmware.c6
-rw-r--r--arch/x86/kernel/cpuid.c4
-rw-r--r--arch/x86/kernel/crash_dump_32.c19
-rw-r--r--arch/x86/kernel/dumpstack_32.c1
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/e820.c6
-rw-r--r--arch/x86/kernel/early_printk.c788
-rw-r--r--arch/x86/kernel/efi.c2
-rw-r--r--arch/x86/kernel/entry_64.S24
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/head_64.S4
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c10
-rw-r--r--arch/x86/kernel/init_task.c5
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/irqinit.c2
-rw-r--r--arch/x86/kernel/ldt.c4
-rw-r--r--arch/x86/kernel/microcode_amd.c6
-rw-r--r--arch/x86/kernel/microcode_core.c6
-rw-r--r--arch/x86/kernel/msr.c4
-rw-r--r--arch/x86/kernel/pci-dma.c10
-rw-r--r--arch/x86/kernel/pci-gart_64.c1
-rw-r--r--arch/x86/kernel/pci-swiotlb.c5
-rw-r--r--arch/x86/kernel/process.c31
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/ptrace.c21
-rw-r--r--arch/x86/kernel/reboot.c9
-rw-r--r--arch/x86/kernel/setup.c40
-rw-r--r--arch/x86/kernel/sfi.c122
-rw-r--r--arch/x86/kernel/smpboot.c9
-rw-r--r--arch/x86/kernel/syscall_table_32.S2
-rw-r--r--arch/x86/kernel/time.c4
-rw-r--r--arch/x86/kernel/tlb_uv.c9
-rw-r--r--arch/x86/kernel/trampoline.c12
-rw-r--r--arch/x86/kernel/trampoline_32.S8
-rw-r--r--arch/x86/kernel/trampoline_64.S7
-rw-r--r--arch/x86/kernel/traps.c8
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/tsc_sync.c2
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S86
-rw-r--r--arch/x86/kernel/vsyscall_64.c10
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/lapic.c4
-rw-r--r--arch/x86/kvm/mmu.c86
-rw-r--r--arch/x86/kvm/paging_tmpl.h18
-rw-r--r--arch/x86/kvm/svm.c25
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c6
-rw-r--r--arch/x86/lguest/boot.c10
-rw-r--r--arch/x86/lib/Makefile4
-rw-r--r--arch/x86/lib/cmpxchg8b_emu.S57
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/fault.c27
-rw-r--r--arch/x86/mm/init.c63
-rw-r--r--arch/x86/mm/init_32.c12
-rw-r--r--arch/x86/mm/init_64.c12
-rw-r--r--arch/x86/mm/ioremap.c24
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c3
-rw-r--r--arch/x86/mm/kmemcheck/shadow.c1
-rw-r--r--arch/x86/mm/pageattr.c1
-rw-r--r--arch/x86/mm/pat.c7
-rw-r--r--arch/x86/mm/setup_nx.c69
-rw-r--r--arch/x86/mm/tlb.c15
-rw-r--r--arch/x86/oprofile/op_model_ppro.c4
-rw-r--r--arch/x86/oprofile/op_x86_model.h2
-rw-r--r--arch/x86/pci/common.c2
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/pci/mmconfig-shared.c8
-rw-r--r--arch/x86/pci/mmconfig_32.c2
-rw-r--r--arch/x86/power/cpu.c4
-rw-r--r--arch/x86/vdso/Makefile2
-rw-r--r--arch/x86/xen/debugfs.c2
-rw-r--r--arch/x86/xen/enlighten.c24
-rw-r--r--arch/x86/xen/mmu.c4
138 files changed, 1434 insertions, 1719 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 51c59015b280..72ace9515a07 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,7 +24,7 @@ config X86
24 select HAVE_UNSTABLE_SCHED_CLOCK 24 select HAVE_UNSTABLE_SCHED_CLOCK
25 select HAVE_IDE 25 select HAVE_IDE
26 select HAVE_OPROFILE 26 select HAVE_OPROFILE
27 select HAVE_PERF_COUNTERS if (!M386 && !M486) 27 select HAVE_PERF_EVENTS if (!M386 && !M486)
28 select HAVE_IOREMAP_PROT 28 select HAVE_IOREMAP_PROT
29 select HAVE_KPROBES 29 select HAVE_KPROBES
30 select ARCH_WANT_OPTIONAL_GPIOLIB 30 select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -86,10 +86,6 @@ config STACKTRACE_SUPPORT
86config HAVE_LATENCYTOP_SUPPORT 86config HAVE_LATENCYTOP_SUPPORT
87 def_bool y 87 def_bool y
88 88
89config FAST_CMPXCHG_LOCAL
90 bool
91 default y
92
93config MMU 89config MMU
94 def_bool y 90 def_bool y
95 91
@@ -432,6 +428,17 @@ config X86_NUMAQ
432 of Flat Logical. You will need a new lynxer.elf file to flash your 428 of Flat Logical. You will need a new lynxer.elf file to flash your
433 firmware with - send email to <Martin.Bligh@us.ibm.com>. 429 firmware with - send email to <Martin.Bligh@us.ibm.com>.
434 430
431config X86_SUPPORTS_MEMORY_FAILURE
432 bool
433 # MCE code calls memory_failure():
434 depends on X86_MCE
435 # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags:
436 depends on !X86_NUMAQ
437 # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH:
438 depends on X86_64 || !SPARSEMEM
439 select ARCH_SUPPORTS_MEMORY_FAILURE
440 default y
441
435config X86_VISWS 442config X86_VISWS
436 bool "SGI 320/540 (Visual Workstation)" 443 bool "SGI 320/540 (Visual Workstation)"
437 depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT 444 depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT
@@ -484,7 +491,7 @@ if PARAVIRT_GUEST
484source "arch/x86/xen/Kconfig" 491source "arch/x86/xen/Kconfig"
485 492
486config VMI 493config VMI
487 bool "VMI Guest support" 494 bool "VMI Guest support (DEPRECATED)"
488 select PARAVIRT 495 select PARAVIRT
489 depends on X86_32 496 depends on X86_32
490 ---help--- 497 ---help---
@@ -493,6 +500,15 @@ config VMI
493 at the moment), by linking the kernel to a GPL-ed ROM module 500 at the moment), by linking the kernel to a GPL-ed ROM module
494 provided by the hypervisor. 501 provided by the hypervisor.
495 502
503 As of September 2009, VMware has started a phased retirement
504 of this feature from VMware's products. Please see
505 feature-removal-schedule.txt for details. If you are
506 planning to enable this option, please note that you cannot
507 live migrate a VMI enabled VM to a future VMware product,
508 which doesn't support VMI. So if you expect your kernel to
509 seamlessly migrate to newer VMware products, keep this
510 disabled.
511
496config KVM_CLOCK 512config KVM_CLOCK
497 bool "KVM paravirtualized clock" 513 bool "KVM paravirtualized clock"
498 select PARAVIRT 514 select PARAVIRT
@@ -1204,6 +1220,10 @@ config ARCH_DISCONTIGMEM_DEFAULT
1204 def_bool y 1220 def_bool y
1205 depends on NUMA && X86_32 1221 depends on NUMA && X86_32
1206 1222
1223config ARCH_PROC_KCORE_TEXT
1224 def_bool y
1225 depends on X86_64 && PROC_KCORE
1226
1207config ARCH_SPARSEMEM_DEFAULT 1227config ARCH_SPARSEMEM_DEFAULT
1208 def_bool y 1228 def_bool y
1209 depends on X86_64 1229 depends on X86_64
@@ -1423,12 +1443,8 @@ config SECCOMP
1423 1443
1424 If unsure, say Y. Only embedded should say N here. 1444 If unsure, say Y. Only embedded should say N here.
1425 1445
1426config CC_STACKPROTECTOR_ALL
1427 bool
1428
1429config CC_STACKPROTECTOR 1446config CC_STACKPROTECTOR
1430 bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" 1447 bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
1431 select CC_STACKPROTECTOR_ALL
1432 ---help--- 1448 ---help---
1433 This option turns on the -fstack-protector GCC feature. This 1449 This option turns on the -fstack-protector GCC feature. This
1434 feature puts, at the beginning of functions, a canary value on 1450 feature puts, at the beginning of functions, a canary value on
@@ -1662,6 +1678,8 @@ source "kernel/power/Kconfig"
1662 1678
1663source "drivers/acpi/Kconfig" 1679source "drivers/acpi/Kconfig"
1664 1680
1681source "drivers/sfi/Kconfig"
1682
1665config X86_APM_BOOT 1683config X86_APM_BOOT
1666 bool 1684 bool
1667 default y 1685 default y
@@ -1857,7 +1875,7 @@ config PCI_DIRECT
1857 1875
1858config PCI_MMCONFIG 1876config PCI_MMCONFIG
1859 def_bool y 1877 def_bool y
1860 depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY) 1878 depends on X86_32 && PCI && (ACPI || SFI) && (PCI_GOMMCONFIG || PCI_GOANY)
1861 1879
1862config PCI_OLPC 1880config PCI_OLPC
1863 def_bool y 1881 def_bool y
@@ -1895,7 +1913,7 @@ config DMAR_DEFAULT_ON
1895config DMAR_BROKEN_GFX_WA 1913config DMAR_BROKEN_GFX_WA
1896 def_bool n 1914 def_bool n
1897 prompt "Workaround broken graphics drivers (going away soon)" 1915 prompt "Workaround broken graphics drivers (going away soon)"
1898 depends on DMAR 1916 depends on DMAR && BROKEN
1899 ---help--- 1917 ---help---
1900 Current Graphics drivers tend to use physical address 1918 Current Graphics drivers tend to use physical address
1901 for DMA and avoid using DMA APIs. Setting this config 1919 for DMA and avoid using DMA APIs. Setting this config
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 527519b8a9f9..2649840d888f 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -400,7 +400,7 @@ config X86_TSC
400 400
401config X86_CMPXCHG64 401config X86_CMPXCHG64
402 def_bool y 402 def_bool y
403 depends on X86_PAE || X86_64 403 depends on !M386 && !M486
404 404
405# this should be set for all -march=.. options where the compiler 405# this should be set for all -march=.. options where the compiler
406# generates cmov. 406# generates cmov.
@@ -412,6 +412,7 @@ config X86_MINIMUM_CPU_FAMILY
412 int 412 int
413 default "64" if X86_64 413 default "64" if X86_64
414 default "6" if X86_32 && X86_P6_NOP 414 default "6" if X86_32 && X86_P6_NOP
415 default "5" if X86_32 && X86_CMPXCHG64
415 default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) 416 default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK)
416 default "3" 417 default "3"
417 418
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 7983c420eaf2..d2d24c9ee64d 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -76,7 +76,6 @@ ifdef CONFIG_CC_STACKPROTECTOR
76 cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh 76 cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
77 ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y) 77 ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y)
78 stackp-y := -fstack-protector 78 stackp-y := -fstack-protector
79 stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all
80 KBUILD_CFLAGS += $(stackp-y) 79 KBUILD_CFLAGS += $(stackp-y)
81 else 80 else
82 $(warning stack protector enabled but no compiler support) 81 $(warning stack protector enabled but no compiler support)
@@ -179,8 +178,8 @@ archclean:
179define archhelp 178define archhelp
180 echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' 179 echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
181 echo ' install - Install kernel using' 180 echo ' install - Install kernel using'
182 echo ' (your) ~/bin/installkernel or' 181 echo ' (your) ~/bin/$(INSTALLKERNEL) or'
183 echo ' (distribution) /sbin/installkernel or' 182 echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
184 echo ' install to $$(INSTALL_PATH) and run lilo' 183 echo ' install to $$(INSTALL_PATH) and run lilo'
185 echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' 184 echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
186 echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' 185 echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 75e4f001e706..f543b70ffae2 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -23,13 +23,14 @@
23 */ 23 */
24 .text 24 .text
25 25
26#include <linux/init.h>
26#include <linux/linkage.h> 27#include <linux/linkage.h>
27#include <asm/segment.h> 28#include <asm/segment.h>
28#include <asm/page_types.h> 29#include <asm/page_types.h>
29#include <asm/boot.h> 30#include <asm/boot.h>
30#include <asm/asm-offsets.h> 31#include <asm/asm-offsets.h>
31 32
32 .section ".text.head","ax",@progbits 33 __HEAD
33ENTRY(startup_32) 34ENTRY(startup_32)
34 cld 35 cld
35 /* 36 /*
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index f62c284db9eb..077e1b69198e 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -24,6 +24,7 @@
24 .code32 24 .code32
25 .text 25 .text
26 26
27#include <linux/init.h>
27#include <linux/linkage.h> 28#include <linux/linkage.h>
28#include <asm/segment.h> 29#include <asm/segment.h>
29#include <asm/pgtable_types.h> 30#include <asm/pgtable_types.h>
@@ -33,7 +34,7 @@
33#include <asm/processor-flags.h> 34#include <asm/processor-flags.h>
34#include <asm/asm-offsets.h> 35#include <asm/asm-offsets.h>
35 36
36 .section ".text.head" 37 __HEAD
37 .code32 38 .code32
38ENTRY(startup_32) 39ENTRY(startup_32)
39 cld 40 cld
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index cc353e1b3ffd..f4193bb48782 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -1,3 +1,5 @@
1#include <asm-generic/vmlinux.lds.h>
2
1OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) 3OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
2 4
3#undef i386 5#undef i386
@@ -18,9 +20,9 @@ SECTIONS
18 * address 0. 20 * address 0.
19 */ 21 */
20 . = 0; 22 . = 0;
21 .text.head : { 23 .head.text : {
22 _head = . ; 24 _head = . ;
23 *(.text.head) 25 HEAD_TEXT
24 _ehead = . ; 26 _ehead = . ;
25 } 27 }
26 .rodata.compressed : { 28 .rodata.compressed : {
diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh
index 8d60ee15dfd9..d13ec1c38640 100644
--- a/arch/x86/boot/install.sh
+++ b/arch/x86/boot/install.sh
@@ -33,8 +33,8 @@ verify "$3"
33 33
34# User may have a custom install script 34# User may have a custom install script
35 35
36if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi 36if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
37if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi 37if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
38 38
39# Default install - same as make zlilo 39# Default install - same as make zlilo
40 40
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index 0f6ec455a2b1..03c0683636b6 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -53,6 +53,9 @@ SECTIONS
53 53
54 /DISCARD/ : { *(.note*) } 54 /DISCARD/ : { *(.note*) }
55 55
56 /*
57 * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
58 */
56 . = ASSERT(_end <= 0x8000, "Setup too big!"); 59 . = ASSERT(_end <= 0x8000, "Setup too big!");
57 . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); 60 . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!");
58 /* Necessary for the very-old-loader check to work... */ 61 /* Necessary for the very-old-loader check to work... */
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 585edebe12cf..49c552c060e9 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -82,7 +82,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
82 return -EINVAL; 82 return -EINVAL;
83 } 83 }
84 84
85 if (irq_fpu_usable()) 85 if (!irq_fpu_usable())
86 err = crypto_aes_expand_key(ctx, in_key, key_len); 86 err = crypto_aes_expand_key(ctx, in_key, key_len);
87 else { 87 else {
88 kernel_fpu_begin(); 88 kernel_fpu_begin();
@@ -103,7 +103,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
103{ 103{
104 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); 104 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
105 105
106 if (irq_fpu_usable()) 106 if (!irq_fpu_usable())
107 crypto_aes_encrypt_x86(ctx, dst, src); 107 crypto_aes_encrypt_x86(ctx, dst, src);
108 else { 108 else {
109 kernel_fpu_begin(); 109 kernel_fpu_begin();
@@ -116,7 +116,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
116{ 116{
117 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); 117 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
118 118
119 if (irq_fpu_usable()) 119 if (!irq_fpu_usable())
120 crypto_aes_decrypt_x86(ctx, dst, src); 120 crypto_aes_decrypt_x86(ctx, dst, src);
121 else { 121 else {
122 kernel_fpu_begin(); 122 kernel_fpu_begin();
@@ -342,7 +342,7 @@ static int ablk_encrypt(struct ablkcipher_request *req)
342 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); 342 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
343 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); 343 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
344 344
345 if (irq_fpu_usable()) { 345 if (!irq_fpu_usable()) {
346 struct ablkcipher_request *cryptd_req = 346 struct ablkcipher_request *cryptd_req =
347 ablkcipher_request_ctx(req); 347 ablkcipher_request_ctx(req);
348 memcpy(cryptd_req, req, sizeof(*req)); 348 memcpy(cryptd_req, req, sizeof(*req));
@@ -363,7 +363,7 @@ static int ablk_decrypt(struct ablkcipher_request *req)
363 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); 363 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
364 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); 364 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
365 365
366 if (irq_fpu_usable()) { 366 if (!irq_fpu_usable()) {
367 struct ablkcipher_request *cryptd_req = 367 struct ablkcipher_request *cryptd_req =
368 ablkcipher_request_ctx(req); 368 ablkcipher_request_ctx(req);
369 memcpy(cryptd_req, req, sizeof(*req)); 369 memcpy(cryptd_req, req, sizeof(*req));
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index ba331bfd1112..581b0568fe19 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -21,8 +21,8 @@
21#define __AUDIT_ARCH_LE 0x40000000 21#define __AUDIT_ARCH_LE 0x40000000
22 22
23#ifndef CONFIG_AUDITSYSCALL 23#ifndef CONFIG_AUDITSYSCALL
24#define sysexit_audit int_ret_from_sys_call 24#define sysexit_audit ia32_ret_from_sys_call
25#define sysretl_audit int_ret_from_sys_call 25#define sysretl_audit ia32_ret_from_sys_call
26#endif 26#endif
27 27
28#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) 28#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
@@ -39,12 +39,12 @@
39 .endm 39 .endm
40 40
41 /* clobbers %eax */ 41 /* clobbers %eax */
42 .macro CLEAR_RREGS _r9=rax 42 .macro CLEAR_RREGS offset=0, _r9=rax
43 xorl %eax,%eax 43 xorl %eax,%eax
44 movq %rax,R11(%rsp) 44 movq %rax,\offset+R11(%rsp)
45 movq %rax,R10(%rsp) 45 movq %rax,\offset+R10(%rsp)
46 movq %\_r9,R9(%rsp) 46 movq %\_r9,\offset+R9(%rsp)
47 movq %rax,R8(%rsp) 47 movq %rax,\offset+R8(%rsp)
48 .endm 48 .endm
49 49
50 /* 50 /*
@@ -172,6 +172,10 @@ sysexit_from_sys_call:
172 movl RIP-R11(%rsp),%edx /* User %eip */ 172 movl RIP-R11(%rsp),%edx /* User %eip */
173 CFI_REGISTER rip,rdx 173 CFI_REGISTER rip,rdx
174 RESTORE_ARGS 1,24,1,1,1,1 174 RESTORE_ARGS 1,24,1,1,1,1
175 xorq %r8,%r8
176 xorq %r9,%r9
177 xorq %r10,%r10
178 xorq %r11,%r11
175 popfq 179 popfq
176 CFI_ADJUST_CFA_OFFSET -8 180 CFI_ADJUST_CFA_OFFSET -8
177 /*CFI_RESTORE rflags*/ 181 /*CFI_RESTORE rflags*/
@@ -200,9 +204,9 @@ sysexit_from_sys_call:
200 movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ 204 movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */
201 .endm 205 .endm
202 206
203 .macro auditsys_exit exit,ebpsave=RBP 207 .macro auditsys_exit exit
204 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) 208 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
205 jnz int_ret_from_sys_call 209 jnz ia32_ret_from_sys_call
206 TRACE_IRQS_ON 210 TRACE_IRQS_ON
207 sti 211 sti
208 movl %eax,%esi /* second arg, syscall return value */ 212 movl %eax,%esi /* second arg, syscall return value */
@@ -213,13 +217,13 @@ sysexit_from_sys_call:
213 call audit_syscall_exit 217 call audit_syscall_exit
214 GET_THREAD_INFO(%r10) 218 GET_THREAD_INFO(%r10)
215 movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ 219 movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */
216 movl \ebpsave-ARGOFFSET(%rsp),%ebp /* reload user register value */
217 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi 220 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
218 cli 221 cli
219 TRACE_IRQS_OFF 222 TRACE_IRQS_OFF
220 testl %edi,TI_flags(%r10) 223 testl %edi,TI_flags(%r10)
221 jnz int_with_check 224 jz \exit
222 jmp \exit 225 CLEAR_RREGS -ARGOFFSET
226 jmp int_with_check
223 .endm 227 .endm
224 228
225sysenter_auditsys: 229sysenter_auditsys:
@@ -329,6 +333,9 @@ sysretl_from_sys_call:
329 CFI_REGISTER rip,rcx 333 CFI_REGISTER rip,rcx
330 movl EFLAGS-ARGOFFSET(%rsp),%r11d 334 movl EFLAGS-ARGOFFSET(%rsp),%r11d
331 /*CFI_REGISTER rflags,r11*/ 335 /*CFI_REGISTER rflags,r11*/
336 xorq %r10,%r10
337 xorq %r9,%r9
338 xorq %r8,%r8
332 TRACE_IRQS_ON 339 TRACE_IRQS_ON
333 movl RSP-ARGOFFSET(%rsp),%esp 340 movl RSP-ARGOFFSET(%rsp),%esp
334 CFI_RESTORE rsp 341 CFI_RESTORE rsp
@@ -343,7 +350,7 @@ cstar_auditsys:
343 jmp cstar_dispatch 350 jmp cstar_dispatch
344 351
345sysretl_audit: 352sysretl_audit:
346 auditsys_exit sysretl_from_sys_call, RCX /* user %ebp in RCX slot */ 353 auditsys_exit sysretl_from_sys_call
347#endif 354#endif
348 355
349cstar_tracesys: 356cstar_tracesys:
@@ -353,7 +360,7 @@ cstar_tracesys:
353#endif 360#endif
354 xchgl %r9d,%ebp 361 xchgl %r9d,%ebp
355 SAVE_REST 362 SAVE_REST
356 CLEAR_RREGS r9 363 CLEAR_RREGS 0, r9
357 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ 364 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
358 movq %rsp,%rdi /* &pt_regs -> arg1 */ 365 movq %rsp,%rdi /* &pt_regs -> arg1 */
359 call syscall_trace_enter 366 call syscall_trace_enter
@@ -425,6 +432,8 @@ ia32_do_call:
425 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative 432 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
426ia32_sysret: 433ia32_sysret:
427 movq %rax,RAX-ARGOFFSET(%rsp) 434 movq %rax,RAX-ARGOFFSET(%rsp)
435ia32_ret_from_sys_call:
436 CLEAR_RREGS -ARGOFFSET
428 jmp int_ret_from_sys_call 437 jmp int_ret_from_sys_call
429 438
430ia32_tracesys: 439ia32_tracesys:
@@ -442,8 +451,8 @@ END(ia32_syscall)
442 451
443ia32_badsys: 452ia32_badsys:
444 movq $0,ORIG_RAX-ARGOFFSET(%rsp) 453 movq $0,ORIG_RAX-ARGOFFSET(%rsp)
445 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 454 movq $-ENOSYS,%rax
446 jmp int_ret_from_sys_call 455 jmp ia32_sysret
447 456
448quiet_ni_syscall: 457quiet_ni_syscall:
449 movq $-ENOSYS,%rax 458 movq $-ENOSYS,%rax
@@ -831,5 +840,5 @@ ia32_sys_call_table:
831 .quad compat_sys_preadv 840 .quad compat_sys_preadv
832 .quad compat_sys_pwritev 841 .quad compat_sys_pwritev
833 .quad compat_sys_rt_tgsigqueueinfo /* 335 */ 842 .quad compat_sys_rt_tgsigqueueinfo /* 335 */
834 .quad sys_perf_counter_open 843 .quad sys_perf_event_open
835ia32_syscall_end: 844ia32_syscall_end:
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 20d1465a2ab0..4518dc500903 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -144,7 +144,6 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
144 144
145#else /* !CONFIG_ACPI */ 145#else /* !CONFIG_ACPI */
146 146
147#define acpi_disabled 1
148#define acpi_lapic 0 147#define acpi_lapic 0
149#define acpi_ioapic 0 148#define acpi_ioapic 0
150static inline void acpi_noirq_set(void) { } 149static inline void acpi_noirq_set(void) { }
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index ac95995b7bad..4b180897e6b5 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -31,6 +31,7 @@ extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
31extern void amd_iommu_flush_all_domains(void); 31extern void amd_iommu_flush_all_domains(void);
32extern void amd_iommu_flush_all_devices(void); 32extern void amd_iommu_flush_all_devices(void);
33extern void amd_iommu_shutdown(void); 33extern void amd_iommu_shutdown(void);
34extern void amd_iommu_apply_erratum_63(u16 devid);
34#else 35#else
35static inline int amd_iommu_init(void) { return -ENODEV; } 36static inline int amd_iommu_init(void) { return -ENODEV; }
36static inline void amd_iommu_detect(void) { } 37static inline void amd_iommu_detect(void) { }
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index c6d21b18806c..474d80d3e6cc 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -66,6 +66,19 @@ static inline void default_inquire_remote_apic(int apicid)
66} 66}
67 67
68/* 68/*
69 * With 82489DX we can't rely on apic feature bit
70 * retrieved via cpuid but still have to deal with
71 * such an apic chip so we assume that SMP configuration
72 * is found from MP table (64bit case uses ACPI mostly
73 * which set smp presence flag as well so we are safe
74 * to use this helper too).
75 */
76static inline bool apic_from_smp_config(void)
77{
78 return smp_found_config && !disable_apic;
79}
80
81/*
69 * Basic functions accessing APICs. 82 * Basic functions accessing APICs.
70 */ 83 */
71#ifdef CONFIG_PARAVIRT 84#ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h
index 5d367caa0e36..549860d3be8f 100644
--- a/arch/x86/include/asm/cache.h
+++ b/arch/x86/include/asm/cache.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_X86_CACHE_H 1#ifndef _ASM_X86_CACHE_H
2#define _ASM_X86_CACHE_H 2#define _ASM_X86_CACHE_H
3 3
4#include <linux/linkage.h>
5
4/* L1 cache line size */ 6/* L1 cache line size */
5#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) 7#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
6#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) 8#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
@@ -13,7 +15,7 @@
13#ifdef CONFIG_SMP 15#ifdef CONFIG_SMP
14#define __cacheline_aligned_in_smp \ 16#define __cacheline_aligned_in_smp \
15 __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) \ 17 __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) \
16 __attribute__((__section__(".data.page_aligned"))) 18 __page_aligned_data
17#endif 19#endif
18#endif 20#endif
19 21
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h
index 7c5ef8b14d92..46fc474fd819 100644
--- a/arch/x86/include/asm/checksum_32.h
+++ b/arch/x86/include/asm/checksum_32.h
@@ -161,7 +161,8 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
161 "adcl $0, %0 ;\n" 161 "adcl $0, %0 ;\n"
162 : "=&r" (sum) 162 : "=&r" (sum)
163 : "r" (saddr), "r" (daddr), 163 : "r" (saddr), "r" (daddr),
164 "r" (htonl(len)), "r" (htonl(proto)), "0" (sum)); 164 "r" (htonl(len)), "r" (htonl(proto)), "0" (sum)
165 : "memory");
165 166
166 return csum_fold(sum); 167 return csum_fold(sum);
167} 168}
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 82ceb788a981..ee1931be6593 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -312,19 +312,23 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
312 312
313extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); 313extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64);
314 314
315#define cmpxchg64(ptr, o, n) \ 315#define cmpxchg64(ptr, o, n) \
316({ \ 316({ \
317 __typeof__(*(ptr)) __ret; \ 317 __typeof__(*(ptr)) __ret; \
318 if (likely(boot_cpu_data.x86 > 4)) \ 318 __typeof__(*(ptr)) __old = (o); \
319 __ret = (__typeof__(*(ptr)))__cmpxchg64((ptr), \ 319 __typeof__(*(ptr)) __new = (n); \
320 (unsigned long long)(o), \ 320 alternative_io("call cmpxchg8b_emu", \
321 (unsigned long long)(n)); \ 321 "lock; cmpxchg8b (%%esi)" , \
322 else \ 322 X86_FEATURE_CX8, \
323 __ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr), \ 323 "=A" (__ret), \
324 (unsigned long long)(o), \ 324 "S" ((ptr)), "0" (__old), \
325 (unsigned long long)(n)); \ 325 "b" ((unsigned int)__new), \
326 __ret; \ 326 "c" ((unsigned int)(__new>>32)) \
327}) 327 : "memory"); \
328 __ret; })
329
330
331
328#define cmpxchg64_local(ptr, o, n) \ 332#define cmpxchg64_local(ptr, o, n) \
329({ \ 333({ \
330 __typeof__(*(ptr)) __ret; \ 334 __typeof__(*(ptr)) __ret; \
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index e8de2f6f5ca5..617bd56b3070 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -288,7 +288,7 @@ static inline void load_LDT(mm_context_t *pc)
288 288
289static inline unsigned long get_desc_base(const struct desc_struct *desc) 289static inline unsigned long get_desc_base(const struct desc_struct *desc)
290{ 290{
291 return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24); 291 return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
292} 292}
293 293
294static inline void set_desc_base(struct desc_struct *desc, unsigned long base) 294static inline void set_desc_base(struct desc_struct *desc, unsigned long base)
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 0ee770d23d0e..6a25d5d42836 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -14,6 +14,12 @@
14#include <asm/swiotlb.h> 14#include <asm/swiotlb.h>
15#include <asm-generic/dma-coherent.h> 15#include <asm-generic/dma-coherent.h>
16 16
17#ifdef CONFIG_ISA
18# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24)
19#else
20# define ISA_DMA_BIT_MASK DMA_BIT_MASK(32)
21#endif
22
17extern dma_addr_t bad_dma_address; 23extern dma_addr_t bad_dma_address;
18extern int iommu_merge; 24extern int iommu_merge;
19extern struct device x86_dma_fallback_dev; 25extern struct device x86_dma_fallback_dev;
@@ -124,10 +130,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
124 if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) 130 if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
125 return memory; 131 return memory;
126 132
127 if (!dev) { 133 if (!dev)
128 dev = &x86_dma_fallback_dev; 134 dev = &x86_dma_fallback_dev;
129 gfp |= GFP_DMA;
130 }
131 135
132 if (!is_device_dma_capable(dev)) 136 if (!is_device_dma_capable(dev))
133 return NULL; 137 return NULL;
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 5e3f2044f0d3..f5693c81a1db 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -49,7 +49,7 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
49BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) 49BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
50BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) 50BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
51 51
52#ifdef CONFIG_PERF_COUNTERS 52#ifdef CONFIG_PERF_EVENTS
53BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) 53BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
54#endif 54#endif
55 55
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3be000435fad..d83892226f73 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -796,6 +796,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
796#define KVM_ARCH_WANT_MMU_NOTIFIER 796#define KVM_ARCH_WANT_MMU_NOTIFIER
797int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); 797int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
798int kvm_age_hva(struct kvm *kvm, unsigned long hva); 798int kvm_age_hva(struct kvm *kvm, unsigned long hva);
799void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
799int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); 800int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
800int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); 801int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
801int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); 802int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index b608a64c5814..f1363b72364f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -133,6 +133,8 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
133static inline void enable_p5_mce(void) {} 133static inline void enable_p5_mce(void) {}
134#endif 134#endif
135 135
136extern void (*x86_mce_decode_callback)(struct mce *m);
137
136void mce_setup(struct mce *m); 138void mce_setup(struct mce *m);
137void mce_log(struct mce *m); 139void mce_log(struct mce *m);
138DECLARE_PER_CPU(struct sys_device, mce_dev); 140DECLARE_PER_CPU(struct sys_device, mce_dev);
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index f923203dc39a..4a2d4e0c18d9 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -37,12 +37,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
37 37
38 if (likely(prev != next)) { 38 if (likely(prev != next)) {
39 /* stop flush ipis for the previous mm */ 39 /* stop flush ipis for the previous mm */
40 cpu_clear(cpu, prev->cpu_vm_mask); 40 cpumask_clear_cpu(cpu, mm_cpumask(prev));
41#ifdef CONFIG_SMP 41#ifdef CONFIG_SMP
42 percpu_write(cpu_tlbstate.state, TLBSTATE_OK); 42 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
43 percpu_write(cpu_tlbstate.active_mm, next); 43 percpu_write(cpu_tlbstate.active_mm, next);
44#endif 44#endif
45 cpu_set(cpu, next->cpu_vm_mask); 45 cpumask_set_cpu(cpu, mm_cpumask(next));
46 46
47 /* Re-load page tables */ 47 /* Re-load page tables */
48 load_cr3(next->pgd); 48 load_cr3(next->pgd);
@@ -58,7 +58,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
58 percpu_write(cpu_tlbstate.state, TLBSTATE_OK); 58 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
59 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); 59 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
60 60
61 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { 61 if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
62 /* We were in lazy tlb mode and leave_mm disabled 62 /* We were in lazy tlb mode and leave_mm disabled
63 * tlb flush IPI delivery. We must reload CR3 63 * tlb flush IPI delivery. We must reload CR3
64 * to make sure to use no freed page tables. 64 * to make sure to use no freed page tables.
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index e63cf7d441e1..139d4c1a33a7 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -40,8 +40,7 @@ extern unsigned int nmi_watchdog;
40#define NMI_INVALID 3 40#define NMI_INVALID 3
41 41
42struct ctl_table; 42struct ctl_table;
43struct file; 43extern int proc_nmi_enabled(struct ctl_table *, int ,
44extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
45 void __user *, size_t *, loff_t *); 44 void __user *, size_t *, loff_t *);
46extern int unknown_nmi_panic; 45extern int unknown_nmi_panic;
47 46
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 8aebcc41041d..efb38994859c 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -840,42 +840,22 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
840 840
841static inline unsigned long __raw_local_save_flags(void) 841static inline unsigned long __raw_local_save_flags(void)
842{ 842{
843 unsigned long f; 843 return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
844
845 asm volatile(paravirt_alt(PARAVIRT_CALL)
846 : "=a"(f)
847 : paravirt_type(pv_irq_ops.save_fl),
848 paravirt_clobber(CLBR_EAX)
849 : "memory", "cc");
850 return f;
851} 844}
852 845
853static inline void raw_local_irq_restore(unsigned long f) 846static inline void raw_local_irq_restore(unsigned long f)
854{ 847{
855 asm volatile(paravirt_alt(PARAVIRT_CALL) 848 PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
856 : "=a"(f)
857 : PV_FLAGS_ARG(f),
858 paravirt_type(pv_irq_ops.restore_fl),
859 paravirt_clobber(CLBR_EAX)
860 : "memory", "cc");
861} 849}
862 850
863static inline void raw_local_irq_disable(void) 851static inline void raw_local_irq_disable(void)
864{ 852{
865 asm volatile(paravirt_alt(PARAVIRT_CALL) 853 PVOP_VCALLEE0(pv_irq_ops.irq_disable);
866 :
867 : paravirt_type(pv_irq_ops.irq_disable),
868 paravirt_clobber(CLBR_EAX)
869 : "memory", "eax", "cc");
870} 854}
871 855
872static inline void raw_local_irq_enable(void) 856static inline void raw_local_irq_enable(void)
873{ 857{
874 asm volatile(paravirt_alt(PARAVIRT_CALL) 858 PVOP_VCALLEE0(pv_irq_ops.irq_enable);
875 :
876 : paravirt_type(pv_irq_ops.irq_enable),
877 paravirt_clobber(CLBR_EAX)
878 : "memory", "eax", "cc");
879} 859}
880 860
881static inline unsigned long __raw_local_irq_save(void) 861static inline unsigned long __raw_local_irq_save(void)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index dd0f5b32489d..9357473c8da0 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -494,10 +494,11 @@ int paravirt_disable_iospace(void);
494#define EXTRA_CLOBBERS 494#define EXTRA_CLOBBERS
495#define VEXTRA_CLOBBERS 495#define VEXTRA_CLOBBERS
496#else /* CONFIG_X86_64 */ 496#else /* CONFIG_X86_64 */
497/* [re]ax isn't an arg, but the return val */
497#define PVOP_VCALL_ARGS \ 498#define PVOP_VCALL_ARGS \
498 unsigned long __edi = __edi, __esi = __esi, \ 499 unsigned long __edi = __edi, __esi = __esi, \
499 __edx = __edx, __ecx = __ecx 500 __edx = __edx, __ecx = __ecx, __eax = __eax
500#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax 501#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
501 502
502#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) 503#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
503#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) 504#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x))
@@ -509,6 +510,7 @@ int paravirt_disable_iospace(void);
509 "=c" (__ecx) 510 "=c" (__ecx)
510#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) 511#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax)
511 512
513/* void functions are still allowed [re]ax for scratch */
512#define PVOP_VCALLEE_CLOBBERS "=a" (__eax) 514#define PVOP_VCALLEE_CLOBBERS "=a" (__eax)
513#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS 515#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
514 516
@@ -583,8 +585,8 @@ int paravirt_disable_iospace(void);
583 VEXTRA_CLOBBERS, \ 585 VEXTRA_CLOBBERS, \
584 pre, post, ##__VA_ARGS__) 586 pre, post, ##__VA_ARGS__)
585 587
586#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ 588#define __PVOP_VCALLEESAVE(op, pre, post, ...) \
587 ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ 589 ____PVOP_VCALL(op.func, CLBR_RET_REG, \
588 PVOP_VCALLEE_CLOBBERS, , \ 590 PVOP_VCALLEE_CLOBBERS, , \
589 pre, post, ##__VA_ARGS__) 591 pre, post, ##__VA_ARGS__)
590 592
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index f76a162c082c..ada8c201d513 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -143,7 +143,11 @@ static inline int __pcibus_to_node(const struct pci_bus *bus)
143static inline const struct cpumask * 143static inline const struct cpumask *
144cpumask_of_pcibus(const struct pci_bus *bus) 144cpumask_of_pcibus(const struct pci_bus *bus)
145{ 145{
146 return cpumask_of_node(__pcibus_to_node(bus)); 146 int node;
147
148 node = __pcibus_to_node(bus);
149 return (node == -1) ? cpu_online_mask :
150 cpumask_of_node(node);
147} 151}
148#endif 152#endif
149 153
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_event.h
index e7b7c938ae27..ad7ce3fd5065 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -1,8 +1,8 @@
1#ifndef _ASM_X86_PERF_COUNTER_H 1#ifndef _ASM_X86_PERF_EVENT_H
2#define _ASM_X86_PERF_COUNTER_H 2#define _ASM_X86_PERF_EVENT_H
3 3
4/* 4/*
5 * Performance counter hw details: 5 * Performance event hw details:
6 */ 6 */
7 7
8#define X86_PMC_MAX_GENERIC 8 8#define X86_PMC_MAX_GENERIC 8
@@ -43,7 +43,7 @@
43union cpuid10_eax { 43union cpuid10_eax {
44 struct { 44 struct {
45 unsigned int version_id:8; 45 unsigned int version_id:8;
46 unsigned int num_counters:8; 46 unsigned int num_events:8;
47 unsigned int bit_width:8; 47 unsigned int bit_width:8;
48 unsigned int mask_length:8; 48 unsigned int mask_length:8;
49 } split; 49 } split;
@@ -52,7 +52,7 @@ union cpuid10_eax {
52 52
53union cpuid10_edx { 53union cpuid10_edx {
54 struct { 54 struct {
55 unsigned int num_counters_fixed:4; 55 unsigned int num_events_fixed:4;
56 unsigned int reserved:28; 56 unsigned int reserved:28;
57 } split; 57 } split;
58 unsigned int full; 58 unsigned int full;
@@ -60,7 +60,7 @@ union cpuid10_edx {
60 60
61 61
62/* 62/*
63 * Fixed-purpose performance counters: 63 * Fixed-purpose performance events:
64 */ 64 */
65 65
66/* 66/*
@@ -87,22 +87,22 @@ union cpuid10_edx {
87/* 87/*
88 * We model BTS tracing as another fixed-mode PMC. 88 * We model BTS tracing as another fixed-mode PMC.
89 * 89 *
90 * We choose a value in the middle of the fixed counter range, since lower 90 * We choose a value in the middle of the fixed event range, since lower
91 * values are used by actual fixed counters and higher values are used 91 * values are used by actual fixed events and higher values are used
92 * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. 92 * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
93 */ 93 */
94#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) 94#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16)
95 95
96 96
97#ifdef CONFIG_PERF_COUNTERS 97#ifdef CONFIG_PERF_EVENTS
98extern void init_hw_perf_counters(void); 98extern void init_hw_perf_events(void);
99extern void perf_counters_lapic_init(void); 99extern void perf_events_lapic_init(void);
100 100
101#define PERF_COUNTER_INDEX_OFFSET 0 101#define PERF_EVENT_INDEX_OFFSET 0
102 102
103#else 103#else
104static inline void init_hw_perf_counters(void) { } 104static inline void init_hw_perf_events(void) { }
105static inline void perf_counters_lapic_init(void) { } 105static inline void perf_events_lapic_init(void) { }
106#endif 106#endif
107 107
108#endif /* _ASM_X86_PERF_COUNTER_H */ 108#endif /* _ASM_X86_PERF_EVENT_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7b467bf3c680..d1f4a760be23 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -277,6 +277,7 @@ static inline pteval_t pte_flags(pte_t pte)
277typedef struct page *pgtable_t; 277typedef struct page *pgtable_t;
278 278
279extern pteval_t __supported_pte_mask; 279extern pteval_t __supported_pte_mask;
280extern void set_nx(void);
280extern int nx_enabled; 281extern int nx_enabled;
281 282
282#define pgprot_writecombine pgprot_writecombine 283#define pgprot_writecombine pgprot_writecombine
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c3429e8b2424..c9786480f0fe 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -1000,7 +1000,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
1000#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8)) 1000#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8))
1001 1001
1002#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) 1002#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
1003#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */ 1003extern unsigned long KSTK_ESP(struct task_struct *task);
1004#endif /* CONFIG_X86_64 */ 1004#endif /* CONFIG_X86_64 */
1005 1005
1006extern void start_thread(struct pt_regs *regs, unsigned long new_ip, 1006extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 6a84ed166aec..1e796782cd7b 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -121,7 +121,6 @@ static inline void arch_send_call_function_single_ipi(int cpu)
121 smp_ops.send_call_func_single_ipi(cpu); 121 smp_ops.send_call_func_single_ipi(cpu);
122} 122}
123 123
124#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
125static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) 124static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
126{ 125{
127 smp_ops.send_call_func_ipi(mask); 126 smp_ops.send_call_func_ipi(mask);
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index c86f452256de..ae907e617181 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -65,7 +65,6 @@ static __always_inline void *__constant_memcpy(void *to, const void *from,
65 case 4: 65 case 4:
66 *(int *)to = *(int *)from; 66 *(int *)to = *(int *)from;
67 return to; 67 return to;
68
69 case 3: 68 case 3:
70 *(short *)to = *(short *)from; 69 *(short *)to = *(short *)from;
71 *((char *)to + 2) = *((char *)from + 2); 70 *((char *)to + 2) = *((char *)from + 2);
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index d82f39bb7905..8d33bc5462d1 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * Access to user system call parameters and results 2 * Access to user system call parameters and results
3 * 3 *
4 * Copyright (C) 2008 Red Hat, Inc. All rights reserved. 4 * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved.
5 * 5 *
6 * This copyrighted material is made available to anyone wishing to use, 6 * This copyrighted material is made available to anyone wishing to use,
7 * modify, copy, or redistribute it subject to the terms and conditions 7 * modify, copy, or redistribute it subject to the terms and conditions
@@ -16,13 +16,13 @@
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/err.h> 17#include <linux/err.h>
18 18
19static inline long syscall_get_nr(struct task_struct *task, 19/*
20 struct pt_regs *regs) 20 * Only the low 32 bits of orig_ax are meaningful, so we return int.
21 * This importantly ignores the high bits on 64-bit, so comparisons
22 * sign-extend the low 32 bits.
23 */
24static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
21{ 25{
22 /*
23 * We always sign-extend a -1 value being set here,
24 * so this is always either -1L or a syscall number.
25 */
26 return regs->orig_ax; 26 return regs->orig_ax;
27} 27}
28 28
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 6f0695d744bf..40e37b10c6c0 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -143,6 +143,7 @@ extern unsigned long node_remap_size[];
143 | 1*SD_BALANCE_FORK \ 143 | 1*SD_BALANCE_FORK \
144 | 0*SD_BALANCE_WAKE \ 144 | 0*SD_BALANCE_WAKE \
145 | 1*SD_WAKE_AFFINE \ 145 | 1*SD_WAKE_AFFINE \
146 | 0*SD_PREFER_LOCAL \
146 | 0*SD_SHARE_CPUPOWER \ 147 | 0*SD_SHARE_CPUPOWER \
147 | 0*SD_POWERSAVINGS_BALANCE \ 148 | 0*SD_POWERSAVINGS_BALANCE \
148 | 0*SD_SHARE_PKG_RESOURCES \ 149 | 0*SD_SHARE_PKG_RESOURCES \
@@ -165,21 +166,11 @@ static inline int numa_node_id(void)
165 return 0; 166 return 0;
166} 167}
167 168
168static inline int cpu_to_node(int cpu)
169{
170 return 0;
171}
172
173static inline int early_cpu_to_node(int cpu) 169static inline int early_cpu_to_node(int cpu)
174{ 170{
175 return 0; 171 return 0;
176} 172}
177 173
178static inline const struct cpumask *cpumask_of_node(int node)
179{
180 return cpu_online_mask;
181}
182
183static inline void setup_node_to_cpumask_map(void) { } 174static inline void setup_node_to_cpumask_map(void) { }
184 175
185#endif 176#endif
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 5e06259e90e5..632fb44b4cb5 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -33,7 +33,7 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero
33 * Copy data from kernel space to user space. Caller must check 33 * Copy data from kernel space to user space. Caller must check
34 * the specified block with access_ok() before calling this function. 34 * the specified block with access_ok() before calling this function.
35 * The caller should also make sure he pins the user space address 35 * The caller should also make sure he pins the user space address
36 * so that the we don't result in page fault and sleep. 36 * so that we don't result in page fault and sleep.
37 * 37 *
38 * Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault 38 * Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault
39 * we return the initial request size (1, 2 or 4), as copy_*_user should do. 39 * we return the initial request size (1, 2 or 4), as copy_*_user should do.
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 8deaada61bc8..6fb3c209a7e3 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -341,7 +341,7 @@
341#define __NR_preadv 333 341#define __NR_preadv 333
342#define __NR_pwritev 334 342#define __NR_pwritev 334
343#define __NR_rt_tgsigqueueinfo 335 343#define __NR_rt_tgsigqueueinfo 335
344#define __NR_perf_counter_open 336 344#define __NR_perf_event_open 336
345 345
346#ifdef __KERNEL__ 346#ifdef __KERNEL__
347 347
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index b9f3c60de5f7..8d3ad0adbc68 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -659,8 +659,8 @@ __SYSCALL(__NR_preadv, sys_preadv)
659__SYSCALL(__NR_pwritev, sys_pwritev) 659__SYSCALL(__NR_pwritev, sys_pwritev)
660#define __NR_rt_tgsigqueueinfo 297 660#define __NR_rt_tgsigqueueinfo 297
661__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) 661__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
662#define __NR_perf_counter_open 298 662#define __NR_perf_event_open 298
663__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) 663__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
664 664
665#ifndef __NO_STUBS 665#ifndef __NO_STUBS
666#define __ARCH_WANT_OLD_READDIR 666#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 77a68505419a..d1414af98559 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -15,9 +15,12 @@
15#include <linux/numa.h> 15#include <linux/numa.h>
16#include <linux/percpu.h> 16#include <linux/percpu.h>
17#include <linux/timer.h> 17#include <linux/timer.h>
18#include <linux/io.h>
18#include <asm/types.h> 19#include <asm/types.h>
19#include <asm/percpu.h> 20#include <asm/percpu.h>
20#include <asm/uv/uv_mmrs.h> 21#include <asm/uv/uv_mmrs.h>
22#include <asm/irq_vectors.h>
23#include <asm/io_apic.h>
21 24
22 25
23/* 26/*
@@ -113,7 +116,7 @@
113/* 116/*
114 * The largest possible NASID of a C or M brick (+ 2) 117 * The largest possible NASID of a C or M brick (+ 2)
115 */ 118 */
116#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2) 119#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_BLADES * 2)
117 120
118struct uv_scir_s { 121struct uv_scir_s {
119 struct timer_list timer; 122 struct timer_list timer;
@@ -229,6 +232,20 @@ static inline unsigned long uv_gpa(void *v)
229 return uv_soc_phys_ram_to_gpa(__pa(v)); 232 return uv_soc_phys_ram_to_gpa(__pa(v));
230} 233}
231 234
235/* gnode -> pnode */
236static inline unsigned long uv_gpa_to_gnode(unsigned long gpa)
237{
238 return gpa >> uv_hub_info->m_val;
239}
240
241/* gpa -> pnode */
242static inline int uv_gpa_to_pnode(unsigned long gpa)
243{
244 unsigned long n_mask = (1UL << uv_hub_info->n_val) - 1;
245
246 return uv_gpa_to_gnode(gpa) & n_mask;
247}
248
232/* pnode, offset --> socket virtual */ 249/* pnode, offset --> socket virtual */
233static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) 250static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset)
234{ 251{
@@ -258,13 +275,13 @@ static inline unsigned long *uv_global_mmr32_address(int pnode,
258static inline void uv_write_global_mmr32(int pnode, unsigned long offset, 275static inline void uv_write_global_mmr32(int pnode, unsigned long offset,
259 unsigned long val) 276 unsigned long val)
260{ 277{
261 *uv_global_mmr32_address(pnode, offset) = val; 278 writeq(val, uv_global_mmr32_address(pnode, offset));
262} 279}
263 280
264static inline unsigned long uv_read_global_mmr32(int pnode, 281static inline unsigned long uv_read_global_mmr32(int pnode,
265 unsigned long offset) 282 unsigned long offset)
266{ 283{
267 return *uv_global_mmr32_address(pnode, offset); 284 return readq(uv_global_mmr32_address(pnode, offset));
268} 285}
269 286
270/* 287/*
@@ -281,13 +298,13 @@ static inline unsigned long *uv_global_mmr64_address(int pnode,
281static inline void uv_write_global_mmr64(int pnode, unsigned long offset, 298static inline void uv_write_global_mmr64(int pnode, unsigned long offset,
282 unsigned long val) 299 unsigned long val)
283{ 300{
284 *uv_global_mmr64_address(pnode, offset) = val; 301 writeq(val, uv_global_mmr64_address(pnode, offset));
285} 302}
286 303
287static inline unsigned long uv_read_global_mmr64(int pnode, 304static inline unsigned long uv_read_global_mmr64(int pnode,
288 unsigned long offset) 305 unsigned long offset)
289{ 306{
290 return *uv_global_mmr64_address(pnode, offset); 307 return readq(uv_global_mmr64_address(pnode, offset));
291} 308}
292 309
293/* 310/*
@@ -301,22 +318,22 @@ static inline unsigned long *uv_local_mmr_address(unsigned long offset)
301 318
302static inline unsigned long uv_read_local_mmr(unsigned long offset) 319static inline unsigned long uv_read_local_mmr(unsigned long offset)
303{ 320{
304 return *uv_local_mmr_address(offset); 321 return readq(uv_local_mmr_address(offset));
305} 322}
306 323
307static inline void uv_write_local_mmr(unsigned long offset, unsigned long val) 324static inline void uv_write_local_mmr(unsigned long offset, unsigned long val)
308{ 325{
309 *uv_local_mmr_address(offset) = val; 326 writeq(val, uv_local_mmr_address(offset));
310} 327}
311 328
312static inline unsigned char uv_read_local_mmr8(unsigned long offset) 329static inline unsigned char uv_read_local_mmr8(unsigned long offset)
313{ 330{
314 return *((unsigned char *)uv_local_mmr_address(offset)); 331 return readb(uv_local_mmr_address(offset));
315} 332}
316 333
317static inline void uv_write_local_mmr8(unsigned long offset, unsigned char val) 334static inline void uv_write_local_mmr8(unsigned long offset, unsigned char val)
318{ 335{
319 *((unsigned char *)uv_local_mmr_address(offset)) = val; 336 writeb(val, uv_local_mmr_address(offset));
320} 337}
321 338
322/* 339/*
@@ -420,9 +437,14 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
420static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) 437static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
421{ 438{
422 unsigned long val; 439 unsigned long val;
440 unsigned long dmode = dest_Fixed;
441
442 if (vector == NMI_VECTOR)
443 dmode = dest_NMI;
423 444
424 val = (1UL << UVH_IPI_INT_SEND_SHFT) | 445 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
425 ((apicid & 0x3f) << UVH_IPI_INT_APIC_ID_SHFT) | 446 ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
447 (dmode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
426 (vector << UVH_IPI_INT_VECTOR_SHFT); 448 (vector << UVH_IPI_INT_VECTOR_SHFT);
427 uv_write_global_mmr64(pnode, UVH_IPI_INT, val); 449 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
428} 450}
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 4ba419b668a5..d8e5d0cdd678 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_INTEL_TXT) += tboot.o
56obj-$(CONFIG_STACKTRACE) += stacktrace.o 56obj-$(CONFIG_STACKTRACE) += stacktrace.o
57obj-y += cpu/ 57obj-y += cpu/
58obj-y += acpi/ 58obj-y += acpi/
59obj-$(CONFIG_SFI) += sfi.o
59obj-y += reboot.o 60obj-y += reboot.o
60obj-$(CONFIG_MCA) += mca_32.o 61obj-$(CONFIG_MCA) += mca_32.o
61obj-$(CONFIG_X86_MSR) += msr.o 62obj-$(CONFIG_X86_MSR) += msr.o
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 8c44c232efcb..59cdfa4686b2 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -48,7 +48,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
48 * P4, Core and beyond CPUs 48 * P4, Core and beyond CPUs
49 */ 49 */
50 if (c->x86_vendor == X86_VENDOR_INTEL && 50 if (c->x86_vendor == X86_VENDOR_INTEL &&
51 (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 14))) 51 (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 14)))
52 flags->bm_control = 0; 52 flags->bm_control = 0;
53} 53}
54EXPORT_SYMBOL(acpi_processor_power_init_bm_check); 54EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index d296f4a195c9..d85d1b2432ba 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -79,7 +79,8 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
79 struct cpuinfo_x86 *c = &cpu_data(pr->id); 79 struct cpuinfo_x86 *c = &cpu_data(pr->id);
80 80
81 pr->pdc = NULL; 81 pr->pdc = NULL;
82 if (c->x86_vendor == X86_VENDOR_INTEL) 82 if (c->x86_vendor == X86_VENDOR_INTEL ||
83 c->x86_vendor == X86_VENDOR_CENTAUR)
83 init_intel_pdc(pr, c); 84 init_intel_pdc(pr, c);
84 85
85 return; 86 return;
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
index 7da00b799cda..060fff8f5c5b 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S
+++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
@@ -57,5 +57,8 @@ SECTIONS
57 *(.note*) 57 *(.note*)
58 } 58 }
59 59
60 /*
61 * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
62 */
60 . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); 63 . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!");
61} 64}
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 98f230f6a28d..0285521e0a99 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1220,6 +1220,8 @@ static void __detach_device(struct protection_domain *domain, u16 devid)
1220 amd_iommu_dev_table[devid].data[1] = 0; 1220 amd_iommu_dev_table[devid].data[1] = 0;
1221 amd_iommu_dev_table[devid].data[2] = 0; 1221 amd_iommu_dev_table[devid].data[2] = 0;
1222 1222
1223 amd_iommu_apply_erratum_63(devid);
1224
1223 /* decrease reference counter */ 1225 /* decrease reference counter */
1224 domain->dev_cnt -= 1; 1226 domain->dev_cnt -= 1;
1225 1227
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index b4b61d462dcc..c20001e4f556 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -240,7 +240,7 @@ static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
240 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 240 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
241} 241}
242 242
243static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) 243static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
244{ 244{
245 u32 ctrl; 245 u32 ctrl;
246 246
@@ -519,6 +519,26 @@ static void set_dev_entry_bit(u16 devid, u8 bit)
519 amd_iommu_dev_table[devid].data[i] |= (1 << _bit); 519 amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
520} 520}
521 521
522static int get_dev_entry_bit(u16 devid, u8 bit)
523{
524 int i = (bit >> 5) & 0x07;
525 int _bit = bit & 0x1f;
526
527 return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit;
528}
529
530
531void amd_iommu_apply_erratum_63(u16 devid)
532{
533 int sysmgt;
534
535 sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) |
536 (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1);
537
538 if (sysmgt == 0x01)
539 set_dev_entry_bit(devid, DEV_ENTRY_IW);
540}
541
522/* Writes the specific IOMMU for a device into the rlookup table */ 542/* Writes the specific IOMMU for a device into the rlookup table */
523static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) 543static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
524{ 544{
@@ -547,6 +567,8 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
547 if (flags & ACPI_DEVFLAG_LINT1) 567 if (flags & ACPI_DEVFLAG_LINT1)
548 set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); 568 set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
549 569
570 amd_iommu_apply_erratum_63(devid);
571
550 set_iommu_for_device(iommu, devid); 572 set_iommu_for_device(iommu, devid);
551} 573}
552 574
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a34601f52987..894aa97f0717 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -14,7 +14,7 @@
14 * Mikael Pettersson : PM converted to driver model. 14 * Mikael Pettersson : PM converted to driver model.
15 */ 15 */
16 16
17#include <linux/perf_counter.h> 17#include <linux/perf_event.h>
18#include <linux/kernel_stat.h> 18#include <linux/kernel_stat.h>
19#include <linux/mc146818rtc.h> 19#include <linux/mc146818rtc.h>
20#include <linux/acpi_pmtmr.h> 20#include <linux/acpi_pmtmr.h>
@@ -35,7 +35,7 @@
35#include <linux/smp.h> 35#include <linux/smp.h>
36#include <linux/mm.h> 36#include <linux/mm.h>
37 37
38#include <asm/perf_counter.h> 38#include <asm/perf_event.h>
39#include <asm/x86_init.h> 39#include <asm/x86_init.h>
40#include <asm/pgalloc.h> 40#include <asm/pgalloc.h>
41#include <asm/atomic.h> 41#include <asm/atomic.h>
@@ -62,7 +62,7 @@ unsigned int boot_cpu_physical_apicid = -1U;
62/* 62/*
63 * The highest APIC ID seen during enumeration. 63 * The highest APIC ID seen during enumeration.
64 * 64 *
65 * This determines the messaging protocol we can use: if all APIC IDs 65 * On AMD, this determines the messaging protocol we can use: if all APIC IDs
66 * are in the 0 ... 7 range, then we can use logical addressing which 66 * are in the 0 ... 7 range, then we can use logical addressing which
67 * has some performance advantages (better broadcasting). 67 * has some performance advantages (better broadcasting).
68 * 68 *
@@ -979,7 +979,7 @@ void lapic_shutdown(void)
979{ 979{
980 unsigned long flags; 980 unsigned long flags;
981 981
982 if (!cpu_has_apic) 982 if (!cpu_has_apic && !apic_from_smp_config())
983 return; 983 return;
984 984
985 local_irq_save(flags); 985 local_irq_save(flags);
@@ -1189,7 +1189,7 @@ void __cpuinit setup_local_APIC(void)
1189 apic_write(APIC_ESR, 0); 1189 apic_write(APIC_ESR, 0);
1190 } 1190 }
1191#endif 1191#endif
1192 perf_counters_lapic_init(); 1192 perf_events_lapic_init();
1193 1193
1194 preempt_disable(); 1194 preempt_disable();
1195 1195
@@ -1197,8 +1197,7 @@ void __cpuinit setup_local_APIC(void)
1197 * Double-check whether this APIC is really registered. 1197 * Double-check whether this APIC is really registered.
1198 * This is meaningless in clustered apic mode, so we skip it. 1198 * This is meaningless in clustered apic mode, so we skip it.
1199 */ 1199 */
1200 if (!apic->apic_id_registered()) 1200 BUG_ON(!apic->apic_id_registered());
1201 BUG();
1202 1201
1203 /* 1202 /*
1204 * Intel recommends to set DFR, LDR and TPR before enabling 1203 * Intel recommends to set DFR, LDR and TPR before enabling
@@ -1917,24 +1916,14 @@ void __cpuinit generic_processor_info(int apicid, int version)
1917 max_physical_apicid = apicid; 1916 max_physical_apicid = apicid;
1918 1917
1919#ifdef CONFIG_X86_32 1918#ifdef CONFIG_X86_32
1920 /* 1919 switch (boot_cpu_data.x86_vendor) {
1921 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y 1920 case X86_VENDOR_INTEL:
1922 * but we need to work other dependencies like SMP_SUSPEND etc 1921 if (num_processors > 8)
1923 * before this can be done without some confusion. 1922 def_to_bigsmp = 1;
1924 * if (CPU_HOTPLUG_ENABLED || num_processors > 8) 1923 break;
1925 * - Ashok Raj <ashok.raj@intel.com> 1924 case X86_VENDOR_AMD:
1926 */ 1925 if (max_physical_apicid >= 8)
1927 if (max_physical_apicid >= 8) {
1928 switch (boot_cpu_data.x86_vendor) {
1929 case X86_VENDOR_INTEL:
1930 if (!APIC_XAPIC(version)) {
1931 def_to_bigsmp = 0;
1932 break;
1933 }
1934 /* If P4 and above fall through */
1935 case X86_VENDOR_AMD:
1936 def_to_bigsmp = 1; 1926 def_to_bigsmp = 1;
1937 }
1938 } 1927 }
1939#endif 1928#endif
1940 1929
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 809e1cf86d6b..dc69f28489f5 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -227,17 +227,14 @@ static struct irq_cfg *get_one_free_irq_cfg(int node)
227 227
228 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); 228 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
229 if (cfg) { 229 if (cfg) {
230 if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { 230 if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
231 kfree(cfg); 231 kfree(cfg);
232 cfg = NULL; 232 cfg = NULL;
233 } else if (!alloc_cpumask_var_node(&cfg->old_domain, 233 } else if (!zalloc_cpumask_var_node(&cfg->old_domain,
234 GFP_ATOMIC, node)) { 234 GFP_ATOMIC, node)) {
235 free_cpumask_var(cfg->domain); 235 free_cpumask_var(cfg->domain);
236 kfree(cfg); 236 kfree(cfg);
237 cfg = NULL; 237 cfg = NULL;
238 } else {
239 cpumask_clear(cfg->domain);
240 cpumask_clear(cfg->old_domain);
241 } 238 }
242 } 239 }
243 240
@@ -1874,7 +1871,7 @@ __apicdebuginit(int) print_all_ICs(void)
1874 print_PIC(); 1871 print_PIC();
1875 1872
1876 /* don't print out if apic is not there */ 1873 /* don't print out if apic is not there */
1877 if (!cpu_has_apic || disable_apic) 1874 if (!cpu_has_apic && !apic_from_smp_config())
1878 return 0; 1875 return 0;
1879 1876
1880 print_all_local_APICs(); 1877 print_all_local_APICs();
@@ -1999,7 +1996,7 @@ void disable_IO_APIC(void)
1999 /* 1996 /*
2000 * Use virtual wire A mode when interrupt remapping is enabled. 1997 * Use virtual wire A mode when interrupt remapping is enabled.
2001 */ 1998 */
2002 if (cpu_has_apic) 1999 if (cpu_has_apic || apic_from_smp_config())
2003 disconnect_bsp_APIC(!intr_remapping_enabled && 2000 disconnect_bsp_APIC(!intr_remapping_enabled &&
2004 ioapic_i8259.pin != -1); 2001 ioapic_i8259.pin != -1);
2005} 2002}
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index cb66a22d98ad..7ff61d6a188a 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -508,14 +508,14 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
508/* 508/*
509 * proc handler for /proc/sys/kernel/nmi 509 * proc handler for /proc/sys/kernel/nmi
510 */ 510 */
511int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, 511int proc_nmi_enabled(struct ctl_table *table, int write,
512 void __user *buffer, size_t *length, loff_t *ppos) 512 void __user *buffer, size_t *length, loff_t *ppos)
513{ 513{
514 int old_state; 514 int old_state;
515 515
516 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; 516 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
517 old_state = nmi_watchdog_enabled; 517 old_state = nmi_watchdog_enabled;
518 proc_dointvec(table, write, file, buffer, length, ppos); 518 proc_dointvec(table, write, buffer, length, ppos);
519 if (!!old_state == !!nmi_watchdog_enabled) 519 if (!!old_state == !!nmi_watchdog_enabled)
520 return 0; 520 return 0;
521 521
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 65edc180fc82..c4cbd3080c1c 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -64,16 +64,23 @@ void __init default_setup_apic_routing(void)
64 apic = &apic_x2apic_phys; 64 apic = &apic_x2apic_phys;
65 else 65 else
66 apic = &apic_x2apic_cluster; 66 apic = &apic_x2apic_cluster;
67 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
68 } 67 }
69#endif 68#endif
70 69
71 if (apic == &apic_flat) { 70 if (apic == &apic_flat) {
72 if (max_physical_apicid >= 8) 71 switch (boot_cpu_data.x86_vendor) {
73 apic = &apic_physflat; 72 case X86_VENDOR_INTEL:
74 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); 73 if (num_processors > 8)
74 apic = &apic_physflat;
75 break;
76 case X86_VENDOR_AMD:
77 if (max_physical_apicid >= 8)
78 apic = &apic_physflat;
79 }
75 } 80 }
76 81
82 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
83
77 if (is_vsmp_box()) { 84 if (is_vsmp_box()) {
78 /* need to update phys_pkg_id */ 85 /* need to update phys_pkg_id */
79 apic->phys_pkg_id = apicid_phys_pkg_id; 86 apic->phys_pkg_id = apicid_phys_pkg_id;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 601159374e87..326c25477d3d 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -352,14 +352,14 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
352 352
353 for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { 353 for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) {
354 alias.v = uv_read_local_mmr(redir_addrs[i].alias); 354 alias.v = uv_read_local_mmr(redir_addrs[i].alias);
355 if (alias.s.base == 0) { 355 if (alias.s.enable && alias.s.base == 0) {
356 *size = (1UL << alias.s.m_alias); 356 *size = (1UL << alias.s.m_alias);
357 redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); 357 redirect.v = uv_read_local_mmr(redir_addrs[i].redirect);
358 *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; 358 *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
359 return; 359 return;
360 } 360 }
361 } 361 }
362 BUG(); 362 *base = *size = 0;
363} 363}
364 364
365enum map_type {map_wb, map_uc}; 365enum map_type {map_wb, map_uc};
@@ -389,6 +389,16 @@ static __init void map_gru_high(int max_pnode)
389 map_high("GRU", gru.s.base, shift, max_pnode, map_wb); 389 map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
390} 390}
391 391
392static __init void map_mmr_high(int max_pnode)
393{
394 union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
395 int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
396
397 mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
398 if (mmr.s.enable)
399 map_high("MMR", mmr.s.base, shift, max_pnode, map_uc);
400}
401
392static __init void map_mmioh_high(int max_pnode) 402static __init void map_mmioh_high(int max_pnode)
393{ 403{
394 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; 404 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
@@ -609,12 +619,12 @@ void __init uv_system_init(void)
609 uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; 619 uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
610 uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; 620 uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
611 uv_cpu_hub_info(cpu)->m_val = m_val; 621 uv_cpu_hub_info(cpu)->m_val = m_val;
612 uv_cpu_hub_info(cpu)->n_val = m_val; 622 uv_cpu_hub_info(cpu)->n_val = n_val;
613 uv_cpu_hub_info(cpu)->numa_blade_id = blade; 623 uv_cpu_hub_info(cpu)->numa_blade_id = blade;
614 uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; 624 uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
615 uv_cpu_hub_info(cpu)->pnode = pnode; 625 uv_cpu_hub_info(cpu)->pnode = pnode;
616 uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; 626 uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
617 uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; 627 uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
618 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; 628 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
619 uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; 629 uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
620 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; 630 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
@@ -643,6 +653,7 @@ void __init uv_system_init(void)
643 } 653 }
644 654
645 map_gru_high(max_pnode); 655 map_gru_high(max_pnode);
656 map_mmr_high(max_pnode);
646 map_mmioh_high(max_pnode); 657 map_mmioh_high(max_pnode);
647 658
648 uv_cpu_init(); 659 uv_cpu_init();
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 8dd30638fe44..68537e957a9b 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -27,7 +27,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
27obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o 27obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
28obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o 28obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
29 29
30obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o 30obj-$(CONFIG_PERF_EVENTS) += perf_event.o
31 31
32obj-$(CONFIG_X86_MCE) += mcheck/ 32obj-$(CONFIG_X86_MCE) += mcheck/
33obj-$(CONFIG_MTRR) += mtrr/ 33obj-$(CONFIG_MTRR) += mtrr/
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f32fa71ccf97..c910a716a71c 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -184,7 +184,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
184 * approved Athlon 184 * approved Athlon
185 */ 185 */
186 WARN_ONCE(1, "WARNING: This combination of AMD" 186 WARN_ONCE(1, "WARNING: This combination of AMD"
187 "processors is not suitable for SMP.\n"); 187 " processors is not suitable for SMP.\n");
188 if (!test_taint(TAINT_UNSAFE_SMP)) 188 if (!test_taint(TAINT_UNSAFE_SMP))
189 add_taint(TAINT_UNSAFE_SMP); 189 add_taint(TAINT_UNSAFE_SMP);
190 190
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2055fc2b2e6b..cc25c2b4a567 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,7 +13,7 @@
13#include <linux/io.h> 13#include <linux/io.h>
14 14
15#include <asm/stackprotector.h> 15#include <asm/stackprotector.h>
16#include <asm/perf_counter.h> 16#include <asm/perf_event.h>
17#include <asm/mmu_context.h> 17#include <asm/mmu_context.h>
18#include <asm/hypervisor.h> 18#include <asm/hypervisor.h>
19#include <asm/processor.h> 19#include <asm/processor.h>
@@ -34,7 +34,6 @@
34#include <asm/mce.h> 34#include <asm/mce.h>
35#include <asm/msr.h> 35#include <asm/msr.h>
36#include <asm/pat.h> 36#include <asm/pat.h>
37#include <linux/smp.h>
38 37
39#ifdef CONFIG_X86_LOCAL_APIC 38#ifdef CONFIG_X86_LOCAL_APIC
40#include <asm/uv/uv.h> 39#include <asm/uv/uv.h>
@@ -870,7 +869,7 @@ void __init identify_boot_cpu(void)
870#else 869#else
871 vgetcpu_set_mode(); 870 vgetcpu_set_mode();
872#endif 871#endif
873 init_hw_perf_counters(); 872 init_hw_perf_events();
874} 873}
875 874
876void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 875void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 7bb676c533aa..8b581d3905cb 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,7 +33,7 @@
33#include <linux/cpufreq.h> 33#include <linux/cpufreq.h>
34#include <linux/compiler.h> 34#include <linux/compiler.h>
35#include <linux/dmi.h> 35#include <linux/dmi.h>
36#include <trace/power.h> 36#include <trace/events/power.h>
37 37
38#include <linux/acpi.h> 38#include <linux/acpi.h>
39#include <linux/io.h> 39#include <linux/io.h>
@@ -72,8 +72,6 @@ static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
72 72
73static DEFINE_PER_CPU(struct aperfmperf, old_perf); 73static DEFINE_PER_CPU(struct aperfmperf, old_perf);
74 74
75DEFINE_TRACE(power_mark);
76
77/* acpi_perf_data is a pointer to percpu data. */ 75/* acpi_perf_data is a pointer to percpu data. */
78static struct acpi_processor_performance *acpi_perf_data; 76static struct acpi_processor_performance *acpi_perf_data;
79 77
@@ -332,7 +330,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
332 unsigned int next_perf_state = 0; /* Index into perf table */ 330 unsigned int next_perf_state = 0; /* Index into perf table */
333 unsigned int i; 331 unsigned int i;
334 int result = 0; 332 int result = 0;
335 struct power_trace it;
336 333
337 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); 334 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
338 335
@@ -364,7 +361,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
364 } 361 }
365 } 362 }
366 363
367 trace_power_mark(&it, POWER_PSTATE, next_perf_state); 364 trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency);
368 365
369 switch (data->cpu_feature) { 366 switch (data->cpu_feature) {
370 case SYSTEM_INTEL_MSR_CAPABLE: 367 case SYSTEM_INTEL_MSR_CAPABLE:
@@ -529,15 +526,21 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = {
529 526
530static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) 527static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
531{ 528{
532 /* http://www.intel.com/Assets/PDF/specupdate/314554.pdf 529 /* Intel Xeon Processor 7100 Series Specification Update
530 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
533 * AL30: A Machine Check Exception (MCE) Occurring during an 531 * AL30: A Machine Check Exception (MCE) Occurring during an
534 * Enhanced Intel SpeedStep Technology Ratio Change May Cause 532 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
535 * Both Processor Cores to Lock Up when HT is enabled*/ 533 * Both Processor Cores to Lock Up. */
536 if (c->x86_vendor == X86_VENDOR_INTEL) { 534 if (c->x86_vendor == X86_VENDOR_INTEL) {
537 if ((c->x86 == 15) && 535 if ((c->x86 == 15) &&
538 (c->x86_model == 6) && 536 (c->x86_model == 6) &&
539 (c->x86_mask == 8) && smt_capable()) 537 (c->x86_mask == 8)) {
538 printk(KERN_INFO "acpi-cpufreq: Intel(R) "
539 "Xeon(R) 7100 Errata AL30, processors may "
540 "lock up on frequency changes: disabling "
541 "acpi-cpufreq.\n");
540 return -ENODEV; 542 return -ENODEV;
543 }
541 } 544 }
542 return 0; 545 return 0;
543} 546}
@@ -552,13 +555,18 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
552 unsigned int result = 0; 555 unsigned int result = 0;
553 struct cpuinfo_x86 *c = &cpu_data(policy->cpu); 556 struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
554 struct acpi_processor_performance *perf; 557 struct acpi_processor_performance *perf;
558#ifdef CONFIG_SMP
559 static int blacklisted;
560#endif
555 561
556 dprintk("acpi_cpufreq_cpu_init\n"); 562 dprintk("acpi_cpufreq_cpu_init\n");
557 563
558#ifdef CONFIG_SMP 564#ifdef CONFIG_SMP
559 result = acpi_cpufreq_blacklist(c); 565 if (blacklisted)
560 if (result) 566 return blacklisted;
561 return result; 567 blacklisted = acpi_cpufreq_blacklist(c);
568 if (blacklisted)
569 return blacklisted;
562#endif 570#endif
563 571
564 data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); 572 data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index ce2ed3e4aad9..cabd2fa3fc93 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -813,7 +813,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
813 memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr)); 813 memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr));
814 break; 814 break;
815 case 1 ... 15: 815 case 1 ... 15:
816 longhaul_version = TYPE_LONGHAUL_V1; 816 longhaul_version = TYPE_LONGHAUL_V2;
817 if (c->x86_mask < 8) { 817 if (c->x86_mask < 8) {
818 cpu_model = CPU_SAMUEL2; 818 cpu_model = CPU_SAMUEL2;
819 cpuname = "C3 'Samuel 2' [C5B]"; 819 cpuname = "C3 'Samuel 2' [C5B]";
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 6394aa5c7985..3f12dabeab52 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1022,7 +1022,7 @@ static int get_transition_latency(struct powernow_k8_data *data)
1022 * set it to 1 to avoid problems in the future. 1022 * set it to 1 to avoid problems in the future.
1023 * For all others it's a BIOS bug. 1023 * For all others it's a BIOS bug.
1024 */ 1024 */
1025 if (!boot_cpu_data.x86 == 0x11) 1025 if (boot_cpu_data.x86 != 0x11)
1026 printk(KERN_ERR FW_WARN PFX "Invalid zero transition " 1026 printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
1027 "latency\n"); 1027 "latency\n");
1028 max_latency = 1; 1028 max_latency = 1;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 6911e91fb4f6..3ae5a7a3a500 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -232,28 +232,23 @@ static unsigned int speedstep_detect_chipset(void)
232 return 0; 232 return 0;
233} 233}
234 234
235struct get_freq_data { 235static void get_freq_data(void *_speed)
236 unsigned int speed;
237 unsigned int processor;
238};
239
240static void get_freq_data(void *_data)
241{ 236{
242 struct get_freq_data *data = _data; 237 unsigned int *speed = _speed;
243 238
244 data->speed = speedstep_get_frequency(data->processor); 239 *speed = speedstep_get_frequency(speedstep_processor);
245} 240}
246 241
247static unsigned int speedstep_get(unsigned int cpu) 242static unsigned int speedstep_get(unsigned int cpu)
248{ 243{
249 struct get_freq_data data = { .processor = cpu }; 244 unsigned int speed;
250 245
251 /* You're supposed to ensure CPU is online. */ 246 /* You're supposed to ensure CPU is online. */
252 if (smp_call_function_single(cpu, get_freq_data, &data, 1) != 0) 247 if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0)
253 BUG(); 248 BUG();
254 249
255 dprintk("detected %u kHz as current frequency\n", data.speed); 250 dprintk("detected %u kHz as current frequency\n", speed);
256 return data.speed; 251 return speed;
257} 252}
258 253
259/** 254/**
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 7029f0e2acad..472763d92098 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -98,8 +98,9 @@ static struct notifier_block mce_raise_nb = {
98}; 98};
99 99
100/* Inject mce on current CPU */ 100/* Inject mce on current CPU */
101static int raise_local(struct mce *m) 101static int raise_local(void)
102{ 102{
103 struct mce *m = &__get_cpu_var(injectm);
103 int context = MCJ_CTX(m->inject_flags); 104 int context = MCJ_CTX(m->inject_flags);
104 int ret = 0; 105 int ret = 0;
105 int cpu = m->extcpu; 106 int cpu = m->extcpu;
@@ -167,12 +168,12 @@ static void raise_mce(struct mce *m)
167 } 168 }
168 cpu_relax(); 169 cpu_relax();
169 } 170 }
170 raise_local(m); 171 raise_local();
171 put_cpu(); 172 put_cpu();
172 put_online_cpus(); 173 put_online_cpus();
173 } else 174 } else
174#endif 175#endif
175 raise_local(m); 176 raise_local();
176} 177}
177 178
178/* Error injection interface */ 179/* Error injection interface */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 2f5aab26320e..721a77ca8115 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -85,6 +85,18 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
85static DEFINE_PER_CPU(struct mce, mces_seen); 85static DEFINE_PER_CPU(struct mce, mces_seen);
86static int cpu_missing; 86static int cpu_missing;
87 87
88static void default_decode_mce(struct mce *m)
89{
90 pr_emerg("No human readable MCE decoding support on this CPU type.\n");
91 pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
92}
93
94/*
95 * CPU/chipset specific EDAC code can register a callback here to print
96 * MCE errors in a human-readable form:
97 */
98void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
99EXPORT_SYMBOL(x86_mce_decode_callback);
88 100
89/* MCA banks polled by the period polling timer for corrected events */ 101/* MCA banks polled by the period polling timer for corrected events */
90DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { 102DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@@ -165,49 +177,46 @@ void mce_log(struct mce *mce)
165 set_bit(0, &mce_need_notify); 177 set_bit(0, &mce_need_notify);
166} 178}
167 179
168void __weak decode_mce(struct mce *m)
169{
170 return;
171}
172
173static void print_mce(struct mce *m) 180static void print_mce(struct mce *m)
174{ 181{
175 printk(KERN_EMERG 182 pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
176 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
177 m->extcpu, m->mcgstatus, m->bank, m->status); 183 m->extcpu, m->mcgstatus, m->bank, m->status);
184
178 if (m->ip) { 185 if (m->ip) {
179 printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", 186 pr_emerg("RIP%s %02x:<%016Lx> ",
180 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", 187 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
181 m->cs, m->ip); 188 m->cs, m->ip);
189
182 if (m->cs == __KERNEL_CS) 190 if (m->cs == __KERNEL_CS)
183 print_symbol("{%s}", m->ip); 191 print_symbol("{%s}", m->ip);
184 printk(KERN_CONT "\n"); 192 pr_cont("\n");
185 } 193 }
186 printk(KERN_EMERG "TSC %llx ", m->tsc); 194
195 pr_emerg("TSC %llx ", m->tsc);
187 if (m->addr) 196 if (m->addr)
188 printk(KERN_CONT "ADDR %llx ", m->addr); 197 pr_cont("ADDR %llx ", m->addr);
189 if (m->misc) 198 if (m->misc)
190 printk(KERN_CONT "MISC %llx ", m->misc); 199 pr_cont("MISC %llx ", m->misc);
191 printk(KERN_CONT "\n");
192 printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
193 m->cpuvendor, m->cpuid, m->time, m->socketid,
194 m->apicid);
195 200
196 decode_mce(m); 201 pr_cont("\n");
202 pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
203 m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
204
205 /*
206 * Print out human-readable details about the MCE error,
207 * (if the CPU has an implementation for that):
208 */
209 x86_mce_decode_callback(m);
197} 210}
198 211
199static void print_mce_head(void) 212static void print_mce_head(void)
200{ 213{
201 printk(KERN_EMERG "\nHARDWARE ERROR\n"); 214 pr_emerg("\nHARDWARE ERROR\n");
202} 215}
203 216
204static void print_mce_tail(void) 217static void print_mce_tail(void)
205{ 218{
206 printk(KERN_EMERG "This is not a software problem!\n" 219 pr_emerg("This is not a software problem!\n");
207#if (!defined(CONFIG_EDAC) || !defined(CONFIG_CPU_SUP_AMD))
208 "Run through mcelog --ascii to decode and contact your hardware vendor\n"
209#endif
210 );
211} 220}
212 221
213#define PANIC_TIMEOUT 5 /* 5 seconds */ 222#define PANIC_TIMEOUT 5 /* 5 seconds */
@@ -221,6 +230,7 @@ static atomic_t mce_fake_paniced;
221static void wait_for_panic(void) 230static void wait_for_panic(void)
222{ 231{
223 long timeout = PANIC_TIMEOUT*USEC_PER_SEC; 232 long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
233
224 preempt_disable(); 234 preempt_disable();
225 local_irq_enable(); 235 local_irq_enable();
226 while (timeout-- > 0) 236 while (timeout-- > 0)
@@ -288,6 +298,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
288static int msr_to_offset(u32 msr) 298static int msr_to_offset(u32 msr)
289{ 299{
290 unsigned bank = __get_cpu_var(injectm.bank); 300 unsigned bank = __get_cpu_var(injectm.bank);
301
291 if (msr == rip_msr) 302 if (msr == rip_msr)
292 return offsetof(struct mce, ip); 303 return offsetof(struct mce, ip);
293 if (msr == MSR_IA32_MCx_STATUS(bank)) 304 if (msr == MSR_IA32_MCx_STATUS(bank))
@@ -305,13 +316,25 @@ static int msr_to_offset(u32 msr)
305static u64 mce_rdmsrl(u32 msr) 316static u64 mce_rdmsrl(u32 msr)
306{ 317{
307 u64 v; 318 u64 v;
319
308 if (__get_cpu_var(injectm).finished) { 320 if (__get_cpu_var(injectm).finished) {
309 int offset = msr_to_offset(msr); 321 int offset = msr_to_offset(msr);
322
310 if (offset < 0) 323 if (offset < 0)
311 return 0; 324 return 0;
312 return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); 325 return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
313 } 326 }
314 rdmsrl(msr, v); 327
328 if (rdmsrl_safe(msr, &v)) {
329 WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr);
330 /*
331 * Return zero in case the access faulted. This should
332 * not happen normally but can happen if the CPU does
333 * something weird, or if the code is buggy.
334 */
335 v = 0;
336 }
337
315 return v; 338 return v;
316} 339}
317 340
@@ -319,6 +342,7 @@ static void mce_wrmsrl(u32 msr, u64 v)
319{ 342{
320 if (__get_cpu_var(injectm).finished) { 343 if (__get_cpu_var(injectm).finished) {
321 int offset = msr_to_offset(msr); 344 int offset = msr_to_offset(msr);
345
322 if (offset >= 0) 346 if (offset >= 0)
323 *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; 347 *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
324 return; 348 return;
@@ -415,7 +439,7 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
415 m->ip = mce_rdmsrl(rip_msr); 439 m->ip = mce_rdmsrl(rip_msr);
416} 440}
417 441
418#ifdef CONFIG_X86_LOCAL_APIC 442#ifdef CONFIG_X86_LOCAL_APIC
419/* 443/*
420 * Called after interrupts have been reenabled again 444 * Called after interrupts have been reenabled again
421 * when a MCE happened during an interrupts off region 445 * when a MCE happened during an interrupts off region
@@ -1172,6 +1196,7 @@ static int mce_banks_init(void)
1172 return -ENOMEM; 1196 return -ENOMEM;
1173 for (i = 0; i < banks; i++) { 1197 for (i = 0; i < banks; i++) {
1174 struct mce_bank *b = &mce_banks[i]; 1198 struct mce_bank *b = &mce_banks[i];
1199
1175 b->ctl = -1ULL; 1200 b->ctl = -1ULL;
1176 b->init = 1; 1201 b->init = 1;
1177 } 1202 }
@@ -1189,7 +1214,8 @@ static int __cpuinit mce_cap_init(void)
1189 rdmsrl(MSR_IA32_MCG_CAP, cap); 1214 rdmsrl(MSR_IA32_MCG_CAP, cap);
1190 1215
1191 b = cap & MCG_BANKCNT_MASK; 1216 b = cap & MCG_BANKCNT_MASK;
1192 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); 1217 if (!banks)
1218 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
1193 1219
1194 if (b > MAX_NR_BANKS) { 1220 if (b > MAX_NR_BANKS) {
1195 printk(KERN_WARNING 1221 printk(KERN_WARNING
@@ -1203,6 +1229,7 @@ static int __cpuinit mce_cap_init(void)
1203 banks = b; 1229 banks = b;
1204 if (!mce_banks) { 1230 if (!mce_banks) {
1205 int err = mce_banks_init(); 1231 int err = mce_banks_init();
1232
1206 if (err) 1233 if (err)
1207 return err; 1234 return err;
1208 } 1235 }
@@ -1237,6 +1264,7 @@ static void mce_init(void)
1237 1264
1238 for (i = 0; i < banks; i++) { 1265 for (i = 0; i < banks; i++) {
1239 struct mce_bank *b = &mce_banks[i]; 1266 struct mce_bank *b = &mce_banks[i];
1267
1240 if (!b->init) 1268 if (!b->init)
1241 continue; 1269 continue;
1242 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); 1270 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
@@ -1626,6 +1654,7 @@ static int mce_disable(void)
1626 1654
1627 for (i = 0; i < banks; i++) { 1655 for (i = 0; i < banks; i++) {
1628 struct mce_bank *b = &mce_banks[i]; 1656 struct mce_bank *b = &mce_banks[i];
1657
1629 if (b->init) 1658 if (b->init)
1630 wrmsrl(MSR_IA32_MCx_CTL(i), 0); 1659 wrmsrl(MSR_IA32_MCx_CTL(i), 0);
1631 } 1660 }
@@ -1911,6 +1940,7 @@ static void mce_disable_cpu(void *h)
1911 cmci_clear(); 1940 cmci_clear();
1912 for (i = 0; i < banks; i++) { 1941 for (i = 0; i < banks; i++) {
1913 struct mce_bank *b = &mce_banks[i]; 1942 struct mce_bank *b = &mce_banks[i];
1943
1914 if (b->init) 1944 if (b->init)
1915 wrmsrl(MSR_IA32_MCx_CTL(i), 0); 1945 wrmsrl(MSR_IA32_MCx_CTL(i), 0);
1916 } 1946 }
@@ -1928,6 +1958,7 @@ static void mce_reenable_cpu(void *h)
1928 cmci_reenable(); 1958 cmci_reenable();
1929 for (i = 0; i < banks; i++) { 1959 for (i = 0; i < banks; i++) {
1930 struct mce_bank *b = &mce_banks[i]; 1960 struct mce_bank *b = &mce_banks[i];
1961
1931 if (b->init) 1962 if (b->init)
1932 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); 1963 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
1933 } 1964 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 8cd5224943b5..83a3d1f4efca 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -489,8 +489,9 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
489 int i, err = 0; 489 int i, err = 0;
490 struct threshold_bank *b = NULL; 490 struct threshold_bank *b = NULL;
491 char name[32]; 491 char name[32];
492#ifdef CONFIG_SMP
492 struct cpuinfo_x86 *c = &cpu_data(cpu); 493 struct cpuinfo_x86 *c = &cpu_data(cpu);
493 494#endif
494 495
495 sprintf(name, "threshold_bank%i", bank); 496 sprintf(name, "threshold_bank%i", bank);
496 497
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 889f665fe93d..7c785634af2b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -8,6 +8,7 @@
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/interrupt.h> 9#include <linux/interrupt.h>
10#include <linux/percpu.h> 10#include <linux/percpu.h>
11#include <linux/sched.h>
11#include <asm/apic.h> 12#include <asm/apic.h>
12#include <asm/processor.h> 13#include <asm/processor.h>
13#include <asm/msr.h> 14#include <asm/msr.h>
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 63a56d147e4a..b3a1dba75330 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -34,20 +34,31 @@
34/* How long to wait between reporting thermal events */ 34/* How long to wait between reporting thermal events */
35#define CHECK_INTERVAL (300 * HZ) 35#define CHECK_INTERVAL (300 * HZ)
36 36
37static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; 37/*
38static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); 38 * Current thermal throttling state:
39static DEFINE_PER_CPU(bool, thermal_throttle_active); 39 */
40struct thermal_state {
41 bool is_throttled;
42
43 u64 next_check;
44 unsigned long throttle_count;
45 unsigned long last_throttle_count;
46};
47
48static DEFINE_PER_CPU(struct thermal_state, thermal_state);
40 49
41static atomic_t therm_throt_en = ATOMIC_INIT(0); 50static atomic_t therm_throt_en = ATOMIC_INIT(0);
42 51
43#ifdef CONFIG_SYSFS 52#ifdef CONFIG_SYSFS
44#define define_therm_throt_sysdev_one_ro(_name) \ 53#define define_therm_throt_sysdev_one_ro(_name) \
45 static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) 54 static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
46 55
47#define define_therm_throt_sysdev_show_func(name) \ 56#define define_therm_throt_sysdev_show_func(name) \
48static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ 57 \
49 struct sysdev_attribute *attr, \ 58static ssize_t therm_throt_sysdev_show_##name( \
50 char *buf) \ 59 struct sys_device *dev, \
60 struct sysdev_attribute *attr, \
61 char *buf) \
51{ \ 62{ \
52 unsigned int cpu = dev->id; \ 63 unsigned int cpu = dev->id; \
53 ssize_t ret; \ 64 ssize_t ret; \
@@ -55,7 +66,7 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
55 preempt_disable(); /* CPU hotplug */ \ 66 preempt_disable(); /* CPU hotplug */ \
56 if (cpu_online(cpu)) \ 67 if (cpu_online(cpu)) \
57 ret = sprintf(buf, "%lu\n", \ 68 ret = sprintf(buf, "%lu\n", \
58 per_cpu(thermal_throttle_##name, cpu)); \ 69 per_cpu(thermal_state, cpu).name); \
59 else \ 70 else \
60 ret = 0; \ 71 ret = 0; \
61 preempt_enable(); \ 72 preempt_enable(); \
@@ -63,11 +74,11 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
63 return ret; \ 74 return ret; \
64} 75}
65 76
66define_therm_throt_sysdev_show_func(count); 77define_therm_throt_sysdev_show_func(throttle_count);
67define_therm_throt_sysdev_one_ro(count); 78define_therm_throt_sysdev_one_ro(throttle_count);
68 79
69static struct attribute *thermal_throttle_attrs[] = { 80static struct attribute *thermal_throttle_attrs[] = {
70 &attr_count.attr, 81 &attr_throttle_count.attr,
71 NULL 82 NULL
72}; 83};
73 84
@@ -93,33 +104,39 @@ static struct attribute_group thermal_throttle_attr_group = {
93 * 1 : Event should be logged further, and a message has been 104 * 1 : Event should be logged further, and a message has been
94 * printed to the syslog. 105 * printed to the syslog.
95 */ 106 */
96static int therm_throt_process(int curr) 107static int therm_throt_process(bool is_throttled)
97{ 108{
98 unsigned int cpu = smp_processor_id(); 109 struct thermal_state *state;
99 __u64 tmp_jiffs = get_jiffies_64(); 110 unsigned int this_cpu;
100 bool was_throttled = __get_cpu_var(thermal_throttle_active); 111 bool was_throttled;
101 bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr; 112 u64 now;
113
114 this_cpu = smp_processor_id();
115 now = get_jiffies_64();
116 state = &per_cpu(thermal_state, this_cpu);
117
118 was_throttled = state->is_throttled;
119 state->is_throttled = is_throttled;
102 120
103 if (is_throttled) 121 if (is_throttled)
104 __get_cpu_var(thermal_throttle_count)++; 122 state->throttle_count++;
105 123
106 if (!(was_throttled ^ is_throttled) && 124 if (time_before64(now, state->next_check) &&
107 time_before64(tmp_jiffs, __get_cpu_var(next_check))) 125 state->throttle_count != state->last_throttle_count)
108 return 0; 126 return 0;
109 127
110 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; 128 state->next_check = now + CHECK_INTERVAL;
129 state->last_throttle_count = state->throttle_count;
111 130
112 /* if we just entered the thermal event */ 131 /* if we just entered the thermal event */
113 if (is_throttled) { 132 if (is_throttled) {
114 printk(KERN_CRIT "CPU%d: Temperature above threshold, " 133 printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count);
115 "cpu clock throttled (total events = %lu)\n",
116 cpu, __get_cpu_var(thermal_throttle_count));
117 134
118 add_taint(TAINT_MACHINE_CHECK); 135 add_taint(TAINT_MACHINE_CHECK);
119 return 1; 136 return 1;
120 } 137 }
121 if (was_throttled) { 138 if (was_throttled) {
122 printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); 139 printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu);
123 return 1; 140 return 1;
124 } 141 }
125 142
@@ -213,7 +230,7 @@ static void intel_thermal_interrupt(void)
213 __u64 msr_val; 230 __u64 msr_val;
214 231
215 rdmsrl(MSR_IA32_THERM_STATUS, msr_val); 232 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
216 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) 233 if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0))
217 mce_log_therm_throt_event(msr_val); 234 mce_log_therm_throt_event(msr_val);
218} 235}
219 236
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 315738c74aad..73c86db5acbe 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -846,7 +846,7 @@ int __init mtrr_cleanup(unsigned address_bits)
846 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); 846 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
847 847
848 range_sums = sum_ranges(range, nr_range); 848 range_sums = sum_ranges(range, nr_range);
849 printk(KERN_INFO "total RAM coverred: %ldM\n", 849 printk(KERN_INFO "total RAM covered: %ldM\n",
850 range_sums >> (20 - PAGE_SHIFT)); 850 range_sums >> (20 - PAGE_SHIFT));
851 851
852 if (mtrr_chunk_size && mtrr_gran_size) { 852 if (mtrr_chunk_size && mtrr_gran_size) {
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 08b6ea4c62b4..3c1b12d461d1 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -96,17 +96,24 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
96 unsigned long long base, size; 96 unsigned long long base, size;
97 char *ptr; 97 char *ptr;
98 char line[LINE_SIZE]; 98 char line[LINE_SIZE];
99 int length;
99 size_t linelen; 100 size_t linelen;
100 101
101 if (!capable(CAP_SYS_ADMIN)) 102 if (!capable(CAP_SYS_ADMIN))
102 return -EPERM; 103 return -EPERM;
103 if (!len)
104 return -EINVAL;
105 104
106 memset(line, 0, LINE_SIZE); 105 memset(line, 0, LINE_SIZE);
107 if (len > LINE_SIZE) 106
108 len = LINE_SIZE; 107 length = len;
109 if (copy_from_user(line, buf, len - 1)) 108 length--;
109
110 if (length > LINE_SIZE - 1)
111 length = LINE_SIZE - 1;
112
113 if (length < 0)
114 return -EINVAL;
115
116 if (copy_from_user(line, buf, length))
110 return -EFAULT; 117 return -EFAULT;
111 118
112 linelen = strlen(line); 119 linelen = strlen(line);
@@ -126,8 +133,8 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
126 return -EINVAL; 133 return -EINVAL;
127 134
128 base = simple_strtoull(line + 5, &ptr, 0); 135 base = simple_strtoull(line + 5, &ptr, 0);
129 for (; isspace(*ptr); ++ptr) 136 while (isspace(*ptr))
130 ; 137 ptr++;
131 138
132 if (strncmp(ptr, "size=", 5)) 139 if (strncmp(ptr, "size=", 5))
133 return -EINVAL; 140 return -EINVAL;
@@ -135,14 +142,14 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
135 size = simple_strtoull(ptr + 5, &ptr, 0); 142 size = simple_strtoull(ptr + 5, &ptr, 0);
136 if ((base & 0xfff) || (size & 0xfff)) 143 if ((base & 0xfff) || (size & 0xfff))
137 return -EINVAL; 144 return -EINVAL;
138 for (; isspace(*ptr); ++ptr) 145 while (isspace(*ptr))
139 ; 146 ptr++;
140 147
141 if (strncmp(ptr, "type=", 5)) 148 if (strncmp(ptr, "type=", 5))
142 return -EINVAL; 149 return -EINVAL;
143 ptr += 5; 150 ptr += 5;
144 for (; isspace(*ptr); ++ptr) 151 while (isspace(*ptr))
145 ; 152 ptr++;
146 153
147 for (i = 0; i < MTRR_NUM_TYPES; ++i) { 154 for (i = 0; i < MTRR_NUM_TYPES; ++i) {
148 if (strcmp(ptr, mtrr_strings[i])) 155 if (strcmp(ptr, mtrr_strings[i]))
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_event.c
index 2732e2c1e4d3..b5801c311846 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Performance counter x86 architecture code 2 * Performance events x86 architecture code
3 * 3 *
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> 4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar 5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
@@ -11,7 +11,7 @@
11 * For licencing details see kernel-base/COPYING 11 * For licencing details see kernel-base/COPYING
12 */ 12 */
13 13
14#include <linux/perf_counter.h> 14#include <linux/perf_event.h>
15#include <linux/capability.h> 15#include <linux/capability.h>
16#include <linux/notifier.h> 16#include <linux/notifier.h>
17#include <linux/hardirq.h> 17#include <linux/hardirq.h>
@@ -27,19 +27,19 @@
27#include <asm/stacktrace.h> 27#include <asm/stacktrace.h>
28#include <asm/nmi.h> 28#include <asm/nmi.h>
29 29
30static u64 perf_counter_mask __read_mostly; 30static u64 perf_event_mask __read_mostly;
31 31
32/* The maximal number of PEBS counters: */ 32/* The maximal number of PEBS events: */
33#define MAX_PEBS_COUNTERS 4 33#define MAX_PEBS_EVENTS 4
34 34
35/* The size of a BTS record in bytes: */ 35/* The size of a BTS record in bytes: */
36#define BTS_RECORD_SIZE 24 36#define BTS_RECORD_SIZE 24
37 37
38/* The size of a per-cpu BTS buffer in bytes: */ 38/* The size of a per-cpu BTS buffer in bytes: */
39#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024) 39#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048)
40 40
41/* The BTS overflow threshold in bytes from the end of the buffer: */ 41/* The BTS overflow threshold in bytes from the end of the buffer: */
42#define BTS_OVFL_TH (BTS_RECORD_SIZE * 64) 42#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128)
43 43
44 44
45/* 45/*
@@ -65,11 +65,11 @@ struct debug_store {
65 u64 pebs_index; 65 u64 pebs_index;
66 u64 pebs_absolute_maximum; 66 u64 pebs_absolute_maximum;
67 u64 pebs_interrupt_threshold; 67 u64 pebs_interrupt_threshold;
68 u64 pebs_counter_reset[MAX_PEBS_COUNTERS]; 68 u64 pebs_event_reset[MAX_PEBS_EVENTS];
69}; 69};
70 70
71struct cpu_hw_counters { 71struct cpu_hw_events {
72 struct perf_counter *counters[X86_PMC_IDX_MAX]; 72 struct perf_event *events[X86_PMC_IDX_MAX];
73 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 73 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
74 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 74 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
75 unsigned long interrupts; 75 unsigned long interrupts;
@@ -86,17 +86,17 @@ struct x86_pmu {
86 int (*handle_irq)(struct pt_regs *); 86 int (*handle_irq)(struct pt_regs *);
87 void (*disable_all)(void); 87 void (*disable_all)(void);
88 void (*enable_all)(void); 88 void (*enable_all)(void);
89 void (*enable)(struct hw_perf_counter *, int); 89 void (*enable)(struct hw_perf_event *, int);
90 void (*disable)(struct hw_perf_counter *, int); 90 void (*disable)(struct hw_perf_event *, int);
91 unsigned eventsel; 91 unsigned eventsel;
92 unsigned perfctr; 92 unsigned perfctr;
93 u64 (*event_map)(int); 93 u64 (*event_map)(int);
94 u64 (*raw_event)(u64); 94 u64 (*raw_event)(u64);
95 int max_events; 95 int max_events;
96 int num_counters; 96 int num_events;
97 int num_counters_fixed; 97 int num_events_fixed;
98 int counter_bits; 98 int event_bits;
99 u64 counter_mask; 99 u64 event_mask;
100 int apic; 100 int apic;
101 u64 max_period; 101 u64 max_period;
102 u64 intel_ctrl; 102 u64 intel_ctrl;
@@ -106,7 +106,7 @@ struct x86_pmu {
106 106
107static struct x86_pmu x86_pmu __read_mostly; 107static struct x86_pmu x86_pmu __read_mostly;
108 108
109static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { 109static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
110 .enabled = 1, 110 .enabled = 1,
111}; 111};
112 112
@@ -124,35 +124,35 @@ static const u64 p6_perfmon_event_map[] =
124 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, 124 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
125}; 125};
126 126
127static u64 p6_pmu_event_map(int event) 127static u64 p6_pmu_event_map(int hw_event)
128{ 128{
129 return p6_perfmon_event_map[event]; 129 return p6_perfmon_event_map[hw_event];
130} 130}
131 131
132/* 132/*
133 * Counter setting that is specified not to count anything. 133 * Event setting that is specified not to count anything.
134 * We use this to effectively disable a counter. 134 * We use this to effectively disable a counter.
135 * 135 *
136 * L2_RQSTS with 0 MESI unit mask. 136 * L2_RQSTS with 0 MESI unit mask.
137 */ 137 */
138#define P6_NOP_COUNTER 0x0000002EULL 138#define P6_NOP_EVENT 0x0000002EULL
139 139
140static u64 p6_pmu_raw_event(u64 event) 140static u64 p6_pmu_raw_event(u64 hw_event)
141{ 141{
142#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL 142#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
143#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL 143#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
144#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL 144#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
145#define P6_EVNTSEL_INV_MASK 0x00800000ULL 145#define P6_EVNTSEL_INV_MASK 0x00800000ULL
146#define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL 146#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
147 147
148#define P6_EVNTSEL_MASK \ 148#define P6_EVNTSEL_MASK \
149 (P6_EVNTSEL_EVENT_MASK | \ 149 (P6_EVNTSEL_EVENT_MASK | \
150 P6_EVNTSEL_UNIT_MASK | \ 150 P6_EVNTSEL_UNIT_MASK | \
151 P6_EVNTSEL_EDGE_MASK | \ 151 P6_EVNTSEL_EDGE_MASK | \
152 P6_EVNTSEL_INV_MASK | \ 152 P6_EVNTSEL_INV_MASK | \
153 P6_EVNTSEL_COUNTER_MASK) 153 P6_EVNTSEL_REG_MASK)
154 154
155 return event & P6_EVNTSEL_MASK; 155 return hw_event & P6_EVNTSEL_MASK;
156} 156}
157 157
158 158
@@ -170,16 +170,16 @@ static const u64 intel_perfmon_event_map[] =
170 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, 170 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
171}; 171};
172 172
173static u64 intel_pmu_event_map(int event) 173static u64 intel_pmu_event_map(int hw_event)
174{ 174{
175 return intel_perfmon_event_map[event]; 175 return intel_perfmon_event_map[hw_event];
176} 176}
177 177
178/* 178/*
179 * Generalized hw caching related event table, filled 179 * Generalized hw caching related hw_event table, filled
180 * in on a per model basis. A value of 0 means 180 * in on a per model basis. A value of 0 means
181 * 'not supported', -1 means 'event makes no sense on 181 * 'not supported', -1 means 'hw_event makes no sense on
182 * this CPU', any other value means the raw event 182 * this CPU', any other value means the raw hw_event
183 * ID. 183 * ID.
184 */ 184 */
185 185
@@ -463,22 +463,22 @@ static const u64 atom_hw_cache_event_ids
463 }, 463 },
464}; 464};
465 465
466static u64 intel_pmu_raw_event(u64 event) 466static u64 intel_pmu_raw_event(u64 hw_event)
467{ 467{
468#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL 468#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
469#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL 469#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
470#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL 470#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
471#define CORE_EVNTSEL_INV_MASK 0x00800000ULL 471#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
472#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL 472#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
473 473
474#define CORE_EVNTSEL_MASK \ 474#define CORE_EVNTSEL_MASK \
475 (CORE_EVNTSEL_EVENT_MASK | \ 475 (CORE_EVNTSEL_EVENT_MASK | \
476 CORE_EVNTSEL_UNIT_MASK | \ 476 CORE_EVNTSEL_UNIT_MASK | \
477 CORE_EVNTSEL_EDGE_MASK | \ 477 CORE_EVNTSEL_EDGE_MASK | \
478 CORE_EVNTSEL_INV_MASK | \ 478 CORE_EVNTSEL_INV_MASK | \
479 CORE_EVNTSEL_COUNTER_MASK) 479 CORE_EVNTSEL_REG_MASK)
480 480
481 return event & CORE_EVNTSEL_MASK; 481 return hw_event & CORE_EVNTSEL_MASK;
482} 482}
483 483
484static const u64 amd_hw_cache_event_ids 484static const u64 amd_hw_cache_event_ids
@@ -585,39 +585,39 @@ static const u64 amd_perfmon_event_map[] =
585 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 585 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
586}; 586};
587 587
588static u64 amd_pmu_event_map(int event) 588static u64 amd_pmu_event_map(int hw_event)
589{ 589{
590 return amd_perfmon_event_map[event]; 590 return amd_perfmon_event_map[hw_event];
591} 591}
592 592
593static u64 amd_pmu_raw_event(u64 event) 593static u64 amd_pmu_raw_event(u64 hw_event)
594{ 594{
595#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL 595#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
596#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL 596#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
597#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL 597#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
598#define K7_EVNTSEL_INV_MASK 0x000800000ULL 598#define K7_EVNTSEL_INV_MASK 0x000800000ULL
599#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL 599#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
600 600
601#define K7_EVNTSEL_MASK \ 601#define K7_EVNTSEL_MASK \
602 (K7_EVNTSEL_EVENT_MASK | \ 602 (K7_EVNTSEL_EVENT_MASK | \
603 K7_EVNTSEL_UNIT_MASK | \ 603 K7_EVNTSEL_UNIT_MASK | \
604 K7_EVNTSEL_EDGE_MASK | \ 604 K7_EVNTSEL_EDGE_MASK | \
605 K7_EVNTSEL_INV_MASK | \ 605 K7_EVNTSEL_INV_MASK | \
606 K7_EVNTSEL_COUNTER_MASK) 606 K7_EVNTSEL_REG_MASK)
607 607
608 return event & K7_EVNTSEL_MASK; 608 return hw_event & K7_EVNTSEL_MASK;
609} 609}
610 610
611/* 611/*
612 * Propagate counter elapsed time into the generic counter. 612 * Propagate event elapsed time into the generic event.
613 * Can only be executed on the CPU where the counter is active. 613 * Can only be executed on the CPU where the event is active.
614 * Returns the delta events processed. 614 * Returns the delta events processed.
615 */ 615 */
616static u64 616static u64
617x86_perf_counter_update(struct perf_counter *counter, 617x86_perf_event_update(struct perf_event *event,
618 struct hw_perf_counter *hwc, int idx) 618 struct hw_perf_event *hwc, int idx)
619{ 619{
620 int shift = 64 - x86_pmu.counter_bits; 620 int shift = 64 - x86_pmu.event_bits;
621 u64 prev_raw_count, new_raw_count; 621 u64 prev_raw_count, new_raw_count;
622 s64 delta; 622 s64 delta;
623 623
@@ -625,15 +625,15 @@ x86_perf_counter_update(struct perf_counter *counter,
625 return 0; 625 return 0;
626 626
627 /* 627 /*
628 * Careful: an NMI might modify the previous counter value. 628 * Careful: an NMI might modify the previous event value.
629 * 629 *
630 * Our tactic to handle this is to first atomically read and 630 * Our tactic to handle this is to first atomically read and
631 * exchange a new raw count - then add that new-prev delta 631 * exchange a new raw count - then add that new-prev delta
632 * count to the generic counter atomically: 632 * count to the generic event atomically:
633 */ 633 */
634again: 634again:
635 prev_raw_count = atomic64_read(&hwc->prev_count); 635 prev_raw_count = atomic64_read(&hwc->prev_count);
636 rdmsrl(hwc->counter_base + idx, new_raw_count); 636 rdmsrl(hwc->event_base + idx, new_raw_count);
637 637
638 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 638 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
639 new_raw_count) != prev_raw_count) 639 new_raw_count) != prev_raw_count)
@@ -642,7 +642,7 @@ again:
642 /* 642 /*
643 * Now we have the new raw value and have updated the prev 643 * Now we have the new raw value and have updated the prev
644 * timestamp already. We can now calculate the elapsed delta 644 * timestamp already. We can now calculate the elapsed delta
645 * (counter-)time and add that to the generic counter. 645 * (event-)time and add that to the generic event.
646 * 646 *
647 * Careful, not all hw sign-extends above the physical width 647 * Careful, not all hw sign-extends above the physical width
648 * of the count. 648 * of the count.
@@ -650,13 +650,13 @@ again:
650 delta = (new_raw_count << shift) - (prev_raw_count << shift); 650 delta = (new_raw_count << shift) - (prev_raw_count << shift);
651 delta >>= shift; 651 delta >>= shift;
652 652
653 atomic64_add(delta, &counter->count); 653 atomic64_add(delta, &event->count);
654 atomic64_sub(delta, &hwc->period_left); 654 atomic64_sub(delta, &hwc->period_left);
655 655
656 return new_raw_count; 656 return new_raw_count;
657} 657}
658 658
659static atomic_t active_counters; 659static atomic_t active_events;
660static DEFINE_MUTEX(pmc_reserve_mutex); 660static DEFINE_MUTEX(pmc_reserve_mutex);
661 661
662static bool reserve_pmc_hardware(void) 662static bool reserve_pmc_hardware(void)
@@ -667,12 +667,12 @@ static bool reserve_pmc_hardware(void)
667 if (nmi_watchdog == NMI_LOCAL_APIC) 667 if (nmi_watchdog == NMI_LOCAL_APIC)
668 disable_lapic_nmi_watchdog(); 668 disable_lapic_nmi_watchdog();
669 669
670 for (i = 0; i < x86_pmu.num_counters; i++) { 670 for (i = 0; i < x86_pmu.num_events; i++) {
671 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) 671 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
672 goto perfctr_fail; 672 goto perfctr_fail;
673 } 673 }
674 674
675 for (i = 0; i < x86_pmu.num_counters; i++) { 675 for (i = 0; i < x86_pmu.num_events; i++) {
676 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 676 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
677 goto eventsel_fail; 677 goto eventsel_fail;
678 } 678 }
@@ -685,7 +685,7 @@ eventsel_fail:
685 for (i--; i >= 0; i--) 685 for (i--; i >= 0; i--)
686 release_evntsel_nmi(x86_pmu.eventsel + i); 686 release_evntsel_nmi(x86_pmu.eventsel + i);
687 687
688 i = x86_pmu.num_counters; 688 i = x86_pmu.num_events;
689 689
690perfctr_fail: 690perfctr_fail:
691 for (i--; i >= 0; i--) 691 for (i--; i >= 0; i--)
@@ -703,7 +703,7 @@ static void release_pmc_hardware(void)
703#ifdef CONFIG_X86_LOCAL_APIC 703#ifdef CONFIG_X86_LOCAL_APIC
704 int i; 704 int i;
705 705
706 for (i = 0; i < x86_pmu.num_counters; i++) { 706 for (i = 0; i < x86_pmu.num_events; i++) {
707 release_perfctr_nmi(x86_pmu.perfctr + i); 707 release_perfctr_nmi(x86_pmu.perfctr + i);
708 release_evntsel_nmi(x86_pmu.eventsel + i); 708 release_evntsel_nmi(x86_pmu.eventsel + i);
709 } 709 }
@@ -720,7 +720,7 @@ static inline bool bts_available(void)
720 720
721static inline void init_debug_store_on_cpu(int cpu) 721static inline void init_debug_store_on_cpu(int cpu)
722{ 722{
723 struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; 723 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
724 724
725 if (!ds) 725 if (!ds)
726 return; 726 return;
@@ -732,7 +732,7 @@ static inline void init_debug_store_on_cpu(int cpu)
732 732
733static inline void fini_debug_store_on_cpu(int cpu) 733static inline void fini_debug_store_on_cpu(int cpu)
734{ 734{
735 if (!per_cpu(cpu_hw_counters, cpu).ds) 735 if (!per_cpu(cpu_hw_events, cpu).ds)
736 return; 736 return;
737 737
738 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); 738 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
@@ -751,12 +751,12 @@ static void release_bts_hardware(void)
751 fini_debug_store_on_cpu(cpu); 751 fini_debug_store_on_cpu(cpu);
752 752
753 for_each_possible_cpu(cpu) { 753 for_each_possible_cpu(cpu) {
754 struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; 754 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
755 755
756 if (!ds) 756 if (!ds)
757 continue; 757 continue;
758 758
759 per_cpu(cpu_hw_counters, cpu).ds = NULL; 759 per_cpu(cpu_hw_events, cpu).ds = NULL;
760 760
761 kfree((void *)(unsigned long)ds->bts_buffer_base); 761 kfree((void *)(unsigned long)ds->bts_buffer_base);
762 kfree(ds); 762 kfree(ds);
@@ -796,7 +796,7 @@ static int reserve_bts_hardware(void)
796 ds->bts_interrupt_threshold = 796 ds->bts_interrupt_threshold =
797 ds->bts_absolute_maximum - BTS_OVFL_TH; 797 ds->bts_absolute_maximum - BTS_OVFL_TH;
798 798
799 per_cpu(cpu_hw_counters, cpu).ds = ds; 799 per_cpu(cpu_hw_events, cpu).ds = ds;
800 err = 0; 800 err = 0;
801 } 801 }
802 802
@@ -812,9 +812,9 @@ static int reserve_bts_hardware(void)
812 return err; 812 return err;
813} 813}
814 814
815static void hw_perf_counter_destroy(struct perf_counter *counter) 815static void hw_perf_event_destroy(struct perf_event *event)
816{ 816{
817 if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { 817 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
818 release_pmc_hardware(); 818 release_pmc_hardware();
819 release_bts_hardware(); 819 release_bts_hardware();
820 mutex_unlock(&pmc_reserve_mutex); 820 mutex_unlock(&pmc_reserve_mutex);
@@ -827,7 +827,7 @@ static inline int x86_pmu_initialized(void)
827} 827}
828 828
829static inline int 829static inline int
830set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) 830set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
831{ 831{
832 unsigned int cache_type, cache_op, cache_result; 832 unsigned int cache_type, cache_op, cache_result;
833 u64 config, val; 833 u64 config, val;
@@ -880,7 +880,7 @@ static void intel_pmu_enable_bts(u64 config)
880 880
881static void intel_pmu_disable_bts(void) 881static void intel_pmu_disable_bts(void)
882{ 882{
883 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); 883 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
884 unsigned long debugctlmsr; 884 unsigned long debugctlmsr;
885 885
886 if (!cpuc->ds) 886 if (!cpuc->ds)
@@ -898,10 +898,10 @@ static void intel_pmu_disable_bts(void)
898/* 898/*
899 * Setup the hardware configuration for a given attr_type 899 * Setup the hardware configuration for a given attr_type
900 */ 900 */
901static int __hw_perf_counter_init(struct perf_counter *counter) 901static int __hw_perf_event_init(struct perf_event *event)
902{ 902{
903 struct perf_counter_attr *attr = &counter->attr; 903 struct perf_event_attr *attr = &event->attr;
904 struct hw_perf_counter *hwc = &counter->hw; 904 struct hw_perf_event *hwc = &event->hw;
905 u64 config; 905 u64 config;
906 int err; 906 int err;
907 907
@@ -909,21 +909,23 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
909 return -ENODEV; 909 return -ENODEV;
910 910
911 err = 0; 911 err = 0;
912 if (!atomic_inc_not_zero(&active_counters)) { 912 if (!atomic_inc_not_zero(&active_events)) {
913 mutex_lock(&pmc_reserve_mutex); 913 mutex_lock(&pmc_reserve_mutex);
914 if (atomic_read(&active_counters) == 0) { 914 if (atomic_read(&active_events) == 0) {
915 if (!reserve_pmc_hardware()) 915 if (!reserve_pmc_hardware())
916 err = -EBUSY; 916 err = -EBUSY;
917 else 917 else
918 err = reserve_bts_hardware(); 918 err = reserve_bts_hardware();
919 } 919 }
920 if (!err) 920 if (!err)
921 atomic_inc(&active_counters); 921 atomic_inc(&active_events);
922 mutex_unlock(&pmc_reserve_mutex); 922 mutex_unlock(&pmc_reserve_mutex);
923 } 923 }
924 if (err) 924 if (err)
925 return err; 925 return err;
926 926
927 event->destroy = hw_perf_event_destroy;
928
927 /* 929 /*
928 * Generate PMC IRQs: 930 * Generate PMC IRQs:
929 * (keep 'enabled' bit clear for now) 931 * (keep 'enabled' bit clear for now)
@@ -946,17 +948,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
946 /* 948 /*
947 * If we have a PMU initialized but no APIC 949 * If we have a PMU initialized but no APIC
948 * interrupts, we cannot sample hardware 950 * interrupts, we cannot sample hardware
949 * counters (user-space has to fall back and 951 * events (user-space has to fall back and
950 * sample via a hrtimer based software counter): 952 * sample via a hrtimer based software event):
951 */ 953 */
952 if (!x86_pmu.apic) 954 if (!x86_pmu.apic)
953 return -EOPNOTSUPP; 955 return -EOPNOTSUPP;
954 } 956 }
955 957
956 counter->destroy = hw_perf_counter_destroy;
957
958 /* 958 /*
959 * Raw event type provide the config in the event structure 959 * Raw hw_event type provide the config in the hw_event structure
960 */ 960 */
961 if (attr->type == PERF_TYPE_RAW) { 961 if (attr->type == PERF_TYPE_RAW) {
962 hwc->config |= x86_pmu.raw_event(attr->config); 962 hwc->config |= x86_pmu.raw_event(attr->config);
@@ -1001,7 +1001,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
1001 1001
1002static void p6_pmu_disable_all(void) 1002static void p6_pmu_disable_all(void)
1003{ 1003{
1004 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); 1004 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1005 u64 val; 1005 u64 val;
1006 1006
1007 if (!cpuc->enabled) 1007 if (!cpuc->enabled)
@@ -1018,7 +1018,7 @@ static void p6_pmu_disable_all(void)
1018 1018
1019static void intel_pmu_disable_all(void) 1019static void intel_pmu_disable_all(void)
1020{ 1020{
1021 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); 1021 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1022 1022
1023 if (!cpuc->enabled) 1023 if (!cpuc->enabled)
1024 return; 1024 return;
@@ -1034,7 +1034,7 @@ static void intel_pmu_disable_all(void)
1034 1034
1035static void amd_pmu_disable_all(void) 1035static void amd_pmu_disable_all(void)
1036{ 1036{
1037 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); 1037 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1038 int idx; 1038 int idx;
1039 1039
1040 if (!cpuc->enabled) 1040 if (!cpuc->enabled)
@@ -1043,12 +1043,12 @@ static void amd_pmu_disable_all(void)
1043 cpuc->enabled = 0; 1043 cpuc->enabled = 0;
1044 /* 1044 /*
1045 * ensure we write the disable before we start disabling the 1045 * ensure we write the disable before we start disabling the
1046 * counters proper, so that amd_pmu_enable_counter() does the 1046 * events proper, so that amd_pmu_enable_event() does the
1047 * right thing. 1047 * right thing.
1048 */ 1048 */
1049 barrier(); 1049 barrier();
1050 1050
1051 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1051 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1052 u64 val; 1052 u64 val;
1053 1053
1054 if (!test_bit(idx, cpuc->active_mask)) 1054 if (!test_bit(idx, cpuc->active_mask))
@@ -1070,7 +1070,7 @@ void hw_perf_disable(void)
1070 1070
1071static void p6_pmu_enable_all(void) 1071static void p6_pmu_enable_all(void)
1072{ 1072{
1073 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); 1073 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1074 unsigned long val; 1074 unsigned long val;
1075 1075
1076 if (cpuc->enabled) 1076 if (cpuc->enabled)
@@ -1087,7 +1087,7 @@ static void p6_pmu_enable_all(void)
1087 1087
1088static void intel_pmu_enable_all(void) 1088static void intel_pmu_enable_all(void)
1089{ 1089{
1090 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); 1090 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1091 1091
1092 if (cpuc->enabled) 1092 if (cpuc->enabled)
1093 return; 1093 return;
@@ -1098,19 +1098,19 @@ static void intel_pmu_enable_all(void)
1098 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); 1098 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
1099 1099
1100 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 1100 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
1101 struct perf_counter *counter = 1101 struct perf_event *event =
1102 cpuc->counters[X86_PMC_IDX_FIXED_BTS]; 1102 cpuc->events[X86_PMC_IDX_FIXED_BTS];
1103 1103
1104 if (WARN_ON_ONCE(!counter)) 1104 if (WARN_ON_ONCE(!event))
1105 return; 1105 return;
1106 1106
1107 intel_pmu_enable_bts(counter->hw.config); 1107 intel_pmu_enable_bts(event->hw.config);
1108 } 1108 }
1109} 1109}
1110 1110
1111static void amd_pmu_enable_all(void) 1111static void amd_pmu_enable_all(void)
1112{ 1112{
1113 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); 1113 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1114 int idx; 1114 int idx;
1115 1115
1116 if (cpuc->enabled) 1116 if (cpuc->enabled)
@@ -1119,14 +1119,14 @@ static void amd_pmu_enable_all(void)
1119 cpuc->enabled = 1; 1119 cpuc->enabled = 1;
1120 barrier(); 1120 barrier();
1121 1121
1122 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1122 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1123 struct perf_counter *counter = cpuc->counters[idx]; 1123 struct perf_event *event = cpuc->events[idx];
1124 u64 val; 1124 u64 val;
1125 1125
1126 if (!test_bit(idx, cpuc->active_mask)) 1126 if (!test_bit(idx, cpuc->active_mask))
1127 continue; 1127 continue;
1128 1128
1129 val = counter->hw.config; 1129 val = event->hw.config;
1130 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 1130 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1131 wrmsrl(MSR_K7_EVNTSEL0 + idx, val); 1131 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1132 } 1132 }
@@ -1153,19 +1153,19 @@ static inline void intel_pmu_ack_status(u64 ack)
1153 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); 1153 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
1154} 1154}
1155 1155
1156static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) 1156static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1157{ 1157{
1158 (void)checking_wrmsrl(hwc->config_base + idx, 1158 (void)checking_wrmsrl(hwc->config_base + idx,
1159 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); 1159 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
1160} 1160}
1161 1161
1162static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) 1162static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1163{ 1163{
1164 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); 1164 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
1165} 1165}
1166 1166
1167static inline void 1167static inline void
1168intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) 1168intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
1169{ 1169{
1170 int idx = __idx - X86_PMC_IDX_FIXED; 1170 int idx = __idx - X86_PMC_IDX_FIXED;
1171 u64 ctrl_val, mask; 1171 u64 ctrl_val, mask;
@@ -1178,10 +1178,10 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
1178} 1178}
1179 1179
1180static inline void 1180static inline void
1181p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) 1181p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1182{ 1182{
1183 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); 1183 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1184 u64 val = P6_NOP_COUNTER; 1184 u64 val = P6_NOP_EVENT;
1185 1185
1186 if (cpuc->enabled) 1186 if (cpuc->enabled)
1187 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 1187 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
@@ -1190,7 +1190,7 @@ p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
1190} 1190}
1191 1191
1192static inline void 1192static inline void
1193intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) 1193intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1194{ 1194{
1195 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { 1195 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1196 intel_pmu_disable_bts(); 1196 intel_pmu_disable_bts();
@@ -1202,24 +1202,24 @@ intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
1202 return; 1202 return;
1203 } 1203 }
1204 1204
1205 x86_pmu_disable_counter(hwc, idx); 1205 x86_pmu_disable_event(hwc, idx);
1206} 1206}
1207 1207
1208static inline void 1208static inline void
1209amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) 1209amd_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1210{ 1210{
1211 x86_pmu_disable_counter(hwc, idx); 1211 x86_pmu_disable_event(hwc, idx);
1212} 1212}
1213 1213
1214static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 1214static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
1215 1215
1216/* 1216/*
1217 * Set the next IRQ period, based on the hwc->period_left value. 1217 * Set the next IRQ period, based on the hwc->period_left value.
1218 * To be called with the counter disabled in hw: 1218 * To be called with the event disabled in hw:
1219 */ 1219 */
1220static int 1220static int
1221x86_perf_counter_set_period(struct perf_counter *counter, 1221x86_perf_event_set_period(struct perf_event *event,
1222 struct hw_perf_counter *hwc, int idx) 1222 struct hw_perf_event *hwc, int idx)
1223{ 1223{
1224 s64 left = atomic64_read(&hwc->period_left); 1224 s64 left = atomic64_read(&hwc->period_left);
1225 s64 period = hwc->sample_period; 1225 s64 period = hwc->sample_period;
@@ -1245,7 +1245,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
1245 ret = 1; 1245 ret = 1;
1246 } 1246 }
1247 /* 1247 /*
1248 * Quirk: certain CPUs dont like it if just 1 event is left: 1248 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
1249 */ 1249 */
1250 if (unlikely(left < 2)) 1250 if (unlikely(left < 2))
1251 left = 2; 1251 left = 2;
@@ -1256,21 +1256,21 @@ x86_perf_counter_set_period(struct perf_counter *counter,
1256 per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; 1256 per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
1257 1257
1258 /* 1258 /*
1259 * The hw counter starts counting from this counter offset, 1259 * The hw event starts counting from this event offset,
1260 * mark it to be able to extra future deltas: 1260 * mark it to be able to extra future deltas:
1261 */ 1261 */
1262 atomic64_set(&hwc->prev_count, (u64)-left); 1262 atomic64_set(&hwc->prev_count, (u64)-left);
1263 1263
1264 err = checking_wrmsrl(hwc->counter_base + idx, 1264 err = checking_wrmsrl(hwc->event_base + idx,
1265 (u64)(-left) & x86_pmu.counter_mask); 1265 (u64)(-left) & x86_pmu.event_mask);
1266 1266
1267 perf_counter_update_userpage(counter); 1267 perf_event_update_userpage(event);
1268 1268
1269 return ret; 1269 return ret;
1270} 1270}
1271 1271
1272static inline void 1272static inline void
1273intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) 1273intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
1274{ 1274{
1275 int idx = __idx - X86_PMC_IDX_FIXED; 1275 int idx = __idx - X86_PMC_IDX_FIXED;
1276 u64 ctrl_val, bits, mask; 1276 u64 ctrl_val, bits, mask;
@@ -1295,9 +1295,9 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
1295 err = checking_wrmsrl(hwc->config_base, ctrl_val); 1295 err = checking_wrmsrl(hwc->config_base, ctrl_val);
1296} 1296}
1297 1297
1298static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) 1298static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1299{ 1299{
1300 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); 1300 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1301 u64 val; 1301 u64 val;
1302 1302
1303 val = hwc->config; 1303 val = hwc->config;
@@ -1308,10 +1308,10 @@ static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
1308} 1308}
1309 1309
1310 1310
1311static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) 1311static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1312{ 1312{
1313 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { 1313 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1314 if (!__get_cpu_var(cpu_hw_counters).enabled) 1314 if (!__get_cpu_var(cpu_hw_events).enabled)
1315 return; 1315 return;
1316 1316
1317 intel_pmu_enable_bts(hwc->config); 1317 intel_pmu_enable_bts(hwc->config);
@@ -1323,134 +1323,134 @@ static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
1323 return; 1323 return;
1324 } 1324 }
1325 1325
1326 x86_pmu_enable_counter(hwc, idx); 1326 x86_pmu_enable_event(hwc, idx);
1327} 1327}
1328 1328
1329static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) 1329static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1330{ 1330{
1331 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); 1331 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1332 1332
1333 if (cpuc->enabled) 1333 if (cpuc->enabled)
1334 x86_pmu_enable_counter(hwc, idx); 1334 x86_pmu_enable_event(hwc, idx);
1335} 1335}
1336 1336
1337static int 1337static int
1338fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) 1338fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
1339{ 1339{
1340 unsigned int event; 1340 unsigned int hw_event;
1341 1341
1342 event = hwc->config & ARCH_PERFMON_EVENT_MASK; 1342 hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
1343 1343
1344 if (unlikely((event == 1344 if (unlikely((hw_event ==
1345 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && 1345 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
1346 (hwc->sample_period == 1))) 1346 (hwc->sample_period == 1)))
1347 return X86_PMC_IDX_FIXED_BTS; 1347 return X86_PMC_IDX_FIXED_BTS;
1348 1348
1349 if (!x86_pmu.num_counters_fixed) 1349 if (!x86_pmu.num_events_fixed)
1350 return -1; 1350 return -1;
1351 1351
1352 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) 1352 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
1353 return X86_PMC_IDX_FIXED_INSTRUCTIONS; 1353 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
1354 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) 1354 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
1355 return X86_PMC_IDX_FIXED_CPU_CYCLES; 1355 return X86_PMC_IDX_FIXED_CPU_CYCLES;
1356 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) 1356 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
1357 return X86_PMC_IDX_FIXED_BUS_CYCLES; 1357 return X86_PMC_IDX_FIXED_BUS_CYCLES;
1358 1358
1359 return -1; 1359 return -1;
1360} 1360}
1361 1361
1362/* 1362/*
1363 * Find a PMC slot for the freshly enabled / scheduled in counter: 1363 * Find a PMC slot for the freshly enabled / scheduled in event:
1364 */ 1364 */
1365static int x86_pmu_enable(struct perf_counter *counter) 1365static int x86_pmu_enable(struct perf_event *event)
1366{ 1366{
1367 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); 1367 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1368 struct hw_perf_counter *hwc = &counter->hw; 1368 struct hw_perf_event *hwc = &event->hw;
1369 int idx; 1369 int idx;
1370 1370
1371 idx = fixed_mode_idx(counter, hwc); 1371 idx = fixed_mode_idx(event, hwc);
1372 if (idx == X86_PMC_IDX_FIXED_BTS) { 1372 if (idx == X86_PMC_IDX_FIXED_BTS) {
1373 /* BTS is already occupied. */ 1373 /* BTS is already occupied. */
1374 if (test_and_set_bit(idx, cpuc->used_mask)) 1374 if (test_and_set_bit(idx, cpuc->used_mask))
1375 return -EAGAIN; 1375 return -EAGAIN;
1376 1376
1377 hwc->config_base = 0; 1377 hwc->config_base = 0;
1378 hwc->counter_base = 0; 1378 hwc->event_base = 0;
1379 hwc->idx = idx; 1379 hwc->idx = idx;
1380 } else if (idx >= 0) { 1380 } else if (idx >= 0) {
1381 /* 1381 /*
1382 * Try to get the fixed counter, if that is already taken 1382 * Try to get the fixed event, if that is already taken
1383 * then try to get a generic counter: 1383 * then try to get a generic event:
1384 */ 1384 */
1385 if (test_and_set_bit(idx, cpuc->used_mask)) 1385 if (test_and_set_bit(idx, cpuc->used_mask))
1386 goto try_generic; 1386 goto try_generic;
1387 1387
1388 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; 1388 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
1389 /* 1389 /*
1390 * We set it so that counter_base + idx in wrmsr/rdmsr maps to 1390 * We set it so that event_base + idx in wrmsr/rdmsr maps to
1391 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: 1391 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
1392 */ 1392 */
1393 hwc->counter_base = 1393 hwc->event_base =
1394 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; 1394 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
1395 hwc->idx = idx; 1395 hwc->idx = idx;
1396 } else { 1396 } else {
1397 idx = hwc->idx; 1397 idx = hwc->idx;
1398 /* Try to get the previous generic counter again */ 1398 /* Try to get the previous generic event again */
1399 if (test_and_set_bit(idx, cpuc->used_mask)) { 1399 if (test_and_set_bit(idx, cpuc->used_mask)) {
1400try_generic: 1400try_generic:
1401 idx = find_first_zero_bit(cpuc->used_mask, 1401 idx = find_first_zero_bit(cpuc->used_mask,
1402 x86_pmu.num_counters); 1402 x86_pmu.num_events);
1403 if (idx == x86_pmu.num_counters) 1403 if (idx == x86_pmu.num_events)
1404 return -EAGAIN; 1404 return -EAGAIN;
1405 1405
1406 set_bit(idx, cpuc->used_mask); 1406 set_bit(idx, cpuc->used_mask);
1407 hwc->idx = idx; 1407 hwc->idx = idx;
1408 } 1408 }
1409 hwc->config_base = x86_pmu.eventsel; 1409 hwc->config_base = x86_pmu.eventsel;
1410 hwc->counter_base = x86_pmu.perfctr; 1410 hwc->event_base = x86_pmu.perfctr;
1411 } 1411 }
1412 1412
1413 perf_counters_lapic_init(); 1413 perf_events_lapic_init();
1414 1414
1415 x86_pmu.disable(hwc, idx); 1415 x86_pmu.disable(hwc, idx);
1416 1416
1417 cpuc->counters[idx] = counter; 1417 cpuc->events[idx] = event;
1418 set_bit(idx, cpuc->active_mask); 1418 set_bit(idx, cpuc->active_mask);
1419 1419
1420 x86_perf_counter_set_period(counter, hwc, idx); 1420 x86_perf_event_set_period(event, hwc, idx);
1421 x86_pmu.enable(hwc, idx); 1421 x86_pmu.enable(hwc, idx);
1422 1422
1423 perf_counter_update_userpage(counter); 1423 perf_event_update_userpage(event);
1424 1424
1425 return 0; 1425 return 0;
1426} 1426}
1427 1427
1428static void x86_pmu_unthrottle(struct perf_counter *counter) 1428static void x86_pmu_unthrottle(struct perf_event *event)
1429{ 1429{
1430 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); 1430 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1431 struct hw_perf_counter *hwc = &counter->hw; 1431 struct hw_perf_event *hwc = &event->hw;
1432 1432
1433 if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || 1433 if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
1434 cpuc->counters[hwc->idx] != counter)) 1434 cpuc->events[hwc->idx] != event))
1435 return; 1435 return;
1436 1436
1437 x86_pmu.enable(hwc, hwc->idx); 1437 x86_pmu.enable(hwc, hwc->idx);
1438} 1438}
1439 1439
1440void perf_counter_print_debug(void) 1440void perf_event_print_debug(void)
1441{ 1441{
1442 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; 1442 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
1443 struct cpu_hw_counters *cpuc; 1443 struct cpu_hw_events *cpuc;
1444 unsigned long flags; 1444 unsigned long flags;
1445 int cpu, idx; 1445 int cpu, idx;
1446 1446
1447 if (!x86_pmu.num_counters) 1447 if (!x86_pmu.num_events)
1448 return; 1448 return;
1449 1449
1450 local_irq_save(flags); 1450 local_irq_save(flags);
1451 1451
1452 cpu = smp_processor_id(); 1452 cpu = smp_processor_id();
1453 cpuc = &per_cpu(cpu_hw_counters, cpu); 1453 cpuc = &per_cpu(cpu_hw_events, cpu);
1454 1454
1455 if (x86_pmu.version >= 2) { 1455 if (x86_pmu.version >= 2) {
1456 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); 1456 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
@@ -1466,7 +1466,7 @@ void perf_counter_print_debug(void)
1466 } 1466 }
1467 pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); 1467 pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask);
1468 1468
1469 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1469 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1470 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); 1470 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1471 rdmsrl(x86_pmu.perfctr + idx, pmc_count); 1471 rdmsrl(x86_pmu.perfctr + idx, pmc_count);
1472 1472
@@ -1479,7 +1479,7 @@ void perf_counter_print_debug(void)
1479 pr_info("CPU#%d: gen-PMC%d left: %016llx\n", 1479 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
1480 cpu, idx, prev_left); 1480 cpu, idx, prev_left);
1481 } 1481 }
1482 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { 1482 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1483 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); 1483 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1484 1484
1485 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", 1485 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@@ -1488,8 +1488,7 @@ void perf_counter_print_debug(void)
1488 local_irq_restore(flags); 1488 local_irq_restore(flags);
1489} 1489}
1490 1490
1491static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, 1491static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
1492 struct perf_sample_data *data)
1493{ 1492{
1494 struct debug_store *ds = cpuc->ds; 1493 struct debug_store *ds = cpuc->ds;
1495 struct bts_record { 1494 struct bts_record {
@@ -1497,11 +1496,14 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
1497 u64 to; 1496 u64 to;
1498 u64 flags; 1497 u64 flags;
1499 }; 1498 };
1500 struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS]; 1499 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
1501 unsigned long orig_ip = data->regs->ip;
1502 struct bts_record *at, *top; 1500 struct bts_record *at, *top;
1501 struct perf_output_handle handle;
1502 struct perf_event_header header;
1503 struct perf_sample_data data;
1504 struct pt_regs regs;
1503 1505
1504 if (!counter) 1506 if (!event)
1505 return; 1507 return;
1506 1508
1507 if (!ds) 1509 if (!ds)
@@ -1510,26 +1512,45 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
1510 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 1512 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
1511 top = (struct bts_record *)(unsigned long)ds->bts_index; 1513 top = (struct bts_record *)(unsigned long)ds->bts_index;
1512 1514
1515 if (top <= at)
1516 return;
1517
1513 ds->bts_index = ds->bts_buffer_base; 1518 ds->bts_index = ds->bts_buffer_base;
1514 1519
1520
1521 data.period = event->hw.last_period;
1522 data.addr = 0;
1523 regs.ip = 0;
1524
1525 /*
1526 * Prepare a generic sample, i.e. fill in the invariant fields.
1527 * We will overwrite the from and to address before we output
1528 * the sample.
1529 */
1530 perf_prepare_sample(&header, &data, event, &regs);
1531
1532 if (perf_output_begin(&handle, event,
1533 header.size * (top - at), 1, 1))
1534 return;
1535
1515 for (; at < top; at++) { 1536 for (; at < top; at++) {
1516 data->regs->ip = at->from; 1537 data.ip = at->from;
1517 data->addr = at->to; 1538 data.addr = at->to;
1518 1539
1519 perf_counter_output(counter, 1, data); 1540 perf_output_sample(&handle, &header, &data, event);
1520 } 1541 }
1521 1542
1522 data->regs->ip = orig_ip; 1543 perf_output_end(&handle);
1523 data->addr = 0;
1524 1544
1525 /* There's new data available. */ 1545 /* There's new data available. */
1526 counter->pending_kill = POLL_IN; 1546 event->hw.interrupts++;
1547 event->pending_kill = POLL_IN;
1527} 1548}
1528 1549
1529static void x86_pmu_disable(struct perf_counter *counter) 1550static void x86_pmu_disable(struct perf_event *event)
1530{ 1551{
1531 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); 1552 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1532 struct hw_perf_counter *hwc = &counter->hw; 1553 struct hw_perf_event *hwc = &event->hw;
1533 int idx = hwc->idx; 1554 int idx = hwc->idx;
1534 1555
1535 /* 1556 /*
@@ -1541,67 +1562,63 @@ static void x86_pmu_disable(struct perf_counter *counter)
1541 1562
1542 /* 1563 /*
1543 * Make sure the cleared pointer becomes visible before we 1564 * Make sure the cleared pointer becomes visible before we
1544 * (potentially) free the counter: 1565 * (potentially) free the event:
1545 */ 1566 */
1546 barrier(); 1567 barrier();
1547 1568
1548 /* 1569 /*
1549 * Drain the remaining delta count out of a counter 1570 * Drain the remaining delta count out of a event
1550 * that we are disabling: 1571 * that we are disabling:
1551 */ 1572 */
1552 x86_perf_counter_update(counter, hwc, idx); 1573 x86_perf_event_update(event, hwc, idx);
1553 1574
1554 /* Drain the remaining BTS records. */ 1575 /* Drain the remaining BTS records. */
1555 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { 1576 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
1556 struct perf_sample_data data; 1577 intel_pmu_drain_bts_buffer(cpuc);
1557 struct pt_regs regs;
1558 1578
1559 data.regs = &regs; 1579 cpuc->events[idx] = NULL;
1560 intel_pmu_drain_bts_buffer(cpuc, &data);
1561 }
1562 cpuc->counters[idx] = NULL;
1563 clear_bit(idx, cpuc->used_mask); 1580 clear_bit(idx, cpuc->used_mask);
1564 1581
1565 perf_counter_update_userpage(counter); 1582 perf_event_update_userpage(event);
1566} 1583}
1567 1584
1568/* 1585/*
1569 * Save and restart an expired counter. Called by NMI contexts, 1586 * Save and restart an expired event. Called by NMI contexts,
1570 * so it has to be careful about preempting normal counter ops: 1587 * so it has to be careful about preempting normal event ops:
1571 */ 1588 */
1572static int intel_pmu_save_and_restart(struct perf_counter *counter) 1589static int intel_pmu_save_and_restart(struct perf_event *event)
1573{ 1590{
1574 struct hw_perf_counter *hwc = &counter->hw; 1591 struct hw_perf_event *hwc = &event->hw;
1575 int idx = hwc->idx; 1592 int idx = hwc->idx;
1576 int ret; 1593 int ret;
1577 1594
1578 x86_perf_counter_update(counter, hwc, idx); 1595 x86_perf_event_update(event, hwc, idx);
1579 ret = x86_perf_counter_set_period(counter, hwc, idx); 1596 ret = x86_perf_event_set_period(event, hwc, idx);
1580 1597
1581 if (counter->state == PERF_COUNTER_STATE_ACTIVE) 1598 if (event->state == PERF_EVENT_STATE_ACTIVE)
1582 intel_pmu_enable_counter(hwc, idx); 1599 intel_pmu_enable_event(hwc, idx);
1583 1600
1584 return ret; 1601 return ret;
1585} 1602}
1586 1603
1587static void intel_pmu_reset(void) 1604static void intel_pmu_reset(void)
1588{ 1605{
1589 struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds; 1606 struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
1590 unsigned long flags; 1607 unsigned long flags;
1591 int idx; 1608 int idx;
1592 1609
1593 if (!x86_pmu.num_counters) 1610 if (!x86_pmu.num_events)
1594 return; 1611 return;
1595 1612
1596 local_irq_save(flags); 1613 local_irq_save(flags);
1597 1614
1598 printk("clearing PMU state on CPU#%d\n", smp_processor_id()); 1615 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
1599 1616
1600 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1617 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1601 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); 1618 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
1602 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); 1619 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
1603 } 1620 }
1604 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { 1621 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1605 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); 1622 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1606 } 1623 }
1607 if (ds) 1624 if (ds)
@@ -1613,39 +1630,38 @@ static void intel_pmu_reset(void)
1613static int p6_pmu_handle_irq(struct pt_regs *regs) 1630static int p6_pmu_handle_irq(struct pt_regs *regs)
1614{ 1631{
1615 struct perf_sample_data data; 1632 struct perf_sample_data data;
1616 struct cpu_hw_counters *cpuc; 1633 struct cpu_hw_events *cpuc;
1617 struct perf_counter *counter; 1634 struct perf_event *event;
1618 struct hw_perf_counter *hwc; 1635 struct hw_perf_event *hwc;
1619 int idx, handled = 0; 1636 int idx, handled = 0;
1620 u64 val; 1637 u64 val;
1621 1638
1622 data.regs = regs;
1623 data.addr = 0; 1639 data.addr = 0;
1624 1640
1625 cpuc = &__get_cpu_var(cpu_hw_counters); 1641 cpuc = &__get_cpu_var(cpu_hw_events);
1626 1642
1627 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1643 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1628 if (!test_bit(idx, cpuc->active_mask)) 1644 if (!test_bit(idx, cpuc->active_mask))
1629 continue; 1645 continue;
1630 1646
1631 counter = cpuc->counters[idx]; 1647 event = cpuc->events[idx];
1632 hwc = &counter->hw; 1648 hwc = &event->hw;
1633 1649
1634 val = x86_perf_counter_update(counter, hwc, idx); 1650 val = x86_perf_event_update(event, hwc, idx);
1635 if (val & (1ULL << (x86_pmu.counter_bits - 1))) 1651 if (val & (1ULL << (x86_pmu.event_bits - 1)))
1636 continue; 1652 continue;
1637 1653
1638 /* 1654 /*
1639 * counter overflow 1655 * event overflow
1640 */ 1656 */
1641 handled = 1; 1657 handled = 1;
1642 data.period = counter->hw.last_period; 1658 data.period = event->hw.last_period;
1643 1659
1644 if (!x86_perf_counter_set_period(counter, hwc, idx)) 1660 if (!x86_perf_event_set_period(event, hwc, idx))
1645 continue; 1661 continue;
1646 1662
1647 if (perf_counter_overflow(counter, 1, &data)) 1663 if (perf_event_overflow(event, 1, &data, regs))
1648 p6_pmu_disable_counter(hwc, idx); 1664 p6_pmu_disable_event(hwc, idx);
1649 } 1665 }
1650 1666
1651 if (handled) 1667 if (handled)
@@ -1661,17 +1677,16 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
1661static int intel_pmu_handle_irq(struct pt_regs *regs) 1677static int intel_pmu_handle_irq(struct pt_regs *regs)
1662{ 1678{
1663 struct perf_sample_data data; 1679 struct perf_sample_data data;
1664 struct cpu_hw_counters *cpuc; 1680 struct cpu_hw_events *cpuc;
1665 int bit, loops; 1681 int bit, loops;
1666 u64 ack, status; 1682 u64 ack, status;
1667 1683
1668 data.regs = regs;
1669 data.addr = 0; 1684 data.addr = 0;
1670 1685
1671 cpuc = &__get_cpu_var(cpu_hw_counters); 1686 cpuc = &__get_cpu_var(cpu_hw_events);
1672 1687
1673 perf_disable(); 1688 perf_disable();
1674 intel_pmu_drain_bts_buffer(cpuc, &data); 1689 intel_pmu_drain_bts_buffer(cpuc);
1675 status = intel_pmu_get_status(); 1690 status = intel_pmu_get_status();
1676 if (!status) { 1691 if (!status) {
1677 perf_enable(); 1692 perf_enable();
@@ -1681,8 +1696,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1681 loops = 0; 1696 loops = 0;
1682again: 1697again:
1683 if (++loops > 100) { 1698 if (++loops > 100) {
1684 WARN_ONCE(1, "perfcounters: irq loop stuck!\n"); 1699 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
1685 perf_counter_print_debug(); 1700 perf_event_print_debug();
1686 intel_pmu_reset(); 1701 intel_pmu_reset();
1687 perf_enable(); 1702 perf_enable();
1688 return 1; 1703 return 1;
@@ -1691,19 +1706,19 @@ again:
1691 inc_irq_stat(apic_perf_irqs); 1706 inc_irq_stat(apic_perf_irqs);
1692 ack = status; 1707 ack = status;
1693 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { 1708 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1694 struct perf_counter *counter = cpuc->counters[bit]; 1709 struct perf_event *event = cpuc->events[bit];
1695 1710
1696 clear_bit(bit, (unsigned long *) &status); 1711 clear_bit(bit, (unsigned long *) &status);
1697 if (!test_bit(bit, cpuc->active_mask)) 1712 if (!test_bit(bit, cpuc->active_mask))
1698 continue; 1713 continue;
1699 1714
1700 if (!intel_pmu_save_and_restart(counter)) 1715 if (!intel_pmu_save_and_restart(event))
1701 continue; 1716 continue;
1702 1717
1703 data.period = counter->hw.last_period; 1718 data.period = event->hw.last_period;
1704 1719
1705 if (perf_counter_overflow(counter, 1, &data)) 1720 if (perf_event_overflow(event, 1, &data, regs))
1706 intel_pmu_disable_counter(&counter->hw, bit); 1721 intel_pmu_disable_event(&event->hw, bit);
1707 } 1722 }
1708 1723
1709 intel_pmu_ack_status(ack); 1724 intel_pmu_ack_status(ack);
@@ -1723,39 +1738,38 @@ again:
1723static int amd_pmu_handle_irq(struct pt_regs *regs) 1738static int amd_pmu_handle_irq(struct pt_regs *regs)
1724{ 1739{
1725 struct perf_sample_data data; 1740 struct perf_sample_data data;
1726 struct cpu_hw_counters *cpuc; 1741 struct cpu_hw_events *cpuc;
1727 struct perf_counter *counter; 1742 struct perf_event *event;
1728 struct hw_perf_counter *hwc; 1743 struct hw_perf_event *hwc;
1729 int idx, handled = 0; 1744 int idx, handled = 0;
1730 u64 val; 1745 u64 val;
1731 1746
1732 data.regs = regs;
1733 data.addr = 0; 1747 data.addr = 0;
1734 1748
1735 cpuc = &__get_cpu_var(cpu_hw_counters); 1749 cpuc = &__get_cpu_var(cpu_hw_events);
1736 1750
1737 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1751 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1738 if (!test_bit(idx, cpuc->active_mask)) 1752 if (!test_bit(idx, cpuc->active_mask))
1739 continue; 1753 continue;
1740 1754
1741 counter = cpuc->counters[idx]; 1755 event = cpuc->events[idx];
1742 hwc = &counter->hw; 1756 hwc = &event->hw;
1743 1757
1744 val = x86_perf_counter_update(counter, hwc, idx); 1758 val = x86_perf_event_update(event, hwc, idx);
1745 if (val & (1ULL << (x86_pmu.counter_bits - 1))) 1759 if (val & (1ULL << (x86_pmu.event_bits - 1)))
1746 continue; 1760 continue;
1747 1761
1748 /* 1762 /*
1749 * counter overflow 1763 * event overflow
1750 */ 1764 */
1751 handled = 1; 1765 handled = 1;
1752 data.period = counter->hw.last_period; 1766 data.period = event->hw.last_period;
1753 1767
1754 if (!x86_perf_counter_set_period(counter, hwc, idx)) 1768 if (!x86_perf_event_set_period(event, hwc, idx))
1755 continue; 1769 continue;
1756 1770
1757 if (perf_counter_overflow(counter, 1, &data)) 1771 if (perf_event_overflow(event, 1, &data, regs))
1758 amd_pmu_disable_counter(hwc, idx); 1772 amd_pmu_disable_event(hwc, idx);
1759 } 1773 }
1760 1774
1761 if (handled) 1775 if (handled)
@@ -1769,18 +1783,21 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
1769 irq_enter(); 1783 irq_enter();
1770 ack_APIC_irq(); 1784 ack_APIC_irq();
1771 inc_irq_stat(apic_pending_irqs); 1785 inc_irq_stat(apic_pending_irqs);
1772 perf_counter_do_pending(); 1786 perf_event_do_pending();
1773 irq_exit(); 1787 irq_exit();
1774} 1788}
1775 1789
1776void set_perf_counter_pending(void) 1790void set_perf_event_pending(void)
1777{ 1791{
1778#ifdef CONFIG_X86_LOCAL_APIC 1792#ifdef CONFIG_X86_LOCAL_APIC
1793 if (!x86_pmu.apic || !x86_pmu_initialized())
1794 return;
1795
1779 apic->send_IPI_self(LOCAL_PENDING_VECTOR); 1796 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1780#endif 1797#endif
1781} 1798}
1782 1799
1783void perf_counters_lapic_init(void) 1800void perf_events_lapic_init(void)
1784{ 1801{
1785#ifdef CONFIG_X86_LOCAL_APIC 1802#ifdef CONFIG_X86_LOCAL_APIC
1786 if (!x86_pmu.apic || !x86_pmu_initialized()) 1803 if (!x86_pmu.apic || !x86_pmu_initialized())
@@ -1794,13 +1811,13 @@ void perf_counters_lapic_init(void)
1794} 1811}
1795 1812
1796static int __kprobes 1813static int __kprobes
1797perf_counter_nmi_handler(struct notifier_block *self, 1814perf_event_nmi_handler(struct notifier_block *self,
1798 unsigned long cmd, void *__args) 1815 unsigned long cmd, void *__args)
1799{ 1816{
1800 struct die_args *args = __args; 1817 struct die_args *args = __args;
1801 struct pt_regs *regs; 1818 struct pt_regs *regs;
1802 1819
1803 if (!atomic_read(&active_counters)) 1820 if (!atomic_read(&active_events))
1804 return NOTIFY_DONE; 1821 return NOTIFY_DONE;
1805 1822
1806 switch (cmd) { 1823 switch (cmd) {
@@ -1819,7 +1836,7 @@ perf_counter_nmi_handler(struct notifier_block *self,
1819#endif 1836#endif
1820 /* 1837 /*
1821 * Can't rely on the handled return value to say it was our NMI, two 1838 * Can't rely on the handled return value to say it was our NMI, two
1822 * counters could trigger 'simultaneously' raising two back-to-back NMIs. 1839 * events could trigger 'simultaneously' raising two back-to-back NMIs.
1823 * 1840 *
1824 * If the first NMI handles both, the latter will be empty and daze 1841 * If the first NMI handles both, the latter will be empty and daze
1825 * the CPU. 1842 * the CPU.
@@ -1829,8 +1846,8 @@ perf_counter_nmi_handler(struct notifier_block *self,
1829 return NOTIFY_STOP; 1846 return NOTIFY_STOP;
1830} 1847}
1831 1848
1832static __read_mostly struct notifier_block perf_counter_nmi_notifier = { 1849static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1833 .notifier_call = perf_counter_nmi_handler, 1850 .notifier_call = perf_event_nmi_handler,
1834 .next = NULL, 1851 .next = NULL,
1835 .priority = 1 1852 .priority = 1
1836}; 1853};
@@ -1840,8 +1857,8 @@ static struct x86_pmu p6_pmu = {
1840 .handle_irq = p6_pmu_handle_irq, 1857 .handle_irq = p6_pmu_handle_irq,
1841 .disable_all = p6_pmu_disable_all, 1858 .disable_all = p6_pmu_disable_all,
1842 .enable_all = p6_pmu_enable_all, 1859 .enable_all = p6_pmu_enable_all,
1843 .enable = p6_pmu_enable_counter, 1860 .enable = p6_pmu_enable_event,
1844 .disable = p6_pmu_disable_counter, 1861 .disable = p6_pmu_disable_event,
1845 .eventsel = MSR_P6_EVNTSEL0, 1862 .eventsel = MSR_P6_EVNTSEL0,
1846 .perfctr = MSR_P6_PERFCTR0, 1863 .perfctr = MSR_P6_PERFCTR0,
1847 .event_map = p6_pmu_event_map, 1864 .event_map = p6_pmu_event_map,
@@ -1850,16 +1867,16 @@ static struct x86_pmu p6_pmu = {
1850 .apic = 1, 1867 .apic = 1,
1851 .max_period = (1ULL << 31) - 1, 1868 .max_period = (1ULL << 31) - 1,
1852 .version = 0, 1869 .version = 0,
1853 .num_counters = 2, 1870 .num_events = 2,
1854 /* 1871 /*
1855 * Counters have 40 bits implemented. However they are designed such 1872 * Events have 40 bits implemented. However they are designed such
1856 * that bits [32-39] are sign extensions of bit 31. As such the 1873 * that bits [32-39] are sign extensions of bit 31. As such the
1857 * effective width of a counter for P6-like PMU is 32 bits only. 1874 * effective width of a event for P6-like PMU is 32 bits only.
1858 * 1875 *
1859 * See IA-32 Intel Architecture Software developer manual Vol 3B 1876 * See IA-32 Intel Architecture Software developer manual Vol 3B
1860 */ 1877 */
1861 .counter_bits = 32, 1878 .event_bits = 32,
1862 .counter_mask = (1ULL << 32) - 1, 1879 .event_mask = (1ULL << 32) - 1,
1863}; 1880};
1864 1881
1865static struct x86_pmu intel_pmu = { 1882static struct x86_pmu intel_pmu = {
@@ -1867,8 +1884,8 @@ static struct x86_pmu intel_pmu = {
1867 .handle_irq = intel_pmu_handle_irq, 1884 .handle_irq = intel_pmu_handle_irq,
1868 .disable_all = intel_pmu_disable_all, 1885 .disable_all = intel_pmu_disable_all,
1869 .enable_all = intel_pmu_enable_all, 1886 .enable_all = intel_pmu_enable_all,
1870 .enable = intel_pmu_enable_counter, 1887 .enable = intel_pmu_enable_event,
1871 .disable = intel_pmu_disable_counter, 1888 .disable = intel_pmu_disable_event,
1872 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, 1889 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
1873 .perfctr = MSR_ARCH_PERFMON_PERFCTR0, 1890 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
1874 .event_map = intel_pmu_event_map, 1891 .event_map = intel_pmu_event_map,
@@ -1878,7 +1895,7 @@ static struct x86_pmu intel_pmu = {
1878 /* 1895 /*
1879 * Intel PMCs cannot be accessed sanely above 32 bit width, 1896 * Intel PMCs cannot be accessed sanely above 32 bit width,
1880 * so we install an artificial 1<<31 period regardless of 1897 * so we install an artificial 1<<31 period regardless of
1881 * the generic counter period: 1898 * the generic event period:
1882 */ 1899 */
1883 .max_period = (1ULL << 31) - 1, 1900 .max_period = (1ULL << 31) - 1,
1884 .enable_bts = intel_pmu_enable_bts, 1901 .enable_bts = intel_pmu_enable_bts,
@@ -1890,16 +1907,16 @@ static struct x86_pmu amd_pmu = {
1890 .handle_irq = amd_pmu_handle_irq, 1907 .handle_irq = amd_pmu_handle_irq,
1891 .disable_all = amd_pmu_disable_all, 1908 .disable_all = amd_pmu_disable_all,
1892 .enable_all = amd_pmu_enable_all, 1909 .enable_all = amd_pmu_enable_all,
1893 .enable = amd_pmu_enable_counter, 1910 .enable = amd_pmu_enable_event,
1894 .disable = amd_pmu_disable_counter, 1911 .disable = amd_pmu_disable_event,
1895 .eventsel = MSR_K7_EVNTSEL0, 1912 .eventsel = MSR_K7_EVNTSEL0,
1896 .perfctr = MSR_K7_PERFCTR0, 1913 .perfctr = MSR_K7_PERFCTR0,
1897 .event_map = amd_pmu_event_map, 1914 .event_map = amd_pmu_event_map,
1898 .raw_event = amd_pmu_raw_event, 1915 .raw_event = amd_pmu_raw_event,
1899 .max_events = ARRAY_SIZE(amd_perfmon_event_map), 1916 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
1900 .num_counters = 4, 1917 .num_events = 4,
1901 .counter_bits = 48, 1918 .event_bits = 48,
1902 .counter_mask = (1ULL << 48) - 1, 1919 .event_mask = (1ULL << 48) - 1,
1903 .apic = 1, 1920 .apic = 1,
1904 /* use highest bit to detect overflow */ 1921 /* use highest bit to detect overflow */
1905 .max_period = (1ULL << 47) - 1, 1922 .max_period = (1ULL << 47) - 1,
@@ -1956,7 +1973,7 @@ static int intel_pmu_init(void)
1956 1973
1957 /* 1974 /*
1958 * Check whether the Architectural PerfMon supports 1975 * Check whether the Architectural PerfMon supports
1959 * Branch Misses Retired Event or not. 1976 * Branch Misses Retired hw_event or not.
1960 */ 1977 */
1961 cpuid(10, &eax.full, &ebx, &unused, &edx.full); 1978 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
1962 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) 1979 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
@@ -1968,15 +1985,15 @@ static int intel_pmu_init(void)
1968 1985
1969 x86_pmu = intel_pmu; 1986 x86_pmu = intel_pmu;
1970 x86_pmu.version = version; 1987 x86_pmu.version = version;
1971 x86_pmu.num_counters = eax.split.num_counters; 1988 x86_pmu.num_events = eax.split.num_events;
1972 x86_pmu.counter_bits = eax.split.bit_width; 1989 x86_pmu.event_bits = eax.split.bit_width;
1973 x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1; 1990 x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1;
1974 1991
1975 /* 1992 /*
1976 * Quirk: v2 perfmon does not report fixed-purpose counters, so 1993 * Quirk: v2 perfmon does not report fixed-purpose events, so
1977 * assume at least 3 counters: 1994 * assume at least 3 events:
1978 */ 1995 */
1979 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); 1996 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
1980 1997
1981 /* 1998 /*
1982 * Install the hw-cache-events table: 1999 * Install the hw-cache-events table:
@@ -2023,11 +2040,11 @@ static int amd_pmu_init(void)
2023 return 0; 2040 return 0;
2024} 2041}
2025 2042
2026void __init init_hw_perf_counters(void) 2043void __init init_hw_perf_events(void)
2027{ 2044{
2028 int err; 2045 int err;
2029 2046
2030 pr_info("Performance Counters: "); 2047 pr_info("Performance Events: ");
2031 2048
2032 switch (boot_cpu_data.x86_vendor) { 2049 switch (boot_cpu_data.x86_vendor) {
2033 case X86_VENDOR_INTEL: 2050 case X86_VENDOR_INTEL:
@@ -2040,45 +2057,45 @@ void __init init_hw_perf_counters(void)
2040 return; 2057 return;
2041 } 2058 }
2042 if (err != 0) { 2059 if (err != 0) {
2043 pr_cont("no PMU driver, software counters only.\n"); 2060 pr_cont("no PMU driver, software events only.\n");
2044 return; 2061 return;
2045 } 2062 }
2046 2063
2047 pr_cont("%s PMU driver.\n", x86_pmu.name); 2064 pr_cont("%s PMU driver.\n", x86_pmu.name);
2048 2065
2049 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { 2066 if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
2050 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", 2067 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
2051 x86_pmu.num_counters, X86_PMC_MAX_GENERIC); 2068 x86_pmu.num_events, X86_PMC_MAX_GENERIC);
2052 x86_pmu.num_counters = X86_PMC_MAX_GENERIC; 2069 x86_pmu.num_events = X86_PMC_MAX_GENERIC;
2053 } 2070 }
2054 perf_counter_mask = (1 << x86_pmu.num_counters) - 1; 2071 perf_event_mask = (1 << x86_pmu.num_events) - 1;
2055 perf_max_counters = x86_pmu.num_counters; 2072 perf_max_events = x86_pmu.num_events;
2056 2073
2057 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { 2074 if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
2058 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", 2075 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
2059 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); 2076 x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
2060 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; 2077 x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
2061 } 2078 }
2062 2079
2063 perf_counter_mask |= 2080 perf_event_mask |=
2064 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; 2081 ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
2065 x86_pmu.intel_ctrl = perf_counter_mask; 2082 x86_pmu.intel_ctrl = perf_event_mask;
2066 2083
2067 perf_counters_lapic_init(); 2084 perf_events_lapic_init();
2068 register_die_notifier(&perf_counter_nmi_notifier); 2085 register_die_notifier(&perf_event_nmi_notifier);
2069 2086
2070 pr_info("... version: %d\n", x86_pmu.version); 2087 pr_info("... version: %d\n", x86_pmu.version);
2071 pr_info("... bit width: %d\n", x86_pmu.counter_bits); 2088 pr_info("... bit width: %d\n", x86_pmu.event_bits);
2072 pr_info("... generic counters: %d\n", x86_pmu.num_counters); 2089 pr_info("... generic registers: %d\n", x86_pmu.num_events);
2073 pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask); 2090 pr_info("... value mask: %016Lx\n", x86_pmu.event_mask);
2074 pr_info("... max period: %016Lx\n", x86_pmu.max_period); 2091 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
2075 pr_info("... fixed-purpose counters: %d\n", x86_pmu.num_counters_fixed); 2092 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed);
2076 pr_info("... counter mask: %016Lx\n", perf_counter_mask); 2093 pr_info("... event mask: %016Lx\n", perf_event_mask);
2077} 2094}
2078 2095
2079static inline void x86_pmu_read(struct perf_counter *counter) 2096static inline void x86_pmu_read(struct perf_event *event)
2080{ 2097{
2081 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); 2098 x86_perf_event_update(event, &event->hw, event->hw.idx);
2082} 2099}
2083 2100
2084static const struct pmu pmu = { 2101static const struct pmu pmu = {
@@ -2088,13 +2105,16 @@ static const struct pmu pmu = {
2088 .unthrottle = x86_pmu_unthrottle, 2105 .unthrottle = x86_pmu_unthrottle,
2089}; 2106};
2090 2107
2091const struct pmu *hw_perf_counter_init(struct perf_counter *counter) 2108const struct pmu *hw_perf_event_init(struct perf_event *event)
2092{ 2109{
2093 int err; 2110 int err;
2094 2111
2095 err = __hw_perf_counter_init(counter); 2112 err = __hw_perf_event_init(event);
2096 if (err) 2113 if (err) {
2114 if (event->destroy)
2115 event->destroy(event);
2097 return ERR_PTR(err); 2116 return ERR_PTR(err);
2117 }
2098 2118
2099 return &pmu; 2119 return &pmu;
2100} 2120}
@@ -2275,7 +2295,7 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2275 return entry; 2295 return entry;
2276} 2296}
2277 2297
2278void hw_perf_counter_setup_online(int cpu) 2298void hw_perf_event_setup_online(int cpu)
2279{ 2299{
2280 init_debug_store_on_cpu(cpu); 2300 init_debug_store_on_cpu(cpu);
2281} 2301}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 392bea43b890..fab786f60ed6 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -20,7 +20,7 @@
20#include <linux/kprobes.h> 20#include <linux/kprobes.h>
21 21
22#include <asm/apic.h> 22#include <asm/apic.h>
23#include <asm/perf_counter.h> 23#include <asm/perf_event.h>
24 24
25struct nmi_watchdog_ctlblk { 25struct nmi_watchdog_ctlblk {
26 unsigned int cccr_msr; 26 unsigned int cccr_msr;
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 0a46b4df5d80..1cbed97b59cf 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -58,6 +58,9 @@ static unsigned long vmware_get_tsc_khz(void)
58 tsc_hz = eax | (((uint64_t)ebx) << 32); 58 tsc_hz = eax | (((uint64_t)ebx) << 32);
59 do_div(tsc_hz, 1000); 59 do_div(tsc_hz, 1000);
60 BUG_ON(tsc_hz >> 32); 60 BUG_ON(tsc_hz >> 32);
61 printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n",
62 (unsigned long) tsc_hz / 1000,
63 (unsigned long) tsc_hz % 1000);
61 return tsc_hz; 64 return tsc_hz;
62} 65}
63 66
@@ -69,6 +72,9 @@ void __init vmware_platform_setup(void)
69 72
70 if (ebx != UINT_MAX) 73 if (ebx != UINT_MAX)
71 x86_platform.calibrate_tsc = vmware_get_tsc_khz; 74 x86_platform.calibrate_tsc = vmware_get_tsc_khz;
75 else
76 printk(KERN_WARNING
77 "Failed to get TSC freq from the hypervisor\n");
72} 78}
73 79
74/* 80/*
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index b07af8861244..6a52d4b36a30 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -182,7 +182,7 @@ static struct notifier_block __refdata cpuid_class_cpu_notifier =
182 .notifier_call = cpuid_class_cpu_callback, 182 .notifier_call = cpuid_class_cpu_callback,
183}; 183};
184 184
185static char *cpuid_nodename(struct device *dev) 185static char *cpuid_devnode(struct device *dev, mode_t *mode)
186{ 186{
187 return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt)); 187 return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt));
188} 188}
@@ -203,7 +203,7 @@ static int __init cpuid_init(void)
203 err = PTR_ERR(cpuid_class); 203 err = PTR_ERR(cpuid_class);
204 goto out_chrdev; 204 goto out_chrdev;
205 } 205 }
206 cpuid_class->nodename = cpuid_nodename; 206 cpuid_class->devnode = cpuid_devnode;
207 for_each_online_cpu(i) { 207 for_each_online_cpu(i) {
208 err = cpuid_device_create(i); 208 err = cpuid_device_create(i);
209 if (err != 0) 209 if (err != 0)
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index f7cdb3b457aa..cd97ce18c29d 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -16,6 +16,22 @@ static void *kdump_buf_page;
16/* Stores the physical address of elf header of crash image. */ 16/* Stores the physical address of elf header of crash image. */
17unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; 17unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
18 18
19static inline bool is_crashed_pfn_valid(unsigned long pfn)
20{
21#ifndef CONFIG_X86_PAE
22 /*
23 * non-PAE kdump kernel executed from a PAE one will crop high pte
24 * bits and poke unwanted space counting again from address 0, we
25 * don't want that. pte must fit into unsigned long. In fact the
26 * test checks high 12 bits for being zero (pfn will be shifted left
27 * by PAGE_SHIFT).
28 */
29 return pte_pfn(pfn_pte(pfn, __pgprot(0))) == pfn;
30#else
31 return true;
32#endif
33}
34
19/** 35/**
20 * copy_oldmem_page - copy one page from "oldmem" 36 * copy_oldmem_page - copy one page from "oldmem"
21 * @pfn: page frame number to be copied 37 * @pfn: page frame number to be copied
@@ -41,6 +57,9 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
41 if (!csize) 57 if (!csize)
42 return 0; 58 return 0;
43 59
60 if (!is_crashed_pfn_valid(pfn))
61 return -EFAULT;
62
44 vaddr = kmap_atomic_pfn(pfn, KM_PTE0); 63 vaddr = kmap_atomic_pfn(pfn, KM_PTE0);
45 64
46 if (!userbuf) { 65 if (!userbuf) {
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index bca5fba91c9e..f7dd2a7c3bf4 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -5,7 +5,6 @@
5#include <linux/kallsyms.h> 5#include <linux/kallsyms.h>
6#include <linux/kprobes.h> 6#include <linux/kprobes.h>
7#include <linux/uaccess.h> 7#include <linux/uaccess.h>
8#include <linux/utsname.h>
9#include <linux/hardirq.h> 8#include <linux/hardirq.h>
10#include <linux/kdebug.h> 9#include <linux/kdebug.h>
11#include <linux/module.h> 10#include <linux/module.h>
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 54b0a3276766..a071e6be177e 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -5,7 +5,6 @@
5#include <linux/kallsyms.h> 5#include <linux/kallsyms.h>
6#include <linux/kprobes.h> 6#include <linux/kprobes.h>
7#include <linux/uaccess.h> 7#include <linux/uaccess.h>
8#include <linux/utsname.h>
9#include <linux/hardirq.h> 8#include <linux/hardirq.h>
10#include <linux/kdebug.h> 9#include <linux/kdebug.h>
11#include <linux/module.h> 10#include <linux/module.h>
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index a3210ce1eccd..d17d482a04f4 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1331,7 +1331,7 @@ void __init e820_reserve_resources(void)
1331 struct resource *res; 1331 struct resource *res;
1332 u64 end; 1332 u64 end;
1333 1333
1334 res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); 1334 res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
1335 e820_res = res; 1335 e820_res = res;
1336 for (i = 0; i < e820.nr_map; i++) { 1336 for (i = 0; i < e820.nr_map; i++) {
1337 end = e820.map[i].addr + e820.map[i].size - 1; 1337 end = e820.map[i].addr + e820.map[i].size - 1;
@@ -1378,8 +1378,8 @@ static unsigned long ram_alignment(resource_size_t pos)
1378 if (mb < 16) 1378 if (mb < 16)
1379 return 1024*1024; 1379 return 1024*1024;
1380 1380
1381 /* To 32MB for anything above that */ 1381 /* To 64MB for anything above that */
1382 return 32*1024*1024; 1382 return 64*1024*1024;
1383} 1383}
1384 1384
1385#define MAX_RESOURCE_SIZE ((resource_size_t)-1) 1385#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 335f049d110f..b9c830c12b4a 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -160,721 +160,6 @@ static struct console early_serial_console = {
160 .index = -1, 160 .index = -1,
161}; 161};
162 162
163#ifdef CONFIG_EARLY_PRINTK_DBGP
164
165static struct ehci_caps __iomem *ehci_caps;
166static struct ehci_regs __iomem *ehci_regs;
167static struct ehci_dbg_port __iomem *ehci_debug;
168static unsigned int dbgp_endpoint_out;
169
170struct ehci_dev {
171 u32 bus;
172 u32 slot;
173 u32 func;
174};
175
176static struct ehci_dev ehci_dev;
177
178#define USB_DEBUG_DEVNUM 127
179
180#define DBGP_DATA_TOGGLE 0x8800
181
182static inline u32 dbgp_pid_update(u32 x, u32 tok)
183{
184 return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
185}
186
187static inline u32 dbgp_len_update(u32 x, u32 len)
188{
189 return (x & ~0x0f) | (len & 0x0f);
190}
191
192/*
193 * USB Packet IDs (PIDs)
194 */
195
196/* token */
197#define USB_PID_OUT 0xe1
198#define USB_PID_IN 0x69
199#define USB_PID_SOF 0xa5
200#define USB_PID_SETUP 0x2d
201/* handshake */
202#define USB_PID_ACK 0xd2
203#define USB_PID_NAK 0x5a
204#define USB_PID_STALL 0x1e
205#define USB_PID_NYET 0x96
206/* data */
207#define USB_PID_DATA0 0xc3
208#define USB_PID_DATA1 0x4b
209#define USB_PID_DATA2 0x87
210#define USB_PID_MDATA 0x0f
211/* Special */
212#define USB_PID_PREAMBLE 0x3c
213#define USB_PID_ERR 0x3c
214#define USB_PID_SPLIT 0x78
215#define USB_PID_PING 0xb4
216#define USB_PID_UNDEF_0 0xf0
217
218#define USB_PID_DATA_TOGGLE 0x88
219#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE)
220
221#define PCI_CAP_ID_EHCI_DEBUG 0xa
222
223#define HUB_ROOT_RESET_TIME 50 /* times are in msec */
224#define HUB_SHORT_RESET_TIME 10
225#define HUB_LONG_RESET_TIME 200
226#define HUB_RESET_TIMEOUT 500
227
228#define DBGP_MAX_PACKET 8
229
230static int dbgp_wait_until_complete(void)
231{
232 u32 ctrl;
233 int loop = 0x100000;
234
235 do {
236 ctrl = readl(&ehci_debug->control);
237 /* Stop when the transaction is finished */
238 if (ctrl & DBGP_DONE)
239 break;
240 } while (--loop > 0);
241
242 if (!loop)
243 return -1;
244
245 /*
246 * Now that we have observed the completed transaction,
247 * clear the done bit.
248 */
249 writel(ctrl | DBGP_DONE, &ehci_debug->control);
250 return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
251}
252
253static void __init dbgp_mdelay(int ms)
254{
255 int i;
256
257 while (ms--) {
258 for (i = 0; i < 1000; i++)
259 outb(0x1, 0x80);
260 }
261}
262
263static void dbgp_breath(void)
264{
265 /* Sleep to give the debug port a chance to breathe */
266}
267
268static int dbgp_wait_until_done(unsigned ctrl)
269{
270 u32 pids, lpid;
271 int ret;
272 int loop = 3;
273
274retry:
275 writel(ctrl | DBGP_GO, &ehci_debug->control);
276 ret = dbgp_wait_until_complete();
277 pids = readl(&ehci_debug->pids);
278 lpid = DBGP_PID_GET(pids);
279
280 if (ret < 0)
281 return ret;
282
283 /*
284 * If the port is getting full or it has dropped data
285 * start pacing ourselves, not necessary but it's friendly.
286 */
287 if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET))
288 dbgp_breath();
289
290 /* If I get a NACK reissue the transmission */
291 if (lpid == USB_PID_NAK) {
292 if (--loop > 0)
293 goto retry;
294 }
295
296 return ret;
297}
298
299static void dbgp_set_data(const void *buf, int size)
300{
301 const unsigned char *bytes = buf;
302 u32 lo, hi;
303 int i;
304
305 lo = hi = 0;
306 for (i = 0; i < 4 && i < size; i++)
307 lo |= bytes[i] << (8*i);
308 for (; i < 8 && i < size; i++)
309 hi |= bytes[i] << (8*(i - 4));
310 writel(lo, &ehci_debug->data03);
311 writel(hi, &ehci_debug->data47);
312}
313
314static void __init dbgp_get_data(void *buf, int size)
315{
316 unsigned char *bytes = buf;
317 u32 lo, hi;
318 int i;
319
320 lo = readl(&ehci_debug->data03);
321 hi = readl(&ehci_debug->data47);
322 for (i = 0; i < 4 && i < size; i++)
323 bytes[i] = (lo >> (8*i)) & 0xff;
324 for (; i < 8 && i < size; i++)
325 bytes[i] = (hi >> (8*(i - 4))) & 0xff;
326}
327
328static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
329 const char *bytes, int size)
330{
331 u32 pids, addr, ctrl;
332 int ret;
333
334 if (size > DBGP_MAX_PACKET)
335 return -1;
336
337 addr = DBGP_EPADDR(devnum, endpoint);
338
339 pids = readl(&ehci_debug->pids);
340 pids = dbgp_pid_update(pids, USB_PID_OUT);
341
342 ctrl = readl(&ehci_debug->control);
343 ctrl = dbgp_len_update(ctrl, size);
344 ctrl |= DBGP_OUT;
345 ctrl |= DBGP_GO;
346
347 dbgp_set_data(bytes, size);
348 writel(addr, &ehci_debug->address);
349 writel(pids, &ehci_debug->pids);
350
351 ret = dbgp_wait_until_done(ctrl);
352 if (ret < 0)
353 return ret;
354
355 return ret;
356}
357
358static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
359 int size)
360{
361 u32 pids, addr, ctrl;
362 int ret;
363
364 if (size > DBGP_MAX_PACKET)
365 return -1;
366
367 addr = DBGP_EPADDR(devnum, endpoint);
368
369 pids = readl(&ehci_debug->pids);
370 pids = dbgp_pid_update(pids, USB_PID_IN);
371
372 ctrl = readl(&ehci_debug->control);
373 ctrl = dbgp_len_update(ctrl, size);
374 ctrl &= ~DBGP_OUT;
375 ctrl |= DBGP_GO;
376
377 writel(addr, &ehci_debug->address);
378 writel(pids, &ehci_debug->pids);
379 ret = dbgp_wait_until_done(ctrl);
380 if (ret < 0)
381 return ret;
382
383 if (size > ret)
384 size = ret;
385 dbgp_get_data(data, size);
386 return ret;
387}
388
389static int __init dbgp_control_msg(unsigned devnum, int requesttype,
390 int request, int value, int index, void *data, int size)
391{
392 u32 pids, addr, ctrl;
393 struct usb_ctrlrequest req;
394 int read;
395 int ret;
396
397 read = (requesttype & USB_DIR_IN) != 0;
398 if (size > (read ? DBGP_MAX_PACKET:0))
399 return -1;
400
401 /* Compute the control message */
402 req.bRequestType = requesttype;
403 req.bRequest = request;
404 req.wValue = cpu_to_le16(value);
405 req.wIndex = cpu_to_le16(index);
406 req.wLength = cpu_to_le16(size);
407
408 pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP);
409 addr = DBGP_EPADDR(devnum, 0);
410
411 ctrl = readl(&ehci_debug->control);
412 ctrl = dbgp_len_update(ctrl, sizeof(req));
413 ctrl |= DBGP_OUT;
414 ctrl |= DBGP_GO;
415
416 /* Send the setup message */
417 dbgp_set_data(&req, sizeof(req));
418 writel(addr, &ehci_debug->address);
419 writel(pids, &ehci_debug->pids);
420 ret = dbgp_wait_until_done(ctrl);
421 if (ret < 0)
422 return ret;
423
424 /* Read the result */
425 return dbgp_bulk_read(devnum, 0, data, size);
426}
427
428
429/* Find a PCI capability */
430static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap)
431{
432 u8 pos;
433 int bytes;
434
435 if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
436 PCI_STATUS_CAP_LIST))
437 return 0;
438
439 pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
440 for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
441 u8 id;
442
443 pos &= ~3;
444 id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
445 if (id == 0xff)
446 break;
447 if (id == cap)
448 return pos;
449
450 pos = read_pci_config_byte(num, slot, func,
451 pos+PCI_CAP_LIST_NEXT);
452 }
453 return 0;
454}
455
456static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func)
457{
458 u32 class;
459
460 class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
461 if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI)
462 return 0;
463
464 return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG);
465}
466
467static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
468{
469 u32 bus, slot, func;
470
471 for (bus = 0; bus < 256; bus++) {
472 for (slot = 0; slot < 32; slot++) {
473 for (func = 0; func < 8; func++) {
474 unsigned cap;
475
476 cap = __find_dbgp(bus, slot, func);
477
478 if (!cap)
479 continue;
480 if (ehci_num-- != 0)
481 continue;
482 *rbus = bus;
483 *rslot = slot;
484 *rfunc = func;
485 return cap;
486 }
487 }
488 }
489 return 0;
490}
491
492static int __init ehci_reset_port(int port)
493{
494 u32 portsc;
495 u32 delay_time, delay;
496 int loop;
497
498 /* Reset the usb debug port */
499 portsc = readl(&ehci_regs->port_status[port - 1]);
500 portsc &= ~PORT_PE;
501 portsc |= PORT_RESET;
502 writel(portsc, &ehci_regs->port_status[port - 1]);
503
504 delay = HUB_ROOT_RESET_TIME;
505 for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT;
506 delay_time += delay) {
507 dbgp_mdelay(delay);
508
509 portsc = readl(&ehci_regs->port_status[port - 1]);
510 if (portsc & PORT_RESET) {
511 /* force reset to complete */
512 loop = 2;
513 writel(portsc & ~(PORT_RWC_BITS | PORT_RESET),
514 &ehci_regs->port_status[port - 1]);
515 do {
516 portsc = readl(&ehci_regs->port_status[port-1]);
517 } while ((portsc & PORT_RESET) && (--loop > 0));
518 }
519
520 /* Device went away? */
521 if (!(portsc & PORT_CONNECT))
522 return -ENOTCONN;
523
524 /* bomb out completely if something weird happend */
525 if ((portsc & PORT_CSC))
526 return -EINVAL;
527
528 /* If we've finished resetting, then break out of the loop */
529 if (!(portsc & PORT_RESET) && (portsc & PORT_PE))
530 return 0;
531 }
532 return -EBUSY;
533}
534
535static int __init ehci_wait_for_port(int port)
536{
537 u32 status;
538 int ret, reps;
539
540 for (reps = 0; reps < 3; reps++) {
541 dbgp_mdelay(100);
542 status = readl(&ehci_regs->status);
543 if (status & STS_PCD) {
544 ret = ehci_reset_port(port);
545 if (ret == 0)
546 return 0;
547 }
548 }
549 return -ENOTCONN;
550}
551
552#ifdef DBGP_DEBUG
553# define dbgp_printk early_printk
554#else
555static inline void dbgp_printk(const char *fmt, ...) { }
556#endif
557
558typedef void (*set_debug_port_t)(int port);
559
560static void __init default_set_debug_port(int port)
561{
562}
563
564static set_debug_port_t __initdata set_debug_port = default_set_debug_port;
565
566static void __init nvidia_set_debug_port(int port)
567{
568 u32 dword;
569 dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
570 0x74);
571 dword &= ~(0x0f<<12);
572 dword |= ((port & 0x0f)<<12);
573 write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74,
574 dword);
575 dbgp_printk("set debug port to %d\n", port);
576}
577
578static void __init detect_set_debug_port(void)
579{
580 u32 vendorid;
581
582 vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
583 0x00);
584
585 if ((vendorid & 0xffff) == 0x10de) {
586 dbgp_printk("using nvidia set_debug_port\n");
587 set_debug_port = nvidia_set_debug_port;
588 }
589}
590
591static int __init ehci_setup(void)
592{
593 struct usb_debug_descriptor dbgp_desc;
594 u32 cmd, ctrl, status, portsc, hcs_params;
595 u32 debug_port, new_debug_port = 0, n_ports;
596 u32 devnum;
597 int ret, i;
598 int loop;
599 int port_map_tried;
600 int playtimes = 3;
601
602try_next_time:
603 port_map_tried = 0;
604
605try_next_port:
606
607 hcs_params = readl(&ehci_caps->hcs_params);
608 debug_port = HCS_DEBUG_PORT(hcs_params);
609 n_ports = HCS_N_PORTS(hcs_params);
610
611 dbgp_printk("debug_port: %d\n", debug_port);
612 dbgp_printk("n_ports: %d\n", n_ports);
613
614 for (i = 1; i <= n_ports; i++) {
615 portsc = readl(&ehci_regs->port_status[i-1]);
616 dbgp_printk("portstatus%d: %08x\n", i, portsc);
617 }
618
619 if (port_map_tried && (new_debug_port != debug_port)) {
620 if (--playtimes) {
621 set_debug_port(new_debug_port);
622 goto try_next_time;
623 }
624 return -1;
625 }
626
627 loop = 10;
628 /* Reset the EHCI controller */
629 cmd = readl(&ehci_regs->command);
630 cmd |= CMD_RESET;
631 writel(cmd, &ehci_regs->command);
632 do {
633 cmd = readl(&ehci_regs->command);
634 } while ((cmd & CMD_RESET) && (--loop > 0));
635
636 if (!loop) {
637 dbgp_printk("can not reset ehci\n");
638 return -1;
639 }
640 dbgp_printk("ehci reset done\n");
641
642 /* Claim ownership, but do not enable yet */
643 ctrl = readl(&ehci_debug->control);
644 ctrl |= DBGP_OWNER;
645 ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
646 writel(ctrl, &ehci_debug->control);
647
648 /* Start the ehci running */
649 cmd = readl(&ehci_regs->command);
650 cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
651 cmd |= CMD_RUN;
652 writel(cmd, &ehci_regs->command);
653
654 /* Ensure everything is routed to the EHCI */
655 writel(FLAG_CF, &ehci_regs->configured_flag);
656
657 /* Wait until the controller is no longer halted */
658 loop = 10;
659 do {
660 status = readl(&ehci_regs->status);
661 } while ((status & STS_HALT) && (--loop > 0));
662
663 if (!loop) {
664 dbgp_printk("ehci can be started\n");
665 return -1;
666 }
667 dbgp_printk("ehci started\n");
668
669 /* Wait for a device to show up in the debug port */
670 ret = ehci_wait_for_port(debug_port);
671 if (ret < 0) {
672 dbgp_printk("No device found in debug port\n");
673 goto next_debug_port;
674 }
675 dbgp_printk("ehci wait for port done\n");
676
677 /* Enable the debug port */
678 ctrl = readl(&ehci_debug->control);
679 ctrl |= DBGP_CLAIM;
680 writel(ctrl, &ehci_debug->control);
681 ctrl = readl(&ehci_debug->control);
682 if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
683 dbgp_printk("No device in debug port\n");
684 writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
685 goto err;
686 }
687 dbgp_printk("debug ported enabled\n");
688
689 /* Completely transfer the debug device to the debug controller */
690 portsc = readl(&ehci_regs->port_status[debug_port - 1]);
691 portsc &= ~PORT_PE;
692 writel(portsc, &ehci_regs->port_status[debug_port - 1]);
693
694 dbgp_mdelay(100);
695
696 /* Find the debug device and make it device number 127 */
697 for (devnum = 0; devnum <= 127; devnum++) {
698 ret = dbgp_control_msg(devnum,
699 USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
700 USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
701 &dbgp_desc, sizeof(dbgp_desc));
702 if (ret > 0)
703 break;
704 }
705 if (devnum > 127) {
706 dbgp_printk("Could not find attached debug device\n");
707 goto err;
708 }
709 if (ret < 0) {
710 dbgp_printk("Attached device is not a debug device\n");
711 goto err;
712 }
713 dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
714
715 /* Move the device to 127 if it isn't already there */
716 if (devnum != USB_DEBUG_DEVNUM) {
717 ret = dbgp_control_msg(devnum,
718 USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
719 USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
720 if (ret < 0) {
721 dbgp_printk("Could not move attached device to %d\n",
722 USB_DEBUG_DEVNUM);
723 goto err;
724 }
725 devnum = USB_DEBUG_DEVNUM;
726 dbgp_printk("debug device renamed to 127\n");
727 }
728
729 /* Enable the debug interface */
730 ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
731 USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
732 USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
733 if (ret < 0) {
734 dbgp_printk(" Could not enable the debug device\n");
735 goto err;
736 }
737 dbgp_printk("debug interface enabled\n");
738
739 /* Perform a small write to get the even/odd data state in sync
740 */
741 ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
742 if (ret < 0) {
743 dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
744 goto err;
745 }
746 dbgp_printk("small write doned\n");
747
748 return 0;
749err:
750 /* Things didn't work so remove my claim */
751 ctrl = readl(&ehci_debug->control);
752 ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
753 writel(ctrl, &ehci_debug->control);
754 return -1;
755
756next_debug_port:
757 port_map_tried |= (1<<(debug_port - 1));
758 new_debug_port = ((debug_port-1+1)%n_ports) + 1;
759 if (port_map_tried != ((1<<n_ports) - 1)) {
760 set_debug_port(new_debug_port);
761 goto try_next_port;
762 }
763 if (--playtimes) {
764 set_debug_port(new_debug_port);
765 goto try_next_time;
766 }
767
768 return -1;
769}
770
771static int __init early_dbgp_init(char *s)
772{
773 u32 debug_port, bar, offset;
774 u32 bus, slot, func, cap;
775 void __iomem *ehci_bar;
776 u32 dbgp_num;
777 u32 bar_val;
778 char *e;
779 int ret;
780 u8 byte;
781
782 if (!early_pci_allowed())
783 return -1;
784
785 dbgp_num = 0;
786 if (*s)
787 dbgp_num = simple_strtoul(s, &e, 10);
788 dbgp_printk("dbgp_num: %d\n", dbgp_num);
789
790 cap = find_dbgp(dbgp_num, &bus, &slot, &func);
791 if (!cap)
792 return -1;
793
794 dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot,
795 func);
796
797 debug_port = read_pci_config(bus, slot, func, cap);
798 bar = (debug_port >> 29) & 0x7;
799 bar = (bar * 4) + 0xc;
800 offset = (debug_port >> 16) & 0xfff;
801 dbgp_printk("bar: %02x offset: %03x\n", bar, offset);
802 if (bar != PCI_BASE_ADDRESS_0) {
803 dbgp_printk("only debug ports on bar 1 handled.\n");
804
805 return -1;
806 }
807
808 bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
809 dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset);
810 if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) {
811 dbgp_printk("only simple 32bit mmio bars supported\n");
812
813 return -1;
814 }
815
816 /* double check if the mem space is enabled */
817 byte = read_pci_config_byte(bus, slot, func, 0x04);
818 if (!(byte & 0x2)) {
819 byte |= 0x02;
820 write_pci_config_byte(bus, slot, func, 0x04, byte);
821 dbgp_printk("mmio for ehci enabled\n");
822 }
823
824 /*
825 * FIXME I don't have the bar size so just guess PAGE_SIZE is more
826 * than enough. 1K is the biggest I have seen.
827 */
828 set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK);
829 ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE);
830 ehci_bar += bar_val & ~PAGE_MASK;
831 dbgp_printk("ehci_bar: %p\n", ehci_bar);
832
833 ehci_caps = ehci_bar;
834 ehci_regs = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
835 ehci_debug = ehci_bar + offset;
836 ehci_dev.bus = bus;
837 ehci_dev.slot = slot;
838 ehci_dev.func = func;
839
840 detect_set_debug_port();
841
842 ret = ehci_setup();
843 if (ret < 0) {
844 dbgp_printk("ehci_setup failed\n");
845 ehci_debug = NULL;
846
847 return -1;
848 }
849
850 return 0;
851}
852
853static void early_dbgp_write(struct console *con, const char *str, u32 n)
854{
855 int chunk, ret;
856
857 if (!ehci_debug)
858 return;
859 while (n > 0) {
860 chunk = n;
861 if (chunk > DBGP_MAX_PACKET)
862 chunk = DBGP_MAX_PACKET;
863 ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
864 dbgp_endpoint_out, str, chunk);
865 str += chunk;
866 n -= chunk;
867 }
868}
869
870static struct console early_dbgp_console = {
871 .name = "earlydbg",
872 .write = early_dbgp_write,
873 .flags = CON_PRINTBUFFER,
874 .index = -1,
875};
876#endif
877
878/* Direct interface for emergencies */ 163/* Direct interface for emergencies */
879static struct console *early_console = &early_vga_console; 164static struct console *early_console = &early_vga_console;
880static int __initdata early_console_initialized; 165static int __initdata early_console_initialized;
@@ -891,10 +176,24 @@ asmlinkage void early_printk(const char *fmt, ...)
891 va_end(ap); 176 va_end(ap);
892} 177}
893 178
179static inline void early_console_register(struct console *con, int keep_early)
180{
181 if (early_console->index != -1) {
182 printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n",
183 con->name);
184 return;
185 }
186 early_console = con;
187 if (keep_early)
188 early_console->flags &= ~CON_BOOT;
189 else
190 early_console->flags |= CON_BOOT;
191 register_console(early_console);
192}
894 193
895static int __init setup_early_printk(char *buf) 194static int __init setup_early_printk(char *buf)
896{ 195{
897 int keep_early; 196 int keep;
898 197
899 if (!buf) 198 if (!buf)
900 return 0; 199 return 0;
@@ -903,42 +202,37 @@ static int __init setup_early_printk(char *buf)
903 return 0; 202 return 0;
904 early_console_initialized = 1; 203 early_console_initialized = 1;
905 204
906 keep_early = (strstr(buf, "keep") != NULL); 205 keep = (strstr(buf, "keep") != NULL);
907 206
908 if (!strncmp(buf, "serial", 6)) { 207 while (*buf != '\0') {
909 early_serial_init(buf + 6); 208 if (!strncmp(buf, "serial", 6)) {
910 early_console = &early_serial_console; 209 buf += 6;
911 } else if (!strncmp(buf, "ttyS", 4)) { 210 early_serial_init(buf);
912 early_serial_init(buf); 211 early_console_register(&early_serial_console, keep);
913 early_console = &early_serial_console; 212 if (!strncmp(buf, ",ttyS", 5))
914 } else if (!strncmp(buf, "vga", 3) 213 buf += 5;
915 && boot_params.screen_info.orig_video_isVGA == 1) { 214 }
916 max_xpos = boot_params.screen_info.orig_video_cols; 215 if (!strncmp(buf, "ttyS", 4)) {
917 max_ypos = boot_params.screen_info.orig_video_lines; 216 early_serial_init(buf + 4);
918 current_ypos = boot_params.screen_info.orig_y; 217 early_console_register(&early_serial_console, keep);
919 early_console = &early_vga_console; 218 }
219 if (!strncmp(buf, "vga", 3) &&
220 boot_params.screen_info.orig_video_isVGA == 1) {
221 max_xpos = boot_params.screen_info.orig_video_cols;
222 max_ypos = boot_params.screen_info.orig_video_lines;
223 current_ypos = boot_params.screen_info.orig_y;
224 early_console_register(&early_vga_console, keep);
225 }
920#ifdef CONFIG_EARLY_PRINTK_DBGP 226#ifdef CONFIG_EARLY_PRINTK_DBGP
921 } else if (!strncmp(buf, "dbgp", 4)) { 227 if (!strncmp(buf, "dbgp", 4) && !early_dbgp_init(buf + 4))
922 if (early_dbgp_init(buf+4) < 0) 228 early_console_register(&early_dbgp_console, keep);
923 return 0;
924 early_console = &early_dbgp_console;
925 /*
926 * usb subsys will reset ehci controller, so don't keep
927 * that early console
928 */
929 keep_early = 0;
930#endif 229#endif
931#ifdef CONFIG_HVC_XEN 230#ifdef CONFIG_HVC_XEN
932 } else if (!strncmp(buf, "xen", 3)) { 231 if (!strncmp(buf, "xen", 3))
933 early_console = &xenboot_console; 232 early_console_register(&xenboot_console, keep);
934#endif 233#endif
234 buf++;
935 } 235 }
936
937 if (keep_early)
938 early_console->flags &= ~CON_BOOT;
939 else
940 early_console->flags |= CON_BOOT;
941 register_console(early_console);
942 return 0; 236 return 0;
943} 237}
944 238
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index ad5bd988fb79..cdcfb122f256 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -454,8 +454,10 @@ void __init efi_init(void)
454 if (add_efi_memmap) 454 if (add_efi_memmap)
455 do_add_efi_memmap(); 455 do_add_efi_memmap();
456 456
457#ifdef CONFIG_X86_32
457 x86_platform.get_wallclock = efi_get_time; 458 x86_platform.get_wallclock = efi_get_time;
458 x86_platform.set_wallclock = efi_set_rtc_mmss; 459 x86_platform.set_wallclock = efi_set_rtc_mmss;
460#endif
459 461
460 /* Setup for EFI runtime service */ 462 /* Setup for EFI runtime service */
461 reboot_type = BOOT_EFI; 463 reboot_type = BOOT_EFI;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index d59fe323807e..b5c061f8f358 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -536,20 +536,13 @@ sysret_signal:
536 bt $TIF_SYSCALL_AUDIT,%edx 536 bt $TIF_SYSCALL_AUDIT,%edx
537 jc sysret_audit 537 jc sysret_audit
538#endif 538#endif
539 /* edx: work flags (arg3) */ 539 /*
540 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 540 * We have a signal, or exit tracing or single-step.
541 xorl %esi,%esi # oldset -> arg2 541 * These all wind up with the iret return path anyway,
542 SAVE_REST 542 * so just join that path right now.
543 FIXUP_TOP_OF_STACK %r11 543 */
544 call do_notify_resume 544 FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
545 RESTORE_TOP_OF_STACK %r11 545 jmp int_check_syscall_exit_work
546 RESTORE_REST
547 movl $_TIF_WORK_MASK,%edi
548 /* Use IRET because user could have changed frame. This
549 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
550 DISABLE_INTERRUPTS(CLBR_NONE)
551 TRACE_IRQS_OFF
552 jmp int_with_check
553 546
554badsys: 547badsys:
555 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 548 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
@@ -654,6 +647,7 @@ int_careful:
654int_very_careful: 647int_very_careful:
655 TRACE_IRQS_ON 648 TRACE_IRQS_ON
656 ENABLE_INTERRUPTS(CLBR_NONE) 649 ENABLE_INTERRUPTS(CLBR_NONE)
650int_check_syscall_exit_work:
657 SAVE_REST 651 SAVE_REST
658 /* Check for syscall exit trace */ 652 /* Check for syscall exit trace */
659 testl $_TIF_WORK_SYSCALL_EXIT,%edx 653 testl $_TIF_WORK_SYSCALL_EXIT,%edx
@@ -1021,7 +1015,7 @@ apicinterrupt ERROR_APIC_VECTOR \
1021apicinterrupt SPURIOUS_APIC_VECTOR \ 1015apicinterrupt SPURIOUS_APIC_VECTOR \
1022 spurious_interrupt smp_spurious_interrupt 1016 spurious_interrupt smp_spurious_interrupt
1023 1017
1024#ifdef CONFIG_PERF_COUNTERS 1018#ifdef CONFIG_PERF_EVENTS
1025apicinterrupt LOCAL_PENDING_VECTOR \ 1019apicinterrupt LOCAL_PENDING_VECTOR \
1026 perf_pending_interrupt smp_perf_pending_interrupt 1020 perf_pending_interrupt smp_perf_pending_interrupt
1027#endif 1021#endif
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index b766e8c7252d..050c278481b1 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -79,7 +79,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE)
79 * any particular GDT layout, because we load our own as soon as we 79 * any particular GDT layout, because we load our own as soon as we
80 * can. 80 * can.
81 */ 81 */
82.section .text.head,"ax",@progbits 82__HEAD
83ENTRY(startup_32) 83ENTRY(startup_32)
84 /* test KEEP_SEGMENTS flag to see if the bootloader is asking 84 /* test KEEP_SEGMENTS flag to see if the bootloader is asking
85 us to not reload segments */ 85 us to not reload segments */
@@ -608,7 +608,7 @@ ENTRY(initial_code)
608/* 608/*
609 * BSS section 609 * BSS section
610 */ 610 */
611.section ".bss.page_aligned","wa" 611__PAGE_ALIGNED_BSS
612 .align PAGE_SIZE_asm 612 .align PAGE_SIZE_asm
613#ifdef CONFIG_X86_PAE 613#ifdef CONFIG_X86_PAE
614swapper_pg_pmd: 614swapper_pg_pmd:
@@ -626,7 +626,7 @@ ENTRY(empty_zero_page)
626 * This starts the data section. 626 * This starts the data section.
627 */ 627 */
628#ifdef CONFIG_X86_PAE 628#ifdef CONFIG_X86_PAE
629.section ".data.page_aligned","wa" 629__PAGE_ALIGNED_DATA
630 /* Page-aligned for the benefit of paravirt? */ 630 /* Page-aligned for the benefit of paravirt? */
631 .align PAGE_SIZE_asm 631 .align PAGE_SIZE_asm
632ENTRY(swapper_pg_dir) 632ENTRY(swapper_pg_dir)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index fa54f78e2a05..780cd928fcd5 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -40,7 +40,7 @@ L4_START_KERNEL = pgd_index(__START_KERNEL_map)
40L3_START_KERNEL = pud_index(__START_KERNEL_map) 40L3_START_KERNEL = pud_index(__START_KERNEL_map)
41 41
42 .text 42 .text
43 .section .text.head 43 __HEAD
44 .code64 44 .code64
45 .globl startup_64 45 .globl startup_64
46startup_64: 46startup_64:
@@ -418,7 +418,7 @@ ENTRY(phys_base)
418ENTRY(idt_table) 418ENTRY(idt_table)
419 .skip IDT_ENTRIES * 16 419 .skip IDT_ENTRIES * 16
420 420
421 .section .bss.page_aligned, "aw", @nobits 421 __PAGE_ALIGNED_BSS
422 .align PAGE_SIZE 422 .align PAGE_SIZE
423ENTRY(empty_zero_page) 423ENTRY(empty_zero_page)
424 .skip PAGE_SIZE 424 .skip PAGE_SIZE
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 43cec6bdda63..9c3bd4a2050e 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -10,6 +10,16 @@
10EXPORT_SYMBOL(mcount); 10EXPORT_SYMBOL(mcount);
11#endif 11#endif
12 12
13/*
14 * Note, this is a prototype to get at the symbol for
15 * the export, but dont use it from C code, it is used
16 * by assembly code and is not using C calling convention!
17 */
18#ifndef CONFIG_X86_CMPXCHG64
19extern void cmpxchg8b_emu(void);
20EXPORT_SYMBOL(cmpxchg8b_emu);
21#endif
22
13/* Networking helper routines. */ 23/* Networking helper routines. */
14EXPORT_SYMBOL(csum_partial_copy_generic); 24EXPORT_SYMBOL(csum_partial_copy_generic);
15 25
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index 270ff83efc11..3a54dcb9cd0e 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -20,9 +20,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
20 * way process stacks are handled. This is done by having a special 20 * way process stacks are handled. This is done by having a special
21 * "init_task" linker map entry.. 21 * "init_task" linker map entry..
22 */ 22 */
23union thread_union init_thread_union 23union thread_union init_thread_union __init_task_data =
24 __attribute__((__section__(".data.init_task"))) = 24 { INIT_THREAD_INFO(init_task) };
25 { INIT_THREAD_INFO(init_task) };
26 25
27/* 26/*
28 * Initial task structure. 27 * Initial task structure.
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 74656d1d4e30..04bbd5278568 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -63,10 +63,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
63 for_each_online_cpu(j) 63 for_each_online_cpu(j)
64 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); 64 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
65 seq_printf(p, " Spurious interrupts\n"); 65 seq_printf(p, " Spurious interrupts\n");
66 seq_printf(p, "%*s: ", prec, "CNT"); 66 seq_printf(p, "%*s: ", prec, "PMI");
67 for_each_online_cpu(j) 67 for_each_online_cpu(j)
68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); 68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
69 seq_printf(p, " Performance counter interrupts\n"); 69 seq_printf(p, " Performance monitoring interrupts\n");
70 seq_printf(p, "%*s: ", prec, "PND"); 70 seq_printf(p, "%*s: ", prec, "PND");
71 for_each_online_cpu(j) 71 for_each_online_cpu(j)
72 seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); 72 seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 300883112e3d..40f30773fb29 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -208,7 +208,7 @@ static void __init apic_intr_init(void)
208 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); 208 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
209 209
210 /* Performance monitoring interrupts: */ 210 /* Performance monitoring interrupts: */
211# ifdef CONFIG_PERF_COUNTERS 211# ifdef CONFIG_PERF_EVENTS
212 alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); 212 alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
213# endif 213# endif
214 214
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 71f1d99a635d..ec6ef60cbd17 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -67,8 +67,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
67#ifdef CONFIG_SMP 67#ifdef CONFIG_SMP
68 preempt_disable(); 68 preempt_disable();
69 load_LDT(pc); 69 load_LDT(pc);
70 if (!cpus_equal(current->mm->cpu_vm_mask, 70 if (!cpumask_equal(mm_cpumask(current->mm),
71 cpumask_of_cpu(smp_processor_id()))) 71 cpumask_of(smp_processor_id())))
72 smp_call_function(flush_ldt, current->mm, 1); 72 smp_call_function(flush_ldt, current->mm, 1);
73 preempt_enable(); 73 preempt_enable();
74#else 74#else
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 366baa179913..f4c538b681ca 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -317,6 +317,12 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device)
317 return UCODE_NFOUND; 317 return UCODE_NFOUND;
318 } 318 }
319 319
320 if (*(u32 *)firmware->data != UCODE_MAGIC) {
321 printk(KERN_ERR "microcode: invalid UCODE_MAGIC (0x%08x)\n",
322 *(u32 *)firmware->data);
323 return UCODE_ERROR;
324 }
325
320 ret = generic_load_microcode(cpu, firmware->data, firmware->size); 326 ret = generic_load_microcode(cpu, firmware->data, firmware->size);
321 327
322 release_firmware(firmware); 328 release_firmware(firmware);
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 9371448290ac..378e9a8f1bf8 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -210,8 +210,8 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
210{ 210{
211 ssize_t ret = -EINVAL; 211 ssize_t ret = -EINVAL;
212 212
213 if ((len >> PAGE_SHIFT) > num_physpages) { 213 if ((len >> PAGE_SHIFT) > totalram_pages) {
214 pr_err("microcode: too much data (max %ld pages)\n", num_physpages); 214 pr_err("microcode: too much data (max %ld pages)\n", totalram_pages);
215 return ret; 215 return ret;
216 } 216 }
217 217
@@ -236,7 +236,7 @@ static const struct file_operations microcode_fops = {
236static struct miscdevice microcode_dev = { 236static struct miscdevice microcode_dev = {
237 .minor = MICROCODE_MINOR, 237 .minor = MICROCODE_MINOR,
238 .name = "microcode", 238 .name = "microcode",
239 .devnode = "cpu/microcode", 239 .nodename = "cpu/microcode",
240 .fops = &microcode_fops, 240 .fops = &microcode_fops,
241}; 241};
242 242
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 7dd950094178..6a3cefc7dda1 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -241,7 +241,7 @@ static struct notifier_block __refdata msr_class_cpu_notifier = {
241 .notifier_call = msr_class_cpu_callback, 241 .notifier_call = msr_class_cpu_callback,
242}; 242};
243 243
244static char *msr_nodename(struct device *dev) 244static char *msr_devnode(struct device *dev, mode_t *mode)
245{ 245{
246 return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); 246 return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt));
247} 247}
@@ -262,7 +262,7 @@ static int __init msr_init(void)
262 err = PTR_ERR(msr_class); 262 err = PTR_ERR(msr_class);
263 goto out_chrdev; 263 goto out_chrdev;
264 } 264 }
265 msr_class->nodename = msr_nodename; 265 msr_class->devnode = msr_devnode;
266 for_each_online_cpu(i) { 266 for_each_online_cpu(i) {
267 err = msr_device_create(i); 267 err = msr_device_create(i);
268 if (err != 0) 268 if (err != 0)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 64b838eac18c..a6e804d16c35 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -35,7 +35,7 @@ int iommu_detected __read_mostly = 0;
35 35
36/* 36/*
37 * This variable becomes 1 if iommu=pt is passed on the kernel command line. 37 * This variable becomes 1 if iommu=pt is passed on the kernel command line.
38 * If this variable is 1, IOMMU implementations do no DMA ranslation for 38 * If this variable is 1, IOMMU implementations do no DMA translation for
39 * devices and allow every device to access to whole physical memory. This is 39 * devices and allow every device to access to whole physical memory. This is
40 * useful if a user want to use an IOMMU only for KVM device assignment to 40 * useful if a user want to use an IOMMU only for KVM device assignment to
41 * guests and not for driver dma translation. 41 * guests and not for driver dma translation.
@@ -45,12 +45,10 @@ int iommu_pass_through __read_mostly;
45dma_addr_t bad_dma_address __read_mostly = 0; 45dma_addr_t bad_dma_address __read_mostly = 0;
46EXPORT_SYMBOL(bad_dma_address); 46EXPORT_SYMBOL(bad_dma_address);
47 47
48/* Dummy device used for NULL arguments (normally ISA). Better would 48/* Dummy device used for NULL arguments (normally ISA). */
49 be probably a smaller DMA mask, but this is bug-to-bug compatible
50 to older i386. */
51struct device x86_dma_fallback_dev = { 49struct device x86_dma_fallback_dev = {
52 .init_name = "fallback device", 50 .init_name = "fallback device",
53 .coherent_dma_mask = DMA_BIT_MASK(32), 51 .coherent_dma_mask = ISA_DMA_BIT_MASK,
54 .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, 52 .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
55}; 53};
56EXPORT_SYMBOL(x86_dma_fallback_dev); 54EXPORT_SYMBOL(x86_dma_fallback_dev);
@@ -311,7 +309,7 @@ void pci_iommu_shutdown(void)
311 amd_iommu_shutdown(); 309 amd_iommu_shutdown();
312} 310}
313/* Must execute after PCI subsystem */ 311/* Must execute after PCI subsystem */
314fs_initcall(pci_iommu_init); 312rootfs_initcall(pci_iommu_init);
315 313
316#ifdef CONFIG_PCI 314#ifdef CONFIG_PCI
317/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ 315/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 98a827ee9ed7..a7f1b64f86e0 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -16,6 +16,7 @@
16#include <linux/agp_backend.h> 16#include <linux/agp_backend.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/sched.h>
19#include <linux/string.h> 20#include <linux/string.h>
20#include <linux/spinlock.h> 21#include <linux/spinlock.h>
21#include <linux/pci.h> 22#include <linux/pci.h>
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index e8a35016115f..aaa6b7839f1e 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -46,9 +46,8 @@ void __init pci_swiotlb_init(void)
46{ 46{
47 /* don't initialize swiotlb if iommu=off (no_iommu=1) */ 47 /* don't initialize swiotlb if iommu=off (no_iommu=1) */
48#ifdef CONFIG_X86_64 48#ifdef CONFIG_X86_64
49 if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) || 49 if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN))
50 iommu_pass_through) 50 swiotlb = 1;
51 swiotlb = 1;
52#endif 51#endif
53 if (swiotlb_force) 52 if (swiotlb_force)
54 swiotlb = 1; 53 swiotlb = 1;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 071166a4ba83..5284cd2b5776 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,7 +9,7 @@
9#include <linux/pm.h> 9#include <linux/pm.h>
10#include <linux/clockchips.h> 10#include <linux/clockchips.h>
11#include <linux/random.h> 11#include <linux/random.h>
12#include <trace/power.h> 12#include <trace/events/power.h>
13#include <asm/system.h> 13#include <asm/system.h>
14#include <asm/apic.h> 14#include <asm/apic.h>
15#include <asm/syscalls.h> 15#include <asm/syscalls.h>
@@ -25,9 +25,6 @@ EXPORT_SYMBOL(idle_nomwait);
25 25
26struct kmem_cache *task_xstate_cachep; 26struct kmem_cache *task_xstate_cachep;
27 27
28DEFINE_TRACE(power_start);
29DEFINE_TRACE(power_end);
30
31int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 28int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
32{ 29{
33 *dst = *src; 30 *dst = *src;
@@ -299,9 +296,7 @@ static inline int hlt_use_halt(void)
299void default_idle(void) 296void default_idle(void)
300{ 297{
301 if (hlt_use_halt()) { 298 if (hlt_use_halt()) {
302 struct power_trace it; 299 trace_power_start(POWER_CSTATE, 1);
303
304 trace_power_start(&it, POWER_CSTATE, 1);
305 current_thread_info()->status &= ~TS_POLLING; 300 current_thread_info()->status &= ~TS_POLLING;
306 /* 301 /*
307 * TS_POLLING-cleared state must be visible before we 302 * TS_POLLING-cleared state must be visible before we
@@ -314,7 +309,6 @@ void default_idle(void)
314 else 309 else
315 local_irq_enable(); 310 local_irq_enable();
316 current_thread_info()->status |= TS_POLLING; 311 current_thread_info()->status |= TS_POLLING;
317 trace_power_end(&it);
318 } else { 312 } else {
319 local_irq_enable(); 313 local_irq_enable();
320 /* loop is done by the caller */ 314 /* loop is done by the caller */
@@ -372,9 +366,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
372 */ 366 */
373void mwait_idle_with_hints(unsigned long ax, unsigned long cx) 367void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
374{ 368{
375 struct power_trace it; 369 trace_power_start(POWER_CSTATE, (ax>>4)+1);
376
377 trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
378 if (!need_resched()) { 370 if (!need_resched()) {
379 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) 371 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
380 clflush((void *)&current_thread_info()->flags); 372 clflush((void *)&current_thread_info()->flags);
@@ -384,15 +376,13 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
384 if (!need_resched()) 376 if (!need_resched())
385 __mwait(ax, cx); 377 __mwait(ax, cx);
386 } 378 }
387 trace_power_end(&it);
388} 379}
389 380
390/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 381/* Default MONITOR/MWAIT with no hints, used for default C1 state */
391static void mwait_idle(void) 382static void mwait_idle(void)
392{ 383{
393 struct power_trace it;
394 if (!need_resched()) { 384 if (!need_resched()) {
395 trace_power_start(&it, POWER_CSTATE, 1); 385 trace_power_start(POWER_CSTATE, 1);
396 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) 386 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
397 clflush((void *)&current_thread_info()->flags); 387 clflush((void *)&current_thread_info()->flags);
398 388
@@ -402,7 +392,6 @@ static void mwait_idle(void)
402 __sti_mwait(0, 0); 392 __sti_mwait(0, 0);
403 else 393 else
404 local_irq_enable(); 394 local_irq_enable();
405 trace_power_end(&it);
406 } else 395 } else
407 local_irq_enable(); 396 local_irq_enable();
408} 397}
@@ -414,13 +403,11 @@ static void mwait_idle(void)
414 */ 403 */
415static void poll_idle(void) 404static void poll_idle(void)
416{ 405{
417 struct power_trace it; 406 trace_power_start(POWER_CSTATE, 0);
418
419 trace_power_start(&it, POWER_CSTATE, 0);
420 local_irq_enable(); 407 local_irq_enable();
421 while (!need_resched()) 408 while (!need_resched())
422 cpu_relax(); 409 cpu_relax();
423 trace_power_end(&it); 410 trace_power_end(0);
424} 411}
425 412
426/* 413/*
@@ -568,10 +555,8 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
568void __init init_c1e_mask(void) 555void __init init_c1e_mask(void)
569{ 556{
570 /* If we're using c1e_idle, we need to allocate c1e_mask. */ 557 /* If we're using c1e_idle, we need to allocate c1e_mask. */
571 if (pm_idle == c1e_idle) { 558 if (pm_idle == c1e_idle)
572 alloc_cpumask_var(&c1e_mask, GFP_KERNEL); 559 zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
573 cpumask_clear(c1e_mask);
574 }
575} 560}
576 561
577static int __init idle_setup(char *str) 562static int __init idle_setup(char *str)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ad535b683170..eb62cbcaa490 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -664,3 +664,8 @@ long sys_arch_prctl(int code, unsigned long addr)
664 return do_arch_prctl(current, code, addr); 664 return do_arch_prctl(current, code, addr);
665} 665}
666 666
667unsigned long KSTK_ESP(struct task_struct *task)
668{
669 return (test_tsk_thread_flag(task, TIF_IA32)) ?
670 (task_pt_regs(task)->sp) : ((task)->thread.usersp);
671}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 8d7d5c9c1be3..7b058a2dc66a 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -325,16 +325,6 @@ static int putreg(struct task_struct *child,
325 return set_flags(child, value); 325 return set_flags(child, value);
326 326
327#ifdef CONFIG_X86_64 327#ifdef CONFIG_X86_64
328 /*
329 * Orig_ax is really just a flag with small positive and
330 * negative values, so make sure to always sign-extend it
331 * from 32 bits so that it works correctly regardless of
332 * whether we come from a 32-bit environment or not.
333 */
334 case offsetof(struct user_regs_struct, orig_ax):
335 value = (long) (s32) value;
336 break;
337
338 case offsetof(struct user_regs_struct,fs_base): 328 case offsetof(struct user_regs_struct,fs_base):
339 if (value >= TASK_SIZE_OF(child)) 329 if (value >= TASK_SIZE_OF(child))
340 return -EIO; 330 return -EIO;
@@ -1126,10 +1116,15 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
1126 1116
1127 case offsetof(struct user32, regs.orig_eax): 1117 case offsetof(struct user32, regs.orig_eax):
1128 /* 1118 /*
1129 * Sign-extend the value so that orig_eax = -1 1119 * A 32-bit debugger setting orig_eax means to restore
1130 * causes (long)orig_ax < 0 tests to fire correctly. 1120 * the state of the task restarting a 32-bit syscall.
1121 * Make sure we interpret the -ERESTART* codes correctly
1122 * in case the task is not actually still sitting at the
1123 * exit from a 32-bit syscall with TS_COMPAT still set.
1131 */ 1124 */
1132 regs->orig_ax = (long) (s32) value; 1125 regs->orig_ax = value;
1126 if (syscall_get_nr(child, regs) >= 0)
1127 task_thread_info(child)->status |= TS_COMPAT;
1133 break; 1128 break;
1134 1129
1135 case offsetof(struct user32, regs.eflags): 1130 case offsetof(struct user32, regs.eflags):
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 27349f92a6d7..f93078746e00 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -4,6 +4,7 @@
4#include <linux/pm.h> 4#include <linux/pm.h>
5#include <linux/efi.h> 5#include <linux/efi.h>
6#include <linux/dmi.h> 6#include <linux/dmi.h>
7#include <linux/sched.h>
7#include <linux/tboot.h> 8#include <linux/tboot.h>
8#include <acpi/reboot.h> 9#include <acpi/reboot.h>
9#include <asm/io.h> 10#include <asm/io.h>
@@ -435,6 +436,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
435 DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"), 436 DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
436 }, 437 },
437 }, 438 },
439 { /* Handle problems with rebooting on Apple Macmini3,1 */
440 .callback = set_pci_reboot,
441 .ident = "Apple Macmini3,1",
442 .matches = {
443 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
444 DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"),
445 },
446 },
438 { } 447 { }
439}; 448};
440 449
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a55f6609fe1f..2a34f9c5be21 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -27,6 +27,7 @@
27#include <linux/screen_info.h> 27#include <linux/screen_info.h>
28#include <linux/ioport.h> 28#include <linux/ioport.h>
29#include <linux/acpi.h> 29#include <linux/acpi.h>
30#include <linux/sfi.h>
30#include <linux/apm_bios.h> 31#include <linux/apm_bios.h>
31#include <linux/initrd.h> 32#include <linux/initrd.h>
32#include <linux/bootmem.h> 33#include <linux/bootmem.h>
@@ -659,6 +660,13 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
659 }, 660 },
660 }, 661 },
661 { 662 {
663 .callback = dmi_low_memory_corruption,
664 .ident = "Phoenix/MSC BIOS",
665 .matches = {
666 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"),
667 },
668 },
669 {
662 /* 670 /*
663 * AMI BIOS with low memory corruption was found on Intel DG45ID board. 671 * AMI BIOS with low memory corruption was found on Intel DG45ID board.
664 * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will 672 * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
@@ -697,21 +705,6 @@ void __init setup_arch(char **cmdline_p)
697 printk(KERN_INFO "Command line: %s\n", boot_command_line); 705 printk(KERN_INFO "Command line: %s\n", boot_command_line);
698#endif 706#endif
699 707
700 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
701 *cmdline_p = command_line;
702
703#ifdef CONFIG_X86_64
704 /*
705 * Must call this twice: Once just to detect whether hardware doesn't
706 * support NX (so that the early EHCI debug console setup can safely
707 * call set_fixmap(), and then again after parsing early parameters to
708 * honor the respective command line option.
709 */
710 check_efer();
711#endif
712
713 parse_early_param();
714
715 /* VMI may relocate the fixmap; do this before touching ioremap area */ 708 /* VMI may relocate the fixmap; do this before touching ioremap area */
716 vmi_init(); 709 vmi_init();
717 710
@@ -794,6 +787,21 @@ void __init setup_arch(char **cmdline_p)
794#endif 787#endif
795#endif 788#endif
796 789
790 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
791 *cmdline_p = command_line;
792
793#ifdef CONFIG_X86_64
794 /*
795 * Must call this twice: Once just to detect whether hardware doesn't
796 * support NX (so that the early EHCI debug console setup can safely
797 * call set_fixmap(), and then again after parsing early parameters to
798 * honor the respective command line option.
799 */
800 check_efer();
801#endif
802
803 parse_early_param();
804
797#ifdef CONFIG_X86_64 805#ifdef CONFIG_X86_64
798 check_efer(); 806 check_efer();
799#endif 807#endif
@@ -985,6 +993,8 @@ void __init setup_arch(char **cmdline_p)
985 */ 993 */
986 acpi_boot_init(); 994 acpi_boot_init();
987 995
996 sfi_init();
997
988 /* 998 /*
989 * get boot-time SMP configuration: 999 * get boot-time SMP configuration:
990 */ 1000 */
diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c
new file mode 100644
index 000000000000..34e099382651
--- /dev/null
+++ b/arch/x86/kernel/sfi.c
@@ -0,0 +1,122 @@
1/*
2 * sfi.c - x86 architecture SFI support.
3 *
4 * Copyright (c) 2009, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 */
20
21#define KMSG_COMPONENT "SFI"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24#include <linux/acpi.h>
25#include <linux/init.h>
26#include <linux/sfi.h>
27#include <linux/io.h>
28
29#include <asm/io_apic.h>
30#include <asm/mpspec.h>
31#include <asm/setup.h>
32#include <asm/apic.h>
33
34#ifdef CONFIG_X86_LOCAL_APIC
35static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
36
37void __init mp_sfi_register_lapic_address(unsigned long address)
38{
39 mp_lapic_addr = address;
40
41 set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
42 if (boot_cpu_physical_apicid == -1U)
43 boot_cpu_physical_apicid = read_apic_id();
44
45 pr_info("Boot CPU = %d\n", boot_cpu_physical_apicid);
46}
47
48/* All CPUs enumerated by SFI must be present and enabled */
49void __cpuinit mp_sfi_register_lapic(u8 id)
50{
51 if (MAX_APICS - id <= 0) {
52 pr_warning("Processor #%d invalid (max %d)\n",
53 id, MAX_APICS);
54 return;
55 }
56
57 pr_info("registering lapic[%d]\n", id);
58
59 generic_processor_info(id, GET_APIC_VERSION(apic_read(APIC_LVR)));
60}
61
62static int __init sfi_parse_cpus(struct sfi_table_header *table)
63{
64 struct sfi_table_simple *sb;
65 struct sfi_cpu_table_entry *pentry;
66 int i;
67 int cpu_num;
68
69 sb = (struct sfi_table_simple *)table;
70 cpu_num = SFI_GET_NUM_ENTRIES(sb, struct sfi_cpu_table_entry);
71 pentry = (struct sfi_cpu_table_entry *)sb->pentry;
72
73 for (i = 0; i < cpu_num; i++) {
74 mp_sfi_register_lapic(pentry->apic_id);
75 pentry++;
76 }
77
78 smp_found_config = 1;
79 return 0;
80}
81#endif /* CONFIG_X86_LOCAL_APIC */
82
83#ifdef CONFIG_X86_IO_APIC
84static u32 gsi_base;
85
86static int __init sfi_parse_ioapic(struct sfi_table_header *table)
87{
88 struct sfi_table_simple *sb;
89 struct sfi_apic_table_entry *pentry;
90 int i, num;
91
92 sb = (struct sfi_table_simple *)table;
93 num = SFI_GET_NUM_ENTRIES(sb, struct sfi_apic_table_entry);
94 pentry = (struct sfi_apic_table_entry *)sb->pentry;
95
96 for (i = 0; i < num; i++) {
97 mp_register_ioapic(i, pentry->phys_addr, gsi_base);
98 gsi_base += io_apic_get_redir_entries(i);
99 pentry++;
100 }
101
102 WARN(pic_mode, KERN_WARNING
103 "SFI: pic_mod shouldn't be 1 when IOAPIC table is present\n");
104 pic_mode = 0;
105 return 0;
106}
107#endif /* CONFIG_X86_IO_APIC */
108
109/*
110 * sfi_platform_init(): register lapics & io-apics
111 */
112int __init sfi_platform_init(void)
113{
114#ifdef CONFIG_X86_LOCAL_APIC
115 mp_sfi_register_lapic_address(sfi_lapic_addr);
116 sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus);
117#endif
118#ifdef CONFIG_X86_IO_APIC
119 sfi_table_parse(SFI_SIG_APIC, NULL, NULL, sfi_parse_ioapic);
120#endif
121 return 0;
122}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 09c5e077dff7..565ebc65920e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1059,12 +1059,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1059#endif 1059#endif
1060 current_thread_info()->cpu = 0; /* needed? */ 1060 current_thread_info()->cpu = 0; /* needed? */
1061 for_each_possible_cpu(i) { 1061 for_each_possible_cpu(i) {
1062 alloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); 1062 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
1063 alloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); 1063 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
1064 alloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL); 1064 zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
1065 cpumask_clear(per_cpu(cpu_core_map, i));
1066 cpumask_clear(per_cpu(cpu_sibling_map, i));
1067 cpumask_clear(cpu_data(i).llc_shared_map);
1068 } 1065 }
1069 set_cpu_sibling_map(0); 1066 set_cpu_sibling_map(0);
1070 1067
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index d51321ddafda..0157cd26d7cc 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -335,4 +335,4 @@ ENTRY(sys_call_table)
335 .long sys_preadv 335 .long sys_preadv
336 .long sys_pwritev 336 .long sys_pwritev
337 .long sys_rt_tgsigqueueinfo /* 335 */ 337 .long sys_rt_tgsigqueueinfo /* 335 */
338 .long sys_perf_counter_open 338 .long sys_perf_event_open
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index e293ac56c723..be2573448ed9 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -38,7 +38,8 @@ unsigned long profile_pc(struct pt_regs *regs)
38#ifdef CONFIG_FRAME_POINTER 38#ifdef CONFIG_FRAME_POINTER
39 return *(unsigned long *)(regs->bp + sizeof(long)); 39 return *(unsigned long *)(regs->bp + sizeof(long));
40#else 40#else
41 unsigned long *sp = (unsigned long *)regs->sp; 41 unsigned long *sp =
42 (unsigned long *)kernel_stack_pointer(regs);
42 /* 43 /*
43 * Return address is either directly at stack pointer 44 * Return address is either directly at stack pointer
44 * or above a saved flags. Eflags has bits 22-31 zero, 45 * or above a saved flags. Eflags has bits 22-31 zero,
@@ -93,7 +94,6 @@ static struct irqaction irq0 = {
93 94
94void __init setup_default_timer_irq(void) 95void __init setup_default_timer_irq(void)
95{ 96{
96 irq0.mask = cpumask_of_cpu(0);
97 setup_irq(0, &irq0); 97 setup_irq(0, &irq0);
98} 98}
99 99
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 503c1f2e8835..1740c85e24bb 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -23,8 +23,6 @@
23static struct bau_control **uv_bau_table_bases __read_mostly; 23static struct bau_control **uv_bau_table_bases __read_mostly;
24static int uv_bau_retry_limit __read_mostly; 24static int uv_bau_retry_limit __read_mostly;
25 25
26/* position of pnode (which is nasid>>1): */
27static int uv_nshift __read_mostly;
28/* base pnode in this partition */ 26/* base pnode in this partition */
29static int uv_partition_base_pnode __read_mostly; 27static int uv_partition_base_pnode __read_mostly;
30 28
@@ -723,7 +721,7 @@ uv_activation_descriptor_init(int node, int pnode)
723 BUG_ON(!adp); 721 BUG_ON(!adp);
724 722
725 pa = uv_gpa(adp); /* need the real nasid*/ 723 pa = uv_gpa(adp); /* need the real nasid*/
726 n = pa >> uv_nshift; 724 n = uv_gpa_to_pnode(pa);
727 m = pa & uv_mmask; 725 m = pa & uv_mmask;
728 726
729 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, 727 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
@@ -778,7 +776,7 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
778 * need the pnode of where the memory was really allocated 776 * need the pnode of where the memory was really allocated
779 */ 777 */
780 pa = uv_gpa(pqp); 778 pa = uv_gpa(pqp);
781 pn = pa >> uv_nshift; 779 pn = uv_gpa_to_pnode(pa);
782 uv_write_global_mmr64(pnode, 780 uv_write_global_mmr64(pnode,
783 UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, 781 UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
784 ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | 782 ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |
@@ -843,8 +841,7 @@ static int __init uv_bau_init(void)
843 GFP_KERNEL, cpu_to_node(cur_cpu)); 841 GFP_KERNEL, cpu_to_node(cur_cpu));
844 842
845 uv_bau_retry_limit = 1; 843 uv_bau_retry_limit = 1;
846 uv_nshift = uv_hub_info->n_val; 844 uv_mmask = (1UL << uv_hub_info->m_val) - 1;
847 uv_mmask = (1UL << uv_hub_info->n_val) - 1;
848 nblades = uv_num_possible_blades(); 845 nblades = uv_num_possible_blades();
849 846
850 uv_bau_table_bases = (struct bau_control **) 847 uv_bau_table_bases = (struct bau_control **)
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index 808031a5ba19..cd022121cab6 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -3,8 +3,16 @@
3#include <asm/trampoline.h> 3#include <asm/trampoline.h>
4#include <asm/e820.h> 4#include <asm/e820.h>
5 5
6#if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP)
7#define __trampinit
8#define __trampinitdata
9#else
10#define __trampinit __cpuinit
11#define __trampinitdata __cpuinitdata
12#endif
13
6/* ready for x86_64 and x86 */ 14/* ready for x86_64 and x86 */
7unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); 15unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE);
8 16
9void __init reserve_trampoline_memory(void) 17void __init reserve_trampoline_memory(void)
10{ 18{
@@ -26,7 +34,7 @@ void __init reserve_trampoline_memory(void)
26 * bootstrap into the page concerned. The caller 34 * bootstrap into the page concerned. The caller
27 * has made sure it's suitably aligned. 35 * has made sure it's suitably aligned.
28 */ 36 */
29unsigned long setup_trampoline(void) 37unsigned long __trampinit setup_trampoline(void)
30{ 38{
31 memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); 39 memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
32 return virt_to_phys(trampoline_base); 40 return virt_to_phys(trampoline_base);
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index 66d874e5404c..8508237e8e43 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -28,16 +28,12 @@
28 */ 28 */
29 29
30#include <linux/linkage.h> 30#include <linux/linkage.h>
31#include <linux/init.h>
31#include <asm/segment.h> 32#include <asm/segment.h>
32#include <asm/page_types.h> 33#include <asm/page_types.h>
33 34
34/* We can free up trampoline after bootup if cpu hotplug is not supported. */ 35/* We can free up trampoline after bootup if cpu hotplug is not supported. */
35#ifndef CONFIG_HOTPLUG_CPU 36__CPUINITRODATA
36.section ".cpuinit.data","aw",@progbits
37#else
38.section .rodata,"a",@progbits
39#endif
40
41.code16 37.code16
42 38
43ENTRY(trampoline_data) 39ENTRY(trampoline_data)
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index cddfb8d386b9..3af2dff58b21 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -25,14 +25,19 @@
25 */ 25 */
26 26
27#include <linux/linkage.h> 27#include <linux/linkage.h>
28#include <linux/init.h>
28#include <asm/pgtable_types.h> 29#include <asm/pgtable_types.h>
29#include <asm/page_types.h> 30#include <asm/page_types.h>
30#include <asm/msr.h> 31#include <asm/msr.h>
31#include <asm/segment.h> 32#include <asm/segment.h>
32#include <asm/processor-flags.h> 33#include <asm/processor-flags.h>
33 34
35#ifdef CONFIG_ACPI_SLEEP
34.section .rodata, "a", @progbits 36.section .rodata, "a", @progbits
35 37#else
38/* We can free up the trampoline after bootup if cpu hotplug is not supported. */
39__CPUINITRODATA
40#endif
36.code16 41.code16
37 42
38ENTRY(trampoline_data) 43ENTRY(trampoline_data)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7dc0de9d1ed9..7e37dcee0cc3 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -14,7 +14,6 @@
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/kprobes.h> 15#include <linux/kprobes.h>
16#include <linux/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/utsname.h>
18#include <linux/kdebug.h> 17#include <linux/kdebug.h>
19#include <linux/kernel.h> 18#include <linux/kernel.h>
20#include <linux/module.h> 19#include <linux/module.h>
@@ -65,7 +64,6 @@
65#else 64#else
66#include <asm/processor-flags.h> 65#include <asm/processor-flags.h>
67#include <asm/setup.h> 66#include <asm/setup.h>
68#include <asm/traps.h>
69 67
70asmlinkage int system_call(void); 68asmlinkage int system_call(void);
71 69
@@ -74,11 +72,9 @@ char ignore_fpu_irq;
74 72
75/* 73/*
76 * The IDT has to be page-aligned to simplify the Pentium 74 * The IDT has to be page-aligned to simplify the Pentium
77 * F0 0F bug workaround.. We have a special link segment 75 * F0 0F bug workaround.
78 * for this.
79 */ 76 */
80gate_desc idt_table[NR_VECTORS] 77gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
81 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
82#endif 78#endif
83 79
84DECLARE_BITMAP(used_vectors, NR_VECTORS); 80DECLARE_BITMAP(used_vectors, NR_VECTORS);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 17409e8d1097..cd982f48e23e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -666,7 +666,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
666 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || 666 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
667 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || 667 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
668 (val == CPUFREQ_RESUMECHANGE)) { 668 (val == CPUFREQ_RESUMECHANGE)) {
669 *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); 669 *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
670 670
671 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); 671 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
672 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 672 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 027b5b498993..f37930954d15 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -114,7 +114,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
114 return; 114 return;
115 115
116 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { 116 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
117 pr_info("Skipping synchronization checks as TSC is reliable.\n"); 117 printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n");
118 return; 118 return;
119 } 119 }
120 120
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 31e6f6cfe53e..d430e4c30193 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -648,7 +648,7 @@ static inline int __init activate_vmi(void)
648 648
649 pv_info.paravirt_enabled = 1; 649 pv_info.paravirt_enabled = 1;
650 pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; 650 pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
651 pv_info.name = "vmi"; 651 pv_info.name = "vmi [deprecated]";
652 652
653 pv_init_ops.patch = vmi_patch; 653 pv_init_ops.patch = vmi_patch;
654 654
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 0ccb57d5ee35..3c68fe2d46cf 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -45,9 +45,9 @@ PHDRS {
45 text PT_LOAD FLAGS(5); /* R_E */ 45 text PT_LOAD FLAGS(5); /* R_E */
46 data PT_LOAD FLAGS(7); /* RWE */ 46 data PT_LOAD FLAGS(7); /* RWE */
47#ifdef CONFIG_X86_64 47#ifdef CONFIG_X86_64
48 user PT_LOAD FLAGS(7); /* RWE */ 48 user PT_LOAD FLAGS(5); /* R_E */
49#ifdef CONFIG_SMP 49#ifdef CONFIG_SMP
50 percpu PT_LOAD FLAGS(7); /* RWE */ 50 percpu PT_LOAD FLAGS(6); /* RW_ */
51#endif 51#endif
52 init PT_LOAD FLAGS(7); /* RWE */ 52 init PT_LOAD FLAGS(7); /* RWE */
53#endif 53#endif
@@ -65,17 +65,11 @@ SECTIONS
65#endif 65#endif
66 66
67 /* Text and read-only data */ 67 /* Text and read-only data */
68
69 /* bootstrapping code */
70 .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
71 _text = .;
72 *(.text.head)
73 } :text = 0x9090
74
75 /* The rest of the text */
76 .text : AT(ADDR(.text) - LOAD_OFFSET) { 68 .text : AT(ADDR(.text) - LOAD_OFFSET) {
69 _text = .;
70 /* bootstrapping code */
71 HEAD_TEXT
77#ifdef CONFIG_X86_32 72#ifdef CONFIG_X86_32
78 /* not really needed, already page aligned */
79 . = ALIGN(PAGE_SIZE); 73 . = ALIGN(PAGE_SIZE);
80 *(.text.page_aligned) 74 *(.text.page_aligned)
81#endif 75#endif
@@ -94,13 +88,7 @@ SECTIONS
94 88
95 NOTES :text :note 89 NOTES :text :note
96 90
97 /* Exception table */ 91 EXCEPTION_TABLE(16) :text = 0x9090
98 . = ALIGN(16);
99 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
100 __start___ex_table = .;
101 *(__ex_table)
102 __stop___ex_table = .;
103 } :text = 0x9090
104 92
105 RO_DATA(PAGE_SIZE) 93 RO_DATA(PAGE_SIZE)
106 94
@@ -118,7 +106,6 @@ SECTIONS
118#endif 106#endif
119 107
120 PAGE_ALIGNED_DATA(PAGE_SIZE) 108 PAGE_ALIGNED_DATA(PAGE_SIZE)
121 *(.data.idt)
122 109
123 CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES) 110 CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES)
124 111
@@ -135,24 +122,21 @@ SECTIONS
135#ifdef CONFIG_X86_64 122#ifdef CONFIG_X86_64
136 123
137#define VSYSCALL_ADDR (-10*1024*1024) 124#define VSYSCALL_ADDR (-10*1024*1024)
138#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data) + SIZEOF(.data) + \
139 PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
140#define VSYSCALL_VIRT_ADDR ((ADDR(.data) + SIZEOF(.data) + \
141 PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
142 125
143#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) 126#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
144#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) 127#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
145 128
146#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR) 129#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
147#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) 130#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
148 131
132 . = ALIGN(4096);
133 __vsyscall_0 = .;
134
149 . = VSYSCALL_ADDR; 135 . = VSYSCALL_ADDR;
150 .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { 136 .vsyscall_0 : AT(VLOAD(.vsyscall_0)) {
151 *(.vsyscall_0) 137 *(.vsyscall_0)
152 } :user 138 } :user
153 139
154 __vsyscall_0 = VSYSCALL_VIRT_ADDR;
155
156 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); 140 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
157 .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { 141 .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
158 *(.vsyscall_fn) 142 *(.vsyscall_fn)
@@ -192,11 +176,9 @@ SECTIONS
192 *(.vsyscall_3) 176 *(.vsyscall_3)
193 } 177 }
194 178
195 . = VSYSCALL_VIRT_ADDR + PAGE_SIZE; 179 . = __vsyscall_0 + PAGE_SIZE;
196 180
197#undef VSYSCALL_ADDR 181#undef VSYSCALL_ADDR
198#undef VSYSCALL_PHYS_ADDR
199#undef VSYSCALL_VIRT_ADDR
200#undef VLOAD_OFFSET 182#undef VLOAD_OFFSET
201#undef VLOAD 183#undef VLOAD
202#undef VVIRT_OFFSET 184#undef VVIRT_OFFSET
@@ -219,36 +201,12 @@ SECTIONS
219 PERCPU_VADDR(0, :percpu) 201 PERCPU_VADDR(0, :percpu)
220#endif 202#endif
221 203
222 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { 204 INIT_TEXT_SECTION(PAGE_SIZE)
223 _sinittext = .;
224 INIT_TEXT
225 _einittext = .;
226 }
227#ifdef CONFIG_X86_64 205#ifdef CONFIG_X86_64
228 :init 206 :init
229#endif 207#endif
230 208
231 .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { 209 INIT_DATA_SECTION(16)
232 INIT_DATA
233 }
234
235 . = ALIGN(16);
236 .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
237 __setup_start = .;
238 *(.init.setup)
239 __setup_end = .;
240 }
241 .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
242 __initcall_start = .;
243 INITCALLS
244 __initcall_end = .;
245 }
246
247 .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
248 __con_initcall_start = .;
249 *(.con_initcall.init)
250 __con_initcall_end = .;
251 }
252 210
253 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { 211 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
254 __x86_cpu_dev_start = .; 212 __x86_cpu_dev_start = .;
@@ -256,8 +214,6 @@ SECTIONS
256 __x86_cpu_dev_end = .; 214 __x86_cpu_dev_end = .;
257 } 215 }
258 216
259 SECURITY_INIT
260
261 . = ALIGN(8); 217 . = ALIGN(8);
262 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { 218 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
263 __parainstructions = .; 219 __parainstructions = .;
@@ -288,15 +244,6 @@ SECTIONS
288 EXIT_DATA 244 EXIT_DATA
289 } 245 }
290 246
291#ifdef CONFIG_BLK_DEV_INITRD
292 . = ALIGN(PAGE_SIZE);
293 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
294 __initramfs_start = .;
295 *(.init.ramfs)
296 __initramfs_end = .;
297 }
298#endif
299
300#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) 247#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
301 PERCPU(PAGE_SIZE) 248 PERCPU(PAGE_SIZE)
302#endif 249#endif
@@ -358,6 +305,9 @@ SECTIONS
358 305
359 306
360#ifdef CONFIG_X86_32 307#ifdef CONFIG_X86_32
308/*
309 * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
310 */
361. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), 311. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
362 "kernel image bigger than KERNEL_IMAGE_SIZE"); 312 "kernel image bigger than KERNEL_IMAGE_SIZE");
363#else 313#else
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index cf53a78e2dcf..8cb4974ff599 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -228,19 +228,11 @@ static long __vsyscall(3) venosys_1(void)
228} 228}
229 229
230#ifdef CONFIG_SYSCTL 230#ifdef CONFIG_SYSCTL
231
232static int
233vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
234 void __user *buffer, size_t *lenp, loff_t *ppos)
235{
236 return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
237}
238
239static ctl_table kernel_table2[] = { 231static ctl_table kernel_table2[] = {
240 { .procname = "vsyscall64", 232 { .procname = "vsyscall64",
241 .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), 233 .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
242 .mode = 0644, 234 .mode = 0644,
243 .proc_handler = vsyscall_sysctl_change }, 235 .proc_handler = proc_dointvec },
244 {} 236 {}
245}; 237};
246 238
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 82ad523b4901..144e7f60b5e2 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -116,7 +116,7 @@ static s64 __kpit_elapsed(struct kvm *kvm)
116 * itself with the initial count and continues counting 116 * itself with the initial count and continues counting
117 * from there. 117 * from there.
118 */ 118 */
119 remaining = hrtimer_expires_remaining(&ps->pit_timer.timer); 119 remaining = hrtimer_get_remaining(&ps->pit_timer.timer);
120 elapsed = ps->pit_timer.period - ktime_to_ns(remaining); 120 elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
121 elapsed = mod_64(elapsed, ps->pit_timer.period); 121 elapsed = mod_64(elapsed, ps->pit_timer.period);
122 122
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 1ae5ceba7eb2..23c217692ea9 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -521,7 +521,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
521 if (apic_get_reg(apic, APIC_TMICT) == 0) 521 if (apic_get_reg(apic, APIC_TMICT) == 0)
522 return 0; 522 return 0;
523 523
524 remaining = hrtimer_expires_remaining(&apic->lapic_timer.timer); 524 remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
525 if (ktime_to_ns(remaining) < 0) 525 if (ktime_to_ns(remaining) < 0)
526 remaining = ktime_set(0, 0); 526 remaining = ktime_set(0, 0);
527 527
@@ -664,7 +664,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
664{ 664{
665 ktime_t now = apic->lapic_timer.timer.base->get_time(); 665 ktime_t now = apic->lapic_timer.timer.base->get_time();
666 666
667 apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) * 667 apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) *
668 APIC_BUS_CYCLE_NS * apic->divide_count; 668 APIC_BUS_CYCLE_NS * apic->divide_count;
669 atomic_set(&apic->lapic_timer.pending, 0); 669 atomic_set(&apic->lapic_timer.pending, 0);
670 670
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index eca41ae9f453..818b92ad82cf 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644);
156#define CREATE_TRACE_POINTS 156#define CREATE_TRACE_POINTS
157#include "mmutrace.h" 157#include "mmutrace.h"
158 158
159#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
160
159#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) 161#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
160 162
161struct kvm_rmap_desc { 163struct kvm_rmap_desc {
@@ -634,9 +636,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
634 if (*spte & shadow_accessed_mask) 636 if (*spte & shadow_accessed_mask)
635 kvm_set_pfn_accessed(pfn); 637 kvm_set_pfn_accessed(pfn);
636 if (is_writeble_pte(*spte)) 638 if (is_writeble_pte(*spte))
637 kvm_release_pfn_dirty(pfn); 639 kvm_set_pfn_dirty(pfn);
638 else
639 kvm_release_pfn_clean(pfn);
640 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); 640 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
641 if (!*rmapp) { 641 if (!*rmapp) {
642 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); 642 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
@@ -748,7 +748,8 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
748 return write_protected; 748 return write_protected;
749} 749}
750 750
751static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) 751static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
752 unsigned long data)
752{ 753{
753 u64 *spte; 754 u64 *spte;
754 int need_tlb_flush = 0; 755 int need_tlb_flush = 0;
@@ -763,8 +764,47 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
763 return need_tlb_flush; 764 return need_tlb_flush;
764} 765}
765 766
767static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
768 unsigned long data)
769{
770 int need_flush = 0;
771 u64 *spte, new_spte;
772 pte_t *ptep = (pte_t *)data;
773 pfn_t new_pfn;
774
775 WARN_ON(pte_huge(*ptep));
776 new_pfn = pte_pfn(*ptep);
777 spte = rmap_next(kvm, rmapp, NULL);
778 while (spte) {
779 BUG_ON(!is_shadow_present_pte(*spte));
780 rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
781 need_flush = 1;
782 if (pte_write(*ptep)) {
783 rmap_remove(kvm, spte);
784 __set_spte(spte, shadow_trap_nonpresent_pte);
785 spte = rmap_next(kvm, rmapp, NULL);
786 } else {
787 new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
788 new_spte |= (u64)new_pfn << PAGE_SHIFT;
789
790 new_spte &= ~PT_WRITABLE_MASK;
791 new_spte &= ~SPTE_HOST_WRITEABLE;
792 if (is_writeble_pte(*spte))
793 kvm_set_pfn_dirty(spte_to_pfn(*spte));
794 __set_spte(spte, new_spte);
795 spte = rmap_next(kvm, rmapp, spte);
796 }
797 }
798 if (need_flush)
799 kvm_flush_remote_tlbs(kvm);
800
801 return 0;
802}
803
766static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, 804static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
767 int (*handler)(struct kvm *kvm, unsigned long *rmapp)) 805 unsigned long data,
806 int (*handler)(struct kvm *kvm, unsigned long *rmapp,
807 unsigned long data))
768{ 808{
769 int i, j; 809 int i, j;
770 int retval = 0; 810 int retval = 0;
@@ -786,13 +826,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
786 if (hva >= start && hva < end) { 826 if (hva >= start && hva < end) {
787 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; 827 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
788 828
789 retval |= handler(kvm, &memslot->rmap[gfn_offset]); 829 retval |= handler(kvm, &memslot->rmap[gfn_offset],
830 data);
790 831
791 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { 832 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
792 int idx = gfn_offset; 833 int idx = gfn_offset;
793 idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); 834 idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
794 retval |= handler(kvm, 835 retval |= handler(kvm,
795 &memslot->lpage_info[j][idx].rmap_pde); 836 &memslot->lpage_info[j][idx].rmap_pde,
837 data);
796 } 838 }
797 } 839 }
798 } 840 }
@@ -802,10 +844,16 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
802 844
803int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 845int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
804{ 846{
805 return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 847 return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
806} 848}
807 849
808static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) 850void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
851{
852 kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
853}
854
855static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
856 unsigned long data)
809{ 857{
810 u64 *spte; 858 u64 *spte;
811 int young = 0; 859 int young = 0;
@@ -841,13 +889,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
841 gfn = unalias_gfn(vcpu->kvm, gfn); 889 gfn = unalias_gfn(vcpu->kvm, gfn);
842 rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); 890 rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
843 891
844 kvm_unmap_rmapp(vcpu->kvm, rmapp); 892 kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
845 kvm_flush_remote_tlbs(vcpu->kvm); 893 kvm_flush_remote_tlbs(vcpu->kvm);
846} 894}
847 895
848int kvm_age_hva(struct kvm *kvm, unsigned long hva) 896int kvm_age_hva(struct kvm *kvm, unsigned long hva)
849{ 897{
850 return kvm_handle_hva(kvm, hva, kvm_age_rmapp); 898 return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
851} 899}
852 900
853#ifdef MMU_DEBUG 901#ifdef MMU_DEBUG
@@ -1756,7 +1804,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1756 unsigned pte_access, int user_fault, 1804 unsigned pte_access, int user_fault,
1757 int write_fault, int dirty, int level, 1805 int write_fault, int dirty, int level,
1758 gfn_t gfn, pfn_t pfn, bool speculative, 1806 gfn_t gfn, pfn_t pfn, bool speculative,
1759 bool can_unsync) 1807 bool can_unsync, bool reset_host_protection)
1760{ 1808{
1761 u64 spte; 1809 u64 spte;
1762 int ret = 0; 1810 int ret = 0;
@@ -1783,6 +1831,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1783 spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, 1831 spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
1784 kvm_is_mmio_pfn(pfn)); 1832 kvm_is_mmio_pfn(pfn));
1785 1833
1834 if (reset_host_protection)
1835 spte |= SPTE_HOST_WRITEABLE;
1836
1786 spte |= (u64)pfn << PAGE_SHIFT; 1837 spte |= (u64)pfn << PAGE_SHIFT;
1787 1838
1788 if ((pte_access & ACC_WRITE_MASK) 1839 if ((pte_access & ACC_WRITE_MASK)
@@ -1828,7 +1879,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1828 unsigned pt_access, unsigned pte_access, 1879 unsigned pt_access, unsigned pte_access,
1829 int user_fault, int write_fault, int dirty, 1880 int user_fault, int write_fault, int dirty,
1830 int *ptwrite, int level, gfn_t gfn, 1881 int *ptwrite, int level, gfn_t gfn,
1831 pfn_t pfn, bool speculative) 1882 pfn_t pfn, bool speculative,
1883 bool reset_host_protection)
1832{ 1884{
1833 int was_rmapped = 0; 1885 int was_rmapped = 0;
1834 int was_writeble = is_writeble_pte(*sptep); 1886 int was_writeble = is_writeble_pte(*sptep);
@@ -1860,7 +1912,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1860 } 1912 }
1861 1913
1862 if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, 1914 if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
1863 dirty, level, gfn, pfn, speculative, true)) { 1915 dirty, level, gfn, pfn, speculative, true,
1916 reset_host_protection)) {
1864 if (write_fault) 1917 if (write_fault)
1865 *ptwrite = 1; 1918 *ptwrite = 1;
1866 kvm_x86_ops->tlb_flush(vcpu); 1919 kvm_x86_ops->tlb_flush(vcpu);
@@ -1877,8 +1930,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1877 page_header_update_slot(vcpu->kvm, sptep, gfn); 1930 page_header_update_slot(vcpu->kvm, sptep, gfn);
1878 if (!was_rmapped) { 1931 if (!was_rmapped) {
1879 rmap_count = rmap_add(vcpu, sptep, gfn); 1932 rmap_count = rmap_add(vcpu, sptep, gfn);
1880 if (!is_rmap_spte(*sptep)) 1933 kvm_release_pfn_clean(pfn);
1881 kvm_release_pfn_clean(pfn);
1882 if (rmap_count > RMAP_RECYCLE_THRESHOLD) 1934 if (rmap_count > RMAP_RECYCLE_THRESHOLD)
1883 rmap_recycle(vcpu, sptep, gfn); 1935 rmap_recycle(vcpu, sptep, gfn);
1884 } else { 1936 } else {
@@ -1909,7 +1961,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
1909 if (iterator.level == level) { 1961 if (iterator.level == level) {
1910 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, 1962 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
1911 0, write, 1, &pt_write, 1963 0, write, 1, &pt_write,
1912 level, gfn, pfn, false); 1964 level, gfn, pfn, false, true);
1913 ++vcpu->stat.pf_fixed; 1965 ++vcpu->stat.pf_fixed;
1914 break; 1966 break;
1915 } 1967 }
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d2fec9c12d22..72558f8ff3f5 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -273,9 +273,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
273 if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) 273 if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
274 return; 274 return;
275 kvm_get_pfn(pfn); 275 kvm_get_pfn(pfn);
276 /*
277 * we call mmu_set_spte() with reset_host_protection = true beacuse that
278 * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
279 */
276 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 280 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
277 gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, 281 gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
278 gpte_to_gfn(gpte), pfn, true); 282 gpte_to_gfn(gpte), pfn, true, true);
279} 283}
280 284
281/* 285/*
@@ -308,7 +312,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
308 user_fault, write_fault, 312 user_fault, write_fault,
309 gw->ptes[gw->level-1] & PT_DIRTY_MASK, 313 gw->ptes[gw->level-1] & PT_DIRTY_MASK,
310 ptwrite, level, 314 ptwrite, level,
311 gw->gfn, pfn, false); 315 gw->gfn, pfn, false, true);
312 break; 316 break;
313 } 317 }
314 318
@@ -558,6 +562,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
558static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 562static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
559{ 563{
560 int i, offset, nr_present; 564 int i, offset, nr_present;
565 bool reset_host_protection;
561 566
562 offset = nr_present = 0; 567 offset = nr_present = 0;
563 568
@@ -595,9 +600,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
595 600
596 nr_present++; 601 nr_present++;
597 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); 602 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
603 if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) {
604 pte_access &= ~ACC_WRITE_MASK;
605 reset_host_protection = 0;
606 } else {
607 reset_host_protection = 1;
608 }
598 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, 609 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
599 is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, 610 is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
600 spte_to_pfn(sp->spt[i]), true, false); 611 spte_to_pfn(sp->spt[i]), true, false,
612 reset_host_protection);
601 } 613 }
602 614
603 return !nr_present; 615 return !nr_present;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 944cc9c04b3c..c17404add91f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -767,6 +767,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
767 rdtscll(tsc_this); 767 rdtscll(tsc_this);
768 delta = vcpu->arch.host_tsc - tsc_this; 768 delta = vcpu->arch.host_tsc - tsc_this;
769 svm->vmcb->control.tsc_offset += delta; 769 svm->vmcb->control.tsc_offset += delta;
770 if (is_nested(svm))
771 svm->nested.hsave->control.tsc_offset += delta;
770 vcpu->cpu = cpu; 772 vcpu->cpu = cpu;
771 kvm_migrate_timers(vcpu); 773 kvm_migrate_timers(vcpu);
772 svm->asid_generation = 0; 774 svm->asid_generation = 0;
@@ -2057,10 +2059,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
2057 2059
2058 switch (ecx) { 2060 switch (ecx) {
2059 case MSR_IA32_TSC: { 2061 case MSR_IA32_TSC: {
2060 u64 tsc; 2062 u64 tsc_offset;
2063
2064 if (is_nested(svm))
2065 tsc_offset = svm->nested.hsave->control.tsc_offset;
2066 else
2067 tsc_offset = svm->vmcb->control.tsc_offset;
2061 2068
2062 rdtscll(tsc); 2069 *data = tsc_offset + native_read_tsc();
2063 *data = svm->vmcb->control.tsc_offset + tsc;
2064 break; 2070 break;
2065 } 2071 }
2066 case MSR_K6_STAR: 2072 case MSR_K6_STAR:
@@ -2146,10 +2152,17 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
2146 2152
2147 switch (ecx) { 2153 switch (ecx) {
2148 case MSR_IA32_TSC: { 2154 case MSR_IA32_TSC: {
2149 u64 tsc; 2155 u64 tsc_offset = data - native_read_tsc();
2156 u64 g_tsc_offset = 0;
2157
2158 if (is_nested(svm)) {
2159 g_tsc_offset = svm->vmcb->control.tsc_offset -
2160 svm->nested.hsave->control.tsc_offset;
2161 svm->nested.hsave->control.tsc_offset = tsc_offset;
2162 }
2163
2164 svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset;
2150 2165
2151 rdtscll(tsc);
2152 svm->vmcb->control.tsc_offset = data - tsc;
2153 break; 2166 break;
2154 } 2167 }
2155 case MSR_K6_STAR: 2168 case MSR_K6_STAR:
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f3812014bd0b..ed53b42caba1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -709,7 +709,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
709 if (vcpu->cpu != cpu) { 709 if (vcpu->cpu != cpu) {
710 vcpu_clear(vmx); 710 vcpu_clear(vmx);
711 kvm_migrate_timers(vcpu); 711 kvm_migrate_timers(vcpu);
712 vpid_sync_vcpu_all(vmx); 712 set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
713 local_irq_disable(); 713 local_irq_disable();
714 list_add(&vmx->local_vcpus_link, 714 list_add(&vmx->local_vcpus_link,
715 &per_cpu(vcpus_on_cpu, cpu)); 715 &per_cpu(vcpus_on_cpu, cpu));
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index be451ee44249..ae07d261527c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1591,6 +1591,8 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
1591 1591
1592 if (cpuid->nent < 1) 1592 if (cpuid->nent < 1)
1593 goto out; 1593 goto out;
1594 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
1595 cpuid->nent = KVM_MAX_CPUID_ENTRIES;
1594 r = -ENOMEM; 1596 r = -ENOMEM;
1595 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); 1597 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
1596 if (!cpuid_entries) 1598 if (!cpuid_entries)
@@ -1690,7 +1692,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
1690 unsigned bank_num = mcg_cap & 0xff, bank; 1692 unsigned bank_num = mcg_cap & 0xff, bank;
1691 1693
1692 r = -EINVAL; 1694 r = -EINVAL;
1693 if (!bank_num) 1695 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
1694 goto out; 1696 goto out;
1695 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000)) 1697 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
1696 goto out; 1698 goto out;
@@ -4049,7 +4051,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
4049 return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); 4051 return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
4050} 4052}
4051 4053
4052static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, 4054static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu,
4053 struct desc_struct *seg_desc) 4055 struct desc_struct *seg_desc)
4054{ 4056{
4055 u32 base_addr = get_desc_base(seg_desc); 4057 u32 base_addr = get_desc_base(seg_desc);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 4cb7d5d18b8e..7e59dc1d3fc2 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1135,11 +1135,6 @@ static struct notifier_block paniced = {
1135/* Setting up memory is fairly easy. */ 1135/* Setting up memory is fairly easy. */
1136static __init char *lguest_memory_setup(void) 1136static __init char *lguest_memory_setup(void)
1137{ 1137{
1138 /* We do this here and not earlier because lockcheck used to barf if we
1139 * did it before start_kernel(). I think we fixed that, so it'd be
1140 * nice to move it back to lguest_init. Patch welcome... */
1141 atomic_notifier_chain_register(&panic_notifier_list, &paniced);
1142
1143 /* 1138 /*
1144 *The Linux bootloader header contains an "e820" memory map: the 1139 *The Linux bootloader header contains an "e820" memory map: the
1145 * Launcher populated the first entry with our memory limit. 1140 * Launcher populated the first entry with our memory limit.
@@ -1364,10 +1359,13 @@ __init void lguest_init(void)
1364 1359
1365 /* 1360 /*
1366 * If we don't initialize the lock dependency checker now, it crashes 1361 * If we don't initialize the lock dependency checker now, it crashes
1367 * paravirt_disable_iospace. 1362 * atomic_notifier_chain_register, then paravirt_disable_iospace.
1368 */ 1363 */
1369 lockdep_init(); 1364 lockdep_init();
1370 1365
1366 /* Hook in our special panic hypercall code. */
1367 atomic_notifier_chain_register(&panic_notifier_list, &paniced);
1368
1371 /* 1369 /*
1372 * The IDE code spends about 3 seconds probing for disks: if we reserve 1370 * The IDE code spends about 3 seconds probing for disks: if we reserve
1373 * all the I/O ports up front it can't get them and so doesn't probe. 1371 * all the I/O ports up front it can't get them and so doesn't probe.
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 9e609206fac9..85f5db95c60f 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -16,7 +16,9 @@ ifeq ($(CONFIG_X86_32),y)
16 lib-y += checksum_32.o 16 lib-y += checksum_32.o
17 lib-y += strstr_32.o 17 lib-y += strstr_32.o
18 lib-y += semaphore_32.o string_32.o 18 lib-y += semaphore_32.o string_32.o
19 19ifneq ($(CONFIG_X86_CMPXCHG64),y)
20 lib-y += cmpxchg8b_emu.o
21endif
20 lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o 22 lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
21else 23else
22 obj-y += io_64.o iomap_copy_64.o 24 obj-y += io_64.o iomap_copy_64.o
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
new file mode 100644
index 000000000000..828cb710dec2
--- /dev/null
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -0,0 +1,57 @@
1/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; version 2
5 * of the License.
6 *
7 */
8
9#include <linux/linkage.h>
10#include <asm/alternative-asm.h>
11#include <asm/frame.h>
12#include <asm/dwarf2.h>
13
14
15.text
16
17/*
18 * Inputs:
19 * %esi : memory location to compare
20 * %eax : low 32 bits of old value
21 * %edx : high 32 bits of old value
22 * %ebx : low 32 bits of new value
23 * %ecx : high 32 bits of new value
24 */
25ENTRY(cmpxchg8b_emu)
26CFI_STARTPROC
27
28#
29# Emulate 'cmpxchg8b (%esi)' on UP except we don't
30# set the whole ZF thing (caller will just compare
31# eax:edx with the expected value)
32#
33cmpxchg8b_emu:
34 pushfl
35 cli
36
37 cmpl (%esi), %eax
38 jne not_same
39 cmpl 4(%esi), %edx
40 jne half_same
41
42 movl %ebx, (%esi)
43 movl %ecx, 4(%esi)
44
45 popfl
46 ret
47
48 not_same:
49 movl (%esi), %eax
50 half_same:
51 movl 4(%esi), %edx
52
53 popfl
54 ret
55
56CFI_ENDPROC
57ENDPROC(cmpxchg8b_emu)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 9b5a9f59a478..06630d26e56d 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,9 +1,10 @@
1obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 1obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
2 pat.o pgtable.o physaddr.o gup.o 2 pat.o pgtable.o physaddr.o gup.o setup_nx.o
3 3
4# Make sure __phys_addr has no stackprotector 4# Make sure __phys_addr has no stackprotector
5nostackp := $(call cc-option, -fno-stack-protector) 5nostackp := $(call cc-option, -fno-stack-protector)
6CFLAGS_physaddr.o := $(nostackp) 6CFLAGS_physaddr.o := $(nostackp)
7CFLAGS_setup_nx.o := $(nostackp)
7 8
8obj-$(CONFIG_SMP) += tlb.o 9obj-$(CONFIG_SMP) += tlb.o
9 10
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 775a020990a5..f4cee9028cf0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -10,7 +10,7 @@
10#include <linux/bootmem.h> /* max_low_pfn */ 10#include <linux/bootmem.h> /* max_low_pfn */
11#include <linux/kprobes.h> /* __kprobes, ... */ 11#include <linux/kprobes.h> /* __kprobes, ... */
12#include <linux/mmiotrace.h> /* kmmio_handler, ... */ 12#include <linux/mmiotrace.h> /* kmmio_handler, ... */
13#include <linux/perf_counter.h> /* perf_swcounter_event */ 13#include <linux/perf_event.h> /* perf_sw_event */
14 14
15#include <asm/traps.h> /* dotraplinkage, ... */ 15#include <asm/traps.h> /* dotraplinkage, ... */
16#include <asm/pgalloc.h> /* pgd_*(), ... */ 16#include <asm/pgalloc.h> /* pgd_*(), ... */
@@ -167,6 +167,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
167 info.si_errno = 0; 167 info.si_errno = 0;
168 info.si_code = si_code; 168 info.si_code = si_code;
169 info.si_addr = (void __user *)address; 169 info.si_addr = (void __user *)address;
170 info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
170 171
171 force_sig_info(si_signo, &info, tsk); 172 force_sig_info(si_signo, &info, tsk);
172} 173}
@@ -790,10 +791,12 @@ out_of_memory(struct pt_regs *regs, unsigned long error_code,
790} 791}
791 792
792static void 793static void
793do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) 794do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
795 unsigned int fault)
794{ 796{
795 struct task_struct *tsk = current; 797 struct task_struct *tsk = current;
796 struct mm_struct *mm = tsk->mm; 798 struct mm_struct *mm = tsk->mm;
799 int code = BUS_ADRERR;
797 800
798 up_read(&mm->mmap_sem); 801 up_read(&mm->mmap_sem);
799 802
@@ -809,7 +812,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
809 tsk->thread.error_code = error_code; 812 tsk->thread.error_code = error_code;
810 tsk->thread.trap_no = 14; 813 tsk->thread.trap_no = 14;
811 814
812 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); 815#ifdef CONFIG_MEMORY_FAILURE
816 if (fault & VM_FAULT_HWPOISON) {
817 printk(KERN_ERR
818 "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
819 tsk->comm, tsk->pid, address);
820 code = BUS_MCEERR_AR;
821 }
822#endif
823 force_sig_info_fault(SIGBUS, code, address, tsk);
813} 824}
814 825
815static noinline void 826static noinline void
@@ -819,8 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
819 if (fault & VM_FAULT_OOM) { 830 if (fault & VM_FAULT_OOM) {
820 out_of_memory(regs, error_code, address); 831 out_of_memory(regs, error_code, address);
821 } else { 832 } else {
822 if (fault & VM_FAULT_SIGBUS) 833 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
823 do_sigbus(regs, error_code, address); 834 do_sigbus(regs, error_code, address, fault);
824 else 835 else
825 BUG(); 836 BUG();
826 } 837 }
@@ -1017,7 +1028,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
1017 if (unlikely(error_code & PF_RSVD)) 1028 if (unlikely(error_code & PF_RSVD))
1018 pgtable_bad(regs, error_code, address); 1029 pgtable_bad(regs, error_code, address);
1019 1030
1020 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 1031 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
1021 1032
1022 /* 1033 /*
1023 * If we're in an interrupt, have no user context or are running 1034 * If we're in an interrupt, have no user context or are running
@@ -1114,11 +1125,11 @@ good_area:
1114 1125
1115 if (fault & VM_FAULT_MAJOR) { 1126 if (fault & VM_FAULT_MAJOR) {
1116 tsk->maj_flt++; 1127 tsk->maj_flt++;
1117 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, 1128 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
1118 regs, address); 1129 regs, address);
1119 } else { 1130 } else {
1120 tsk->min_flt++; 1131 tsk->min_flt++;
1121 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, 1132 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
1122 regs, address); 1133 regs, address);
1123 } 1134 }
1124 1135
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 0607119cef94..73ffd5536f62 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -28,69 +28,6 @@ int direct_gbpages
28#endif 28#endif
29; 29;
30 30
31int nx_enabled;
32
33#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
34static int disable_nx __cpuinitdata;
35
36/*
37 * noexec = on|off
38 *
39 * Control non-executable mappings for processes.
40 *
41 * on Enable
42 * off Disable
43 */
44static int __init noexec_setup(char *str)
45{
46 if (!str)
47 return -EINVAL;
48 if (!strncmp(str, "on", 2)) {
49 __supported_pte_mask |= _PAGE_NX;
50 disable_nx = 0;
51 } else if (!strncmp(str, "off", 3)) {
52 disable_nx = 1;
53 __supported_pte_mask &= ~_PAGE_NX;
54 }
55 return 0;
56}
57early_param("noexec", noexec_setup);
58#endif
59
60#ifdef CONFIG_X86_PAE
61static void __init set_nx(void)
62{
63 unsigned int v[4], l, h;
64
65 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
66 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
67
68 if ((v[3] & (1 << 20)) && !disable_nx) {
69 rdmsr(MSR_EFER, l, h);
70 l |= EFER_NX;
71 wrmsr(MSR_EFER, l, h);
72 nx_enabled = 1;
73 __supported_pte_mask |= _PAGE_NX;
74 }
75 }
76}
77#else
78static inline void set_nx(void)
79{
80}
81#endif
82
83#ifdef CONFIG_X86_64
84void __cpuinit check_efer(void)
85{
86 unsigned long efer;
87
88 rdmsrl(MSR_EFER, efer);
89 if (!(efer & EFER_NX) || disable_nx)
90 __supported_pte_mask &= ~_PAGE_NX;
91}
92#endif
93
94static void __init find_early_table_space(unsigned long end, int use_pse, 31static void __init find_early_table_space(unsigned long end, int use_pse,
95 int use_gbpages) 32 int use_gbpages)
96{ 33{
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 3cd7711bb949..30938c1d8d5d 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -84,7 +84,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
84#ifdef CONFIG_X86_PAE 84#ifdef CONFIG_X86_PAE
85 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { 85 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
86 if (after_bootmem) 86 if (after_bootmem)
87 pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); 87 pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE);
88 else 88 else
89 pmd_table = (pmd_t *)alloc_low_page(); 89 pmd_table = (pmd_t *)alloc_low_page();
90 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); 90 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
@@ -116,7 +116,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
116#endif 116#endif
117 if (!page_table) 117 if (!page_table)
118 page_table = 118 page_table =
119 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); 119 (pte_t *)alloc_bootmem_pages(PAGE_SIZE);
120 } else 120 } else
121 page_table = (pte_t *)alloc_low_page(); 121 page_table = (pte_t *)alloc_low_page();
122 122
@@ -857,8 +857,6 @@ static void __init test_wp_bit(void)
857 } 857 }
858} 858}
859 859
860static struct kcore_list kcore_mem, kcore_vmalloc;
861
862void __init mem_init(void) 860void __init mem_init(void)
863{ 861{
864 int codesize, reservedpages, datasize, initsize; 862 int codesize, reservedpages, datasize, initsize;
@@ -886,13 +884,9 @@ void __init mem_init(void)
886 datasize = (unsigned long) &_edata - (unsigned long) &_etext; 884 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
887 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; 885 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
888 886
889 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
890 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
891 VMALLOC_END-VMALLOC_START);
892
893 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " 887 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
894 "%dk reserved, %dk data, %dk init, %ldk highmem)\n", 888 "%dk reserved, %dk data, %dk init, %ldk highmem)\n",
895 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 889 nr_free_pages() << (PAGE_SHIFT-10),
896 num_physpages << (PAGE_SHIFT-10), 890 num_physpages << (PAGE_SHIFT-10),
897 codesize >> 10, 891 codesize >> 10,
898 reservedpages << (PAGE_SHIFT-10), 892 reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ea56b8cbb6a6..5a4398a6006b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -647,8 +647,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
647 647
648#endif /* CONFIG_MEMORY_HOTPLUG */ 648#endif /* CONFIG_MEMORY_HOTPLUG */
649 649
650static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, 650static struct kcore_list kcore_vsyscall;
651 kcore_modules, kcore_vsyscall;
652 651
653void __init mem_init(void) 652void __init mem_init(void)
654{ 653{
@@ -677,17 +676,12 @@ void __init mem_init(void)
677 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; 676 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
678 677
679 /* Register memory areas for /proc/kcore */ 678 /* Register memory areas for /proc/kcore */
680 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
681 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
682 VMALLOC_END-VMALLOC_START);
683 kclist_add(&kcore_kernel, &_stext, _end - _stext);
684 kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
685 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, 679 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
686 VSYSCALL_END - VSYSCALL_START); 680 VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
687 681
688 printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " 682 printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
689 "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n", 683 "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
690 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 684 nr_free_pages() << (PAGE_SHIFT-10),
691 max_pfn << (PAGE_SHIFT-10), 685 max_pfn << (PAGE_SHIFT-10),
692 codesize >> 10, 686 codesize >> 10,
693 absent_pages << (PAGE_SHIFT-10), 687 absent_pages << (PAGE_SHIFT-10),
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 334e63ca7b2b..2feb9bdedaaf 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -170,8 +170,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
170 (unsigned long long)phys_addr, 170 (unsigned long long)phys_addr,
171 (unsigned long long)(phys_addr + size), 171 (unsigned long long)(phys_addr + size),
172 prot_val, new_prot_val); 172 prot_val, new_prot_val);
173 free_memtype(phys_addr, phys_addr + size); 173 goto err_free_memtype;
174 return NULL;
175 } 174 }
176 prot_val = new_prot_val; 175 prot_val = new_prot_val;
177 } 176 }
@@ -197,26 +196,25 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
197 */ 196 */
198 area = get_vm_area_caller(size, VM_IOREMAP, caller); 197 area = get_vm_area_caller(size, VM_IOREMAP, caller);
199 if (!area) 198 if (!area)
200 return NULL; 199 goto err_free_memtype;
201 area->phys_addr = phys_addr; 200 area->phys_addr = phys_addr;
202 vaddr = (unsigned long) area->addr; 201 vaddr = (unsigned long) area->addr;
203 202
204 if (kernel_map_sync_memtype(phys_addr, size, prot_val)) { 203 if (kernel_map_sync_memtype(phys_addr, size, prot_val))
205 free_memtype(phys_addr, phys_addr + size); 204 goto err_free_area;
206 free_vm_area(area);
207 return NULL;
208 }
209 205
210 if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) { 206 if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
211 free_memtype(phys_addr, phys_addr + size); 207 goto err_free_area;
212 free_vm_area(area);
213 return NULL;
214 }
215 208
216 ret_addr = (void __iomem *) (vaddr + offset); 209 ret_addr = (void __iomem *) (vaddr + offset);
217 mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); 210 mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
218 211
219 return ret_addr; 212 return ret_addr;
213err_free_area:
214 free_vm_area(area);
215err_free_memtype:
216 free_memtype(phys_addr, phys_addr + size);
217 return NULL;
220} 218}
221 219
222/** 220/**
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index 528bf954eb74..8cc183344140 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -225,9 +225,6 @@ void kmemcheck_hide(struct pt_regs *regs)
225 225
226 BUG_ON(!irqs_disabled()); 226 BUG_ON(!irqs_disabled());
227 227
228 if (data->balance == 0)
229 return;
230
231 if (unlikely(data->balance != 1)) { 228 if (unlikely(data->balance != 1)) {
232 kmemcheck_show_all(); 229 kmemcheck_show_all();
233 kmemcheck_error_save_bug(regs); 230 kmemcheck_error_save_bug(regs);
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
index e773b6bd0079..3f66b82076a3 100644
--- a/arch/x86/mm/kmemcheck/shadow.c
+++ b/arch/x86/mm/kmemcheck/shadow.c
@@ -1,7 +1,6 @@
1#include <linux/kmemcheck.h> 1#include <linux/kmemcheck.h>
2#include <linux/module.h> 2#include <linux/module.h>
3#include <linux/mm.h> 3#include <linux/mm.h>
4#include <linux/module.h>
5 4
6#include <asm/page.h> 5#include <asm/page.h>
7#include <asm/pgtable.h> 6#include <asm/pgtable.h>
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 24952fdc7e40..dd38bfbefd1f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -144,6 +144,7 @@ void clflush_cache_range(void *vaddr, unsigned int size)
144 144
145 mb(); 145 mb();
146} 146}
147EXPORT_SYMBOL_GPL(clflush_cache_range);
147 148
148static void __cpa_flush_all(void *arg) 149static void __cpa_flush_all(void *arg)
149{ 150{
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7257cf3decf9..e78cd0ec2bcf 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -81,6 +81,7 @@ enum {
81void pat_init(void) 81void pat_init(void)
82{ 82{
83 u64 pat; 83 u64 pat;
84 bool boot_cpu = !boot_pat_state;
84 85
85 if (!pat_enabled) 86 if (!pat_enabled)
86 return; 87 return;
@@ -122,8 +123,10 @@ void pat_init(void)
122 rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); 123 rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
123 124
124 wrmsrl(MSR_IA32_CR_PAT, pat); 125 wrmsrl(MSR_IA32_CR_PAT, pat);
125 printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n", 126
126 smp_processor_id(), boot_pat_state, pat); 127 if (boot_cpu)
128 printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
129 smp_processor_id(), boot_pat_state, pat);
127} 130}
128 131
129#undef PAT 132#undef PAT
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
new file mode 100644
index 000000000000..513d8ed5d2ec
--- /dev/null
+++ b/arch/x86/mm/setup_nx.c
@@ -0,0 +1,69 @@
1#include <linux/spinlock.h>
2#include <linux/errno.h>
3#include <linux/init.h>
4
5#include <asm/pgtable.h>
6
7int nx_enabled;
8
9#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
10static int disable_nx __cpuinitdata;
11
12/*
13 * noexec = on|off
14 *
15 * Control non-executable mappings for processes.
16 *
17 * on Enable
18 * off Disable
19 */
20static int __init noexec_setup(char *str)
21{
22 if (!str)
23 return -EINVAL;
24 if (!strncmp(str, "on", 2)) {
25 __supported_pte_mask |= _PAGE_NX;
26 disable_nx = 0;
27 } else if (!strncmp(str, "off", 3)) {
28 disable_nx = 1;
29 __supported_pte_mask &= ~_PAGE_NX;
30 }
31 return 0;
32}
33early_param("noexec", noexec_setup);
34#endif
35
36#ifdef CONFIG_X86_PAE
37void __init set_nx(void)
38{
39 unsigned int v[4], l, h;
40
41 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
42 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
43
44 if ((v[3] & (1 << 20)) && !disable_nx) {
45 rdmsr(MSR_EFER, l, h);
46 l |= EFER_NX;
47 wrmsr(MSR_EFER, l, h);
48 nx_enabled = 1;
49 __supported_pte_mask |= _PAGE_NX;
50 }
51 }
52}
53#else
54void set_nx(void)
55{
56}
57#endif
58
59#ifdef CONFIG_X86_64
60void __cpuinit check_efer(void)
61{
62 unsigned long efer;
63
64 rdmsrl(MSR_EFER, efer);
65 if (!(efer & EFER_NX) || disable_nx)
66 __supported_pte_mask &= ~_PAGE_NX;
67}
68#endif
69
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index c814e144a3f0..36fe08eeb5c3 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -59,7 +59,8 @@ void leave_mm(int cpu)
59{ 59{
60 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) 60 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
61 BUG(); 61 BUG();
62 cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); 62 cpumask_clear_cpu(cpu,
63 mm_cpumask(percpu_read(cpu_tlbstate.active_mm)));
63 load_cr3(swapper_pg_dir); 64 load_cr3(swapper_pg_dir);
64} 65}
65EXPORT_SYMBOL_GPL(leave_mm); 66EXPORT_SYMBOL_GPL(leave_mm);
@@ -234,8 +235,8 @@ void flush_tlb_current_task(void)
234 preempt_disable(); 235 preempt_disable();
235 236
236 local_flush_tlb(); 237 local_flush_tlb();
237 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) 238 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
238 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); 239 flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
239 preempt_enable(); 240 preempt_enable();
240} 241}
241 242
@@ -249,8 +250,8 @@ void flush_tlb_mm(struct mm_struct *mm)
249 else 250 else
250 leave_mm(smp_processor_id()); 251 leave_mm(smp_processor_id());
251 } 252 }
252 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) 253 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
253 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); 254 flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
254 255
255 preempt_enable(); 256 preempt_enable();
256} 257}
@@ -268,8 +269,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
268 leave_mm(smp_processor_id()); 269 leave_mm(smp_processor_id());
269 } 270 }
270 271
271 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) 272 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
272 flush_tlb_others(&mm->cpu_vm_mask, mm, va); 273 flush_tlb_others(mm_cpumask(mm), mm, va);
273 274
274 preempt_enable(); 275 preempt_enable();
275} 276}
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 4899215999de..8eb05878554c 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -234,11 +234,11 @@ static void arch_perfmon_setup_counters(void)
234 if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && 234 if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
235 current_cpu_data.x86_model == 15) { 235 current_cpu_data.x86_model == 15) {
236 eax.split.version_id = 2; 236 eax.split.version_id = 2;
237 eax.split.num_counters = 2; 237 eax.split.num_events = 2;
238 eax.split.bit_width = 40; 238 eax.split.bit_width = 40;
239 } 239 }
240 240
241 num_counters = eax.split.num_counters; 241 num_counters = eax.split.num_events;
242 242
243 op_arch_perfmon_spec.num_counters = num_counters; 243 op_arch_perfmon_spec.num_counters = num_counters;
244 op_arch_perfmon_spec.num_controls = num_counters; 244 op_arch_perfmon_spec.num_controls = num_counters;
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index b83776180c7f..7b8e75d16081 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -13,7 +13,7 @@
13#define OP_X86_MODEL_H 13#define OP_X86_MODEL_H
14 14
15#include <asm/types.h> 15#include <asm/types.h>
16#include <asm/perf_counter.h> 16#include <asm/perf_event.h>
17 17
18struct op_msr { 18struct op_msr {
19 unsigned long addr; 19 unsigned long addr;
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 5db96d4304de..1331fcf26143 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -646,7 +646,7 @@ int get_mp_bus_to_node(int busnum)
646 646
647#else /* CONFIG_X86_32 */ 647#else /* CONFIG_X86_32 */
648 648
649static unsigned char mp_bus_to_node[BUS_NR] = { 649static int mp_bus_to_node[BUS_NR] = {
650 [0 ... BUS_NR - 1] = -1 650 [0 ... BUS_NR - 1] = -1
651}; 651};
652 652
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 52e62e57fedd..b22d13b0c71d 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -266,7 +266,7 @@ void pcibios_set_master(struct pci_dev *dev)
266 pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); 266 pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
267} 267}
268 268
269static struct vm_operations_struct pci_mmap_ops = { 269static const struct vm_operations_struct pci_mmap_ops = {
270 .access = generic_access_phys, 270 .access = generic_access_phys,
271}; 271};
272 272
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 712443ec6d43..602c172d3bd5 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -13,10 +13,14 @@
13#include <linux/pci.h> 13#include <linux/pci.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/acpi.h> 15#include <linux/acpi.h>
16#include <linux/sfi_acpi.h>
16#include <linux/bitmap.h> 17#include <linux/bitmap.h>
17#include <linux/sort.h> 18#include <linux/sort.h>
18#include <asm/e820.h> 19#include <asm/e820.h>
19#include <asm/pci_x86.h> 20#include <asm/pci_x86.h>
21#include <asm/acpi.h>
22
23#define PREFIX "PCI: "
20 24
21/* aperture is up to 256MB but BIOS may reserve less */ 25/* aperture is up to 256MB but BIOS may reserve less */
22#define MMCONFIG_APER_MIN (2 * 1024*1024) 26#define MMCONFIG_APER_MIN (2 * 1024*1024)
@@ -491,7 +495,7 @@ static void __init pci_mmcfg_reject_broken(int early)
491 (unsigned int)cfg->start_bus_number, 495 (unsigned int)cfg->start_bus_number,
492 (unsigned int)cfg->end_bus_number); 496 (unsigned int)cfg->end_bus_number);
493 497
494 if (!early) 498 if (!early && !acpi_disabled)
495 valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0); 499 valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0);
496 500
497 if (valid) 501 if (valid)
@@ -606,7 +610,7 @@ static void __init __pci_mmcfg_init(int early)
606 } 610 }
607 611
608 if (!known_bridge) 612 if (!known_bridge)
609 acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); 613 acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
610 614
611 pci_mmcfg_reject_broken(early); 615 pci_mmcfg_reject_broken(early);
612 616
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index 8b2d561046a3..f10a7e94a84c 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -11,9 +11,9 @@
11 11
12#include <linux/pci.h> 12#include <linux/pci.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/acpi.h>
15#include <asm/e820.h> 14#include <asm/e820.h>
16#include <asm/pci_x86.h> 15#include <asm/pci_x86.h>
16#include <acpi/acpi.h>
17 17
18/* Assume systems with more busses have correct MCFG */ 18/* Assume systems with more busses have correct MCFG */
19#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) 19#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 417c9f5b4afa..8aa85f17667e 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -243,10 +243,6 @@ static void __restore_processor_state(struct saved_context *ctxt)
243 243
244 do_fpu_end(); 244 do_fpu_end();
245 mtrr_bp_restore(); 245 mtrr_bp_restore();
246
247#ifdef CONFIG_X86_OLD_MCE
248 mcheck_init(&boot_cpu_data);
249#endif
250} 246}
251 247
252/* Needed by apm.c */ 248/* Needed by apm.c */
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 88112b49f02c..6b4ffedb93c9 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -122,7 +122,7 @@ quiet_cmd_vdso = VDSO $@
122 $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ 122 $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
123 -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) 123 -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^)
124 124
125VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv) 125VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
126GCOV_PROFILE := n 126GCOV_PROFILE := n
127 127
128# 128#
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index b53225d2cac3..e133ce25e290 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -100,7 +100,7 @@ static int xen_array_release(struct inode *inode, struct file *file)
100 return 0; 100 return 0;
101} 101}
102 102
103static struct file_operations u32_array_fops = { 103static const struct file_operations u32_array_fops = {
104 .owner = THIS_MODULE, 104 .owner = THIS_MODULE,
105 .open = u32_array_open, 105 .open = u32_array_open,
106 .release= xen_array_release, 106 .release= xen_array_release,
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 544eb7496531..dfbf70e65860 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -178,6 +178,7 @@ static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
178static void xen_cpuid(unsigned int *ax, unsigned int *bx, 178static void xen_cpuid(unsigned int *ax, unsigned int *bx,
179 unsigned int *cx, unsigned int *dx) 179 unsigned int *cx, unsigned int *dx)
180{ 180{
181 unsigned maskebx = ~0;
181 unsigned maskecx = ~0; 182 unsigned maskecx = ~0;
182 unsigned maskedx = ~0; 183 unsigned maskedx = ~0;
183 184
@@ -185,9 +186,16 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
185 * Mask out inconvenient features, to try and disable as many 186 * Mask out inconvenient features, to try and disable as many
186 * unsupported kernel subsystems as possible. 187 * unsupported kernel subsystems as possible.
187 */ 188 */
188 if (*ax == 1) { 189 switch (*ax) {
190 case 1:
189 maskecx = cpuid_leaf1_ecx_mask; 191 maskecx = cpuid_leaf1_ecx_mask;
190 maskedx = cpuid_leaf1_edx_mask; 192 maskedx = cpuid_leaf1_edx_mask;
193 break;
194
195 case 0xb:
196 /* Suppress extended topology stuff */
197 maskebx = 0;
198 break;
191 } 199 }
192 200
193 asm(XEN_EMULATE_PREFIX "cpuid" 201 asm(XEN_EMULATE_PREFIX "cpuid"
@@ -197,6 +205,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
197 "=d" (*dx) 205 "=d" (*dx)
198 : "0" (*ax), "2" (*cx)); 206 : "0" (*ax), "2" (*cx));
199 207
208 *bx &= maskebx;
200 *cx &= maskecx; 209 *cx &= maskecx;
201 *dx &= maskedx; 210 *dx &= maskedx;
202} 211}
@@ -1075,6 +1084,8 @@ asmlinkage void __init xen_start_kernel(void)
1075 * Set up some pagetable state before starting to set any ptes. 1084 * Set up some pagetable state before starting to set any ptes.
1076 */ 1085 */
1077 1086
1087 xen_init_mmu_ops();
1088
1078 /* Prevent unwanted bits from being set in PTEs. */ 1089 /* Prevent unwanted bits from being set in PTEs. */
1079 __supported_pte_mask &= ~_PAGE_GLOBAL; 1090 __supported_pte_mask &= ~_PAGE_GLOBAL;
1080 if (!xen_initial_domain()) 1091 if (!xen_initial_domain())
@@ -1082,6 +1093,11 @@ asmlinkage void __init xen_start_kernel(void)
1082 1093
1083 __supported_pte_mask |= _PAGE_IOMAP; 1094 __supported_pte_mask |= _PAGE_IOMAP;
1084 1095
1096#ifdef CONFIG_X86_64
1097 /* Work out if we support NX */
1098 check_efer();
1099#endif
1100
1085 xen_setup_features(); 1101 xen_setup_features();
1086 1102
1087 /* Get mfn list */ 1103 /* Get mfn list */
@@ -1094,7 +1110,6 @@ asmlinkage void __init xen_start_kernel(void)
1094 */ 1110 */
1095 xen_setup_stackprotector(); 1111 xen_setup_stackprotector();
1096 1112
1097 xen_init_mmu_ops();
1098 xen_init_irq_ops(); 1113 xen_init_irq_ops();
1099 xen_init_cpuid_mask(); 1114 xen_init_cpuid_mask();
1100 1115
@@ -1123,11 +1138,6 @@ asmlinkage void __init xen_start_kernel(void)
1123 1138
1124 pgd = (pgd_t *)xen_start_info->pt_base; 1139 pgd = (pgd_t *)xen_start_info->pt_base;
1125 1140
1126#ifdef CONFIG_X86_64
1127 /* Work out if we support NX */
1128 check_efer();
1129#endif
1130
1131 /* Don't do the full vcpu_info placement stuff until we have a 1141 /* Don't do the full vcpu_info placement stuff until we have a
1132 possible map and a non-dummy shared_info. */ 1142 possible map and a non-dummy shared_info. */
1133 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 1143 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 093dd59b5385..3bf7b1d250ce 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1165,14 +1165,14 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
1165 /* Get the "official" set of cpus referring to our pagetable. */ 1165 /* Get the "official" set of cpus referring to our pagetable. */
1166 if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { 1166 if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
1167 for_each_online_cpu(cpu) { 1167 for_each_online_cpu(cpu) {
1168 if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask) 1168 if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
1169 && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) 1169 && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
1170 continue; 1170 continue;
1171 smp_call_function_single(cpu, drop_other_mm_ref, mm, 1); 1171 smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
1172 } 1172 }
1173 return; 1173 return;
1174 } 1174 }
1175 cpumask_copy(mask, &mm->cpu_vm_mask); 1175 cpumask_copy(mask, mm_cpumask(mm));
1176 1176
1177 /* It's possible that a vcpu may have a stale reference to our 1177 /* It's possible that a vcpu may have a stale reference to our
1178 cr3, because its in lazy mode, and it hasn't yet flushed 1178 cr3, because its in lazy mode, and it hasn't yet flushed