aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-12-29 03:45:15 -0500
committerIngo Molnar <mingo@elte.hu>2008-12-29 03:45:15 -0500
commite1df957670aef74ffd9a4ad93e6d2c90bf6b4845 (patch)
treebca1fcfef55b3e3e82c9a822b4ac6428fce2b419 /arch/x86
parent2b583d8bc8d7105b58d7481a4a0ceb718dac49c6 (diff)
parent3c92ec8ae91ecf59d88c798301833d7cf83f2179 (diff)
Merge branch 'linus' into perfcounters/core
Conflicts: fs/exec.c include/linux/init_task.h Simple context conflicts.
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig79
-rw-r--r--arch/x86/Kconfig.cpu1
-rw-r--r--arch/x86/Kconfig.debug24
-rw-r--r--arch/x86/boot/video-vga.c4
-rw-r--r--arch/x86/boot/video.c2
-rw-r--r--arch/x86/configs/i386_defconfig4
-rw-r--r--arch/x86/configs/x86_64_defconfig4
-rw-r--r--arch/x86/crypto/crc32c-intel.c121
-rw-r--r--arch/x86/ia32/ia32_aout.c2
-rw-r--r--arch/x86/ia32/ia32_signal.c43
-rw-r--r--arch/x86/include/asm/apic.h1
-rw-r--r--arch/x86/include/asm/bigsmp/apic.h2
-rw-r--r--arch/x86/include/asm/bug.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h5
-rw-r--r--arch/x86/include/asm/dma-mapping.h2
-rw-r--r--arch/x86/include/asm/ds.h312
-rw-r--r--arch/x86/include/asm/elf.h2
-rw-r--r--arch/x86/include/asm/emergency-restart.h4
-rw-r--r--arch/x86/include/asm/es7000/apic.h79
-rw-r--r--arch/x86/include/asm/es7000/wakecpu.h41
-rw-r--r--arch/x86/include/asm/ftrace.h61
-rw-r--r--arch/x86/include/asm/gart.h33
-rw-r--r--arch/x86/include/asm/genapic_32.h19
-rw-r--r--arch/x86/include/asm/genapic_64.h2
-rw-r--r--arch/x86/include/asm/hypervisor.h26
-rw-r--r--arch/x86/include/asm/ia32.h18
-rw-r--r--arch/x86/include/asm/idle.h5
-rw-r--r--arch/x86/include/asm/io.h37
-rw-r--r--arch/x86/include/asm/io_64.h2
-rw-r--r--arch/x86/include/asm/io_apic.h10
-rw-r--r--arch/x86/include/asm/iommu.h33
-rw-r--r--arch/x86/include/asm/kexec.h31
-rw-r--r--arch/x86/include/asm/mach-default/mach_apic.h2
-rw-r--r--arch/x86/include/asm/mach-default/mach_wakecpu.h24
-rw-r--r--arch/x86/include/asm/mach-default/smpboot_hooks.h8
-rw-r--r--arch/x86/include/asm/mach-generic/mach_apic.h1
-rw-r--r--arch/x86/include/asm/mach-generic/mach_wakecpu.h12
-rw-r--r--arch/x86/include/asm/mmu_context_32.h13
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/msr.h15
-rw-r--r--arch/x86/include/asm/numaq/wakecpu.h24
-rw-r--r--arch/x86/include/asm/pci.h2
-rw-r--r--arch/x86/include/asm/pgtable-2level.h50
-rw-r--r--arch/x86/include/asm/pgtable-3level.h1
-rw-r--r--arch/x86/include/asm/pgtable.h28
-rw-r--r--arch/x86/include/asm/pgtable_32.h9
-rw-r--r--arch/x86/include/asm/pgtable_64.h28
-rw-r--r--arch/x86/include/asm/prctl.h3
-rw-r--r--arch/x86/include/asm/processor.h17
-rw-r--r--arch/x86/include/asm/ptrace.h43
-rw-r--r--arch/x86/include/asm/reboot.h5
-rw-r--r--arch/x86/include/asm/setup.h7
-rw-r--r--arch/x86/include/asm/sigframe.h70
-rw-r--r--arch/x86/include/asm/signal.h6
-rw-r--r--arch/x86/include/asm/sparsemem.h2
-rw-r--r--arch/x86/include/asm/syscalls.h14
-rw-r--r--arch/x86/include/asm/system.h6
-rw-r--r--arch/x86/include/asm/thread_info.h9
-rw-r--r--arch/x86/include/asm/trampoline.h7
-rw-r--r--arch/x86/include/asm/traps.h11
-rw-r--r--arch/x86/include/asm/uaccess.h4
-rw-r--r--arch/x86/include/asm/uv/bios.h34
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h103
-rw-r--r--arch/x86/include/asm/vmi.h8
-rw-r--r--arch/x86/include/asm/vmware.h27
-rw-r--r--arch/x86/include/asm/xen/hypercall.h6
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h39
-rw-r--r--arch/x86/include/asm/xen/page.h5
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c11
-rw-r--r--arch/x86/kernel/amd_iommu.c6
-rw-r--r--arch/x86/kernel/amd_iommu_init.c8
-rw-r--r--arch/x86/kernel/aperture_64.c5
-rw-r--r--arch/x86/kernel/apic.c130
-rw-r--r--arch/x86/kernel/apm_32.c4
-rw-r--r--arch/x86/kernel/asm-offsets_32.c2
-rw-r--r--arch/x86/kernel/asm-offsets_64.c4
-rw-r--r--arch/x86/kernel/bios_uv.c58
-rw-r--r--arch/x86/kernel/check.c161
-rw-r--r--arch/x86/kernel/cpu/Makefile6
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c8
-rw-r--r--arch/x86/kernel/cpu/amd.c9
-rw-r--r--arch/x86/kernel/cpu/common.c8
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c4
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c58
-rw-r--r--arch/x86/kernel/cpu/intel.c23
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c17
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c346
-rw-r--r--arch/x86/kernel/cpu/vmware.c112
-rw-r--r--arch/x86/kernel/crash.c70
-rw-r--r--arch/x86/kernel/ds.c1147
-rw-r--r--arch/x86/kernel/dumpstack.c351
-rw-r--r--arch/x86/kernel/dumpstack.h39
-rw-r--r--arch/x86/kernel/dumpstack_32.c307
-rw-r--r--arch/x86/kernel/dumpstack_64.c289
-rw-r--r--arch/x86/kernel/e820.c16
-rw-r--r--arch/x86/kernel/early-quirks.c1
-rw-r--r--arch/x86/kernel/early_printk.c47
-rw-r--r--arch/x86/kernel/entry_32.S51
-rw-r--r--arch/x86/kernel/entry_64.S98
-rw-r--r--arch/x86/kernel/es7000_32.c62
-rw-r--r--arch/x86/kernel/ftrace.c390
-rw-r--r--arch/x86/kernel/genapic_64.c4
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c111
-rw-r--r--arch/x86/kernel/head.c1
-rw-r--r--arch/x86/kernel/head32.c3
-rw-r--r--arch/x86/kernel/head64.c3
-rw-r--r--arch/x86/kernel/hpet.c4
-rw-r--r--arch/x86/kernel/init_task.c1
-rw-r--r--arch/x86/kernel/io_apic.c3
-rw-r--r--arch/x86/kernel/irq_64.c3
-rw-r--r--arch/x86/kernel/machine_kexec_32.c104
-rw-r--r--arch/x86/kernel/microcode_amd.c232
-rw-r--r--arch/x86/kernel/microcode_core.c25
-rw-r--r--arch/x86/kernel/microcode_intel.c8
-rw-r--r--arch/x86/kernel/mpparse.c25
-rw-r--r--arch/x86/kernel/nmi.c58
-rw-r--r--arch/x86/kernel/numaq_32.c10
-rw-r--r--arch/x86/kernel/pci-dma.c11
-rw-r--r--arch/x86/kernel/pci-gart_64.c4
-rw-r--r--arch/x86/kernel/process.c35
-rw-r--r--arch/x86/kernel/process_32.c67
-rw-r--r--arch/x86/kernel/process_64.c58
-rw-r--r--arch/x86/kernel/ptrace.c432
-rw-r--r--arch/x86/kernel/reboot.c126
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S115
-rw-r--r--arch/x86/kernel/setup.c181
-rw-r--r--arch/x86/kernel/sigframe.h42
-rw-r--r--arch/x86/kernel/signal.c22
-rw-r--r--arch/x86/kernel/smp.c13
-rw-r--r--arch/x86/kernel/smpboot.c27
-rw-r--r--arch/x86/kernel/stacktrace.c64
-rw-r--r--arch/x86/kernel/time_32.c2
-rw-r--r--arch/x86/kernel/time_64.c4
-rw-r--r--arch/x86/kernel/tlb_32.c13
-rw-r--r--arch/x86/kernel/tlb_64.c2
-rw-r--r--arch/x86/kernel/tlb_uv.c4
-rw-r--r--arch/x86/kernel/trampoline.c19
-rw-r--r--arch/x86/kernel/traps.c32
-rw-r--r--arch/x86/kernel/tsc.c42
-rw-r--r--arch/x86/kernel/tsc_sync.c8
-rw-r--r--arch/x86/kernel/vmi_32.c135
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S1
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S1
-rw-r--r--arch/x86/kernel/vsyscall_64.c3
-rw-r--r--arch/x86/mach-generic/bigsmp.c1
-rw-r--r--arch/x86/mach-generic/default.c1
-rw-r--r--arch/x86/mach-generic/es7000.c14
-rw-r--r--arch/x86/mach-generic/probe.c16
-rw-r--r--arch/x86/mach-generic/summit.c1
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/fault.c15
-rw-r--r--arch/x86/mm/init_32.c30
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/ioremap.c3
-rw-r--r--arch/x86/mm/pat.c236
-rw-r--r--arch/x86/pci/common.c17
-rw-r--r--arch/x86/pci/direct.c4
-rw-r--r--arch/x86/pci/pci.h1
-rw-r--r--arch/x86/scripts/strip-symbols1
-rw-r--r--arch/x86/vdso/vclock_gettime.c3
-rw-r--r--arch/x86/vdso/vdso32-setup.c2
-rw-r--r--arch/x86/vdso/vma.c2
-rw-r--r--arch/x86/xen/enlighten.c17
-rw-r--r--arch/x86/xen/mmu.c17
-rw-r--r--arch/x86/xen/multicalls.c2
-rw-r--r--arch/x86/xen/setup.c9
170 files changed, 4611 insertions, 3360 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fe94490bab61..f3921028038c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -19,6 +19,8 @@ config X86_64
19config X86 19config X86
20 def_bool y 20 def_bool y
21 select HAVE_AOUT if X86_32 21 select HAVE_AOUT if X86_32
22 select HAVE_READQ
23 select HAVE_WRITEQ
22 select HAVE_UNSTABLE_SCHED_CLOCK 24 select HAVE_UNSTABLE_SCHED_CLOCK
23 select HAVE_IDE 25 select HAVE_IDE
24 select HAVE_OPROFILE 26 select HAVE_OPROFILE
@@ -29,11 +31,14 @@ config X86
29 select HAVE_FTRACE_MCOUNT_RECORD 31 select HAVE_FTRACE_MCOUNT_RECORD
30 select HAVE_DYNAMIC_FTRACE 32 select HAVE_DYNAMIC_FTRACE
31 select HAVE_FUNCTION_TRACER 33 select HAVE_FUNCTION_TRACER
34 select HAVE_FUNCTION_GRAPH_TRACER
35 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
32 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 36 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
33 select HAVE_ARCH_KGDB if !X86_VOYAGER 37 select HAVE_ARCH_KGDB if !X86_VOYAGER
34 select HAVE_ARCH_TRACEHOOK 38 select HAVE_ARCH_TRACEHOOK
35 select HAVE_GENERIC_DMA_COHERENT if X86_32 39 select HAVE_GENERIC_DMA_COHERENT if X86_32
36 select HAVE_EFFICIENT_UNALIGNED_ACCESS 40 select HAVE_EFFICIENT_UNALIGNED_ACCESS
41 select USER_STACKTRACE_SUPPORT
37 42
38config ARCH_DEFCONFIG 43config ARCH_DEFCONFIG
39 string 44 string
@@ -87,6 +92,10 @@ config GENERIC_IOMAP
87config GENERIC_BUG 92config GENERIC_BUG
88 def_bool y 93 def_bool y
89 depends on BUG 94 depends on BUG
95 select GENERIC_BUG_RELATIVE_POINTERS if X86_64
96
97config GENERIC_BUG_RELATIVE_POINTERS
98 bool
90 99
91config GENERIC_HWEIGHT 100config GENERIC_HWEIGHT
92 def_bool y 101 def_bool y
@@ -359,10 +368,10 @@ config X86_RDC321X
359 as R-8610-(G). 368 as R-8610-(G).
360 If you don't have one of these chips, you should say N here. 369 If you don't have one of these chips, you should say N here.
361 370
362config SCHED_NO_NO_OMIT_FRAME_POINTER 371config SCHED_OMIT_FRAME_POINTER
363 def_bool y 372 def_bool y
364 prompt "Single-depth WCHAN output" 373 prompt "Single-depth WCHAN output"
365 depends on X86_32 374 depends on X86
366 help 375 help
367 Calculate simpler /proc/<PID>/wchan values. If this option 376 Calculate simpler /proc/<PID>/wchan values. If this option
368 is disabled then wchan values will recurse back to the 377 is disabled then wchan values will recurse back to the
@@ -457,10 +466,6 @@ config X86_CYCLONE_TIMER
457 def_bool y 466 def_bool y
458 depends on X86_GENERICARCH 467 depends on X86_GENERICARCH
459 468
460config ES7000_CLUSTERED_APIC
461 def_bool y
462 depends on SMP && X86_ES7000 && MPENTIUMIII
463
464source "arch/x86/Kconfig.cpu" 469source "arch/x86/Kconfig.cpu"
465 470
466config HPET_TIMER 471config HPET_TIMER
@@ -561,7 +566,7 @@ config AMD_IOMMU
561 566
562# need this always selected by IOMMU for the VIA workaround 567# need this always selected by IOMMU for the VIA workaround
563config SWIOTLB 568config SWIOTLB
564 bool 569 def_bool y if X86_64
565 help 570 help
566 Support for software bounce buffers used on x86-64 systems 571 Support for software bounce buffers used on x86-64 systems
567 which don't have a hardware IOMMU (e.g. the current generation 572 which don't have a hardware IOMMU (e.g. the current generation
@@ -653,6 +658,30 @@ config X86_VISWS_APIC
653 def_bool y 658 def_bool y
654 depends on X86_32 && X86_VISWS 659 depends on X86_32 && X86_VISWS
655 660
661config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
662 bool "Reroute for broken boot IRQs"
663 default n
664 depends on X86_IO_APIC
665 help
666 This option enables a workaround that fixes a source of
667 spurious interrupts. This is recommended when threaded
668 interrupt handling is used on systems where the generation of
669 superfluous "boot interrupts" cannot be disabled.
670
671 Some chipsets generate a legacy INTx "boot IRQ" when the IRQ
672 entry in the chipset's IO-APIC is masked (as, e.g. the RT
673 kernel does during interrupt handling). On chipsets where this
674 boot IRQ generation cannot be disabled, this workaround keeps
675 the original IRQ line masked so that only the equivalent "boot
676 IRQ" is delivered to the CPUs. The workaround also tells the
677 kernel to set up the IRQ handler on the boot IRQ line. In this
678 way only one interrupt is delivered to the kernel. Otherwise
679 the spurious second interrupt may cause the kernel to bring
680 down (vital) interrupt lines.
681
682 Only affects "broken" chipsets. Interrupt sharing may be
683 increased on these systems.
684
656config X86_MCE 685config X86_MCE
657 bool "Machine Check Exception" 686 bool "Machine Check Exception"
658 depends on !X86_VOYAGER 687 depends on !X86_VOYAGER
@@ -949,24 +978,37 @@ config X86_PAE
949config ARCH_PHYS_ADDR_T_64BIT 978config ARCH_PHYS_ADDR_T_64BIT
950 def_bool X86_64 || X86_PAE 979 def_bool X86_64 || X86_PAE
951 980
981config DIRECT_GBPAGES
982 bool "Enable 1GB pages for kernel pagetables" if EMBEDDED
983 default y
984 depends on X86_64
985 help
986 Allow the kernel linear mapping to use 1GB pages on CPUs that
987 support it. This can improve the kernel's performance a tiny bit by
988 reducing TLB pressure. If in doubt, say "Y".
989
952# Common NUMA Features 990# Common NUMA Features
953config NUMA 991config NUMA
954 bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" 992 bool "Numa Memory Allocation and Scheduler Support"
955 depends on SMP 993 depends on SMP
956 depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) 994 depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
957 default n if X86_PC 995 default n if X86_PC
958 default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) 996 default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
959 help 997 help
960 Enable NUMA (Non Uniform Memory Access) support. 998 Enable NUMA (Non Uniform Memory Access) support.
999
961 The kernel will try to allocate memory used by a CPU on the 1000 The kernel will try to allocate memory used by a CPU on the
962 local memory controller of the CPU and add some more 1001 local memory controller of the CPU and add some more
963 NUMA awareness to the kernel. 1002 NUMA awareness to the kernel.
964 1003
965 For 32-bit this is currently highly experimental and should be only 1004 For 64-bit this is recommended if the system is Intel Core i7
966 used for kernel development. It might also cause boot failures. 1005 (or later), AMD Opteron, or EM64T NUMA.
967 For 64-bit this is recommended on all multiprocessor Opteron systems. 1006
968 If the system is EM64T, you should say N unless your system is 1007 For 32-bit this is only needed on (rare) 32-bit-only platforms
969 EM64T NUMA. 1008 that support NUMA topologies, such as NUMAQ / Summit, or if you
1009 boot a 32-bit kernel on a 64-bit NUMA platform.
1010
1011 Otherwise, you should say N.
970 1012
971comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" 1013comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
972 depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) 1014 depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI)
@@ -1486,6 +1528,10 @@ config ARCH_ENABLE_MEMORY_HOTPLUG
1486 def_bool y 1528 def_bool y
1487 depends on X86_64 || (X86_32 && HIGHMEM) 1529 depends on X86_64 || (X86_32 && HIGHMEM)
1488 1530
1531config ARCH_ENABLE_MEMORY_HOTREMOVE
1532 def_bool y
1533 depends on MEMORY_HOTPLUG
1534
1489config HAVE_ARCH_EARLY_PFN_TO_NID 1535config HAVE_ARCH_EARLY_PFN_TO_NID
1490 def_bool X86_64 1536 def_bool X86_64
1491 depends on NUMA 1537 depends on NUMA
@@ -1625,13 +1671,6 @@ config APM_ALLOW_INTS
1625 many of the newer IBM Thinkpads. If you experience hangs when you 1671 many of the newer IBM Thinkpads. If you experience hangs when you
1626 suspend, try setting this to Y. Otherwise, say N. 1672 suspend, try setting this to Y. Otherwise, say N.
1627 1673
1628config APM_REAL_MODE_POWER_OFF
1629 bool "Use real mode APM BIOS call to power off"
1630 help
1631 Use real mode APM BIOS calls to switch off the computer. This is
1632 a work-around for a number of buggy BIOSes. Switch this option on if
1633 your computer crashes instead of powering off properly.
1634
1635endif # APM 1674endif # APM
1636 1675
1637source "arch/x86/kernel/cpu/cpufreq/Kconfig" 1676source "arch/x86/kernel/cpu/cpufreq/Kconfig"
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index b815664fe370..85a78575956c 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -515,6 +515,7 @@ config CPU_SUP_UMC_32
515config X86_DS 515config X86_DS
516 def_bool X86_PTRACE_BTS 516 def_bool X86_PTRACE_BTS
517 depends on X86_DEBUGCTLMSR 517 depends on X86_DEBUGCTLMSR
518 select HAVE_HW_BRANCH_TRACER
518 519
519config X86_PTRACE_BTS 520config X86_PTRACE_BTS
520 bool "Branch Trace Store" 521 bool "Branch Trace Store"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2a3dfbd5e677..10d6cc3fd052 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -114,18 +114,6 @@ config DEBUG_RODATA
114 data. This is recommended so that we can catch kernel bugs sooner. 114 data. This is recommended so that we can catch kernel bugs sooner.
115 If in doubt, say "Y". 115 If in doubt, say "Y".
116 116
117config DIRECT_GBPAGES
118 bool "Enable gbpages-mapped kernel pagetables"
119 depends on DEBUG_KERNEL && EXPERIMENTAL && X86_64
120 help
121 Enable gigabyte pages support (if the CPU supports it). This can
122 improve the kernel's performance a tiny bit by reducing TLB
123 pressure.
124
125 This is experimental code.
126
127 If in doubt, say "N".
128
129config DEBUG_RODATA_TEST 117config DEBUG_RODATA_TEST
130 bool "Testcase for the DEBUG_RODATA feature" 118 bool "Testcase for the DEBUG_RODATA feature"
131 depends on DEBUG_RODATA 119 depends on DEBUG_RODATA
@@ -186,14 +174,10 @@ config IOMMU_LEAK
186 Add a simple leak tracer to the IOMMU code. This is useful when you 174 Add a simple leak tracer to the IOMMU code. This is useful when you
187 are debugging a buggy device driver that leaks IOMMU mappings. 175 are debugging a buggy device driver that leaks IOMMU mappings.
188 176
189config MMIOTRACE_HOOKS
190 bool
191
192config MMIOTRACE 177config MMIOTRACE
193 bool "Memory mapped IO tracing" 178 bool "Memory mapped IO tracing"
194 depends on DEBUG_KERNEL && PCI 179 depends on DEBUG_KERNEL && PCI
195 select TRACING 180 select TRACING
196 select MMIOTRACE_HOOKS
197 help 181 help
198 Mmiotrace traces Memory Mapped I/O access and is meant for 182 Mmiotrace traces Memory Mapped I/O access and is meant for
199 debugging and reverse engineering. It is called from the ioremap 183 debugging and reverse engineering. It is called from the ioremap
@@ -307,10 +291,10 @@ config OPTIMIZE_INLINING
307 developers have marked 'inline'. Doing so takes away freedom from gcc to 291 developers have marked 'inline'. Doing so takes away freedom from gcc to
308 do what it thinks is best, which is desirable for the gcc 3.x series of 292 do what it thinks is best, which is desirable for the gcc 3.x series of
309 compilers. The gcc 4.x series have a rewritten inlining algorithm and 293 compilers. The gcc 4.x series have a rewritten inlining algorithm and
310 disabling this option will generate a smaller kernel there. Hopefully 294 enabling this option will generate a smaller kernel there. Hopefully
311 this algorithm is so good that allowing gcc4 to make the decision can 295 this algorithm is so good that allowing gcc 4.x and above to make the
312 become the default in the future, until then this option is there to 296 decision will become the default in the future. Until then this option
313 test gcc for this. 297 is there to test gcc for this.
314 298
315 If unsure, say N. 299 If unsure, say N.
316 300
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index b939cb476dec..5d4742ed4aa2 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -34,7 +34,7 @@ static struct mode_info cga_modes[] = {
34 { VIDEO_80x25, 80, 25, 0 }, 34 { VIDEO_80x25, 80, 25, 0 },
35}; 35};
36 36
37__videocard video_vga; 37static __videocard video_vga;
38 38
39/* Set basic 80x25 mode */ 39/* Set basic 80x25 mode */
40static u8 vga_set_basic_mode(void) 40static u8 vga_set_basic_mode(void)
@@ -259,7 +259,7 @@ static int vga_probe(void)
259 return mode_count[adapter]; 259 return mode_count[adapter];
260} 260}
261 261
262__videocard video_vga = { 262static __videocard video_vga = {
263 .card_name = "VGA", 263 .card_name = "VGA",
264 .probe = vga_probe, 264 .probe = vga_probe,
265 .set_mode = vga_set_mode, 265 .set_mode = vga_set_mode,
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index 83598b23093a..3bef2c1febe9 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -226,7 +226,7 @@ static unsigned int mode_menu(void)
226 226
227#ifdef CONFIG_VIDEO_RETAIN 227#ifdef CONFIG_VIDEO_RETAIN
228/* Save screen content to the heap */ 228/* Save screen content to the heap */
229struct saved_screen { 229static struct saved_screen {
230 int x, y; 230 int x, y;
231 int curx, cury; 231 int curx, cury;
232 u16 *data; 232 u16 *data;
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 13b8c86ae985..b30a08ed8eb4 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -77,7 +77,7 @@ CONFIG_AUDIT=y
77CONFIG_AUDITSYSCALL=y 77CONFIG_AUDITSYSCALL=y
78CONFIG_AUDIT_TREE=y 78CONFIG_AUDIT_TREE=y
79# CONFIG_IKCONFIG is not set 79# CONFIG_IKCONFIG is not set
80CONFIG_LOG_BUF_SHIFT=17 80CONFIG_LOG_BUF_SHIFT=18
81CONFIG_CGROUPS=y 81CONFIG_CGROUPS=y
82# CONFIG_CGROUP_DEBUG is not set 82# CONFIG_CGROUP_DEBUG is not set
83CONFIG_CGROUP_NS=y 83CONFIG_CGROUP_NS=y
@@ -298,7 +298,7 @@ CONFIG_KEXEC=y
298CONFIG_CRASH_DUMP=y 298CONFIG_CRASH_DUMP=y
299# CONFIG_KEXEC_JUMP is not set 299# CONFIG_KEXEC_JUMP is not set
300CONFIG_PHYSICAL_START=0x1000000 300CONFIG_PHYSICAL_START=0x1000000
301CONFIG_RELOCATABLE=y 301# CONFIG_RELOCATABLE is not set
302CONFIG_PHYSICAL_ALIGN=0x200000 302CONFIG_PHYSICAL_ALIGN=0x200000
303CONFIG_HOTPLUG_CPU=y 303CONFIG_HOTPLUG_CPU=y
304# CONFIG_COMPAT_VDSO is not set 304# CONFIG_COMPAT_VDSO is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index f0a03d7a7d63..0e7dbc0a3e46 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -77,7 +77,7 @@ CONFIG_AUDIT=y
77CONFIG_AUDITSYSCALL=y 77CONFIG_AUDITSYSCALL=y
78CONFIG_AUDIT_TREE=y 78CONFIG_AUDIT_TREE=y
79# CONFIG_IKCONFIG is not set 79# CONFIG_IKCONFIG is not set
80CONFIG_LOG_BUF_SHIFT=17 80CONFIG_LOG_BUF_SHIFT=18
81CONFIG_CGROUPS=y 81CONFIG_CGROUPS=y
82# CONFIG_CGROUP_DEBUG is not set 82# CONFIG_CGROUP_DEBUG is not set
83CONFIG_CGROUP_NS=y 83CONFIG_CGROUP_NS=y
@@ -298,7 +298,7 @@ CONFIG_SCHED_HRTICK=y
298CONFIG_KEXEC=y 298CONFIG_KEXEC=y
299CONFIG_CRASH_DUMP=y 299CONFIG_CRASH_DUMP=y
300CONFIG_PHYSICAL_START=0x1000000 300CONFIG_PHYSICAL_START=0x1000000
301CONFIG_RELOCATABLE=y 301# CONFIG_RELOCATABLE is not set
302CONFIG_PHYSICAL_ALIGN=0x200000 302CONFIG_PHYSICAL_ALIGN=0x200000
303CONFIG_HOTPLUG_CPU=y 303CONFIG_HOTPLUG_CPU=y
304# CONFIG_COMPAT_VDSO is not set 304# CONFIG_COMPAT_VDSO is not set
diff --git a/arch/x86/crypto/crc32c-intel.c b/arch/x86/crypto/crc32c-intel.c
index 070afc5b6c94..b9d00261703c 100644
--- a/arch/x86/crypto/crc32c-intel.c
+++ b/arch/x86/crypto/crc32c-intel.c
@@ -6,13 +6,22 @@
6 * Intel(R) 64 and IA-32 Architectures Software Developer's Manual 6 * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
7 * Volume 2A: Instruction Set Reference, A-M 7 * Volume 2A: Instruction Set Reference, A-M
8 * 8 *
9 * Copyright (c) 2008 Austin Zhang <austin_zhang@linux.intel.com> 9 * Copyright (C) 2008 Intel Corporation
10 * Copyright (c) 2008 Kent Liu <kent.liu@intel.com> 10 * Authors: Austin Zhang <austin_zhang@linux.intel.com>
11 * Kent Liu <kent.liu@intel.com>
11 * 12 *
12 * This program is free software; you can redistribute it and/or modify it 13 * This program is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the Free 14 * under the terms and conditions of the GNU General Public License,
14 * Software Foundation; either version 2 of the License, or (at your option) 15 * version 2, as published by the Free Software Foundation.
15 * any later version. 16 *
17 * This program is distributed in the hope it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
20 * more details.
21 *
22 * You should have received a copy of the GNU General Public License along with
23 * this program; if not, write to the Free Software Foundation, Inc.,
24 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
16 * 25 *
17 */ 26 */
18#include <linux/init.h> 27#include <linux/init.h>
@@ -75,99 +84,92 @@ static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len
75 * If your algorithm starts with ~0, then XOR with ~0 before you set 84 * If your algorithm starts with ~0, then XOR with ~0 before you set
76 * the seed. 85 * the seed.
77 */ 86 */
78static int crc32c_intel_setkey(struct crypto_ahash *hash, const u8 *key, 87static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key,
79 unsigned int keylen) 88 unsigned int keylen)
80{ 89{
81 u32 *mctx = crypto_ahash_ctx(hash); 90 u32 *mctx = crypto_shash_ctx(hash);
82 91
83 if (keylen != sizeof(u32)) { 92 if (keylen != sizeof(u32)) {
84 crypto_ahash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); 93 crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
85 return -EINVAL; 94 return -EINVAL;
86 } 95 }
87 *mctx = le32_to_cpup((__le32 *)key); 96 *mctx = le32_to_cpup((__le32 *)key);
88 return 0; 97 return 0;
89} 98}
90 99
91static int crc32c_intel_init(struct ahash_request *req) 100static int crc32c_intel_init(struct shash_desc *desc)
92{ 101{
93 u32 *mctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); 102 u32 *mctx = crypto_shash_ctx(desc->tfm);
94 u32 *crcp = ahash_request_ctx(req); 103 u32 *crcp = shash_desc_ctx(desc);
95 104
96 *crcp = *mctx; 105 *crcp = *mctx;
97 106
98 return 0; 107 return 0;
99} 108}
100 109
101static int crc32c_intel_update(struct ahash_request *req) 110static int crc32c_intel_update(struct shash_desc *desc, const u8 *data,
111 unsigned int len)
102{ 112{
103 struct crypto_hash_walk walk; 113 u32 *crcp = shash_desc_ctx(desc);
104 u32 *crcp = ahash_request_ctx(req);
105 u32 crc = *crcp;
106 int nbytes;
107
108 for (nbytes = crypto_hash_walk_first(req, &walk); nbytes;
109 nbytes = crypto_hash_walk_done(&walk, 0))
110 crc = crc32c_intel_le_hw(crc, walk.data, nbytes);
111 114
112 *crcp = crc; 115 *crcp = crc32c_intel_le_hw(*crcp, data, len);
113 return 0; 116 return 0;
114} 117}
115 118
116static int crc32c_intel_final(struct ahash_request *req) 119static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
120 u8 *out)
117{ 121{
118 u32 *crcp = ahash_request_ctx(req); 122 *(__le32 *)out = ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
119
120 *(__le32 *)req->result = ~cpu_to_le32p(crcp);
121 return 0; 123 return 0;
122} 124}
123 125
124static int crc32c_intel_digest(struct ahash_request *req) 126static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data,
127 unsigned int len, u8 *out)
125{ 128{
126 struct crypto_hash_walk walk; 129 return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out);
127 u32 *mctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); 130}
128 u32 crc = *mctx;
129 int nbytes;
130 131
131 for (nbytes = crypto_hash_walk_first(req, &walk); nbytes; 132static int crc32c_intel_final(struct shash_desc *desc, u8 *out)
132 nbytes = crypto_hash_walk_done(&walk, 0)) 133{
133 crc = crc32c_intel_le_hw(crc, walk.data, nbytes); 134 u32 *crcp = shash_desc_ctx(desc);
134 135
135 *(__le32 *)req->result = ~cpu_to_le32(crc); 136 *(__le32 *)out = ~cpu_to_le32p(crcp);
136 return 0; 137 return 0;
137} 138}
138 139
140static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data,
141 unsigned int len, u8 *out)
142{
143 return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
144 out);
145}
146
139static int crc32c_intel_cra_init(struct crypto_tfm *tfm) 147static int crc32c_intel_cra_init(struct crypto_tfm *tfm)
140{ 148{
141 u32 *key = crypto_tfm_ctx(tfm); 149 u32 *key = crypto_tfm_ctx(tfm);
142 150
143 *key = ~0; 151 *key = ~0;
144 152
145 tfm->crt_ahash.reqsize = sizeof(u32);
146
147 return 0; 153 return 0;
148} 154}
149 155
150static struct crypto_alg alg = { 156static struct shash_alg alg = {
151 .cra_name = "crc32c", 157 .setkey = crc32c_intel_setkey,
152 .cra_driver_name = "crc32c-intel", 158 .init = crc32c_intel_init,
153 .cra_priority = 200, 159 .update = crc32c_intel_update,
154 .cra_flags = CRYPTO_ALG_TYPE_AHASH, 160 .final = crc32c_intel_final,
155 .cra_blocksize = CHKSUM_BLOCK_SIZE, 161 .finup = crc32c_intel_finup,
156 .cra_alignmask = 3, 162 .digest = crc32c_intel_digest,
157 .cra_ctxsize = sizeof(u32), 163 .descsize = sizeof(u32),
158 .cra_module = THIS_MODULE, 164 .digestsize = CHKSUM_DIGEST_SIZE,
159 .cra_list = LIST_HEAD_INIT(alg.cra_list), 165 .base = {
160 .cra_init = crc32c_intel_cra_init, 166 .cra_name = "crc32c",
161 .cra_type = &crypto_ahash_type, 167 .cra_driver_name = "crc32c-intel",
162 .cra_u = { 168 .cra_priority = 200,
163 .ahash = { 169 .cra_blocksize = CHKSUM_BLOCK_SIZE,
164 .digestsize = CHKSUM_DIGEST_SIZE, 170 .cra_ctxsize = sizeof(u32),
165 .setkey = crc32c_intel_setkey, 171 .cra_module = THIS_MODULE,
166 .init = crc32c_intel_init, 172 .cra_init = crc32c_intel_cra_init,
167 .update = crc32c_intel_update,
168 .final = crc32c_intel_final,
169 .digest = crc32c_intel_digest,
170 }
171 } 173 }
172}; 174};
173 175
@@ -175,14 +177,14 @@ static struct crypto_alg alg = {
175static int __init crc32c_intel_mod_init(void) 177static int __init crc32c_intel_mod_init(void)
176{ 178{
177 if (cpu_has_xmm4_2) 179 if (cpu_has_xmm4_2)
178 return crypto_register_alg(&alg); 180 return crypto_register_shash(&alg);
179 else 181 else
180 return -ENODEV; 182 return -ENODEV;
181} 183}
182 184
183static void __exit crc32c_intel_mod_fini(void) 185static void __exit crc32c_intel_mod_fini(void)
184{ 186{
185 crypto_unregister_alg(&alg); 187 crypto_unregister_shash(&alg);
186} 188}
187 189
188module_init(crc32c_intel_mod_init); 190module_init(crc32c_intel_mod_init);
@@ -194,4 +196,3 @@ MODULE_LICENSE("GPL");
194 196
195MODULE_ALIAS("crc32c"); 197MODULE_ALIAS("crc32c");
196MODULE_ALIAS("crc32c-intel"); 198MODULE_ALIAS("crc32c-intel");
197
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 127ec3f07214..2a4d073d2cf1 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -327,7 +327,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
327 current->mm->cached_hole_size = 0; 327 current->mm->cached_hole_size = 0;
328 328
329 current->mm->mmap = NULL; 329 current->mm->mmap = NULL;
330 compute_creds(bprm); 330 install_exec_creds(bprm);
331 current->flags &= ~PF_FORKNOEXEC; 331 current->flags &= ~PF_FORKNOEXEC;
332 332
333 if (N_MAGIC(ex) == OMAGIC) { 333 if (N_MAGIC(ex) == OMAGIC) {
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 9ddf2fa0129d..b195f85526e3 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -32,6 +32,8 @@
32#include <asm/proto.h> 32#include <asm/proto.h>
33#include <asm/vdso.h> 33#include <asm/vdso.h>
34 34
35#include <asm/sigframe.h>
36
35#define DEBUG_SIG 0 37#define DEBUG_SIG 0
36 38
37#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) 39#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -41,7 +43,6 @@
41 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ 43 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
42 X86_EFLAGS_CF) 44 X86_EFLAGS_CF)
43 45
44asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
45void signal_fault(struct pt_regs *regs, void __user *frame, char *where); 46void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
46 47
47int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) 48int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
@@ -173,30 +174,6 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
173/* 174/*
174 * Do a signal return; undo the signal stack. 175 * Do a signal return; undo the signal stack.
175 */ 176 */
176
177struct sigframe
178{
179 u32 pretcode;
180 int sig;
181 struct sigcontext_ia32 sc;
182 struct _fpstate_ia32 fpstate_unused; /* look at kernel/sigframe.h */
183 unsigned int extramask[_COMPAT_NSIG_WORDS-1];
184 char retcode[8];
185 /* fp state follows here */
186};
187
188struct rt_sigframe
189{
190 u32 pretcode;
191 int sig;
192 u32 pinfo;
193 u32 puc;
194 compat_siginfo_t info;
195 struct ucontext_ia32 uc;
196 char retcode[8];
197 /* fp state follows here */
198};
199
200#define COPY(x) { \ 177#define COPY(x) { \
201 err |= __get_user(regs->x, &sc->x); \ 178 err |= __get_user(regs->x, &sc->x); \
202} 179}
@@ -271,7 +248,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
271 248
272asmlinkage long sys32_sigreturn(struct pt_regs *regs) 249asmlinkage long sys32_sigreturn(struct pt_regs *regs)
273{ 250{
274 struct sigframe __user *frame = (struct sigframe __user *)(regs->sp-8); 251 struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
275 sigset_t set; 252 sigset_t set;
276 unsigned int ax; 253 unsigned int ax;
277 254
@@ -301,12 +278,12 @@ badframe:
301 278
302asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) 279asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
303{ 280{
304 struct rt_sigframe __user *frame; 281 struct rt_sigframe_ia32 __user *frame;
305 sigset_t set; 282 sigset_t set;
306 unsigned int ax; 283 unsigned int ax;
307 struct pt_regs tregs; 284 struct pt_regs tregs;
308 285
309 frame = (struct rt_sigframe __user *)(regs->sp - 4); 286 frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
310 287
311 if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) 288 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
312 goto badframe; 289 goto badframe;
@@ -396,7 +373,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
396 } 373 }
397 374
398 /* This is the legacy signal stack switching. */ 375 /* This is the legacy signal stack switching. */
399 else if ((regs->ss & 0xffff) != __USER_DS && 376 else if ((regs->ss & 0xffff) != __USER32_DS &&
400 !(ka->sa.sa_flags & SA_RESTORER) && 377 !(ka->sa.sa_flags & SA_RESTORER) &&
401 ka->sa.sa_restorer) 378 ka->sa.sa_restorer)
402 sp = (unsigned long) ka->sa.sa_restorer; 379 sp = (unsigned long) ka->sa.sa_restorer;
@@ -418,7 +395,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
418int ia32_setup_frame(int sig, struct k_sigaction *ka, 395int ia32_setup_frame(int sig, struct k_sigaction *ka,
419 compat_sigset_t *set, struct pt_regs *regs) 396 compat_sigset_t *set, struct pt_regs *regs)
420{ 397{
421 struct sigframe __user *frame; 398 struct sigframe_ia32 __user *frame;
422 void __user *restorer; 399 void __user *restorer;
423 int err = 0; 400 int err = 0;
424 void __user *fpstate = NULL; 401 void __user *fpstate = NULL;
@@ -467,7 +444,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
467 * These are actually not used anymore, but left because some 444 * These are actually not used anymore, but left because some
468 * gdb versions depend on them as a marker. 445 * gdb versions depend on them as a marker.
469 */ 446 */
470 err |= __copy_to_user(frame->retcode, &code, 8); 447 err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode);
471 if (err) 448 if (err)
472 return -EFAULT; 449 return -EFAULT;
473 450
@@ -497,7 +474,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
497int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 474int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
498 compat_sigset_t *set, struct pt_regs *regs) 475 compat_sigset_t *set, struct pt_regs *regs)
499{ 476{
500 struct rt_sigframe __user *frame; 477 struct rt_sigframe_ia32 __user *frame;
501 void __user *restorer; 478 void __user *restorer;
502 int err = 0; 479 int err = 0;
503 void __user *fpstate = NULL; 480 void __user *fpstate = NULL;
@@ -554,7 +531,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
554 * Not actually used anymore, but left because some gdb 531 * Not actually used anymore, but left because some gdb
555 * versions need it. 532 * versions need it.
556 */ 533 */
557 err |= __copy_to_user(frame->retcode, &code, 8); 534 err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode);
558 if (err) 535 if (err)
559 return -EFAULT; 536 return -EFAULT;
560 537
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3b1510b4fc57..25caa0738af5 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -193,6 +193,7 @@ extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask);
193static inline void lapic_shutdown(void) { } 193static inline void lapic_shutdown(void) { }
194#define local_apic_timer_c2_ok 1 194#define local_apic_timer_c2_ok 1
195static inline void init_apic_mappings(void) { } 195static inline void init_apic_mappings(void) { }
196static inline void disable_local_APIC(void) { }
196 197
197#endif /* !CONFIG_X86_LOCAL_APIC */ 198#endif /* !CONFIG_X86_LOCAL_APIC */
198 199
diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index 1d9543b9d358..ce547f24a1cd 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -24,8 +24,6 @@ static inline cpumask_t target_cpus(void)
24#define INT_DELIVERY_MODE (dest_Fixed) 24#define INT_DELIVERY_MODE (dest_Fixed)
25#define INT_DEST_MODE (0) /* phys delivery to target proc */ 25#define INT_DEST_MODE (0) /* phys delivery to target proc */
26#define NO_BALANCE_IRQ (0) 26#define NO_BALANCE_IRQ (0)
27#define WAKE_SECONDARY_VIA_INIT
28
29 27
30static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) 28static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
31{ 29{
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 3def2065fcea..d9cf1cd156d2 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -9,7 +9,7 @@
9#ifdef CONFIG_X86_32 9#ifdef CONFIG_X86_32
10# define __BUG_C0 "2:\t.long 1b, %c0\n" 10# define __BUG_C0 "2:\t.long 1b, %c0\n"
11#else 11#else
12# define __BUG_C0 "2:\t.quad 1b, %c0\n" 12# define __BUG_C0 "2:\t.long 1b - 2b, %c0 - 2b\n"
13#endif 13#endif
14 14
15#define BUG() \ 15#define BUG() \
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index cfdf8c2c5c31..ea408dcba513 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -80,7 +80,6 @@
80#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ 80#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */
81#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */ 81#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */
82#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ 82#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
83#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */
84#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ 83#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */
85#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ 84#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */
86#define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */ 85#define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */
@@ -92,6 +91,8 @@
92#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ 91#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */
93#define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */ 92#define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */
94#define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ 93#define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */
94#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
95#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */
95 96
96/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 97/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
97#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ 98#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
@@ -117,6 +118,7 @@
117#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ 118#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
118#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ 119#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */
119#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ 120#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
121#define X86_FEATURE_HYPERVISOR (4*32+31) /* Running on a hypervisor */
120 122
121/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ 123/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
122#define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */ 124#define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */
@@ -237,6 +239,7 @@ extern const char * const x86_power_flags[32];
237#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) 239#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2)
238#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) 240#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
239#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) 241#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
242#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
240 243
241#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) 244#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
242# define cpu_has_invlpg 1 245# define cpu_has_invlpg 1
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 097794ff6b79..dc22c0733282 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -71,12 +71,10 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
71/* Make sure we keep the same behaviour */ 71/* Make sure we keep the same behaviour */
72static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 72static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
73{ 73{
74#ifdef CONFIG_X86_64
75 struct dma_mapping_ops *ops = get_dma_ops(dev); 74 struct dma_mapping_ops *ops = get_dma_ops(dev);
76 if (ops->mapping_error) 75 if (ops->mapping_error)
77 return ops->mapping_error(dev, dma_addr); 76 return ops->mapping_error(dev, dma_addr);
78 77
79#endif
80 return (dma_addr == bad_dma_address); 78 return (dma_addr == bad_dma_address);
81} 79}
82 80
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index a95008457ea4..a8f672ba100c 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -6,14 +6,13 @@
6 * precise-event based sampling (PEBS). 6 * precise-event based sampling (PEBS).
7 * 7 *
8 * It manages: 8 * It manages:
9 * - per-thread and per-cpu allocation of BTS and PEBS 9 * - DS and BTS hardware configuration
10 * - buffer memory allocation (optional) 10 * - buffer overflow handling (to be done)
11 * - buffer overflow handling
12 * - buffer access 11 * - buffer access
13 * 12 *
14 * It assumes: 13 * It does not do:
15 * - get_task_struct on all parameter tasks 14 * - security checking (is the caller allowed to trace the task)
16 * - current is allowed to trace parameter tasks 15 * - buffer allocation (memory accounting)
17 * 16 *
18 * 17 *
19 * Copyright (C) 2007-2008 Intel Corporation. 18 * Copyright (C) 2007-2008 Intel Corporation.
@@ -26,11 +25,51 @@
26 25
27#include <linux/types.h> 26#include <linux/types.h>
28#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/err.h>
29 29
30 30
31#ifdef CONFIG_X86_DS 31#ifdef CONFIG_X86_DS
32 32
33struct task_struct; 33struct task_struct;
34struct ds_context;
35struct ds_tracer;
36struct bts_tracer;
37struct pebs_tracer;
38
39typedef void (*bts_ovfl_callback_t)(struct bts_tracer *);
40typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
41
42
43/*
44 * A list of features plus corresponding macros to talk about them in
45 * the ds_request function's flags parameter.
46 *
47 * We use the enum to index an array of corresponding control bits;
48 * we use the macro to index a flags bit-vector.
49 */
50enum ds_feature {
51 dsf_bts = 0,
52 dsf_bts_kernel,
53#define BTS_KERNEL (1 << dsf_bts_kernel)
54 /* trace kernel-mode branches */
55
56 dsf_bts_user,
57#define BTS_USER (1 << dsf_bts_user)
58 /* trace user-mode branches */
59
60 dsf_bts_overflow,
61 dsf_bts_max,
62 dsf_pebs = dsf_bts_max,
63
64 dsf_pebs_max,
65 dsf_ctl_max = dsf_pebs_max,
66 dsf_bts_timestamps = dsf_ctl_max,
67#define BTS_TIMESTAMPS (1 << dsf_bts_timestamps)
68 /* add timestamps into BTS trace */
69
70#define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS)
71};
72
34 73
35/* 74/*
36 * Request BTS or PEBS 75 * Request BTS or PEBS
@@ -38,163 +77,169 @@ struct task_struct;
38 * Due to alignement constraints, the actual buffer may be slightly 77 * Due to alignement constraints, the actual buffer may be slightly
39 * smaller than the requested or provided buffer. 78 * smaller than the requested or provided buffer.
40 * 79 *
41 * Returns 0 on success; -Eerrno otherwise 80 * Returns a pointer to a tracer structure on success, or
81 * ERR_PTR(errcode) on failure.
82 *
83 * The interrupt threshold is independent from the overflow callback
84 * to allow users to use their own overflow interrupt handling mechanism.
42 * 85 *
43 * task: the task to request recording for; 86 * task: the task to request recording for;
44 * NULL for per-cpu recording on the current cpu 87 * NULL for per-cpu recording on the current cpu
45 * base: the base pointer for the (non-pageable) buffer; 88 * base: the base pointer for the (non-pageable) buffer;
46 * NULL if buffer allocation requested 89 * size: the size of the provided buffer in bytes
47 * size: the size of the requested or provided buffer
48 * ovfl: pointer to a function to be called on buffer overflow; 90 * ovfl: pointer to a function to be called on buffer overflow;
49 * NULL if cyclic buffer requested 91 * NULL if cyclic buffer requested
92 * th: the interrupt threshold in records from the end of the buffer;
93 * -1 if no interrupt threshold is requested.
94 * flags: a bit-mask of the above flags
50 */ 95 */
51typedef void (*ds_ovfl_callback_t)(struct task_struct *); 96extern struct bts_tracer *ds_request_bts(struct task_struct *task,
52extern int ds_request_bts(struct task_struct *task, void *base, size_t size, 97 void *base, size_t size,
53 ds_ovfl_callback_t ovfl); 98 bts_ovfl_callback_t ovfl,
54extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, 99 size_t th, unsigned int flags);
55 ds_ovfl_callback_t ovfl); 100extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
101 void *base, size_t size,
102 pebs_ovfl_callback_t ovfl,
103 size_t th, unsigned int flags);
56 104
57/* 105/*
58 * Release BTS or PEBS resources 106 * Release BTS or PEBS resources
107 * Suspend and resume BTS or PEBS tracing
59 * 108 *
60 * Frees buffers allocated on ds_request. 109 * tracer: the tracer handle returned from ds_request_~()
61 *
62 * Returns 0 on success; -Eerrno otherwise
63 *
64 * task: the task to release resources for;
65 * NULL to release resources for the current cpu
66 */ 110 */
67extern int ds_release_bts(struct task_struct *task); 111extern void ds_release_bts(struct bts_tracer *tracer);
68extern int ds_release_pebs(struct task_struct *task); 112extern void ds_suspend_bts(struct bts_tracer *tracer);
113extern void ds_resume_bts(struct bts_tracer *tracer);
114extern void ds_release_pebs(struct pebs_tracer *tracer);
115extern void ds_suspend_pebs(struct pebs_tracer *tracer);
116extern void ds_resume_pebs(struct pebs_tracer *tracer);
69 117
70/*
71 * Return the (array) index of the write pointer.
72 * (assuming an array of BTS/PEBS records)
73 *
74 * Returns -Eerrno on error
75 *
76 * task: the task to access;
77 * NULL to access the current cpu
78 * pos (out): if not NULL, will hold the result
79 */
80extern int ds_get_bts_index(struct task_struct *task, size_t *pos);
81extern int ds_get_pebs_index(struct task_struct *task, size_t *pos);
82 118
83/* 119/*
84 * Return the (array) index one record beyond the end of the array. 120 * The raw DS buffer state as it is used for BTS and PEBS recording.
85 * (assuming an array of BTS/PEBS records)
86 * 121 *
87 * Returns -Eerrno on error 122 * This is the low-level, arch-dependent interface for working
88 * 123 * directly on the raw trace data.
89 * task: the task to access;
90 * NULL to access the current cpu
91 * pos (out): if not NULL, will hold the result
92 */ 124 */
93extern int ds_get_bts_end(struct task_struct *task, size_t *pos); 125struct ds_trace {
94extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); 126 /* the number of bts/pebs records */
127 size_t n;
128 /* the size of a bts/pebs record in bytes */
129 size_t size;
130 /* pointers into the raw buffer:
131 - to the first entry */
132 void *begin;
133 /* - one beyond the last entry */
134 void *end;
135 /* - one beyond the newest entry */
136 void *top;
137 /* - the interrupt threshold */
138 void *ith;
139 /* flags given on ds_request() */
140 unsigned int flags;
141};
95 142
96/* 143/*
97 * Provide a pointer to the BTS/PEBS record at parameter index. 144 * An arch-independent view on branch trace data.
98 * (assuming an array of BTS/PEBS records)
99 *
100 * The pointer points directly into the buffer. The user is
101 * responsible for copying the record.
102 *
103 * Returns the size of a single record on success; -Eerrno on error
104 *
105 * task: the task to access;
106 * NULL to access the current cpu
107 * index: the index of the requested record
108 * record (out): pointer to the requested record
109 */ 145 */
110extern int ds_access_bts(struct task_struct *task, 146enum bts_qualifier {
111 size_t index, const void **record); 147 bts_invalid,
112extern int ds_access_pebs(struct task_struct *task, 148#define BTS_INVALID bts_invalid
113 size_t index, const void **record); 149
150 bts_branch,
151#define BTS_BRANCH bts_branch
152
153 bts_task_arrives,
154#define BTS_TASK_ARRIVES bts_task_arrives
155
156 bts_task_departs,
157#define BTS_TASK_DEPARTS bts_task_departs
158
159 bts_qual_bit_size = 4,
160 bts_qual_max = (1 << bts_qual_bit_size),
161};
162
163struct bts_struct {
164 __u64 qualifier;
165 union {
166 /* BTS_BRANCH */
167 struct {
168 __u64 from;
169 __u64 to;
170 } lbr;
171 /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */
172 struct {
173 __u64 jiffies;
174 pid_t pid;
175 } timestamp;
176 } variant;
177};
114 178
115/*
116 * Write one or more BTS/PEBS records at the write pointer index and
117 * advance the write pointer.
118 *
119 * If size is not a multiple of the record size, trailing bytes are
120 * zeroed out.
121 *
122 * May result in one or more overflow notifications.
123 *
124 * If called during overflow handling, that is, with index >=
125 * interrupt threshold, the write will wrap around.
126 *
127 * An overflow notification is given if and when the interrupt
128 * threshold is reached during or after the write.
129 *
130 * Returns the number of bytes written or -Eerrno.
131 *
132 * task: the task to access;
133 * NULL to access the current cpu
134 * buffer: the buffer to write
135 * size: the size of the buffer
136 */
137extern int ds_write_bts(struct task_struct *task,
138 const void *buffer, size_t size);
139extern int ds_write_pebs(struct task_struct *task,
140 const void *buffer, size_t size);
141 179
142/* 180/*
143 * Same as ds_write_bts/pebs, but omit ownership checks. 181 * The BTS state.
144 * 182 *
145 * This is needed to have some other task than the owner of the 183 * This gives access to the raw DS state and adds functions to provide
146 * BTS/PEBS buffer or the parameter task itself write into the 184 * an arch-independent view of the BTS data.
147 * respective buffer.
148 */ 185 */
149extern int ds_unchecked_write_bts(struct task_struct *task, 186struct bts_trace {
150 const void *buffer, size_t size); 187 struct ds_trace ds;
151extern int ds_unchecked_write_pebs(struct task_struct *task, 188
152 const void *buffer, size_t size); 189 int (*read)(struct bts_tracer *tracer, const void *at,
190 struct bts_struct *out);
191 int (*write)(struct bts_tracer *tracer, const struct bts_struct *in);
192};
193
153 194
154/* 195/*
155 * Reset the write pointer of the BTS/PEBS buffer. 196 * The PEBS state.
156 * 197 *
157 * Returns 0 on success; -Eerrno on error 198 * This gives access to the raw DS state and the PEBS-specific counter
158 * 199 * reset value.
159 * task: the task to access;
160 * NULL to access the current cpu
161 */ 200 */
162extern int ds_reset_bts(struct task_struct *task); 201struct pebs_trace {
163extern int ds_reset_pebs(struct task_struct *task); 202 struct ds_trace ds;
203
204 /* the PEBS reset value */
205 unsigned long long reset_value;
206};
207
164 208
165/* 209/*
166 * Clear the BTS/PEBS buffer and reset the write pointer. 210 * Read the BTS or PEBS trace.
167 * The entire buffer will be zeroed out.
168 * 211 *
169 * Returns 0 on success; -Eerrno on error 212 * Returns a view on the trace collected for the parameter tracer.
213 *
214 * The view remains valid as long as the traced task is not running or
215 * the tracer is suspended.
216 * Writes into the trace buffer are not reflected.
170 * 217 *
171 * task: the task to access; 218 * tracer: the tracer handle returned from ds_request_~()
172 * NULL to access the current cpu
173 */ 219 */
174extern int ds_clear_bts(struct task_struct *task); 220extern const struct bts_trace *ds_read_bts(struct bts_tracer *tracer);
175extern int ds_clear_pebs(struct task_struct *task); 221extern const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer);
222
176 223
177/* 224/*
178 * Provide the PEBS counter reset value. 225 * Reset the write pointer of the BTS/PEBS buffer.
179 * 226 *
180 * Returns 0 on success; -Eerrno on error 227 * Returns 0 on success; -Eerrno on error
181 * 228 *
182 * task: the task to access; 229 * tracer: the tracer handle returned from ds_request_~()
183 * NULL to access the current cpu
184 * value (out): the counter reset value
185 */ 230 */
186extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); 231extern int ds_reset_bts(struct bts_tracer *tracer);
232extern int ds_reset_pebs(struct pebs_tracer *tracer);
187 233
188/* 234/*
189 * Set the PEBS counter reset value. 235 * Set the PEBS counter reset value.
190 * 236 *
191 * Returns 0 on success; -Eerrno on error 237 * Returns 0 on success; -Eerrno on error
192 * 238 *
193 * task: the task to access; 239 * tracer: the tracer handle returned from ds_request_pebs()
194 * NULL to access the current cpu
195 * value: the new counter reset value 240 * value: the new counter reset value
196 */ 241 */
197extern int ds_set_pebs_reset(struct task_struct *task, u64 value); 242extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
198 243
199/* 244/*
200 * Initialization 245 * Initialization
@@ -202,39 +247,26 @@ extern int ds_set_pebs_reset(struct task_struct *task, u64 value);
202struct cpuinfo_x86; 247struct cpuinfo_x86;
203extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); 248extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
204 249
205
206
207/* 250/*
208 * The DS context - part of struct thread_struct. 251 * Context switch work
209 */ 252 */
210struct ds_context { 253extern void ds_switch_to(struct task_struct *prev, struct task_struct *next);
211 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
212 unsigned char *ds;
213 /* the owner of the BTS and PEBS configuration, respectively */
214 struct task_struct *owner[2];
215 /* buffer overflow notification function for BTS and PEBS */
216 ds_ovfl_callback_t callback[2];
217 /* the original buffer address */
218 void *buffer[2];
219 /* the number of allocated pages for on-request allocated buffers */
220 unsigned int pages[2];
221 /* use count */
222 unsigned long count;
223 /* a pointer to the context location inside the thread_struct
224 * or the per_cpu context array */
225 struct ds_context **this;
226 /* a pointer to the task owning this context, or NULL, if the
227 * context is owned by a cpu */
228 struct task_struct *task;
229};
230 254
231/* called by exit_thread() to free leftover contexts */ 255/*
232extern void ds_free(struct ds_context *context); 256 * Task clone/init and cleanup work
257 */
258extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father);
259extern void ds_exit_thread(struct task_struct *tsk);
233 260
234#else /* CONFIG_X86_DS */ 261#else /* CONFIG_X86_DS */
235 262
236struct cpuinfo_x86; 263struct cpuinfo_x86;
237static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} 264static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
265static inline void ds_switch_to(struct task_struct *prev,
266 struct task_struct *next) {}
267static inline void ds_copy_thread(struct task_struct *tsk,
268 struct task_struct *father) {}
269static inline void ds_exit_thread(struct task_struct *tsk) {}
238 270
239#endif /* CONFIG_X86_DS */ 271#endif /* CONFIG_X86_DS */
240#endif /* _ASM_X86_DS_H */ 272#endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 40ca1bea7916..f51a3ddde01a 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -325,7 +325,7 @@ struct linux_binprm;
325 325
326#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 326#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
327extern int arch_setup_additional_pages(struct linux_binprm *bprm, 327extern int arch_setup_additional_pages(struct linux_binprm *bprm,
328 int executable_stack); 328 int uses_interp);
329 329
330extern int syscall32_setup_pages(struct linux_binprm *, int exstack); 330extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
331#define compat_arch_setup_additional_pages syscall32_setup_pages 331#define compat_arch_setup_additional_pages syscall32_setup_pages
diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h
index 94826cf87455..cc70c1c78ca4 100644
--- a/arch/x86/include/asm/emergency-restart.h
+++ b/arch/x86/include/asm/emergency-restart.h
@@ -8,7 +8,9 @@ enum reboot_type {
8 BOOT_BIOS = 'b', 8 BOOT_BIOS = 'b',
9#endif 9#endif
10 BOOT_ACPI = 'a', 10 BOOT_ACPI = 'a',
11 BOOT_EFI = 'e' 11 BOOT_EFI = 'e',
12 BOOT_CF9 = 'p',
13 BOOT_CF9_COND = 'q',
12}; 14};
13 15
14extern enum reboot_type reboot_type; 16extern enum reboot_type reboot_type;
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index 380f0b4f17ed..e24ef876915f 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -9,31 +9,27 @@ static inline int apic_id_registered(void)
9 return (1); 9 return (1);
10} 10}
11 11
12static inline cpumask_t target_cpus(void) 12static inline cpumask_t target_cpus_cluster(void)
13{ 13{
14#if defined CONFIG_ES7000_CLUSTERED_APIC
15 return CPU_MASK_ALL; 14 return CPU_MASK_ALL;
16#else 15}
16
17static inline cpumask_t target_cpus(void)
18{
17 return cpumask_of_cpu(smp_processor_id()); 19 return cpumask_of_cpu(smp_processor_id());
18#endif
19} 20}
20 21
21#if defined CONFIG_ES7000_CLUSTERED_APIC 22#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER)
22#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) 23#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio)
23#define INT_DELIVERY_MODE (dest_LowestPrio) 24#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */
24#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */ 25#define NO_BALANCE_IRQ_CLUSTER (1)
25#define NO_BALANCE_IRQ (1) 26
26#undef WAKE_SECONDARY_VIA_INIT
27#define WAKE_SECONDARY_VIA_MIP
28#else
29#define APIC_DFR_VALUE (APIC_DFR_FLAT) 27#define APIC_DFR_VALUE (APIC_DFR_FLAT)
30#define INT_DELIVERY_MODE (dest_Fixed) 28#define INT_DELIVERY_MODE (dest_Fixed)
31#define INT_DEST_MODE (0) /* phys delivery to target procs */ 29#define INT_DEST_MODE (0) /* phys delivery to target procs */
32#define NO_BALANCE_IRQ (0) 30#define NO_BALANCE_IRQ (0)
33#undef APIC_DEST_LOGICAL 31#undef APIC_DEST_LOGICAL
34#define APIC_DEST_LOGICAL 0x0 32#define APIC_DEST_LOGICAL 0x0
35#define WAKE_SECONDARY_VIA_INIT
36#endif
37 33
38static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) 34static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
39{ 35{
@@ -60,6 +56,16 @@ static inline unsigned long calculate_ldr(int cpu)
60 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel 56 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
61 * document number 292116). So here it goes... 57 * document number 292116). So here it goes...
62 */ 58 */
59static inline void init_apic_ldr_cluster(void)
60{
61 unsigned long val;
62 int cpu = smp_processor_id();
63
64 apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER);
65 val = calculate_ldr(cpu);
66 apic_write(APIC_LDR, val);
67}
68
63static inline void init_apic_ldr(void) 69static inline void init_apic_ldr(void)
64{ 70{
65 unsigned long val; 71 unsigned long val;
@@ -70,10 +76,6 @@ static inline void init_apic_ldr(void)
70 apic_write(APIC_LDR, val); 76 apic_write(APIC_LDR, val);
71} 77}
72 78
73#ifndef CONFIG_X86_GENERICARCH
74extern void enable_apic_mode(void);
75#endif
76
77extern int apic_version [MAX_APICS]; 79extern int apic_version [MAX_APICS];
78static inline void setup_apic_routing(void) 80static inline void setup_apic_routing(void)
79{ 81{
@@ -144,7 +146,7 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid)
144 return (1); 146 return (1);
145} 147}
146 148
147static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 149static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
148{ 150{
149 int num_bits_set; 151 int num_bits_set;
150 int cpus_found = 0; 152 int cpus_found = 0;
@@ -154,11 +156,7 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
154 num_bits_set = cpus_weight(cpumask); 156 num_bits_set = cpus_weight(cpumask);
155 /* Return id to all */ 157 /* Return id to all */
156 if (num_bits_set == NR_CPUS) 158 if (num_bits_set == NR_CPUS)
157#if defined CONFIG_ES7000_CLUSTERED_APIC
158 return 0xFF; 159 return 0xFF;
159#else
160 return cpu_to_logical_apicid(0);
161#endif
162 /* 160 /*
163 * The cpus in the mask must all be on the apic cluster. If are not 161 * The cpus in the mask must all be on the apic cluster. If are not
164 * on the same apicid cluster return default value of TARGET_CPUS. 162 * on the same apicid cluster return default value of TARGET_CPUS.
@@ -171,11 +169,40 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
171 if (apicid_cluster(apicid) != 169 if (apicid_cluster(apicid) !=
172 apicid_cluster(new_apicid)){ 170 apicid_cluster(new_apicid)){
173 printk ("%s: Not a valid mask!\n", __func__); 171 printk ("%s: Not a valid mask!\n", __func__);
174#if defined CONFIG_ES7000_CLUSTERED_APIC
175 return 0xFF; 172 return 0xFF;
176#else 173 }
174 apicid = new_apicid;
175 cpus_found++;
176 }
177 cpu++;
178 }
179 return apicid;
180}
181
182static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
183{
184 int num_bits_set;
185 int cpus_found = 0;
186 int cpu;
187 int apicid;
188
189 num_bits_set = cpus_weight(cpumask);
190 /* Return id to all */
191 if (num_bits_set == NR_CPUS)
192 return cpu_to_logical_apicid(0);
193 /*
194 * The cpus in the mask must all be on the apic cluster. If are not
195 * on the same apicid cluster return default value of TARGET_CPUS.
196 */
197 cpu = first_cpu(cpumask);
198 apicid = cpu_to_logical_apicid(cpu);
199 while (cpus_found < num_bits_set) {
200 if (cpu_isset(cpu, cpumask)) {
201 int new_apicid = cpu_to_logical_apicid(cpu);
202 if (apicid_cluster(apicid) !=
203 apicid_cluster(new_apicid)){
204 printk ("%s: Not a valid mask!\n", __func__);
177 return cpu_to_logical_apicid(0); 205 return cpu_to_logical_apicid(0);
178#endif
179 } 206 }
180 apicid = new_apicid; 207 apicid = new_apicid;
181 cpus_found++; 208 cpus_found++;
diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h
index 398493461913..78f0daaee436 100644
--- a/arch/x86/include/asm/es7000/wakecpu.h
+++ b/arch/x86/include/asm/es7000/wakecpu.h
@@ -1,36 +1,12 @@
1#ifndef __ASM_ES7000_WAKECPU_H 1#ifndef __ASM_ES7000_WAKECPU_H
2#define __ASM_ES7000_WAKECPU_H 2#define __ASM_ES7000_WAKECPU_H
3 3
4/* 4#define TRAMPOLINE_PHYS_LOW 0x467
5 * This file copes with machines that wakeup secondary CPUs by the 5#define TRAMPOLINE_PHYS_HIGH 0x469
6 * INIT, INIT, STARTUP sequence.
7 */
8
9#ifdef CONFIG_ES7000_CLUSTERED_APIC
10#define WAKE_SECONDARY_VIA_MIP
11#else
12#define WAKE_SECONDARY_VIA_INIT
13#endif
14
15#ifdef WAKE_SECONDARY_VIA_MIP
16extern int es7000_start_cpu(int cpu, unsigned long eip);
17static inline int
18wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
19{
20 int boot_error = 0;
21 boot_error = es7000_start_cpu(phys_apicid, start_eip);
22 return boot_error;
23}
24#endif
25
26#define TRAMPOLINE_LOW phys_to_virt(0x467)
27#define TRAMPOLINE_HIGH phys_to_virt(0x469)
28
29#define boot_cpu_apicid boot_cpu_physical_apicid
30 6
31static inline void wait_for_init_deassert(atomic_t *deassert) 7static inline void wait_for_init_deassert(atomic_t *deassert)
32{ 8{
33#ifdef WAKE_SECONDARY_VIA_INIT 9#ifndef CONFIG_ES7000_CLUSTERED_APIC
34 while (!atomic_read(deassert)) 10 while (!atomic_read(deassert))
35 cpu_relax(); 11 cpu_relax();
36#endif 12#endif
@@ -50,9 +26,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
50{ 26{
51} 27}
52 28
53#define inquire_remote_apic(apicid) do { \ 29extern void __inquire_remote_apic(int apicid);
54 if (apic_verbosity >= APIC_DEBUG) \ 30
55 __inquire_remote_apic(apicid); \ 31static inline void inquire_remote_apic(int apicid)
56 } while (0) 32{
33 if (apic_verbosity >= APIC_DEBUG)
34 __inquire_remote_apic(apicid);
35}
57 36
58#endif /* __ASM_MACH_WAKECPU_H */ 37#endif /* __ASM_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9e8bc29b8b17..b55b4a7fbefd 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -1,6 +1,33 @@
1#ifndef _ASM_X86_FTRACE_H 1#ifndef _ASM_X86_FTRACE_H
2#define _ASM_X86_FTRACE_H 2#define _ASM_X86_FTRACE_H
3 3
4#ifdef __ASSEMBLY__
5
6 .macro MCOUNT_SAVE_FRAME
7 /* taken from glibc */
8 subq $0x38, %rsp
9 movq %rax, (%rsp)
10 movq %rcx, 8(%rsp)
11 movq %rdx, 16(%rsp)
12 movq %rsi, 24(%rsp)
13 movq %rdi, 32(%rsp)
14 movq %r8, 40(%rsp)
15 movq %r9, 48(%rsp)
16 .endm
17
18 .macro MCOUNT_RESTORE_FRAME
19 movq 48(%rsp), %r9
20 movq 40(%rsp), %r8
21 movq 32(%rsp), %rdi
22 movq 24(%rsp), %rsi
23 movq 16(%rsp), %rdx
24 movq 8(%rsp), %rcx
25 movq (%rsp), %rax
26 addq $0x38, %rsp
27 .endm
28
29#endif
30
4#ifdef CONFIG_FUNCTION_TRACER 31#ifdef CONFIG_FUNCTION_TRACER
5#define MCOUNT_ADDR ((long)(mcount)) 32#define MCOUNT_ADDR ((long)(mcount))
6#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ 33#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
@@ -17,8 +44,40 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
17 */ 44 */
18 return addr - 1; 45 return addr - 1;
19} 46}
20#endif
21 47
48#ifdef CONFIG_DYNAMIC_FTRACE
49
50struct dyn_arch_ftrace {
51 /* No extra data needed for x86 */
52};
53
54#endif /* CONFIG_DYNAMIC_FTRACE */
55#endif /* __ASSEMBLY__ */
22#endif /* CONFIG_FUNCTION_TRACER */ 56#endif /* CONFIG_FUNCTION_TRACER */
23 57
58#ifdef CONFIG_FUNCTION_GRAPH_TRACER
59
60#ifndef __ASSEMBLY__
61
62/*
63 * Stack of return addresses for functions
64 * of a thread.
65 * Used in struct thread_info
66 */
67struct ftrace_ret_stack {
68 unsigned long ret;
69 unsigned long func;
70 unsigned long long calltime;
71};
72
73/*
74 * Primary handler of a function return.
75 * It relays on ftrace_return_to_handler.
76 * Defined in entry_32/64.S
77 */
78extern void return_to_handler(void);
79
80#endif /* __ASSEMBLY__ */
81#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
82
24#endif /* _ASM_X86_FTRACE_H */ 83#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 74252264433d..6cfdafa409d8 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -29,6 +29,39 @@ extern int fix_aperture;
29#define AMD64_GARTCACHECTL 0x9c 29#define AMD64_GARTCACHECTL 0x9c
30#define AMD64_GARTEN (1<<0) 30#define AMD64_GARTEN (1<<0)
31 31
32#ifdef CONFIG_GART_IOMMU
33extern int gart_iommu_aperture;
34extern int gart_iommu_aperture_allowed;
35extern int gart_iommu_aperture_disabled;
36
37extern void early_gart_iommu_check(void);
38extern void gart_iommu_init(void);
39extern void gart_iommu_shutdown(void);
40extern void __init gart_parse_options(char *);
41extern void gart_iommu_hole_init(void);
42
43#else
44#define gart_iommu_aperture 0
45#define gart_iommu_aperture_allowed 0
46#define gart_iommu_aperture_disabled 1
47
48static inline void early_gart_iommu_check(void)
49{
50}
51static inline void gart_iommu_init(void)
52{
53}
54static inline void gart_iommu_shutdown(void)
55{
56}
57static inline void gart_parse_options(char *options)
58{
59}
60static inline void gart_iommu_hole_init(void)
61{
62}
63#endif
64
32extern int agp_amd64_init(void); 65extern int agp_amd64_init(void);
33 66
34static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) 67static inline void enable_gart_translation(struct pci_dev *dev, u64 addr)
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 5cbd4fcc06fd..0ac17d33a8c7 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -2,6 +2,7 @@
2#define _ASM_X86_GENAPIC_32_H 2#define _ASM_X86_GENAPIC_32_H
3 3
4#include <asm/mpspec.h> 4#include <asm/mpspec.h>
5#include <asm/atomic.h>
5 6
6/* 7/*
7 * Generic APIC driver interface. 8 * Generic APIC driver interface.
@@ -65,6 +66,14 @@ struct genapic {
65 void (*send_IPI_allbutself)(int vector); 66 void (*send_IPI_allbutself)(int vector);
66 void (*send_IPI_all)(int vector); 67 void (*send_IPI_all)(int vector);
67#endif 68#endif
69 int (*wakeup_cpu)(int apicid, unsigned long start_eip);
70 int trampoline_phys_low;
71 int trampoline_phys_high;
72 void (*wait_for_init_deassert)(atomic_t *deassert);
73 void (*smp_callin_clear_local_apic)(void);
74 void (*store_NMI_vector)(unsigned short *high, unsigned short *low);
75 void (*restore_NMI_vector)(unsigned short *high, unsigned short *low);
76 void (*inquire_remote_apic)(int apicid);
68}; 77};
69 78
70#define APICFUNC(x) .x = x, 79#define APICFUNC(x) .x = x,
@@ -105,16 +114,24 @@ struct genapic {
105 APICFUNC(get_apic_id) \ 114 APICFUNC(get_apic_id) \
106 .apic_id_mask = APIC_ID_MASK, \ 115 .apic_id_mask = APIC_ID_MASK, \
107 APICFUNC(cpu_mask_to_apicid) \ 116 APICFUNC(cpu_mask_to_apicid) \
108 APICFUNC(vector_allocation_domain) \ 117 APICFUNC(vector_allocation_domain) \
109 APICFUNC(acpi_madt_oem_check) \ 118 APICFUNC(acpi_madt_oem_check) \
110 IPIFUNC(send_IPI_mask) \ 119 IPIFUNC(send_IPI_mask) \
111 IPIFUNC(send_IPI_allbutself) \ 120 IPIFUNC(send_IPI_allbutself) \
112 IPIFUNC(send_IPI_all) \ 121 IPIFUNC(send_IPI_all) \
113 APICFUNC(enable_apic_mode) \ 122 APICFUNC(enable_apic_mode) \
114 APICFUNC(phys_pkg_id) \ 123 APICFUNC(phys_pkg_id) \
124 .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \
125 .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \
126 APICFUNC(wait_for_init_deassert) \
127 APICFUNC(smp_callin_clear_local_apic) \
128 APICFUNC(store_NMI_vector) \
129 APICFUNC(restore_NMI_vector) \
130 APICFUNC(inquire_remote_apic) \
115} 131}
116 132
117extern struct genapic *genapic; 133extern struct genapic *genapic;
134extern void es7000_update_genapic_to_cluster(void);
118 135
119enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; 136enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
120#define get_uv_system_type() UV_NONE 137#define get_uv_system_type() UV_NONE
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index 13c4e96199ea..2cae011668b7 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -32,6 +32,8 @@ struct genapic {
32 unsigned int (*get_apic_id)(unsigned long x); 32 unsigned int (*get_apic_id)(unsigned long x);
33 unsigned long (*set_apic_id)(unsigned int id); 33 unsigned long (*set_apic_id)(unsigned int id);
34 unsigned long apic_id_mask; 34 unsigned long apic_id_mask;
35 /* wakeup_secondary_cpu */
36 int (*wakeup_cpu)(int apicid, unsigned long start_eip);
35}; 37};
36 38
37extern struct genapic *genapic; 39extern struct genapic *genapic;
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
new file mode 100644
index 000000000000..369f5c5d09a1
--- /dev/null
+++ b/arch/x86/include/asm/hypervisor.h
@@ -0,0 +1,26 @@
1/*
2 * Copyright (C) 2008, VMware, Inc.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
12 * NON INFRINGEMENT. See the GNU General Public License for more
13 * details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 */
20#ifndef ASM_X86__HYPERVISOR_H
21#define ASM_X86__HYPERVISOR_H
22
23extern unsigned long get_hypervisor_tsc_freq(void);
24extern void init_hypervisor(struct cpuinfo_x86 *c);
25
26#endif
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 97989c0e534c..50ca486fd88c 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -129,24 +129,6 @@ typedef struct compat_siginfo {
129 } _sifields; 129 } _sifields;
130} compat_siginfo_t; 130} compat_siginfo_t;
131 131
132struct sigframe32 {
133 u32 pretcode;
134 int sig;
135 struct sigcontext_ia32 sc;
136 struct _fpstate_ia32 fpstate;
137 unsigned int extramask[_COMPAT_NSIG_WORDS-1];
138};
139
140struct rt_sigframe32 {
141 u32 pretcode;
142 int sig;
143 u32 pinfo;
144 u32 puc;
145 compat_siginfo_t info;
146 struct ucontext_ia32 uc;
147 struct _fpstate_ia32 fpstate;
148};
149
150struct ustat32 { 132struct ustat32 {
151 __u32 f_tfree; 133 __u32 f_tfree;
152 compat_ino_t f_tinode; 134 compat_ino_t f_tinode;
diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h
index 44c89c3a23e9..38d87379e270 100644
--- a/arch/x86/include/asm/idle.h
+++ b/arch/x86/include/asm/idle.h
@@ -8,8 +8,13 @@ struct notifier_block;
8void idle_notifier_register(struct notifier_block *n); 8void idle_notifier_register(struct notifier_block *n);
9void idle_notifier_unregister(struct notifier_block *n); 9void idle_notifier_unregister(struct notifier_block *n);
10 10
11#ifdef CONFIG_X86_64
11void enter_idle(void); 12void enter_idle(void);
12void exit_idle(void); 13void exit_idle(void);
14#else /* !CONFIG_X86_64 */
15static inline void enter_idle(void) { }
16static inline void exit_idle(void) { }
17#endif /* CONFIG_X86_64 */
13 18
14void c1e_remove_cpu(int cpu); 19void c1e_remove_cpu(int cpu);
15 20
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index ac2abc88cd95..05cfed4485fa 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -4,6 +4,7 @@
4#define ARCH_HAS_IOREMAP_WC 4#define ARCH_HAS_IOREMAP_WC
5 5
6#include <linux/compiler.h> 6#include <linux/compiler.h>
7#include <asm-generic/int-ll64.h>
7 8
8#define build_mmio_read(name, size, type, reg, barrier) \ 9#define build_mmio_read(name, size, type, reg, barrier) \
9static inline type name(const volatile void __iomem *addr) \ 10static inline type name(const volatile void __iomem *addr) \
@@ -45,21 +46,39 @@ build_mmio_write(__writel, "l", unsigned int, "r", )
45#define mmiowb() barrier() 46#define mmiowb() barrier()
46 47
47#ifdef CONFIG_X86_64 48#ifdef CONFIG_X86_64
49
48build_mmio_read(readq, "q", unsigned long, "=r", :"memory") 50build_mmio_read(readq, "q", unsigned long, "=r", :"memory")
49build_mmio_read(__readq, "q", unsigned long, "=r", )
50build_mmio_write(writeq, "q", unsigned long, "r", :"memory") 51build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
51build_mmio_write(__writeq, "q", unsigned long, "r", )
52 52
53#define readq_relaxed(a) __readq(a) 53#else
54#define __raw_readq __readq 54
55#define __raw_writeq writeq 55static inline __u64 readq(const volatile void __iomem *addr)
56{
57 const volatile u32 __iomem *p = addr;
58 u32 low, high;
59
60 low = readl(p);
61 high = readl(p + 1);
62
63 return low + ((u64)high << 32);
64}
65
66static inline void writeq(__u64 val, volatile void __iomem *addr)
67{
68 writel(val, addr);
69 writel(val >> 32, addr+4);
70}
56 71
57/* Let people know we have them */
58#define readq readq
59#define writeq writeq
60#endif 72#endif
61 73
62extern int iommu_bio_merge; 74#define readq_relaxed(a) readq(a)
75
76#define __raw_readq(a) readq(a)
77#define __raw_writeq(val, addr) writeq(val, addr)
78
79/* Let people know that we have them */
80#define readq readq
81#define writeq writeq
63 82
64#ifdef CONFIG_X86_32 83#ifdef CONFIG_X86_32
65# include "io_32.h" 84# include "io_32.h"
diff --git a/arch/x86/include/asm/io_64.h b/arch/x86/include/asm/io_64.h
index fea325a1122f..563c16270ba6 100644
--- a/arch/x86/include/asm/io_64.h
+++ b/arch/x86/include/asm/io_64.h
@@ -232,8 +232,6 @@ void memset_io(volatile void __iomem *a, int b, size_t c);
232 232
233#define flush_write_buffers() 233#define flush_write_buffers()
234 234
235#define BIO_VMERGE_BOUNDARY iommu_bio_merge
236
237/* 235/*
238 * Convert a virtual cached pointer to an uncached pointer 236 * Convert a virtual cached pointer to an uncached pointer
239 */ 237 */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 6afd9933a7dd..e475e009ae5d 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -156,11 +156,21 @@ extern int sis_apic_bug;
156/* 1 if "noapic" boot option passed */ 156/* 1 if "noapic" boot option passed */
157extern int skip_ioapic_setup; 157extern int skip_ioapic_setup;
158 158
159/* 1 if "noapic" boot option passed */
160extern int noioapicquirk;
161
162/* -1 if "noapic" boot option passed */
163extern int noioapicreroute;
164
159/* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ 165/* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */
160extern int timer_through_8259; 166extern int timer_through_8259;
161 167
162static inline void disable_ioapic_setup(void) 168static inline void disable_ioapic_setup(void)
163{ 169{
170#ifdef CONFIG_PCI
171 noioapicquirk = 1;
172 noioapicreroute = -1;
173#endif
164 skip_ioapic_setup = 1; 174 skip_ioapic_setup = 1;
165} 175}
166 176
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 0b500c5b6446..295b13193f4d 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -12,37 +12,4 @@ extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len);
12/* 10 seconds */ 12/* 10 seconds */
13#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) 13#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
14 14
15#ifdef CONFIG_GART_IOMMU
16extern int gart_iommu_aperture;
17extern int gart_iommu_aperture_allowed;
18extern int gart_iommu_aperture_disabled;
19
20extern void early_gart_iommu_check(void);
21extern void gart_iommu_init(void);
22extern void gart_iommu_shutdown(void);
23extern void __init gart_parse_options(char *);
24extern void gart_iommu_hole_init(void);
25
26#else
27#define gart_iommu_aperture 0
28#define gart_iommu_aperture_allowed 0
29#define gart_iommu_aperture_disabled 1
30
31static inline void early_gart_iommu_check(void)
32{
33}
34static inline void gart_iommu_init(void)
35{
36}
37static inline void gart_iommu_shutdown(void)
38{
39}
40static inline void gart_parse_options(char *options)
41{
42}
43static inline void gart_iommu_hole_init(void)
44{
45}
46#endif
47
48#endif /* _ASM_X86_IOMMU_H */ 15#endif /* _ASM_X86_IOMMU_H */
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index a1f22771a15a..c61d8b2ab8b9 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -5,21 +5,8 @@
5# define PA_CONTROL_PAGE 0 5# define PA_CONTROL_PAGE 0
6# define VA_CONTROL_PAGE 1 6# define VA_CONTROL_PAGE 1
7# define PA_PGD 2 7# define PA_PGD 2
8# define VA_PGD 3 8# define PA_SWAP_PAGE 3
9# define PA_PTE_0 4 9# define PAGES_NR 4
10# define VA_PTE_0 5
11# define PA_PTE_1 6
12# define VA_PTE_1 7
13# define PA_SWAP_PAGE 8
14# ifdef CONFIG_X86_PAE
15# define PA_PMD_0 9
16# define VA_PMD_0 10
17# define PA_PMD_1 11
18# define VA_PMD_1 12
19# define PAGES_NR 13
20# else
21# define PAGES_NR 9
22# endif
23#else 10#else
24# define PA_CONTROL_PAGE 0 11# define PA_CONTROL_PAGE 0
25# define VA_CONTROL_PAGE 1 12# define VA_CONTROL_PAGE 1
@@ -170,6 +157,20 @@ relocate_kernel(unsigned long indirection_page,
170 unsigned long start_address) ATTRIB_NORET; 157 unsigned long start_address) ATTRIB_NORET;
171#endif 158#endif
172 159
160#ifdef CONFIG_X86_32
161#define ARCH_HAS_KIMAGE_ARCH
162
163struct kimage_arch {
164 pgd_t *pgd;
165#ifdef CONFIG_X86_PAE
166 pmd_t *pmd0;
167 pmd_t *pmd1;
168#endif
169 pte_t *pte0;
170 pte_t *pte1;
171};
172#endif
173
173#endif /* __ASSEMBLY__ */ 174#endif /* __ASSEMBLY__ */
174 175
175#endif /* _ASM_X86_KEXEC_H */ 176#endif /* _ASM_X86_KEXEC_H */
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index ff3a6c236c00..6cb3a467e067 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -32,11 +32,13 @@ static inline cpumask_t target_cpus(void)
32#define vector_allocation_domain (genapic->vector_allocation_domain) 32#define vector_allocation_domain (genapic->vector_allocation_domain)
33#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) 33#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID)))
34#define send_IPI_self (genapic->send_IPI_self) 34#define send_IPI_self (genapic->send_IPI_self)
35#define wakeup_secondary_cpu (genapic->wakeup_cpu)
35extern void setup_apic_routing(void); 36extern void setup_apic_routing(void);
36#else 37#else
37#define INT_DELIVERY_MODE dest_LowestPrio 38#define INT_DELIVERY_MODE dest_LowestPrio
38#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ 39#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */
39#define TARGET_CPUS (target_cpus()) 40#define TARGET_CPUS (target_cpus())
41#define wakeup_secondary_cpu wakeup_secondary_cpu_via_init
40/* 42/*
41 * Set up the logical destination ID. 43 * Set up the logical destination ID.
42 * 44 *
diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h
index 9d80db91e992..ceb013660146 100644
--- a/arch/x86/include/asm/mach-default/mach_wakecpu.h
+++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h
@@ -1,17 +1,8 @@
1#ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H 1#ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
2#define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H 2#define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
3 3
4/* 4#define TRAMPOLINE_PHYS_LOW (0x467)
5 * This file copes with machines that wakeup secondary CPUs by the 5#define TRAMPOLINE_PHYS_HIGH (0x469)
6 * INIT, INIT, STARTUP sequence.
7 */
8
9#define WAKE_SECONDARY_VIA_INIT
10
11#define TRAMPOLINE_LOW phys_to_virt(0x467)
12#define TRAMPOLINE_HIGH phys_to_virt(0x469)
13
14#define boot_cpu_apicid boot_cpu_physical_apicid
15 6
16static inline void wait_for_init_deassert(atomic_t *deassert) 7static inline void wait_for_init_deassert(atomic_t *deassert)
17{ 8{
@@ -33,9 +24,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
33{ 24{
34} 25}
35 26
36#define inquire_remote_apic(apicid) do { \ 27extern void __inquire_remote_apic(int apicid);
37 if (apic_verbosity >= APIC_DEBUG) \ 28
38 __inquire_remote_apic(apicid); \ 29static inline void inquire_remote_apic(int apicid)
39 } while (0) 30{
31 if (apic_verbosity >= APIC_DEBUG)
32 __inquire_remote_apic(apicid);
33}
40 34
41#endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */ 35#endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/mach-default/smpboot_hooks.h b/arch/x86/include/asm/mach-default/smpboot_hooks.h
index dbab36d64d48..23bf52103b89 100644
--- a/arch/x86/include/asm/mach-default/smpboot_hooks.h
+++ b/arch/x86/include/asm/mach-default/smpboot_hooks.h
@@ -13,9 +13,11 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
13 CMOS_WRITE(0xa, 0xf); 13 CMOS_WRITE(0xa, 0xf);
14 local_flush_tlb(); 14 local_flush_tlb();
15 pr_debug("1.\n"); 15 pr_debug("1.\n");
16 *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; 16 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
17 start_eip >> 4;
17 pr_debug("2.\n"); 18 pr_debug("2.\n");
18 *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; 19 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
20 start_eip & 0xf;
19 pr_debug("3.\n"); 21 pr_debug("3.\n");
20} 22}
21 23
@@ -32,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
32 */ 34 */
33 CMOS_WRITE(0, 0xf); 35 CMOS_WRITE(0, 0xf);
34 36
35 *((volatile long *) phys_to_virt(0x467)) = 0; 37 *((volatile long *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
36} 38}
37 39
38static inline void __init smpboot_setup_io_apic(void) 40static inline void __init smpboot_setup_io_apic(void)
diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h
index 5180bd7478fb..e430f47df667 100644
--- a/arch/x86/include/asm/mach-generic/mach_apic.h
+++ b/arch/x86/include/asm/mach-generic/mach_apic.h
@@ -27,6 +27,7 @@
27#define vector_allocation_domain (genapic->vector_allocation_domain) 27#define vector_allocation_domain (genapic->vector_allocation_domain)
28#define enable_apic_mode (genapic->enable_apic_mode) 28#define enable_apic_mode (genapic->enable_apic_mode)
29#define phys_pkg_id (genapic->phys_pkg_id) 29#define phys_pkg_id (genapic->phys_pkg_id)
30#define wakeup_secondary_cpu (genapic->wakeup_cpu)
30 31
31extern void generic_bigsmp_probe(void); 32extern void generic_bigsmp_probe(void);
32 33
diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h
new file mode 100644
index 000000000000..1ab16b168c8a
--- /dev/null
+++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h
@@ -0,0 +1,12 @@
1#ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
2#define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
3
4#define TRAMPOLINE_PHYS_LOW (genapic->trampoline_phys_low)
5#define TRAMPOLINE_PHYS_HIGH (genapic->trampoline_phys_high)
6#define wait_for_init_deassert (genapic->wait_for_init_deassert)
7#define smp_callin_clear_local_apic (genapic->smp_callin_clear_local_apic)
8#define store_NMI_vector (genapic->store_NMI_vector)
9#define restore_NMI_vector (genapic->restore_NMI_vector)
10#define inquire_remote_apic (genapic->inquire_remote_apic)
11
12#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */
diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h
index 8e10015781fb..7e98ce1d2c0e 100644
--- a/arch/x86/include/asm/mmu_context_32.h
+++ b/arch/x86/include/asm/mmu_context_32.h
@@ -4,9 +4,8 @@
4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
5{ 5{
6#ifdef CONFIG_SMP 6#ifdef CONFIG_SMP
7 unsigned cpu = smp_processor_id(); 7 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK)
8 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) 8 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY);
9 per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_LAZY;
10#endif 9#endif
11} 10}
12 11
@@ -20,8 +19,8 @@ static inline void switch_mm(struct mm_struct *prev,
20 /* stop flush ipis for the previous mm */ 19 /* stop flush ipis for the previous mm */
21 cpu_clear(cpu, prev->cpu_vm_mask); 20 cpu_clear(cpu, prev->cpu_vm_mask);
22#ifdef CONFIG_SMP 21#ifdef CONFIG_SMP
23 per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; 22 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
24 per_cpu(cpu_tlbstate, cpu).active_mm = next; 23 x86_write_percpu(cpu_tlbstate.active_mm, next);
25#endif 24#endif
26 cpu_set(cpu, next->cpu_vm_mask); 25 cpu_set(cpu, next->cpu_vm_mask);
27 26
@@ -36,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev,
36 } 35 }
37#ifdef CONFIG_SMP 36#ifdef CONFIG_SMP
38 else { 37 else {
39 per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; 38 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
40 BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next); 39 BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next);
41 40
42 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { 41 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
43 /* We were in lazy tlb mode and leave_mm disabled 42 /* We were in lazy tlb mode and leave_mm disabled
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index e38859d577a1..cb58643947b9 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -85,7 +85,9 @@
85/* AMD64 MSRs. Not complete. See the architecture manual for a more 85/* AMD64 MSRs. Not complete. See the architecture manual for a more
86 complete list. */ 86 complete list. */
87 87
88#define MSR_AMD64_PATCH_LEVEL 0x0000008b
88#define MSR_AMD64_NB_CFG 0xc001001f 89#define MSR_AMD64_NB_CFG 0xc001001f
90#define MSR_AMD64_PATCH_LOADER 0xc0010020
89#define MSR_AMD64_IBSFETCHCTL 0xc0011030 91#define MSR_AMD64_IBSFETCHCTL 0xc0011030
90#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 92#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
91#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 93#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index c2a812ebde89..638bf6241807 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -22,10 +22,10 @@ static inline unsigned long long native_read_tscp(unsigned int *aux)
22} 22}
23 23
24/* 24/*
25 * i386 calling convention returns 64-bit value in edx:eax, while 25 * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A"
26 * x86_64 returns at rax. Also, the "A" constraint does not really 26 * constraint has different meanings. For i386, "A" means exactly
27 * mean rdx:rax in x86_64, so we need specialized behaviour for each 27 * edx:eax, while for x86_64 it doesn't mean rdx:rax or edx:eax. Instead,
28 * architecture 28 * it means rax *or* rdx.
29 */ 29 */
30#ifdef CONFIG_X86_64 30#ifdef CONFIG_X86_64
31#define DECLARE_ARGS(val, low, high) unsigned low, high 31#define DECLARE_ARGS(val, low, high) unsigned low, high
@@ -85,7 +85,8 @@ static inline void native_write_msr(unsigned int msr,
85 asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory"); 85 asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
86} 86}
87 87
88static inline int native_write_msr_safe(unsigned int msr, 88/* Can be uninlined because referenced by paravirt */
89notrace static inline int native_write_msr_safe(unsigned int msr,
89 unsigned low, unsigned high) 90 unsigned low, unsigned high)
90{ 91{
91 int err; 92 int err;
@@ -181,10 +182,10 @@ static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
181} 182}
182 183
183#define rdtscl(low) \ 184#define rdtscl(low) \
184 ((low) = (u32)native_read_tsc()) 185 ((low) = (u32)__native_read_tsc())
185 186
186#define rdtscll(val) \ 187#define rdtscll(val) \
187 ((val) = native_read_tsc()) 188 ((val) = __native_read_tsc())
188 189
189#define rdpmc(counter, low, high) \ 190#define rdpmc(counter, low, high) \
190do { \ 191do { \
diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h
index c577bda5b1c5..6f499df8eddb 100644
--- a/arch/x86/include/asm/numaq/wakecpu.h
+++ b/arch/x86/include/asm/numaq/wakecpu.h
@@ -3,12 +3,8 @@
3 3
4/* This file copes with machines that wakeup secondary CPUs by NMIs */ 4/* This file copes with machines that wakeup secondary CPUs by NMIs */
5 5
6#define WAKE_SECONDARY_VIA_NMI 6#define TRAMPOLINE_PHYS_LOW (0x8)
7 7#define TRAMPOLINE_PHYS_HIGH (0xa)
8#define TRAMPOLINE_LOW phys_to_virt(0x8)
9#define TRAMPOLINE_HIGH phys_to_virt(0xa)
10
11#define boot_cpu_apicid boot_cpu_logical_apicid
12 8
13/* We don't do anything here because we use NMI's to boot instead */ 9/* We don't do anything here because we use NMI's to boot instead */
14static inline void wait_for_init_deassert(atomic_t *deassert) 10static inline void wait_for_init_deassert(atomic_t *deassert)
@@ -27,17 +23,23 @@ static inline void smp_callin_clear_local_apic(void)
27static inline void store_NMI_vector(unsigned short *high, unsigned short *low) 23static inline void store_NMI_vector(unsigned short *high, unsigned short *low)
28{ 24{
29 printk("Storing NMI vector\n"); 25 printk("Storing NMI vector\n");
30 *high = *((volatile unsigned short *) TRAMPOLINE_HIGH); 26 *high =
31 *low = *((volatile unsigned short *) TRAMPOLINE_LOW); 27 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH));
28 *low =
29 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW));
32} 30}
33 31
34static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) 32static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
35{ 33{
36 printk("Restoring NMI vector\n"); 34 printk("Restoring NMI vector\n");
37 *((volatile unsigned short *) TRAMPOLINE_HIGH) = *high; 35 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
38 *((volatile unsigned short *) TRAMPOLINE_LOW) = *low; 36 *high;
37 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
38 *low;
39} 39}
40 40
41#define inquire_remote_apic(apicid) {} 41static inline void inquire_remote_apic(int apicid)
42{
43}
42 44
43#endif /* __ASM_NUMAQ_WAKECPU_H */ 45#endif /* __ASM_NUMAQ_WAKECPU_H */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 875b38edf193..647781298e7e 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -19,6 +19,8 @@ struct pci_sysdata {
19}; 19};
20 20
21extern int pci_routeirq; 21extern int pci_routeirq;
22extern int noioapicquirk;
23extern int noioapicreroute;
22 24
23/* scan a bus after allocating a pci_sysdata for it */ 25/* scan a bus after allocating a pci_sysdata for it */
24extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, 26extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index b17edfd23628..e0d199fe1d83 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -56,23 +56,55 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp)
56#define pte_none(x) (!(x).pte_low) 56#define pte_none(x) (!(x).pte_low)
57 57
58/* 58/*
59 * Bits 0, 6 and 7 are taken, split up the 29 bits of offset 59 * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken,
60 * into this range: 60 * split up the 29 bits of offset into this range:
61 */ 61 */
62#define PTE_FILE_MAX_BITS 29 62#define PTE_FILE_MAX_BITS 29
63#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
64#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
65#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1)
66#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1)
67#else
68#define PTE_FILE_SHIFT2 (_PAGE_BIT_PROTNONE + 1)
69#define PTE_FILE_SHIFT3 (_PAGE_BIT_FILE + 1)
70#endif
71#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
72#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
63 73
64#define pte_to_pgoff(pte) \ 74#define pte_to_pgoff(pte) \
65 ((((pte).pte_low >> 1) & 0x1f) + (((pte).pte_low >> 8) << 5)) 75 ((((pte).pte_low >> PTE_FILE_SHIFT1) \
76 & ((1U << PTE_FILE_BITS1) - 1)) \
77 + ((((pte).pte_low >> PTE_FILE_SHIFT2) \
78 & ((1U << PTE_FILE_BITS2) - 1)) << PTE_FILE_BITS1) \
79 + (((pte).pte_low >> PTE_FILE_SHIFT3) \
80 << (PTE_FILE_BITS1 + PTE_FILE_BITS2)))
66 81
67#define pgoff_to_pte(off) \ 82#define pgoff_to_pte(off) \
68 ((pte_t) { .pte_low = (((off) & 0x1f) << 1) + \ 83 ((pte_t) { .pte_low = \
69 (((off) >> 5) << 8) + _PAGE_FILE }) 84 (((off) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \
85 + ((((off) >> PTE_FILE_BITS1) & ((1U << PTE_FILE_BITS2) - 1)) \
86 << PTE_FILE_SHIFT2) \
87 + (((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \
88 << PTE_FILE_SHIFT3) \
89 + _PAGE_FILE })
70 90
71/* Encode and de-code a swap entry */ 91/* Encode and de-code a swap entry */
72#define __swp_type(x) (((x).val >> 1) & 0x1f) 92#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
73#define __swp_offset(x) ((x).val >> 8) 93#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
74#define __swp_entry(type, offset) \ 94#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
75 ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) 95#else
96#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1)
97#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1)
98#endif
99
100#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
101
102#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \
103 & ((1U << SWP_TYPE_BITS) - 1))
104#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT)
105#define __swp_entry(type, offset) ((swp_entry_t) { \
106 ((type) << (_PAGE_BIT_PRESENT + 1)) \
107 | ((offset) << SWP_OFFSET_SHIFT) })
76#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) 108#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
77#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) 109#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
78 110
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 52597aeadfff..447da43cddb3 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -166,6 +166,7 @@ static inline int pte_none(pte_t pte)
166#define PTE_FILE_MAX_BITS 32 166#define PTE_FILE_MAX_BITS 32
167 167
168/* Encode and de-code a swap entry */ 168/* Encode and de-code a swap entry */
169#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
169#define __swp_type(x) (((x).val) & 0x1f) 170#define __swp_type(x) (((x).val) & 0x1f)
170#define __swp_offset(x) ((x).val >> 5) 171#define __swp_offset(x) ((x).val >> 5)
171#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5}) 172#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5})
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index c012f3b11671..83e69f4a37f0 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -10,7 +10,6 @@
10#define _PAGE_BIT_PCD 4 /* page cache disabled */ 10#define _PAGE_BIT_PCD 4 /* page cache disabled */
11#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */ 11#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */
12#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */ 12#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */
13#define _PAGE_BIT_FILE 6
14#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ 13#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
15#define _PAGE_BIT_PAT 7 /* on 4KB pages */ 14#define _PAGE_BIT_PAT 7 /* on 4KB pages */
16#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ 15#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
@@ -22,6 +21,12 @@
22#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 21#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1
23#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ 22#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
24 23
24/* If _PAGE_BIT_PRESENT is clear, we use these: */
25/* - if the user mapped it with PROT_NONE; pte_present gives true */
26#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
27/* - set: nonlinear file mapping, saved PTE; unset:swap */
28#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY
29
25#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) 30#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
26#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) 31#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW)
27#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) 32#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER)
@@ -46,11 +51,8 @@
46#define _PAGE_NX (_AT(pteval_t, 0)) 51#define _PAGE_NX (_AT(pteval_t, 0))
47#endif 52#endif
48 53
49/* If _PAGE_PRESENT is clear, we use these: */ 54#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE)
50#define _PAGE_FILE _PAGE_DIRTY /* nonlinear file mapping, 55#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
51 * saved PTE; unset:swap */
52#define _PAGE_PROTNONE _PAGE_PSE /* if the user mapped it with PROT_NONE;
53 pte_present gives true */
54 56
55#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ 57#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
56 _PAGE_ACCESSED | _PAGE_DIRTY) 58 _PAGE_ACCESSED | _PAGE_DIRTY)
@@ -158,8 +160,19 @@
158#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ 160#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */
159#endif 161#endif
160 162
163/*
164 * Macro to mark a page protection value as UC-
165 */
166#define pgprot_noncached(prot) \
167 ((boot_cpu_data.x86 > 3) \
168 ? (__pgprot(pgprot_val(prot) | _PAGE_CACHE_UC_MINUS)) \
169 : (prot))
170
161#ifndef __ASSEMBLY__ 171#ifndef __ASSEMBLY__
162 172
173#define pgprot_writecombine pgprot_writecombine
174extern pgprot_t pgprot_writecombine(pgprot_t prot);
175
163/* 176/*
164 * ZERO_PAGE is a global shared page that is always zero: used 177 * ZERO_PAGE is a global shared page that is always zero: used
165 * for zero-mapped memory areas etc.. 178 * for zero-mapped memory areas etc..
@@ -329,6 +342,9 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
329#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) 342#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
330 343
331#ifndef __ASSEMBLY__ 344#ifndef __ASSEMBLY__
345/* Indicate that x86 has its own track and untrack pfn vma functions */
346#define __HAVE_PFNMAP_TRACKING
347
332#define __HAVE_PHYS_MEM_ACCESS_PROT 348#define __HAVE_PHYS_MEM_ACCESS_PROT
333struct file; 349struct file;
334pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, 350pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index f9d5889b336b..72b020deb46b 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -101,15 +101,6 @@ extern unsigned long pg0[];
101#endif 101#endif
102 102
103/* 103/*
104 * Macro to mark a page protection value as "uncacheable".
105 * On processors which do not support it, this is a no-op.
106 */
107#define pgprot_noncached(prot) \
108 ((boot_cpu_data.x86 > 3) \
109 ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) \
110 : (prot))
111
112/*
113 * Conversion functions: convert a page and protection to a page entry, 104 * Conversion functions: convert a page and protection to a page entry,
114 * and a page entry and page directory to the page they refer to. 105 * and a page entry and page directory to the page they refer to.
115 */ 106 */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 545a0e042bb2..ba09289accaa 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -146,7 +146,7 @@ static inline void native_pgd_clear(pgd_t *pgd)
146#define PGDIR_MASK (~(PGDIR_SIZE - 1)) 146#define PGDIR_MASK (~(PGDIR_SIZE - 1))
147 147
148 148
149#define MAXMEM _AC(0x00003fffffffffff, UL) 149#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
150#define VMALLOC_START _AC(0xffffc20000000000, UL) 150#define VMALLOC_START _AC(0xffffc20000000000, UL)
151#define VMALLOC_END _AC(0xffffe1ffffffffff, UL) 151#define VMALLOC_END _AC(0xffffe1ffffffffff, UL)
152#define VMEMMAP_START _AC(0xffffe20000000000, UL) 152#define VMEMMAP_START _AC(0xffffe20000000000, UL)
@@ -177,12 +177,6 @@ static inline int pmd_bad(pmd_t pmd)
177#define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */ 177#define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */
178 178
179/* 179/*
180 * Macro to mark a page protection value as "uncacheable".
181 */
182#define pgprot_noncached(prot) \
183 (__pgprot(pgprot_val((prot)) | _PAGE_PCD | _PAGE_PWT))
184
185/*
186 * Conversion functions: convert a page and protection to a page entry, 180 * Conversion functions: convert a page and protection to a page entry,
187 * and a page entry and page directory to the page they refer to. 181 * and a page entry and page directory to the page they refer to.
188 */ 182 */
@@ -250,10 +244,22 @@ static inline int pud_large(pud_t pte)
250extern int direct_gbpages; 244extern int direct_gbpages;
251 245
252/* Encode and de-code a swap entry */ 246/* Encode and de-code a swap entry */
253#define __swp_type(x) (((x).val >> 1) & 0x3f) 247#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
254#define __swp_offset(x) ((x).val >> 8) 248#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
255#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | \ 249#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
256 ((offset) << 8) }) 250#else
251#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1)
252#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1)
253#endif
254
255#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
256
257#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \
258 & ((1U << SWP_TYPE_BITS) - 1))
259#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT)
260#define __swp_entry(type, offset) ((swp_entry_t) { \
261 ((type) << (_PAGE_BIT_PRESENT + 1)) \
262 | ((offset) << SWP_OFFSET_SHIFT) })
257#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) 263#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
258#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) 264#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
259 265
diff --git a/arch/x86/include/asm/prctl.h b/arch/x86/include/asm/prctl.h
index fe681147a4f7..a8894647dd9a 100644
--- a/arch/x86/include/asm/prctl.h
+++ b/arch/x86/include/asm/prctl.h
@@ -6,5 +6,8 @@
6#define ARCH_GET_FS 0x1003 6#define ARCH_GET_FS 0x1003
7#define ARCH_GET_GS 0x1004 7#define ARCH_GET_GS 0x1004
8 8
9#ifdef CONFIG_X86_64
10extern long sys_arch_prctl(int, unsigned long);
11#endif /* CONFIG_X86_64 */
9 12
10#endif /* _ASM_X86_PRCTL_H */ 13#endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 5ca01e383269..091cd8855f2e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -110,6 +110,7 @@ struct cpuinfo_x86 {
110 /* Index into per_cpu list: */ 110 /* Index into per_cpu list: */
111 u16 cpu_index; 111 u16 cpu_index;
112#endif 112#endif
113 unsigned int x86_hyper_vendor;
113} __attribute__((__aligned__(SMP_CACHE_BYTES))); 114} __attribute__((__aligned__(SMP_CACHE_BYTES)));
114 115
115#define X86_VENDOR_INTEL 0 116#define X86_VENDOR_INTEL 0
@@ -123,6 +124,9 @@ struct cpuinfo_x86 {
123 124
124#define X86_VENDOR_UNKNOWN 0xff 125#define X86_VENDOR_UNKNOWN 0xff
125 126
127#define X86_HYPER_VENDOR_NONE 0
128#define X86_HYPER_VENDOR_VMWARE 1
129
126/* 130/*
127 * capabilities of CPUs 131 * capabilities of CPUs
128 */ 132 */
@@ -752,6 +756,19 @@ extern void switch_to_new_gdt(void);
752extern void cpu_init(void); 756extern void cpu_init(void);
753extern void init_gdt(int cpu); 757extern void init_gdt(int cpu);
754 758
759static inline unsigned long get_debugctlmsr(void)
760{
761 unsigned long debugctlmsr = 0;
762
763#ifndef CONFIG_X86_DEBUGCTLMSR
764 if (boot_cpu_data.x86 < 6)
765 return 0;
766#endif
767 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
768
769 return debugctlmsr;
770}
771
755static inline void update_debugctlmsr(unsigned long debugctlmsr) 772static inline void update_debugctlmsr(unsigned long debugctlmsr)
756{ 773{
757#ifndef CONFIG_X86_DEBUGCTLMSR 774#ifndef CONFIG_X86_DEBUGCTLMSR
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index eefb0594b058..6d34d954c228 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -6,7 +6,6 @@
6#include <asm/processor-flags.h> 6#include <asm/processor-flags.h>
7 7
8#ifdef __KERNEL__ 8#ifdef __KERNEL__
9#include <asm/ds.h> /* the DS BTS struct is used for ptrace too */
10#include <asm/segment.h> 9#include <asm/segment.h>
11#endif 10#endif
12 11
@@ -128,34 +127,6 @@ struct pt_regs {
128#endif /* !__i386__ */ 127#endif /* !__i386__ */
129 128
130 129
131#ifdef CONFIG_X86_PTRACE_BTS
132/* a branch trace record entry
133 *
134 * In order to unify the interface between various processor versions,
135 * we use the below data structure for all processors.
136 */
137enum bts_qualifier {
138 BTS_INVALID = 0,
139 BTS_BRANCH,
140 BTS_TASK_ARRIVES,
141 BTS_TASK_DEPARTS
142};
143
144struct bts_struct {
145 __u64 qualifier;
146 union {
147 /* BTS_BRANCH */
148 struct {
149 __u64 from_ip;
150 __u64 to_ip;
151 } lbr;
152 /* BTS_TASK_ARRIVES or
153 BTS_TASK_DEPARTS */
154 __u64 jiffies;
155 } variant;
156};
157#endif /* CONFIG_X86_PTRACE_BTS */
158
159#ifdef __KERNEL__ 130#ifdef __KERNEL__
160 131
161#include <linux/init.h> 132#include <linux/init.h>
@@ -163,13 +134,6 @@ struct bts_struct {
163struct cpuinfo_x86; 134struct cpuinfo_x86;
164struct task_struct; 135struct task_struct;
165 136
166#ifdef CONFIG_X86_PTRACE_BTS
167extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *);
168extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier);
169#else
170#define ptrace_bts_init_intel(config) do {} while (0)
171#endif /* CONFIG_X86_PTRACE_BTS */
172
173extern unsigned long profile_pc(struct pt_regs *regs); 137extern unsigned long profile_pc(struct pt_regs *regs);
174 138
175extern unsigned long 139extern unsigned long
@@ -271,6 +235,13 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
271extern int do_set_thread_area(struct task_struct *p, int idx, 235extern int do_set_thread_area(struct task_struct *p, int idx,
272 struct user_desc __user *info, int can_allocate); 236 struct user_desc __user *info, int can_allocate);
273 237
238extern void x86_ptrace_untrace(struct task_struct *);
239extern void x86_ptrace_fork(struct task_struct *child,
240 unsigned long clone_flags);
241
242#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk)
243#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags)
244
274#endif /* __KERNEL__ */ 245#endif /* __KERNEL__ */
275 246
276#endif /* !__ASSEMBLY__ */ 247#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index df7710354f85..562d4fd31ba8 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_X86_REBOOT_H 1#ifndef _ASM_X86_REBOOT_H
2#define _ASM_X86_REBOOT_H 2#define _ASM_X86_REBOOT_H
3 3
4#include <linux/kdebug.h>
5
4struct pt_regs; 6struct pt_regs;
5 7
6struct machine_ops { 8struct machine_ops {
@@ -18,4 +20,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs);
18void native_machine_shutdown(void); 20void native_machine_shutdown(void);
19void machine_real_restart(const unsigned char *code, int length); 21void machine_real_restart(const unsigned char *code, int length);
20 22
23typedef void (*nmi_shootdown_cb)(int, struct die_args*);
24void nmi_shootdown_cpus(nmi_shootdown_cb callback);
25
21#endif /* _ASM_X86_REBOOT_H */ 26#endif /* _ASM_X86_REBOOT_H */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index f12d37237465..4fcd53fd5f43 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -8,6 +8,10 @@
8/* Interrupt control for vSMPowered x86_64 systems */ 8/* Interrupt control for vSMPowered x86_64 systems */
9void vsmp_init(void); 9void vsmp_init(void);
10 10
11
12void setup_bios_corruption_check(void);
13
14
11#ifdef CONFIG_X86_VISWS 15#ifdef CONFIG_X86_VISWS
12extern void visws_early_detect(void); 16extern void visws_early_detect(void);
13extern int is_visws_box(void); 17extern int is_visws_box(void);
@@ -16,6 +20,8 @@ static inline void visws_early_detect(void) { }
16static inline int is_visws_box(void) { return 0; } 20static inline int is_visws_box(void) { return 0; }
17#endif 21#endif
18 22
23extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
24extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
19/* 25/*
20 * Any setup quirks to be performed? 26 * Any setup quirks to be performed?
21 */ 27 */
@@ -39,6 +45,7 @@ struct x86_quirks {
39 void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, 45 void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
40 unsigned short oemsize); 46 unsigned short oemsize);
41 int (*setup_ioapic_ids)(void); 47 int (*setup_ioapic_ids)(void);
48 int (*update_genapic)(void);
42}; 49};
43 50
44extern struct x86_quirks *x86_quirks; 51extern struct x86_quirks *x86_quirks;
diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h
new file mode 100644
index 000000000000..4e0fe26d27d3
--- /dev/null
+++ b/arch/x86/include/asm/sigframe.h
@@ -0,0 +1,70 @@
1#ifndef _ASM_X86_SIGFRAME_H
2#define _ASM_X86_SIGFRAME_H
3
4#include <asm/sigcontext.h>
5#include <asm/siginfo.h>
6#include <asm/ucontext.h>
7
8#ifdef CONFIG_X86_32
9#define sigframe_ia32 sigframe
10#define rt_sigframe_ia32 rt_sigframe
11#define sigcontext_ia32 sigcontext
12#define _fpstate_ia32 _fpstate
13#define ucontext_ia32 ucontext
14#else /* !CONFIG_X86_32 */
15
16#ifdef CONFIG_IA32_EMULATION
17#include <asm/ia32.h>
18#endif /* CONFIG_IA32_EMULATION */
19
20#endif /* CONFIG_X86_32 */
21
22#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
23struct sigframe_ia32 {
24 u32 pretcode;
25 int sig;
26 struct sigcontext_ia32 sc;
27 /*
28 * fpstate is unused. fpstate is moved/allocated after
29 * retcode[] below. This movement allows to have the FP state and the
30 * future state extensions (xsave) stay together.
31 * And at the same time retaining the unused fpstate, prevents changing
32 * the offset of extramask[] in the sigframe and thus prevent any
33 * legacy application accessing/modifying it.
34 */
35 struct _fpstate_ia32 fpstate_unused;
36#ifdef CONFIG_IA32_EMULATION
37 unsigned int extramask[_COMPAT_NSIG_WORDS-1];
38#else /* !CONFIG_IA32_EMULATION */
39 unsigned long extramask[_NSIG_WORDS-1];
40#endif /* CONFIG_IA32_EMULATION */
41 char retcode[8];
42 /* fp state follows here */
43};
44
45struct rt_sigframe_ia32 {
46 u32 pretcode;
47 int sig;
48 u32 pinfo;
49 u32 puc;
50#ifdef CONFIG_IA32_EMULATION
51 compat_siginfo_t info;
52#else /* !CONFIG_IA32_EMULATION */
53 struct siginfo info;
54#endif /* CONFIG_IA32_EMULATION */
55 struct ucontext_ia32 uc;
56 char retcode[8];
57 /* fp state follows here */
58};
59#endif /* defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) */
60
61#ifdef CONFIG_X86_64
62struct rt_sigframe {
63 char __user *pretcode;
64 struct ucontext uc;
65 struct siginfo info;
66 /* fp state follows here */
67};
68#endif /* CONFIG_X86_64 */
69
70#endif /* _ASM_X86_SIGFRAME_H */
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 96ac44f275da..7761a5d554bb 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -121,6 +121,10 @@ typedef unsigned long sigset_t;
121 121
122#ifndef __ASSEMBLY__ 122#ifndef __ASSEMBLY__
123 123
124# ifdef __KERNEL__
125extern void do_notify_resume(struct pt_regs *, void *, __u32);
126# endif /* __KERNEL__ */
127
124#ifdef __i386__ 128#ifdef __i386__
125# ifdef __KERNEL__ 129# ifdef __KERNEL__
126struct old_sigaction { 130struct old_sigaction {
@@ -141,8 +145,6 @@ struct k_sigaction {
141 struct sigaction sa; 145 struct sigaction sa;
142}; 146};
143 147
144extern void do_notify_resume(struct pt_regs *, void *, __u32);
145
146# else /* __KERNEL__ */ 148# else /* __KERNEL__ */
147/* Here we must cater to libcs that poke about in kernel headers. */ 149/* Here we must cater to libcs that poke about in kernel headers. */
148 150
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index be44f7dab395..e3cc3c063ec5 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -27,7 +27,7 @@
27#else /* CONFIG_X86_32 */ 27#else /* CONFIG_X86_32 */
28# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ 28# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
29# define MAX_PHYSADDR_BITS 44 29# define MAX_PHYSADDR_BITS 44
30# define MAX_PHYSMEM_BITS 44 30# define MAX_PHYSMEM_BITS 44 /* Can be max 45 bits */
31#endif 31#endif
32 32
33#endif /* CONFIG_SPARSEMEM */ 33#endif /* CONFIG_SPARSEMEM */
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 3a5252c4b8d6..9c6797c3e56c 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -19,6 +19,13 @@
19/* kernel/ioport.c */ 19/* kernel/ioport.c */
20asmlinkage long sys_ioperm(unsigned long, unsigned long, int); 20asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
21 21
22/* kernel/ldt.c */
23asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
24
25/* kernel/tls.c */
26asmlinkage int sys_set_thread_area(struct user_desc __user *);
27asmlinkage int sys_get_thread_area(struct user_desc __user *);
28
22/* X86_32 only */ 29/* X86_32 only */
23#ifdef CONFIG_X86_32 30#ifdef CONFIG_X86_32
24/* kernel/process_32.c */ 31/* kernel/process_32.c */
@@ -38,9 +45,6 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs);
38/* kernel/ioport.c */ 45/* kernel/ioport.c */
39asmlinkage long sys_iopl(unsigned long); 46asmlinkage long sys_iopl(unsigned long);
40 47
41/* kernel/ldt.c */
42asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
43
44/* kernel/sys_i386_32.c */ 48/* kernel/sys_i386_32.c */
45asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, 49asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
46 unsigned long, unsigned long, unsigned long); 50 unsigned long, unsigned long, unsigned long);
@@ -54,10 +58,6 @@ asmlinkage int sys_uname(struct old_utsname __user *);
54struct oldold_utsname; 58struct oldold_utsname;
55asmlinkage int sys_olduname(struct oldold_utsname __user *); 59asmlinkage int sys_olduname(struct oldold_utsname __user *);
56 60
57/* kernel/tls.c */
58asmlinkage int sys_set_thread_area(struct user_desc __user *);
59asmlinkage int sys_get_thread_area(struct user_desc __user *);
60
61/* kernel/vm86_32.c */ 61/* kernel/vm86_32.c */
62asmlinkage int sys_vm86old(struct pt_regs); 62asmlinkage int sys_vm86old(struct pt_regs);
63asmlinkage int sys_vm86(struct pt_regs); 63asmlinkage int sys_vm86(struct pt_regs);
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 2ed3f0f44ff7..8e626ea33a1a 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -17,12 +17,12 @@
17# define AT_VECTOR_SIZE_ARCH 1 17# define AT_VECTOR_SIZE_ARCH 1
18#endif 18#endif
19 19
20#ifdef CONFIG_X86_32
21
22struct task_struct; /* one of the stranger aspects of C forward declarations */ 20struct task_struct; /* one of the stranger aspects of C forward declarations */
23struct task_struct *__switch_to(struct task_struct *prev, 21struct task_struct *__switch_to(struct task_struct *prev,
24 struct task_struct *next); 22 struct task_struct *next);
25 23
24#ifdef CONFIG_X86_32
25
26/* 26/*
27 * Saving eflags is important. It switches not only IOPL between tasks, 27 * Saving eflags is important. It switches not only IOPL between tasks,
28 * it also protects other tasks from NT leaking through sysenter etc. 28 * it also protects other tasks from NT leaking through sysenter etc.
@@ -314,6 +314,8 @@ extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
314 314
315void default_idle(void); 315void default_idle(void);
316 316
317void stop_this_cpu(void *dummy);
318
317/* 319/*
318 * Force strict CPU ordering. 320 * Force strict CPU ordering.
319 * And yes, this is required on UP too when we're talking 321 * And yes, this is required on UP too when we're talking
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 810bf266d134..efdf93820aed 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -20,11 +20,13 @@
20struct task_struct; 20struct task_struct;
21struct exec_domain; 21struct exec_domain;
22#include <asm/processor.h> 22#include <asm/processor.h>
23#include <asm/ftrace.h>
24#include <asm/atomic.h>
23 25
24struct thread_info { 26struct thread_info {
25 struct task_struct *task; /* main task structure */ 27 struct task_struct *task; /* main task structure */
26 struct exec_domain *exec_domain; /* execution domain */ 28 struct exec_domain *exec_domain; /* execution domain */
27 unsigned long flags; /* low level flags */ 29 __u32 flags; /* low level flags */
28 __u32 status; /* thread synchronous flags */ 30 __u32 status; /* thread synchronous flags */
29 __u32 cpu; /* current CPU */ 31 __u32 cpu; /* current CPU */
30 int preempt_count; /* 0 => preemptable, 32 int preempt_count; /* 0 => preemptable,
@@ -92,7 +94,6 @@ struct thread_info {
92#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ 94#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
93#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ 95#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
94#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ 96#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
95#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */
96 97
97#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 98#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
98#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) 99#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -115,7 +116,6 @@ struct thread_info {
115#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) 116#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
116#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) 117#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
117#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) 118#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
118#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS)
119 119
120/* work to do in syscall_trace_enter() */ 120/* work to do in syscall_trace_enter() */
121#define _TIF_WORK_SYSCALL_ENTRY \ 121#define _TIF_WORK_SYSCALL_ENTRY \
@@ -141,8 +141,7 @@ struct thread_info {
141 141
142/* flags to check in __switch_to() */ 142/* flags to check in __switch_to() */
143#define _TIF_WORK_CTXSW \ 143#define _TIF_WORK_CTXSW \
144 (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \ 144 (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC)
145 _TIF_NOTSC)
146 145
147#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW 146#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
148#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) 147#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h
index fa0d79facdbc..780ba0ab94f9 100644
--- a/arch/x86/include/asm/trampoline.h
+++ b/arch/x86/include/asm/trampoline.h
@@ -3,6 +3,7 @@
3 3
4#ifndef __ASSEMBLY__ 4#ifndef __ASSEMBLY__
5 5
6#ifdef CONFIG_X86_TRAMPOLINE
6/* 7/*
7 * Trampoline 80x86 program as an array. 8 * Trampoline 80x86 program as an array.
8 */ 9 */
@@ -13,8 +14,14 @@ extern unsigned char *trampoline_base;
13extern unsigned long init_rsp; 14extern unsigned long init_rsp;
14extern unsigned long initial_code; 15extern unsigned long initial_code;
15 16
17#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
16#define TRAMPOLINE_BASE 0x6000 18#define TRAMPOLINE_BASE 0x6000
19
17extern unsigned long setup_trampoline(void); 20extern unsigned long setup_trampoline(void);
21extern void __init reserve_trampoline_memory(void);
22#else
23static inline void reserve_trampoline_memory(void) {};
24#endif /* CONFIG_X86_TRAMPOLINE */
18 25
19#endif /* __ASSEMBLY__ */ 26#endif /* __ASSEMBLY__ */
20 27
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 45dee286e45c..2ee0a3bceedf 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -46,6 +46,10 @@ dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long);
46dotraplinkage void do_invalid_TSS(struct pt_regs *, long); 46dotraplinkage void do_invalid_TSS(struct pt_regs *, long);
47dotraplinkage void do_segment_not_present(struct pt_regs *, long); 47dotraplinkage void do_segment_not_present(struct pt_regs *, long);
48dotraplinkage void do_stack_segment(struct pt_regs *, long); 48dotraplinkage void do_stack_segment(struct pt_regs *, long);
49#ifdef CONFIG_X86_64
50dotraplinkage void do_double_fault(struct pt_regs *, long);
51asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *);
52#endif
49dotraplinkage void do_general_protection(struct pt_regs *, long); 53dotraplinkage void do_general_protection(struct pt_regs *, long);
50dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); 54dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
51dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long); 55dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long);
@@ -72,10 +76,13 @@ static inline int get_si_code(unsigned long condition)
72extern int panic_on_unrecovered_nmi; 76extern int panic_on_unrecovered_nmi;
73extern int kstack_depth_to_print; 77extern int kstack_depth_to_print;
74 78
75#ifdef CONFIG_X86_32
76void math_error(void __user *); 79void math_error(void __user *);
77unsigned long patch_espfix_desc(unsigned long, unsigned long);
78asmlinkage void math_emulate(long); 80asmlinkage void math_emulate(long);
81#ifdef CONFIG_X86_32
82unsigned long patch_espfix_desc(unsigned long, unsigned long);
83#else
84asmlinkage void smp_thermal_interrupt(void);
85asmlinkage void mce_threshold_interrupt(void);
79#endif 86#endif
80 87
81#endif /* _ASM_X86_TRAPS_H */ 88#endif /* _ASM_X86_TRAPS_H */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 35c54921b2e4..580c3ee6c58c 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -350,14 +350,14 @@ do { \
350 350
351#define __put_user_nocheck(x, ptr, size) \ 351#define __put_user_nocheck(x, ptr, size) \
352({ \ 352({ \
353 long __pu_err; \ 353 int __pu_err; \
354 __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ 354 __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \
355 __pu_err; \ 355 __pu_err; \
356}) 356})
357 357
358#define __get_user_nocheck(x, ptr, size) \ 358#define __get_user_nocheck(x, ptr, size) \
359({ \ 359({ \
360 long __gu_err; \ 360 int __gu_err; \
361 unsigned long __gu_val; \ 361 unsigned long __gu_val; \
362 __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ 362 __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
363 (x) = (__force __typeof__(*(ptr)))__gu_val; \ 363 (x) = (__force __typeof__(*(ptr)))__gu_val; \
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index d931d3b7e6f7..7ed17ff502b9 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -32,13 +32,18 @@
32enum uv_bios_cmd { 32enum uv_bios_cmd {
33 UV_BIOS_COMMON, 33 UV_BIOS_COMMON,
34 UV_BIOS_GET_SN_INFO, 34 UV_BIOS_GET_SN_INFO,
35 UV_BIOS_FREQ_BASE 35 UV_BIOS_FREQ_BASE,
36 UV_BIOS_WATCHLIST_ALLOC,
37 UV_BIOS_WATCHLIST_FREE,
38 UV_BIOS_MEMPROTECT,
39 UV_BIOS_GET_PARTITION_ADDR
36}; 40};
37 41
38/* 42/*
39 * Status values returned from a BIOS call. 43 * Status values returned from a BIOS call.
40 */ 44 */
41enum { 45enum {
46 BIOS_STATUS_MORE_PASSES = 1,
42 BIOS_STATUS_SUCCESS = 0, 47 BIOS_STATUS_SUCCESS = 0,
43 BIOS_STATUS_UNIMPLEMENTED = -ENOSYS, 48 BIOS_STATUS_UNIMPLEMENTED = -ENOSYS,
44 BIOS_STATUS_EINVAL = -EINVAL, 49 BIOS_STATUS_EINVAL = -EINVAL,
@@ -71,6 +76,21 @@ union partition_info_u {
71 }; 76 };
72}; 77};
73 78
79union uv_watchlist_u {
80 u64 val;
81 struct {
82 u64 blade : 16,
83 size : 32,
84 filler : 16;
85 };
86};
87
88enum uv_memprotect {
89 UV_MEMPROT_RESTRICT_ACCESS,
90 UV_MEMPROT_ALLOW_AMO,
91 UV_MEMPROT_ALLOW_RW
92};
93
74/* 94/*
75 * bios calls have 6 parameters 95 * bios calls have 6 parameters
76 */ 96 */
@@ -80,14 +100,20 @@ extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
80 100
81extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *); 101extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *);
82extern s64 uv_bios_freq_base(u64, u64 *); 102extern s64 uv_bios_freq_base(u64, u64 *);
103extern int uv_bios_mq_watchlist_alloc(int, unsigned long, unsigned int,
104 unsigned long *);
105extern int uv_bios_mq_watchlist_free(int, int);
106extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect);
107extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *);
83 108
84extern void uv_bios_init(void); 109extern void uv_bios_init(void);
85 110
111extern unsigned long sn_rtc_cycles_per_second;
86extern int uv_type; 112extern int uv_type;
87extern long sn_partition_id; 113extern long sn_partition_id;
88extern long uv_coherency_id; 114extern long sn_coherency_id;
89extern long uv_region_size; 115extern long sn_region_size;
90#define partition_coherence_id() (uv_coherency_id) 116#define partition_coherence_id() (sn_coherency_id)
91 117
92extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ 118extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */
93 119
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 7a5782610b2b..777327ef05c1 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -113,25 +113,37 @@
113 */ 113 */
114#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2) 114#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2)
115 115
116struct uv_scir_s {
117 struct timer_list timer;
118 unsigned long offset;
119 unsigned long last;
120 unsigned long idle_on;
121 unsigned long idle_off;
122 unsigned char state;
123 unsigned char enabled;
124};
125
116/* 126/*
117 * The following defines attributes of the HUB chip. These attributes are 127 * The following defines attributes of the HUB chip. These attributes are
118 * frequently referenced and are kept in the per-cpu data areas of each cpu. 128 * frequently referenced and are kept in the per-cpu data areas of each cpu.
119 * They are kept together in a struct to minimize cache misses. 129 * They are kept together in a struct to minimize cache misses.
120 */ 130 */
121struct uv_hub_info_s { 131struct uv_hub_info_s {
122 unsigned long global_mmr_base; 132 unsigned long global_mmr_base;
123 unsigned long gpa_mask; 133 unsigned long gpa_mask;
124 unsigned long gnode_upper; 134 unsigned long gnode_upper;
125 unsigned long lowmem_remap_top; 135 unsigned long lowmem_remap_top;
126 unsigned long lowmem_remap_base; 136 unsigned long lowmem_remap_base;
127 unsigned short pnode; 137 unsigned short pnode;
128 unsigned short pnode_mask; 138 unsigned short pnode_mask;
129 unsigned short coherency_domain_number; 139 unsigned short coherency_domain_number;
130 unsigned short numa_blade_id; 140 unsigned short numa_blade_id;
131 unsigned char blade_processor_id; 141 unsigned char blade_processor_id;
132 unsigned char m_val; 142 unsigned char m_val;
133 unsigned char n_val; 143 unsigned char n_val;
144 struct uv_scir_s scir;
134}; 145};
146
135DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); 147DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
136#define uv_hub_info (&__get_cpu_var(__uv_hub_info)) 148#define uv_hub_info (&__get_cpu_var(__uv_hub_info))
137#define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) 149#define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu))
@@ -163,6 +175,30 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
163 175
164#define UV_APIC_PNODE_SHIFT 6 176#define UV_APIC_PNODE_SHIFT 6
165 177
178/* Local Bus from cpu's perspective */
179#define LOCAL_BUS_BASE 0x1c00000
180#define LOCAL_BUS_SIZE (4 * 1024 * 1024)
181
182/*
183 * System Controller Interface Reg
184 *
185 * Note there are NO leds on a UV system. This register is only
186 * used by the system controller to monitor system-wide operation.
187 * There are 64 regs per node. With Nahelem cpus (2 cores per node,
188 * 8 cpus per core, 2 threads per cpu) there are 32 cpu threads on
189 * a node.
190 *
191 * The window is located at top of ACPI MMR space
192 */
193#define SCIR_WINDOW_COUNT 64
194#define SCIR_LOCAL_MMR_BASE (LOCAL_BUS_BASE + \
195 LOCAL_BUS_SIZE - \
196 SCIR_WINDOW_COUNT)
197
198#define SCIR_CPU_HEARTBEAT 0x01 /* timer interrupt */
199#define SCIR_CPU_ACTIVITY 0x02 /* not idle */
200#define SCIR_CPU_HB_INTERVAL (HZ) /* once per second */
201
166/* 202/*
167 * Macros for converting between kernel virtual addresses, socket local physical 203 * Macros for converting between kernel virtual addresses, socket local physical
168 * addresses, and UV global physical addresses. 204 * addresses, and UV global physical addresses.
@@ -174,7 +210,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
174static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) 210static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr)
175{ 211{
176 if (paddr < uv_hub_info->lowmem_remap_top) 212 if (paddr < uv_hub_info->lowmem_remap_top)
177 paddr += uv_hub_info->lowmem_remap_base; 213 paddr |= uv_hub_info->lowmem_remap_base;
178 return paddr | uv_hub_info->gnode_upper; 214 return paddr | uv_hub_info->gnode_upper;
179} 215}
180 216
@@ -182,19 +218,7 @@ static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr)
182/* socket virtual --> UV global physical address */ 218/* socket virtual --> UV global physical address */
183static inline unsigned long uv_gpa(void *v) 219static inline unsigned long uv_gpa(void *v)
184{ 220{
185 return __pa(v) | uv_hub_info->gnode_upper; 221 return uv_soc_phys_ram_to_gpa(__pa(v));
186}
187
188/* socket virtual --> UV global physical address */
189static inline void *uv_vgpa(void *v)
190{
191 return (void *)uv_gpa(v);
192}
193
194/* UV global physical address --> socket virtual */
195static inline void *uv_va(unsigned long gpa)
196{
197 return __va(gpa & uv_hub_info->gpa_mask);
198} 222}
199 223
200/* pnode, offset --> socket virtual */ 224/* pnode, offset --> socket virtual */
@@ -277,6 +301,16 @@ static inline void uv_write_local_mmr(unsigned long offset, unsigned long val)
277 *uv_local_mmr_address(offset) = val; 301 *uv_local_mmr_address(offset) = val;
278} 302}
279 303
304static inline unsigned char uv_read_local_mmr8(unsigned long offset)
305{
306 return *((unsigned char *)uv_local_mmr_address(offset));
307}
308
309static inline void uv_write_local_mmr8(unsigned long offset, unsigned char val)
310{
311 *((unsigned char *)uv_local_mmr_address(offset)) = val;
312}
313
280/* 314/*
281 * Structures and definitions for converting between cpu, node, pnode, and blade 315 * Structures and definitions for converting between cpu, node, pnode, and blade
282 * numbers. 316 * numbers.
@@ -351,5 +385,20 @@ static inline int uv_num_possible_blades(void)
351 return uv_possible_blades; 385 return uv_possible_blades;
352} 386}
353 387
354#endif /* _ASM_X86_UV_UV_HUB_H */ 388/* Update SCIR state */
389static inline void uv_set_scir_bits(unsigned char value)
390{
391 if (uv_hub_info->scir.state != value) {
392 uv_hub_info->scir.state = value;
393 uv_write_local_mmr8(uv_hub_info->scir.offset, value);
394 }
395}
396static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
397{
398 if (uv_cpu_hub_info(cpu)->scir.state != value) {
399 uv_cpu_hub_info(cpu)->scir.state = value;
400 uv_write_local_mmr8(uv_cpu_hub_info(cpu)->scir.offset, value);
401 }
402}
355 403
404#endif /* _ASM_X86_UV_UV_HUB_H */
diff --git a/arch/x86/include/asm/vmi.h b/arch/x86/include/asm/vmi.h
index b7c0dea119fe..61e08c0a2907 100644
--- a/arch/x86/include/asm/vmi.h
+++ b/arch/x86/include/asm/vmi.h
@@ -223,9 +223,15 @@ struct pci_header {
223} __attribute__((packed)); 223} __attribute__((packed));
224 224
225/* Function prototypes for bootstrapping */ 225/* Function prototypes for bootstrapping */
226#ifdef CONFIG_VMI
226extern void vmi_init(void); 227extern void vmi_init(void);
228extern void vmi_activate(void);
227extern void vmi_bringup(void); 229extern void vmi_bringup(void);
228extern void vmi_apply_boot_page_allocations(void); 230#else
231static inline void vmi_init(void) {}
232static inline void vmi_activate(void) {}
233static inline void vmi_bringup(void) {}
234#endif
229 235
230/* State needed to start an application processor in an SMP system. */ 236/* State needed to start an application processor in an SMP system. */
231struct vmi_ap_state { 237struct vmi_ap_state {
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h
new file mode 100644
index 000000000000..c11b7e100d83
--- /dev/null
+++ b/arch/x86/include/asm/vmware.h
@@ -0,0 +1,27 @@
1/*
2 * Copyright (C) 2008, VMware, Inc.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
12 * NON INFRINGEMENT. See the GNU General Public License for more
13 * details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 */
20#ifndef ASM_X86__VMWARE_H
21#define ASM_X86__VMWARE_H
22
23extern unsigned long vmware_get_tsc_khz(void);
24extern int vmware_platform(void);
25extern void vmware_set_feature_bits(struct cpuinfo_x86 *c);
26
27#endif
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 3f6000d95fe2..5e79ca694326 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -33,8 +33,14 @@
33#ifndef _ASM_X86_XEN_HYPERCALL_H 33#ifndef _ASM_X86_XEN_HYPERCALL_H
34#define _ASM_X86_XEN_HYPERCALL_H 34#define _ASM_X86_XEN_HYPERCALL_H
35 35
36#include <linux/kernel.h>
37#include <linux/spinlock.h>
36#include <linux/errno.h> 38#include <linux/errno.h>
37#include <linux/string.h> 39#include <linux/string.h>
40#include <linux/types.h>
41
42#include <asm/page.h>
43#include <asm/pgtable.h>
38 44
39#include <xen/interface/xen.h> 45#include <xen/interface/xen.h>
40#include <xen/interface/sched.h> 46#include <xen/interface/sched.h>
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index a38d25ac87d2..81fbd735aec4 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -33,39 +33,10 @@
33#ifndef _ASM_X86_XEN_HYPERVISOR_H 33#ifndef _ASM_X86_XEN_HYPERVISOR_H
34#define _ASM_X86_XEN_HYPERVISOR_H 34#define _ASM_X86_XEN_HYPERVISOR_H
35 35
36#include <linux/types.h>
37#include <linux/kernel.h>
38
39#include <xen/interface/xen.h>
40#include <xen/interface/version.h>
41
42#include <asm/ptrace.h>
43#include <asm/page.h>
44#include <asm/desc.h>
45#if defined(__i386__)
46# ifdef CONFIG_X86_PAE
47# include <asm-generic/pgtable-nopud.h>
48# else
49# include <asm-generic/pgtable-nopmd.h>
50# endif
51#endif
52#include <asm/xen/hypercall.h>
53
54/* arch/i386/kernel/setup.c */ 36/* arch/i386/kernel/setup.c */
55extern struct shared_info *HYPERVISOR_shared_info; 37extern struct shared_info *HYPERVISOR_shared_info;
56extern struct start_info *xen_start_info; 38extern struct start_info *xen_start_info;
57 39
58/* arch/i386/mach-xen/evtchn.c */
59/* Force a proper event-channel callback from Xen. */
60extern void force_evtchn_callback(void);
61
62/* Turn jiffies into Xen system time. */
63u64 jiffies_to_st(unsigned long jiffies);
64
65
66#define MULTI_UVMFLAGS_INDEX 3
67#define MULTI_UVMDOMID_INDEX 4
68
69enum xen_domain_type { 40enum xen_domain_type {
70 XEN_NATIVE, 41 XEN_NATIVE,
71 XEN_PV_DOMAIN, 42 XEN_PV_DOMAIN,
@@ -74,9 +45,15 @@ enum xen_domain_type {
74 45
75extern enum xen_domain_type xen_domain_type; 46extern enum xen_domain_type xen_domain_type;
76 47
48#ifdef CONFIG_XEN
77#define xen_domain() (xen_domain_type != XEN_NATIVE) 49#define xen_domain() (xen_domain_type != XEN_NATIVE)
78#define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN) 50#else
51#define xen_domain() (0)
52#endif
53
54#define xen_pv_domain() (xen_domain() && xen_domain_type == XEN_PV_DOMAIN)
55#define xen_hvm_domain() (xen_domain() && xen_domain_type == XEN_HVM_DOMAIN)
56
79#define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN) 57#define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN)
80#define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN)
81 58
82#endif /* _ASM_X86_XEN_HYPERVISOR_H */ 59#endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index bc628998a1b9..7ef617ef1df3 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -1,11 +1,16 @@
1#ifndef _ASM_X86_XEN_PAGE_H 1#ifndef _ASM_X86_XEN_PAGE_H
2#define _ASM_X86_XEN_PAGE_H 2#define _ASM_X86_XEN_PAGE_H
3 3
4#include <linux/kernel.h>
5#include <linux/types.h>
6#include <linux/spinlock.h>
4#include <linux/pfn.h> 7#include <linux/pfn.h>
5 8
6#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10#include <asm/page.h>
7#include <asm/pgtable.h> 11#include <asm/pgtable.h>
8 12
13#include <xen/interface/xen.h>
9#include <xen/features.h> 14#include <xen/features.h>
10 15
11/* Xen machine address */ 16/* Xen machine address */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3d4346a73a8f..88dd768eab6d 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -26,7 +26,7 @@ CFLAGS_tsc.o := $(nostackp)
26 26
27obj-y := process_$(BITS).o signal.o entry_$(BITS).o 27obj-y := process_$(BITS).o signal.o entry_$(BITS).o
28obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 28obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
29obj-y += time_$(BITS).o ioport.o ldt.o 29obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
30obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o 30obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
31obj-$(CONFIG_X86_VISWS) += visws_quirks.o 31obj-$(CONFIG_X86_VISWS) += visws_quirks.o
32obj-$(CONFIG_X86_32) += probe_roms_32.o 32obj-$(CONFIG_X86_32) += probe_roms_32.o
@@ -66,6 +66,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
66obj-$(CONFIG_X86_IO_APIC) += io_apic.o 66obj-$(CONFIG_X86_IO_APIC) += io_apic.o
67obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 67obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
68obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 68obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
69obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
69obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 70obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
70obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 71obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
71obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 72obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
@@ -106,6 +107,8 @@ microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
106microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o 107microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
107obj-$(CONFIG_MICROCODE) += microcode.o 108obj-$(CONFIG_MICROCODE) += microcode.o
108 109
110obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
111
109### 112###
110# 64 bit specific files 113# 64 bit specific files
111ifeq ($(CONFIG_X86_64),y) 114ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 4c51a2f8fd31..65d0b72777ea 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1360,6 +1360,17 @@ static void __init acpi_process_madt(void)
1360 disable_acpi(); 1360 disable_acpi();
1361 } 1361 }
1362 } 1362 }
1363
1364 /*
1365 * ACPI supports both logical (e.g. Hyper-Threading) and physical
1366 * processors, where MPS only supports physical.
1367 */
1368 if (acpi_lapic && acpi_ioapic)
1369 printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
1370 "information\n");
1371 else if (acpi_lapic)
1372 printk(KERN_INFO "Using ACPI for processor (LAPIC) "
1373 "configuration information\n");
1363#endif 1374#endif
1364 return; 1375 return;
1365} 1376}
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a7b6dec6fc3f..2e2da717b350 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -24,6 +24,7 @@
24#include <linux/iommu-helper.h> 24#include <linux/iommu-helper.h>
25#include <asm/proto.h> 25#include <asm/proto.h>
26#include <asm/iommu.h> 26#include <asm/iommu.h>
27#include <asm/gart.h>
27#include <asm/amd_iommu_types.h> 28#include <asm/amd_iommu_types.h>
28#include <asm/amd_iommu.h> 29#include <asm/amd_iommu.h>
29 30
@@ -235,8 +236,9 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
235 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; 236 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
236 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); 237 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
237 238
238 if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) 239 if (unlikely(i == EXIT_LOOP_COUNT))
239 printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); 240 panic("AMD IOMMU: Completion wait loop failed\n");
241
240out: 242out:
241 spin_unlock_irqrestore(&iommu->lock, flags); 243 spin_unlock_irqrestore(&iommu->lock, flags);
242 244
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 30ae2701b3df..c625800c55ca 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -28,6 +28,7 @@
28#include <asm/amd_iommu_types.h> 28#include <asm/amd_iommu_types.h>
29#include <asm/amd_iommu.h> 29#include <asm/amd_iommu.h>
30#include <asm/iommu.h> 30#include <asm/iommu.h>
31#include <asm/gart.h>
31 32
32/* 33/*
33 * definitions for the ACPI scanning code 34 * definitions for the ACPI scanning code
@@ -427,6 +428,10 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
427 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, 428 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
428 &entry, sizeof(entry)); 429 &entry, sizeof(entry));
429 430
431 /* set head and tail to zero manually */
432 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
433 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
434
430 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); 435 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
431 436
432 return cmd_buf; 437 return cmd_buf;
@@ -1074,7 +1079,8 @@ int __init amd_iommu_init(void)
1074 goto free; 1079 goto free;
1075 1080
1076 /* IOMMU rlookup table - find the IOMMU for a specific device */ 1081 /* IOMMU rlookup table - find the IOMMU for a specific device */
1077 amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL, 1082 amd_iommu_rlookup_table = (void *)__get_free_pages(
1083 GFP_KERNEL | __GFP_ZERO,
1078 get_order(rlookup_table_size)); 1084 get_order(rlookup_table_size));
1079 if (amd_iommu_rlookup_table == NULL) 1085 if (amd_iommu_rlookup_table == NULL)
1080 goto free; 1086 goto free;
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 9a32b37ee2ee..676debfc1702 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -1,8 +1,9 @@
1/* 1/*
2 * Firmware replacement code. 2 * Firmware replacement code.
3 * 3 *
4 * Work around broken BIOSes that don't set an aperture or only set the 4 * Work around broken BIOSes that don't set an aperture, only set the
5 * aperture in the AGP bridge. 5 * aperture in the AGP bridge, or set too small aperture.
6 *
6 * If all fails map the aperture over some low memory. This is cheaper than 7 * If all fails map the aperture over some low memory. This is cheaper than
7 * doing bounce buffering. The memory is lost. This is done at early boot 8 * doing bounce buffering. The memory is lost. This is done at early boot
8 * because only the bootmem allocator can allocate 32+MB. 9 * because only the bootmem allocator can allocate 32+MB.
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 4f859acb1563..6c83ac10e6d3 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -30,6 +30,7 @@
30#include <linux/module.h> 30#include <linux/module.h>
31#include <linux/dmi.h> 31#include <linux/dmi.h>
32#include <linux/dmar.h> 32#include <linux/dmar.h>
33#include <linux/ftrace.h>
33 34
34#include <asm/perf_counter.h> 35#include <asm/perf_counter.h>
35#include <asm/atomic.h> 36#include <asm/atomic.h>
@@ -442,6 +443,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
442 v = apic_read(APIC_LVTT); 443 v = apic_read(APIC_LVTT);
443 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); 444 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
444 apic_write(APIC_LVTT, v); 445 apic_write(APIC_LVTT, v);
446 apic_write(APIC_TMICT, 0xffffffff);
445 break; 447 break;
446 case CLOCK_EVT_MODE_RESUME: 448 case CLOCK_EVT_MODE_RESUME:
447 /* Nothing to do here */ 449 /* Nothing to do here */
@@ -560,13 +562,13 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta)
560 } else { 562 } else {
561 res = (((u64)deltapm) * mult) >> 22; 563 res = (((u64)deltapm) * mult) >> 22;
562 do_div(res, 1000000); 564 do_div(res, 1000000);
563 printk(KERN_WARNING "APIC calibration not consistent " 565 pr_warning("APIC calibration not consistent "
564 "with PM Timer: %ldms instead of 100ms\n", 566 "with PM Timer: %ldms instead of 100ms\n",
565 (long)res); 567 (long)res);
566 /* Correct the lapic counter value */ 568 /* Correct the lapic counter value */
567 res = (((u64)(*delta)) * pm_100ms); 569 res = (((u64)(*delta)) * pm_100ms);
568 do_div(res, deltapm); 570 do_div(res, deltapm);
569 printk(KERN_INFO "APIC delta adjusted to PM-Timer: " 571 pr_info("APIC delta adjusted to PM-Timer: "
570 "%lu (%ld)\n", (unsigned long)res, *delta); 572 "%lu (%ld)\n", (unsigned long)res, *delta);
571 *delta = (long)res; 573 *delta = (long)res;
572 } 574 }
@@ -646,8 +648,7 @@ static int __init calibrate_APIC_clock(void)
646 */ 648 */
647 if (calibration_result < (1000000 / HZ)) { 649 if (calibration_result < (1000000 / HZ)) {
648 local_irq_enable(); 650 local_irq_enable();
649 printk(KERN_WARNING 651 pr_warning("APIC frequency too slow, disabling apic timer\n");
650 "APIC frequency too slow, disabling apic timer\n");
651 return -1; 652 return -1;
652 } 653 }
653 654
@@ -673,13 +674,9 @@ static int __init calibrate_APIC_clock(void)
673 while (lapic_cal_loops <= LAPIC_CAL_LOOPS) 674 while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
674 cpu_relax(); 675 cpu_relax();
675 676
676 local_irq_disable();
677
678 /* Stop the lapic timer */ 677 /* Stop the lapic timer */
679 lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); 678 lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
680 679
681 local_irq_enable();
682
683 /* Jiffies delta */ 680 /* Jiffies delta */
684 deltaj = lapic_cal_j2 - lapic_cal_j1; 681 deltaj = lapic_cal_j2 - lapic_cal_j1;
685 apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); 682 apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
@@ -693,8 +690,7 @@ static int __init calibrate_APIC_clock(void)
693 local_irq_enable(); 690 local_irq_enable();
694 691
695 if (levt->features & CLOCK_EVT_FEAT_DUMMY) { 692 if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
696 printk(KERN_WARNING 693 pr_warning("APIC timer disabled due to verification failure.\n");
697 "APIC timer disabled due to verification failure.\n");
698 return -1; 694 return -1;
699 } 695 }
700 696
@@ -715,7 +711,7 @@ void __init setup_boot_APIC_clock(void)
715 * broadcast mechanism is used. On UP systems simply ignore it. 711 * broadcast mechanism is used. On UP systems simply ignore it.
716 */ 712 */
717 if (disable_apic_timer) { 713 if (disable_apic_timer) {
718 printk(KERN_INFO "Disabling APIC timer\n"); 714 pr_info("Disabling APIC timer\n");
719 /* No broadcast on UP ! */ 715 /* No broadcast on UP ! */
720 if (num_possible_cpus() > 1) { 716 if (num_possible_cpus() > 1) {
721 lapic_clockevent.mult = 1; 717 lapic_clockevent.mult = 1;
@@ -742,7 +738,7 @@ void __init setup_boot_APIC_clock(void)
742 if (nmi_watchdog != NMI_IO_APIC) 738 if (nmi_watchdog != NMI_IO_APIC)
743 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; 739 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
744 else 740 else
745 printk(KERN_WARNING "APIC timer registered as dummy," 741 pr_warning("APIC timer registered as dummy,"
746 " due to nmi_watchdog=%d!\n", nmi_watchdog); 742 " due to nmi_watchdog=%d!\n", nmi_watchdog);
747 743
748 /* Setup the lapic or request the broadcast */ 744 /* Setup the lapic or request the broadcast */
@@ -774,8 +770,7 @@ static void local_apic_timer_interrupt(void)
774 * spurious. 770 * spurious.
775 */ 771 */
776 if (!evt->event_handler) { 772 if (!evt->event_handler) {
777 printk(KERN_WARNING 773 pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
778 "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
779 /* Switch it off */ 774 /* Switch it off */
780 lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); 775 lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
781 return; 776 return;
@@ -797,7 +792,7 @@ static void local_apic_timer_interrupt(void)
797 * [ if a single-CPU system runs an SMP kernel then we call the local 792 * [ if a single-CPU system runs an SMP kernel then we call the local
798 * interrupt as well. Thus we cannot inline the local irq ... ] 793 * interrupt as well. Thus we cannot inline the local irq ... ]
799 */ 794 */
800void smp_apic_timer_interrupt(struct pt_regs *regs) 795void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
801{ 796{
802 struct pt_regs *old_regs = set_irq_regs(regs); 797 struct pt_regs *old_regs = set_irq_regs(regs);
803 798
@@ -811,9 +806,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
811 * Besides, if we don't timer interrupts ignore the global 806 * Besides, if we don't timer interrupts ignore the global
812 * interrupt lock, which is the WrongThing (tm) to do. 807 * interrupt lock, which is the WrongThing (tm) to do.
813 */ 808 */
814#ifdef CONFIG_X86_64
815 exit_idle(); 809 exit_idle();
816#endif
817 irq_enter(); 810 irq_enter();
818 local_apic_timer_interrupt(); 811 local_apic_timer_interrupt();
819 irq_exit(); 812 irq_exit();
@@ -1090,7 +1083,7 @@ static void __cpuinit lapic_setup_esr(void)
1090 unsigned int oldvalue, value, maxlvt; 1083 unsigned int oldvalue, value, maxlvt;
1091 1084
1092 if (!lapic_is_integrated()) { 1085 if (!lapic_is_integrated()) {
1093 printk(KERN_INFO "No ESR for 82489DX.\n"); 1086 pr_info("No ESR for 82489DX.\n");
1094 return; 1087 return;
1095 } 1088 }
1096 1089
@@ -1101,7 +1094,7 @@ static void __cpuinit lapic_setup_esr(void)
1101 * ESR disabled - we can't do anything useful with the 1094 * ESR disabled - we can't do anything useful with the
1102 * errors anyway - mbligh 1095 * errors anyway - mbligh
1103 */ 1096 */
1104 printk(KERN_INFO "Leaving ESR disabled.\n"); 1097 pr_info("Leaving ESR disabled.\n");
1105 return; 1098 return;
1106 } 1099 }
1107 1100
@@ -1296,7 +1289,7 @@ void check_x2apic(void)
1296 rdmsr(MSR_IA32_APICBASE, msr, msr2); 1289 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1297 1290
1298 if (msr & X2APIC_ENABLE) { 1291 if (msr & X2APIC_ENABLE) {
1299 printk("x2apic enabled by BIOS, switching to x2apic ops\n"); 1292 pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
1300 x2apic_preenabled = x2apic = 1; 1293 x2apic_preenabled = x2apic = 1;
1301 apic_ops = &x2apic_ops; 1294 apic_ops = &x2apic_ops;
1302 } 1295 }
@@ -1308,7 +1301,7 @@ void enable_x2apic(void)
1308 1301
1309 rdmsr(MSR_IA32_APICBASE, msr, msr2); 1302 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1310 if (!(msr & X2APIC_ENABLE)) { 1303 if (!(msr & X2APIC_ENABLE)) {
1311 printk("Enabling x2apic\n"); 1304 pr_info("Enabling x2apic\n");
1312 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); 1305 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
1313 } 1306 }
1314} 1307}
@@ -1323,9 +1316,8 @@ void __init enable_IR_x2apic(void)
1323 return; 1316 return;
1324 1317
1325 if (!x2apic_preenabled && disable_x2apic) { 1318 if (!x2apic_preenabled && disable_x2apic) {
1326 printk(KERN_INFO 1319 pr_info("Skipped enabling x2apic and Interrupt-remapping "
1327 "Skipped enabling x2apic and Interrupt-remapping " 1320 "because of nox2apic\n");
1328 "because of nox2apic\n");
1329 return; 1321 return;
1330 } 1322 }
1331 1323
@@ -1333,22 +1325,19 @@ void __init enable_IR_x2apic(void)
1333 panic("Bios already enabled x2apic, can't enforce nox2apic"); 1325 panic("Bios already enabled x2apic, can't enforce nox2apic");
1334 1326
1335 if (!x2apic_preenabled && skip_ioapic_setup) { 1327 if (!x2apic_preenabled && skip_ioapic_setup) {
1336 printk(KERN_INFO 1328 pr_info("Skipped enabling x2apic and Interrupt-remapping "
1337 "Skipped enabling x2apic and Interrupt-remapping " 1329 "because of skipping io-apic setup\n");
1338 "because of skipping io-apic setup\n");
1339 return; 1330 return;
1340 } 1331 }
1341 1332
1342 ret = dmar_table_init(); 1333 ret = dmar_table_init();
1343 if (ret) { 1334 if (ret) {
1344 printk(KERN_INFO 1335 pr_info("dmar_table_init() failed with %d:\n", ret);
1345 "dmar_table_init() failed with %d:\n", ret);
1346 1336
1347 if (x2apic_preenabled) 1337 if (x2apic_preenabled)
1348 panic("x2apic enabled by bios. But IR enabling failed"); 1338 panic("x2apic enabled by bios. But IR enabling failed");
1349 else 1339 else
1350 printk(KERN_INFO 1340 pr_info("Not enabling x2apic,Intr-remapping\n");
1351 "Not enabling x2apic,Intr-remapping\n");
1352 return; 1341 return;
1353 } 1342 }
1354 1343
@@ -1357,7 +1346,7 @@ void __init enable_IR_x2apic(void)
1357 1346
1358 ret = save_mask_IO_APIC_setup(); 1347 ret = save_mask_IO_APIC_setup();
1359 if (ret) { 1348 if (ret) {
1360 printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret); 1349 pr_info("Saving IO-APIC state failed: %d\n", ret);
1361 goto end; 1350 goto end;
1362 } 1351 }
1363 1352
@@ -1392,14 +1381,11 @@ end:
1392 1381
1393 if (!ret) { 1382 if (!ret) {
1394 if (!x2apic_preenabled) 1383 if (!x2apic_preenabled)
1395 printk(KERN_INFO 1384 pr_info("Enabled x2apic and interrupt-remapping\n");
1396 "Enabled x2apic and interrupt-remapping\n");
1397 else 1385 else
1398 printk(KERN_INFO 1386 pr_info("Enabled Interrupt-remapping\n");
1399 "Enabled Interrupt-remapping\n");
1400 } else 1387 } else
1401 printk(KERN_ERR 1388 pr_err("Failed to enable Interrupt-remapping and x2apic\n");
1402 "Failed to enable Interrupt-remapping and x2apic\n");
1403#else 1389#else
1404 if (!cpu_has_x2apic) 1390 if (!cpu_has_x2apic)
1405 return; 1391 return;
@@ -1408,8 +1394,8 @@ end:
1408 panic("x2apic enabled prior OS handover," 1394 panic("x2apic enabled prior OS handover,"
1409 " enable CONFIG_INTR_REMAP"); 1395 " enable CONFIG_INTR_REMAP");
1410 1396
1411 printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " 1397 pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping "
1412 " and x2apic\n"); 1398 " and x2apic\n");
1413#endif 1399#endif
1414 1400
1415 return; 1401 return;
@@ -1426,7 +1412,7 @@ end:
1426static int __init detect_init_APIC(void) 1412static int __init detect_init_APIC(void)
1427{ 1413{
1428 if (!cpu_has_apic) { 1414 if (!cpu_has_apic) {
1429 printk(KERN_INFO "No local APIC present\n"); 1415 pr_info("No local APIC present\n");
1430 return -1; 1416 return -1;
1431 } 1417 }
1432 1418
@@ -1467,8 +1453,8 @@ static int __init detect_init_APIC(void)
1467 * "lapic" specified. 1453 * "lapic" specified.
1468 */ 1454 */
1469 if (!force_enable_local_apic) { 1455 if (!force_enable_local_apic) {
1470 printk(KERN_INFO "Local APIC disabled by BIOS -- " 1456 pr_info("Local APIC disabled by BIOS -- "
1471 "you can enable it with \"lapic\"\n"); 1457 "you can enable it with \"lapic\"\n");
1472 return -1; 1458 return -1;
1473 } 1459 }
1474 /* 1460 /*
@@ -1478,8 +1464,7 @@ static int __init detect_init_APIC(void)
1478 */ 1464 */
1479 rdmsr(MSR_IA32_APICBASE, l, h); 1465 rdmsr(MSR_IA32_APICBASE, l, h);
1480 if (!(l & MSR_IA32_APICBASE_ENABLE)) { 1466 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1481 printk(KERN_INFO 1467 pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1482 "Local APIC disabled by BIOS -- reenabling.\n");
1483 l &= ~MSR_IA32_APICBASE_BASE; 1468 l &= ~MSR_IA32_APICBASE_BASE;
1484 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; 1469 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
1485 wrmsr(MSR_IA32_APICBASE, l, h); 1470 wrmsr(MSR_IA32_APICBASE, l, h);
@@ -1492,7 +1477,7 @@ static int __init detect_init_APIC(void)
1492 */ 1477 */
1493 features = cpuid_edx(1); 1478 features = cpuid_edx(1);
1494 if (!(features & (1 << X86_FEATURE_APIC))) { 1479 if (!(features & (1 << X86_FEATURE_APIC))) {
1495 printk(KERN_WARNING "Could not enable APIC!\n"); 1480 pr_warning("Could not enable APIC!\n");
1496 return -1; 1481 return -1;
1497 } 1482 }
1498 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); 1483 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
@@ -1503,14 +1488,14 @@ static int __init detect_init_APIC(void)
1503 if (l & MSR_IA32_APICBASE_ENABLE) 1488 if (l & MSR_IA32_APICBASE_ENABLE)
1504 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; 1489 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1505 1490
1506 printk(KERN_INFO "Found and enabled local APIC!\n"); 1491 pr_info("Found and enabled local APIC!\n");
1507 1492
1508 apic_pm_activate(); 1493 apic_pm_activate();
1509 1494
1510 return 0; 1495 return 0;
1511 1496
1512no_apic: 1497no_apic:
1513 printk(KERN_INFO "No local APIC present or hardware disabled\n"); 1498 pr_info("No local APIC present or hardware disabled\n");
1514 return -1; 1499 return -1;
1515} 1500}
1516#endif 1501#endif
@@ -1586,12 +1571,12 @@ int __init APIC_init_uniprocessor(void)
1586{ 1571{
1587#ifdef CONFIG_X86_64 1572#ifdef CONFIG_X86_64
1588 if (disable_apic) { 1573 if (disable_apic) {
1589 printk(KERN_INFO "Apic disabled\n"); 1574 pr_info("Apic disabled\n");
1590 return -1; 1575 return -1;
1591 } 1576 }
1592 if (!cpu_has_apic) { 1577 if (!cpu_has_apic) {
1593 disable_apic = 1; 1578 disable_apic = 1;
1594 printk(KERN_INFO "Apic disabled by BIOS\n"); 1579 pr_info("Apic disabled by BIOS\n");
1595 return -1; 1580 return -1;
1596 } 1581 }
1597#else 1582#else
@@ -1603,8 +1588,8 @@ int __init APIC_init_uniprocessor(void)
1603 */ 1588 */
1604 if (!cpu_has_apic && 1589 if (!cpu_has_apic &&
1605 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { 1590 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
1606 printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n", 1591 pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
1607 boot_cpu_physical_apicid); 1592 boot_cpu_physical_apicid);
1608 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); 1593 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1609 return -1; 1594 return -1;
1610 } 1595 }
@@ -1680,9 +1665,7 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1680{ 1665{
1681 u32 v; 1666 u32 v;
1682 1667
1683#ifdef CONFIG_X86_64
1684 exit_idle(); 1668 exit_idle();
1685#endif
1686 irq_enter(); 1669 irq_enter();
1687 /* 1670 /*
1688 * Check if this really is a spurious interrupt and ACK it 1671 * Check if this really is a spurious interrupt and ACK it
@@ -1696,8 +1679,8 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1696 inc_irq_stat(irq_spurious_count); 1679 inc_irq_stat(irq_spurious_count);
1697 1680
1698 /* see sw-dev-man vol 3, chapter 7.4.13.5 */ 1681 /* see sw-dev-man vol 3, chapter 7.4.13.5 */
1699 printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " 1682 pr_info("spurious APIC interrupt on CPU#%d, "
1700 "should never happen.\n", smp_processor_id()); 1683 "should never happen.\n", smp_processor_id());
1701 irq_exit(); 1684 irq_exit();
1702} 1685}
1703 1686
@@ -1708,9 +1691,7 @@ void smp_error_interrupt(struct pt_regs *regs)
1708{ 1691{
1709 u32 v, v1; 1692 u32 v, v1;
1710 1693
1711#ifdef CONFIG_X86_64
1712 exit_idle(); 1694 exit_idle();
1713#endif
1714 irq_enter(); 1695 irq_enter();
1715 /* First tickle the hardware, only then report what went on. -- REW */ 1696 /* First tickle the hardware, only then report what went on. -- REW */
1716 v = apic_read(APIC_ESR); 1697 v = apic_read(APIC_ESR);
@@ -1719,17 +1700,18 @@ void smp_error_interrupt(struct pt_regs *regs)
1719 ack_APIC_irq(); 1700 ack_APIC_irq();
1720 atomic_inc(&irq_err_count); 1701 atomic_inc(&irq_err_count);
1721 1702
1722 /* Here is what the APIC error bits mean: 1703 /*
1723 0: Send CS error 1704 * Here is what the APIC error bits mean:
1724 1: Receive CS error 1705 * 0: Send CS error
1725 2: Send accept error 1706 * 1: Receive CS error
1726 3: Receive accept error 1707 * 2: Send accept error
1727 4: Reserved 1708 * 3: Receive accept error
1728 5: Send illegal vector 1709 * 4: Reserved
1729 6: Received illegal vector 1710 * 5: Send illegal vector
1730 7: Illegal register address 1711 * 6: Received illegal vector
1731 */ 1712 * 7: Illegal register address
1732 printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", 1713 */
1714 pr_debug("APIC error on CPU%d: %02x(%02x)\n",
1733 smp_processor_id(), v , v1); 1715 smp_processor_id(), v , v1);
1734 irq_exit(); 1716 irq_exit();
1735} 1717}
@@ -1833,15 +1815,15 @@ void __cpuinit generic_processor_info(int apicid, int version)
1833 * Validate version 1815 * Validate version
1834 */ 1816 */
1835 if (version == 0x0) { 1817 if (version == 0x0) {
1836 printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " 1818 pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
1837 "fixing up to 0x10. (tell your hw vendor)\n", 1819 "fixing up to 0x10. (tell your hw vendor)\n",
1838 version); 1820 version);
1839 version = 0x10; 1821 version = 0x10;
1840 } 1822 }
1841 apic_version[apicid] = version; 1823 apic_version[apicid] = version;
1842 1824
1843 if (num_processors >= NR_CPUS) { 1825 if (num_processors >= NR_CPUS) {
1844 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." 1826 pr_warning("WARNING: NR_CPUS limit of %i reached."
1845 " Processor ignored.\n", NR_CPUS); 1827 " Processor ignored.\n", NR_CPUS);
1846 return; 1828 return;
1847 } 1829 }
@@ -2204,7 +2186,7 @@ static int __init apic_set_verbosity(char *arg)
2204 else if (strcmp("verbose", arg) == 0) 2186 else if (strcmp("verbose", arg) == 0)
2205 apic_verbosity = APIC_VERBOSE; 2187 apic_verbosity = APIC_VERBOSE;
2206 else { 2188 else {
2207 printk(KERN_WARNING "APIC Verbosity level %s not recognised" 2189 pr_warning("APIC Verbosity level %s not recognised"
2208 " use apic=verbose or apic=debug\n", arg); 2190 " use apic=verbose or apic=debug\n", arg);
2209 return -EINVAL; 2191 return -EINVAL;
2210 } 2192 }
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 5145a6e72bbb..3a26525a3f31 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -391,11 +391,7 @@ static int power_off;
391#else 391#else
392static int power_off = 1; 392static int power_off = 1;
393#endif 393#endif
394#ifdef CONFIG_APM_REAL_MODE_POWER_OFF
395static int realmode_power_off = 1;
396#else
397static int realmode_power_off; 394static int realmode_power_off;
398#endif
399#ifdef CONFIG_APM_ALLOW_INTS 395#ifdef CONFIG_APM_ALLOW_INTS
400static int allow_ints = 1; 396static int allow_ints = 1;
401#else 397#else
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 6649d09ad88f..ee4df08feee6 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -11,7 +11,7 @@
11#include <linux/suspend.h> 11#include <linux/suspend.h>
12#include <linux/kbuild.h> 12#include <linux/kbuild.h>
13#include <asm/ucontext.h> 13#include <asm/ucontext.h>
14#include "sigframe.h" 14#include <asm/sigframe.h>
15#include <asm/pgtable.h> 15#include <asm/pgtable.h>
16#include <asm/fixmap.h> 16#include <asm/fixmap.h>
17#include <asm/processor.h> 17#include <asm/processor.h>
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 7fcf63d22f8b..1d41d3f1edbc 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -20,6 +20,8 @@
20 20
21#include <xen/interface/xen.h> 21#include <xen/interface/xen.h>
22 22
23#include <asm/sigframe.h>
24
23#define __NO_STUBS 1 25#define __NO_STUBS 1
24#undef __SYSCALL 26#undef __SYSCALL
25#undef _ASM_X86_UNISTD_64_H 27#undef _ASM_X86_UNISTD_64_H
@@ -87,7 +89,7 @@ int main(void)
87 BLANK(); 89 BLANK();
88#undef ENTRY 90#undef ENTRY
89 DEFINE(IA32_RT_SIGFRAME_sigcontext, 91 DEFINE(IA32_RT_SIGFRAME_sigcontext,
90 offsetof (struct rt_sigframe32, uc.uc_mcontext)); 92 offsetof (struct rt_sigframe_ia32, uc.uc_mcontext));
91 BLANK(); 93 BLANK();
92#endif 94#endif
93 DEFINE(pbe_address, offsetof(struct pbe, address)); 95 DEFINE(pbe_address, offsetof(struct pbe, address));
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index f0dfe6f17e7e..2a0a2a3cac26 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -69,10 +69,10 @@ s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
69 69
70long sn_partition_id; 70long sn_partition_id;
71EXPORT_SYMBOL_GPL(sn_partition_id); 71EXPORT_SYMBOL_GPL(sn_partition_id);
72long uv_coherency_id; 72long sn_coherency_id;
73EXPORT_SYMBOL_GPL(uv_coherency_id); 73EXPORT_SYMBOL_GPL(sn_coherency_id);
74long uv_region_size; 74long sn_region_size;
75EXPORT_SYMBOL_GPL(uv_region_size); 75EXPORT_SYMBOL_GPL(sn_region_size);
76int uv_type; 76int uv_type;
77 77
78 78
@@ -100,6 +100,56 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
100 return ret; 100 return ret;
101} 101}
102 102
103int
104uv_bios_mq_watchlist_alloc(int blade, unsigned long addr, unsigned int mq_size,
105 unsigned long *intr_mmr_offset)
106{
107 union uv_watchlist_u size_blade;
108 u64 watchlist;
109 s64 ret;
110
111 size_blade.size = mq_size;
112 size_blade.blade = blade;
113
114 /*
115 * bios returns watchlist number or negative error number.
116 */
117 ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr,
118 size_blade.val, (u64)intr_mmr_offset,
119 (u64)&watchlist, 0);
120 if (ret < BIOS_STATUS_SUCCESS)
121 return ret;
122
123 return watchlist;
124}
125EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc);
126
127int
128uv_bios_mq_watchlist_free(int blade, int watchlist_num)
129{
130 return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE,
131 blade, watchlist_num, 0, 0, 0);
132}
133EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free);
134
135s64
136uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms)
137{
138 return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len,
139 perms, 0, 0);
140}
141EXPORT_SYMBOL_GPL(uv_bios_change_memprotect);
142
143s64
144uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len)
145{
146 s64 ret;
147
148 ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie,
149 (u64)addr, buf, (u64)len, 0);
150 return ret;
151}
152EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa);
103 153
104s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) 154s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
105{ 155{
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
new file mode 100644
index 000000000000..2ac0ab71412a
--- /dev/null
+++ b/arch/x86/kernel/check.c
@@ -0,0 +1,161 @@
1#include <linux/module.h>
2#include <linux/sched.h>
3#include <linux/kthread.h>
4#include <linux/workqueue.h>
5#include <asm/e820.h>
6#include <asm/proto.h>
7
8/*
9 * Some BIOSes seem to corrupt the low 64k of memory during events
10 * like suspend/resume and unplugging an HDMI cable. Reserve all
11 * remaining free memory in that area and fill it with a distinct
12 * pattern.
13 */
14#define MAX_SCAN_AREAS 8
15
16static int __read_mostly memory_corruption_check = -1;
17
18static unsigned __read_mostly corruption_check_size = 64*1024;
19static unsigned __read_mostly corruption_check_period = 60; /* seconds */
20
21static struct e820entry scan_areas[MAX_SCAN_AREAS];
22static int num_scan_areas;
23
24
25static __init int set_corruption_check(char *arg)
26{
27 char *end;
28
29 memory_corruption_check = simple_strtol(arg, &end, 10);
30
31 return (*end == 0) ? 0 : -EINVAL;
32}
33early_param("memory_corruption_check", set_corruption_check);
34
35static __init int set_corruption_check_period(char *arg)
36{
37 char *end;
38
39 corruption_check_period = simple_strtoul(arg, &end, 10);
40
41 return (*end == 0) ? 0 : -EINVAL;
42}
43early_param("memory_corruption_check_period", set_corruption_check_period);
44
45static __init int set_corruption_check_size(char *arg)
46{
47 char *end;
48 unsigned size;
49
50 size = memparse(arg, &end);
51
52 if (*end == '\0')
53 corruption_check_size = size;
54
55 return (size == corruption_check_size) ? 0 : -EINVAL;
56}
57early_param("memory_corruption_check_size", set_corruption_check_size);
58
59
60void __init setup_bios_corruption_check(void)
61{
62 u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */
63
64 if (memory_corruption_check == -1) {
65 memory_corruption_check =
66#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
67 1
68#else
69 0
70#endif
71 ;
72 }
73
74 if (corruption_check_size == 0)
75 memory_corruption_check = 0;
76
77 if (!memory_corruption_check)
78 return;
79
80 corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
81
82 while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
83 u64 size;
84 addr = find_e820_area_size(addr, &size, PAGE_SIZE);
85
86 if (addr == 0)
87 break;
88
89 if ((addr + size) > corruption_check_size)
90 size = corruption_check_size - addr;
91
92 if (size == 0)
93 break;
94
95 e820_update_range(addr, size, E820_RAM, E820_RESERVED);
96 scan_areas[num_scan_areas].addr = addr;
97 scan_areas[num_scan_areas].size = size;
98 num_scan_areas++;
99
100 /* Assume we've already mapped this early memory */
101 memset(__va(addr), 0, size);
102
103 addr += size;
104 }
105
106 printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
107 num_scan_areas);
108 update_e820();
109}
110
111
112void check_for_bios_corruption(void)
113{
114 int i;
115 int corruption = 0;
116
117 if (!memory_corruption_check)
118 return;
119
120 for (i = 0; i < num_scan_areas; i++) {
121 unsigned long *addr = __va(scan_areas[i].addr);
122 unsigned long size = scan_areas[i].size;
123
124 for (; size; addr++, size -= sizeof(unsigned long)) {
125 if (!*addr)
126 continue;
127 printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
128 addr, __pa(addr), *addr);
129 corruption = 1;
130 *addr = 0;
131 }
132 }
133
134 WARN_ONCE(corruption, KERN_ERR "Memory corruption detected in low memory\n");
135}
136
137static void check_corruption(struct work_struct *dummy);
138static DECLARE_DELAYED_WORK(bios_check_work, check_corruption);
139
140static void check_corruption(struct work_struct *dummy)
141{
142 check_for_bios_corruption();
143 schedule_delayed_work(&bios_check_work,
144 round_jiffies_relative(corruption_check_period*HZ));
145}
146
147static int start_periodic_check_for_corruption(void)
148{
149 if (!memory_corruption_check || corruption_check_period == 0)
150 return 0;
151
152 printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
153 corruption_check_period);
154
155 /* First time we run the checks right away */
156 schedule_delayed_work(&bios_check_work, 0);
157 return 0;
158}
159
160module_init(start_periodic_check_for_corruption);
161
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 89e53361fe24..c3813306e0b4 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -2,8 +2,14 @@
2# Makefile for x86-compatible CPU details, features and quirks 2# Makefile for x86-compatible CPU details, features and quirks
3# 3#
4 4
5# Don't trace early stages of a secondary CPU boot
6ifdef CONFIG_FUNCTION_TRACER
7CFLAGS_REMOVE_common.o = -pg
8endif
9
5obj-y := intel_cacheinfo.o addon_cpuid_features.o 10obj-y := intel_cacheinfo.o addon_cpuid_features.o
6obj-y += proc.o capflags.o powerflags.o common.o 11obj-y += proc.o capflags.o powerflags.o common.o
12obj-y += vmware.o hypervisor.o
7 13
8obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o 14obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
9obj-$(CONFIG_X86_64) += bugs_64.o 15obj-$(CONFIG_X86_64) += bugs_64.o
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index ef8f831af823..2cf23634b6d9 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -120,9 +120,17 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
120 c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) 120 c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
121 & core_select_mask; 121 & core_select_mask;
122 c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); 122 c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width);
123 /*
124 * Reinit the apicid, now that we have extended initial_apicid.
125 */
126 c->apicid = phys_pkg_id(c->initial_apicid, 0);
123#else 127#else
124 c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; 128 c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
125 c->phys_proc_id = phys_pkg_id(core_plus_mask_width); 129 c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
130 /*
131 * Reinit the apicid, now that we have extended initial_apicid.
132 */
133 c->apicid = phys_pkg_id(0);
126#endif 134#endif
127 c->x86_max_cores = (core_level_siblings / smp_num_siblings); 135 c->x86_max_cores = (core_level_siblings / smp_num_siblings);
128 136
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 8f1e31db2ad5..7c878f6aa919 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -283,9 +283,14 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
283{ 283{
284 early_init_amd_mc(c); 284 early_init_amd_mc(c);
285 285
286 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ 286 /*
287 if (c->x86_power & (1<<8)) 287 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
288 * with P/T states and does not stop in deep C-states
289 */
290 if (c->x86_power & (1 << 8)) {
288 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 291 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
292 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
293 }
289 294
290#ifdef CONFIG_X86_64 295#ifdef CONFIG_X86_64
291 set_cpu_cap(c, X86_FEATURE_SYSCALL32); 296 set_cpu_cap(c, X86_FEATURE_SYSCALL32);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ad331b4d6238..376b9f9d8d23 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -37,6 +37,7 @@
37#include <asm/proto.h> 37#include <asm/proto.h>
38#include <asm/sections.h> 38#include <asm/sections.h>
39#include <asm/setup.h> 39#include <asm/setup.h>
40#include <asm/hypervisor.h>
40 41
41#include "cpu.h" 42#include "cpu.h"
42 43
@@ -704,6 +705,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
704 detect_ht(c); 705 detect_ht(c);
705#endif 706#endif
706 707
708 init_hypervisor(c);
707 /* 709 /*
708 * On SMP, boot_cpu_data holds the common feature set between 710 * On SMP, boot_cpu_data holds the common feature set between
709 * all CPUs; so make sure that we indicate which features are 711 * all CPUs; so make sure that we indicate which features are
@@ -864,7 +866,7 @@ EXPORT_SYMBOL(_cpu_pda);
864 866
865struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 867struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
866 868
867char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; 869static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
868 870
869void __cpuinit pda_init(int cpu) 871void __cpuinit pda_init(int cpu)
870{ 872{
@@ -905,8 +907,8 @@ void __cpuinit pda_init(int cpu)
905 } 907 }
906} 908}
907 909
908char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + 910static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
909 DEBUG_STKSZ] __page_aligned_bss; 911 DEBUG_STKSZ] __page_aligned_bss;
910 912
911extern asmlinkage void ignore_sysret(void); 913extern asmlinkage void ignore_sysret(void);
912 914
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8e48c5d4467d..88ea02dcb622 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,6 +33,7 @@
33#include <linux/cpufreq.h> 33#include <linux/cpufreq.h>
34#include <linux/compiler.h> 34#include <linux/compiler.h>
35#include <linux/dmi.h> 35#include <linux/dmi.h>
36#include <linux/ftrace.h>
36 37
37#include <linux/acpi.h> 38#include <linux/acpi.h>
38#include <acpi/processor.h> 39#include <acpi/processor.h>
@@ -391,6 +392,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
391 unsigned int next_perf_state = 0; /* Index into perf table */ 392 unsigned int next_perf_state = 0; /* Index into perf table */
392 unsigned int i; 393 unsigned int i;
393 int result = 0; 394 int result = 0;
395 struct power_trace it;
394 396
395 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); 397 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
396 398
@@ -427,6 +429,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
427 } 429 }
428 } 430 }
429 431
432 trace_power_mark(&it, POWER_PSTATE, next_perf_state);
433
430 switch (data->cpu_feature) { 434 switch (data->cpu_feature) {
431 case SYSTEM_INTEL_MSR_CAPABLE: 435 case SYSTEM_INTEL_MSR_CAPABLE:
432 cmd.type = SYSTEM_INTEL_MSR_CAPABLE; 436 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
new file mode 100644
index 000000000000..fb5b86af0b01
--- /dev/null
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -0,0 +1,58 @@
1/*
2 * Common hypervisor code
3 *
4 * Copyright (C) 2008, VMware, Inc.
5 * Author : Alok N Kataria <akataria@vmware.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
15 * NON INFRINGEMENT. See the GNU General Public License for more
16 * details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 */
23
24#include <asm/processor.h>
25#include <asm/vmware.h>
26#include <asm/hypervisor.h>
27
28static inline void __cpuinit
29detect_hypervisor_vendor(struct cpuinfo_x86 *c)
30{
31 if (vmware_platform()) {
32 c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE;
33 } else {
34 c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE;
35 }
36}
37
38unsigned long get_hypervisor_tsc_freq(void)
39{
40 if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE)
41 return vmware_get_tsc_khz();
42 return 0;
43}
44
45static inline void __cpuinit
46hypervisor_set_feature_bits(struct cpuinfo_x86 *c)
47{
48 if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) {
49 vmware_set_feature_bits(c);
50 return;
51 }
52}
53
54void __cpuinit init_hypervisor(struct cpuinfo_x86 *c)
55{
56 detect_hypervisor_vendor(c);
57 hypervisor_set_feature_bits(c);
58}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cce0b6118d55..8ea6929e974c 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -11,7 +11,6 @@
11#include <asm/pgtable.h> 11#include <asm/pgtable.h>
12#include <asm/msr.h> 12#include <asm/msr.h>
13#include <asm/uaccess.h> 13#include <asm/uaccess.h>
14#include <asm/ptrace.h>
15#include <asm/ds.h> 14#include <asm/ds.h>
16#include <asm/bugs.h> 15#include <asm/bugs.h>
17 16
@@ -41,6 +40,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
41 if (c->x86 == 15 && c->x86_cache_alignment == 64) 40 if (c->x86 == 15 && c->x86_cache_alignment == 64)
42 c->x86_cache_alignment = 128; 41 c->x86_cache_alignment = 128;
43#endif 42#endif
43
44 /*
45 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
46 * with P/T states and does not stop in deep C-states
47 */
48 if (c->x86_power & (1 << 8)) {
49 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
50 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
51 }
52
44} 53}
45 54
46#ifdef CONFIG_X86_32 55#ifdef CONFIG_X86_32
@@ -242,6 +251,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
242 251
243 intel_workarounds(c); 252 intel_workarounds(c);
244 253
254 /*
255 * Detect the extended topology information if available. This
256 * will reinitialise the initial_apicid which will be used
257 * in init_intel_cacheinfo()
258 */
259 detect_extended_topology(c);
260
245 l2 = init_intel_cacheinfo(c); 261 l2 = init_intel_cacheinfo(c);
246 if (c->cpuid_level > 9) { 262 if (c->cpuid_level > 9) {
247 unsigned eax = cpuid_eax(10); 263 unsigned eax = cpuid_eax(10);
@@ -307,13 +323,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
307 set_cpu_cap(c, X86_FEATURE_P4); 323 set_cpu_cap(c, X86_FEATURE_P4);
308 if (c->x86 == 6) 324 if (c->x86 == 6)
309 set_cpu_cap(c, X86_FEATURE_P3); 325 set_cpu_cap(c, X86_FEATURE_P3);
310
311 if (cpu_has_bts)
312 ptrace_bts_init_intel(c);
313
314#endif 326#endif
315 327
316 detect_extended_topology(c);
317 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { 328 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
318 /* 329 /*
319 * let's use the legacy cpuid vector 0x1 and 0x4 for topology 330 * let's use the legacy cpuid vector 0x1 and 0x4 for topology
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 3f46afbb1cf1..68b5d8681cbb 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -644,20 +644,17 @@ static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf)
644 return show_shared_cpu_map_func(leaf, 1, buf); 644 return show_shared_cpu_map_func(leaf, 1, buf);
645} 645}
646 646
647static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { 647static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
648 switch(this_leaf->eax.split.type) { 648{
649 case CACHE_TYPE_DATA: 649 switch (this_leaf->eax.split.type) {
650 case CACHE_TYPE_DATA:
650 return sprintf(buf, "Data\n"); 651 return sprintf(buf, "Data\n");
651 break; 652 case CACHE_TYPE_INST:
652 case CACHE_TYPE_INST:
653 return sprintf(buf, "Instruction\n"); 653 return sprintf(buf, "Instruction\n");
654 break; 654 case CACHE_TYPE_UNIFIED:
655 case CACHE_TYPE_UNIFIED:
656 return sprintf(buf, "Unified\n"); 655 return sprintf(buf, "Unified\n");
657 break; 656 default:
658 default:
659 return sprintf(buf, "Unknown\n"); 657 return sprintf(buf, "Unknown\n");
660 break;
661 } 658 }
662} 659}
663 660
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 4b031a4ac856..1c838032fd37 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -510,12 +510,9 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
510 */ 510 */
511void __cpuinit mcheck_init(struct cpuinfo_x86 *c) 511void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
512{ 512{
513 static cpumask_t mce_cpus = CPU_MASK_NONE;
514
515 mce_cpu_quirks(c); 513 mce_cpu_quirks(c);
516 514
517 if (mce_dont_init || 515 if (mce_dont_init ||
518 cpu_test_and_set(smp_processor_id(), mce_cpus) ||
519 !mce_available(c)) 516 !mce_available(c))
520 return; 517 return;
521 518
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 5eb390a4b2e9..748c8f9e7a05 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -237,7 +237,7 @@ asmlinkage void mce_threshold_interrupt(void)
237 } 237 }
238 } 238 }
239out: 239out:
240 add_pda(irq_threshold_count, 1); 240 inc_irq_stat(irq_threshold_count);
241 irq_exit(); 241 irq_exit();
242} 242}
243 243
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index c17eaf5dd6dd..4b48f251fd39 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -26,7 +26,7 @@ asmlinkage void smp_thermal_interrupt(void)
26 if (therm_throt_process(msr_val & 1)) 26 if (therm_throt_process(msr_val & 1))
27 mce_log_therm_throt_event(smp_processor_id(), msr_val); 27 mce_log_therm_throt_event(smp_processor_id(), msr_val);
28 28
29 add_pda(irq_thermal_count, 1); 29 inc_irq_stat(irq_thermal_count);
30 irq_exit(); 30 irq_exit();
31} 31}
32 32
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index c78c04821ea1..1159e269e596 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -803,6 +803,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
803} 803}
804 804
805static struct res_range __initdata range[RANGE_NUM]; 805static struct res_range __initdata range[RANGE_NUM];
806static int __initdata nr_range;
806 807
807#ifdef CONFIG_MTRR_SANITIZER 808#ifdef CONFIG_MTRR_SANITIZER
808 809
@@ -1206,39 +1207,43 @@ struct mtrr_cleanup_result {
1206#define PSHIFT (PAGE_SHIFT - 10) 1207#define PSHIFT (PAGE_SHIFT - 10)
1207 1208
1208static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; 1209static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
1209static struct res_range __initdata range_new[RANGE_NUM];
1210static unsigned long __initdata min_loss_pfn[RANGE_NUM]; 1210static unsigned long __initdata min_loss_pfn[RANGE_NUM];
1211 1211
1212static int __init mtrr_cleanup(unsigned address_bits) 1212static void __init print_out_mtrr_range_state(void)
1213{ 1213{
1214 unsigned long extra_remove_base, extra_remove_size;
1215 unsigned long base, size, def, dummy;
1216 mtrr_type type;
1217 int nr_range, nr_range_new;
1218 u64 chunk_size, gran_size;
1219 unsigned long range_sums, range_sums_new;
1220 int index_good;
1221 int num_reg_good;
1222 int i; 1214 int i;
1215 char start_factor = 'K', size_factor = 'K';
1216 unsigned long start_base, size_base;
1217 mtrr_type type;
1223 1218
1224 /* extra one for all 0 */ 1219 for (i = 0; i < num_var_ranges; i++) {
1225 int num[MTRR_NUM_TYPES + 1];
1226 1220
1227 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) 1221 size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
1228 return 0; 1222 if (!size_base)
1229 rdmsr(MTRRdefType_MSR, def, dummy); 1223 continue;
1230 def &= 0xff;
1231 if (def != MTRR_TYPE_UNCACHABLE)
1232 return 0;
1233 1224
1234 /* get it and store it aside */ 1225 size_base = to_size_factor(size_base, &size_factor),
1235 memset(range_state, 0, sizeof(range_state)); 1226 start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
1236 for (i = 0; i < num_var_ranges; i++) { 1227 start_base = to_size_factor(start_base, &start_factor),
1237 mtrr_if->get(i, &base, &size, &type); 1228 type = range_state[i].type;
1238 range_state[i].base_pfn = base; 1229
1239 range_state[i].size_pfn = size; 1230 printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
1240 range_state[i].type = type; 1231 i, start_base, start_factor,
1232 size_base, size_factor,
1233 (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
1234 ((type == MTRR_TYPE_WRPROT) ? "WP" :
1235 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
1236 );
1241 } 1237 }
1238}
1239
1240static int __init mtrr_need_cleanup(void)
1241{
1242 int i;
1243 mtrr_type type;
1244 unsigned long size;
1245 /* extra one for all 0 */
1246 int num[MTRR_NUM_TYPES + 1];
1242 1247
1243 /* check entries number */ 1248 /* check entries number */
1244 memset(num, 0, sizeof(num)); 1249 memset(num, 0, sizeof(num));
@@ -1263,29 +1268,133 @@ static int __init mtrr_cleanup(unsigned address_bits)
1263 num_var_ranges - num[MTRR_NUM_TYPES]) 1268 num_var_ranges - num[MTRR_NUM_TYPES])
1264 return 0; 1269 return 0;
1265 1270
1266 /* print original var MTRRs at first, for debugging: */ 1271 return 1;
1267 printk(KERN_DEBUG "original variable MTRRs\n"); 1272}
1268 for (i = 0; i < num_var_ranges; i++) {
1269 char start_factor = 'K', size_factor = 'K';
1270 unsigned long start_base, size_base;
1271 1273
1272 size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); 1274static unsigned long __initdata range_sums;
1273 if (!size_base) 1275static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
1274 continue; 1276 unsigned long extra_remove_base,
1277 unsigned long extra_remove_size,
1278 int i)
1279{
1280 int num_reg;
1281 static struct res_range range_new[RANGE_NUM];
1282 static int nr_range_new;
1283 unsigned long range_sums_new;
1284
1285 /* convert ranges to var ranges state */
1286 num_reg = x86_setup_var_mtrrs(range, nr_range,
1287 chunk_size, gran_size);
1288
1289 /* we got new setting in range_state, check it */
1290 memset(range_new, 0, sizeof(range_new));
1291 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
1292 extra_remove_base, extra_remove_size);
1293 range_sums_new = sum_ranges(range_new, nr_range_new);
1294
1295 result[i].chunk_sizek = chunk_size >> 10;
1296 result[i].gran_sizek = gran_size >> 10;
1297 result[i].num_reg = num_reg;
1298 if (range_sums < range_sums_new) {
1299 result[i].lose_cover_sizek =
1300 (range_sums_new - range_sums) << PSHIFT;
1301 result[i].bad = 1;
1302 } else
1303 result[i].lose_cover_sizek =
1304 (range_sums - range_sums_new) << PSHIFT;
1275 1305
1276 size_base = to_size_factor(size_base, &size_factor), 1306 /* double check it */
1277 start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); 1307 if (!result[i].bad && !result[i].lose_cover_sizek) {
1278 start_base = to_size_factor(start_base, &start_factor), 1308 if (nr_range_new != nr_range ||
1279 type = range_state[i].type; 1309 memcmp(range, range_new, sizeof(range)))
1310 result[i].bad = 1;
1311 }
1280 1312
1281 printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", 1313 if (!result[i].bad && (range_sums - range_sums_new <
1282 i, start_base, start_factor, 1314 min_loss_pfn[num_reg])) {
1283 size_base, size_factor, 1315 min_loss_pfn[num_reg] =
1284 (type == MTRR_TYPE_UNCACHABLE) ? "UC" : 1316 range_sums - range_sums_new;
1285 ((type == MTRR_TYPE_WRPROT) ? "WP" :
1286 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
1287 );
1288 } 1317 }
1318}
1319
1320static void __init mtrr_print_out_one_result(int i)
1321{
1322 char gran_factor, chunk_factor, lose_factor;
1323 unsigned long gran_base, chunk_base, lose_base;
1324
1325 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
1326 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
1327 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
1328 printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
1329 result[i].bad ? "*BAD*" : " ",
1330 gran_base, gran_factor, chunk_base, chunk_factor);
1331 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
1332 result[i].num_reg, result[i].bad ? "-" : "",
1333 lose_base, lose_factor);
1334}
1335
1336static int __init mtrr_search_optimal_index(void)
1337{
1338 int i;
1339 int num_reg_good;
1340 int index_good;
1341
1342 if (nr_mtrr_spare_reg >= num_var_ranges)
1343 nr_mtrr_spare_reg = num_var_ranges - 1;
1344 num_reg_good = -1;
1345 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
1346 if (!min_loss_pfn[i])
1347 num_reg_good = i;
1348 }
1349
1350 index_good = -1;
1351 if (num_reg_good != -1) {
1352 for (i = 0; i < NUM_RESULT; i++) {
1353 if (!result[i].bad &&
1354 result[i].num_reg == num_reg_good &&
1355 !result[i].lose_cover_sizek) {
1356 index_good = i;
1357 break;
1358 }
1359 }
1360 }
1361
1362 return index_good;
1363}
1364
1365
1366static int __init mtrr_cleanup(unsigned address_bits)
1367{
1368 unsigned long extra_remove_base, extra_remove_size;
1369 unsigned long base, size, def, dummy;
1370 mtrr_type type;
1371 u64 chunk_size, gran_size;
1372 int index_good;
1373 int i;
1374
1375 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
1376 return 0;
1377 rdmsr(MTRRdefType_MSR, def, dummy);
1378 def &= 0xff;
1379 if (def != MTRR_TYPE_UNCACHABLE)
1380 return 0;
1381
1382 /* get it and store it aside */
1383 memset(range_state, 0, sizeof(range_state));
1384 for (i = 0; i < num_var_ranges; i++) {
1385 mtrr_if->get(i, &base, &size, &type);
1386 range_state[i].base_pfn = base;
1387 range_state[i].size_pfn = size;
1388 range_state[i].type = type;
1389 }
1390
1391 /* check if we need handle it and can handle it */
1392 if (!mtrr_need_cleanup())
1393 return 0;
1394
1395 /* print original var MTRRs at first, for debugging: */
1396 printk(KERN_DEBUG "original variable MTRRs\n");
1397 print_out_mtrr_range_state();
1289 1398
1290 memset(range, 0, sizeof(range)); 1399 memset(range, 0, sizeof(range));
1291 extra_remove_size = 0; 1400 extra_remove_size = 0;
@@ -1309,176 +1418,64 @@ static int __init mtrr_cleanup(unsigned address_bits)
1309 range_sums >> (20 - PAGE_SHIFT)); 1418 range_sums >> (20 - PAGE_SHIFT));
1310 1419
1311 if (mtrr_chunk_size && mtrr_gran_size) { 1420 if (mtrr_chunk_size && mtrr_gran_size) {
1312 int num_reg; 1421 i = 0;
1313 char gran_factor, chunk_factor, lose_factor; 1422 mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
1314 unsigned long gran_base, chunk_base, lose_base; 1423 extra_remove_base, extra_remove_size, i);
1315
1316 debug_print++;
1317 /* convert ranges to var ranges state */
1318 num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
1319 mtrr_gran_size);
1320 1424
1321 /* we got new setting in range_state, check it */ 1425 mtrr_print_out_one_result(i);
1322 memset(range_new, 0, sizeof(range_new));
1323 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
1324 extra_remove_base,
1325 extra_remove_size);
1326 range_sums_new = sum_ranges(range_new, nr_range_new);
1327 1426
1328 i = 0;
1329 result[i].chunk_sizek = mtrr_chunk_size >> 10;
1330 result[i].gran_sizek = mtrr_gran_size >> 10;
1331 result[i].num_reg = num_reg;
1332 if (range_sums < range_sums_new) {
1333 result[i].lose_cover_sizek =
1334 (range_sums_new - range_sums) << PSHIFT;
1335 result[i].bad = 1;
1336 } else
1337 result[i].lose_cover_sizek =
1338 (range_sums - range_sums_new) << PSHIFT;
1339
1340 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
1341 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
1342 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
1343 printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
1344 result[i].bad?"*BAD*":" ",
1345 gran_base, gran_factor, chunk_base, chunk_factor);
1346 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
1347 result[i].num_reg, result[i].bad?"-":"",
1348 lose_base, lose_factor);
1349 if (!result[i].bad) { 1427 if (!result[i].bad) {
1350 set_var_mtrr_all(address_bits); 1428 set_var_mtrr_all(address_bits);
1351 return 1; 1429 return 1;
1352 } 1430 }
1353 printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " 1431 printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
1354 "will find optimal one\n"); 1432 "will find optimal one\n");
1355 debug_print--;
1356 memset(result, 0, sizeof(result[0]));
1357 } 1433 }
1358 1434
1359 i = 0; 1435 i = 0;
1360 memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); 1436 memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
1361 memset(result, 0, sizeof(result)); 1437 memset(result, 0, sizeof(result));
1362 for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { 1438 for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
1363 char gran_factor;
1364 unsigned long gran_base;
1365
1366 if (debug_print)
1367 gran_base = to_size_factor(gran_size >> 10, &gran_factor);
1368 1439
1369 for (chunk_size = gran_size; chunk_size < (1ULL<<32); 1440 for (chunk_size = gran_size; chunk_size < (1ULL<<32);
1370 chunk_size <<= 1) { 1441 chunk_size <<= 1) {
1371 int num_reg;
1372 1442
1373 if (debug_print) {
1374 char chunk_factor;
1375 unsigned long chunk_base;
1376
1377 chunk_base = to_size_factor(chunk_size>>10, &chunk_factor),
1378 printk(KERN_INFO "\n");
1379 printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n",
1380 gran_base, gran_factor, chunk_base, chunk_factor);
1381 }
1382 if (i >= NUM_RESULT) 1443 if (i >= NUM_RESULT)
1383 continue; 1444 continue;
1384 1445
1385 /* convert ranges to var ranges state */ 1446 mtrr_calc_range_state(chunk_size, gran_size,
1386 num_reg = x86_setup_var_mtrrs(range, nr_range, 1447 extra_remove_base, extra_remove_size, i);
1387 chunk_size, gran_size); 1448 if (debug_print) {
1388 1449 mtrr_print_out_one_result(i);
1389 /* we got new setting in range_state, check it */ 1450 printk(KERN_INFO "\n");
1390 memset(range_new, 0, sizeof(range_new));
1391 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
1392 extra_remove_base, extra_remove_size);
1393 range_sums_new = sum_ranges(range_new, nr_range_new);
1394
1395 result[i].chunk_sizek = chunk_size >> 10;
1396 result[i].gran_sizek = gran_size >> 10;
1397 result[i].num_reg = num_reg;
1398 if (range_sums < range_sums_new) {
1399 result[i].lose_cover_sizek =
1400 (range_sums_new - range_sums) << PSHIFT;
1401 result[i].bad = 1;
1402 } else
1403 result[i].lose_cover_sizek =
1404 (range_sums - range_sums_new) << PSHIFT;
1405
1406 /* double check it */
1407 if (!result[i].bad && !result[i].lose_cover_sizek) {
1408 if (nr_range_new != nr_range ||
1409 memcmp(range, range_new, sizeof(range)))
1410 result[i].bad = 1;
1411 } 1451 }
1412 1452
1413 if (!result[i].bad && (range_sums - range_sums_new <
1414 min_loss_pfn[num_reg])) {
1415 min_loss_pfn[num_reg] =
1416 range_sums - range_sums_new;
1417 }
1418 i++; 1453 i++;
1419 } 1454 }
1420 } 1455 }
1421 1456
1422 /* print out all */
1423 for (i = 0; i < NUM_RESULT; i++) {
1424 char gran_factor, chunk_factor, lose_factor;
1425 unsigned long gran_base, chunk_base, lose_base;
1426
1427 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
1428 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
1429 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
1430 printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
1431 result[i].bad?"*BAD*":" ",
1432 gran_base, gran_factor, chunk_base, chunk_factor);
1433 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
1434 result[i].num_reg, result[i].bad?"-":"",
1435 lose_base, lose_factor);
1436 }
1437
1438 /* try to find the optimal index */ 1457 /* try to find the optimal index */
1439 if (nr_mtrr_spare_reg >= num_var_ranges) 1458 index_good = mtrr_search_optimal_index();
1440 nr_mtrr_spare_reg = num_var_ranges - 1;
1441 num_reg_good = -1;
1442 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
1443 if (!min_loss_pfn[i])
1444 num_reg_good = i;
1445 }
1446
1447 index_good = -1;
1448 if (num_reg_good != -1) {
1449 for (i = 0; i < NUM_RESULT; i++) {
1450 if (!result[i].bad &&
1451 result[i].num_reg == num_reg_good &&
1452 !result[i].lose_cover_sizek) {
1453 index_good = i;
1454 break;
1455 }
1456 }
1457 }
1458 1459
1459 if (index_good != -1) { 1460 if (index_good != -1) {
1460 char gran_factor, chunk_factor, lose_factor;
1461 unsigned long gran_base, chunk_base, lose_base;
1462
1463 printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); 1461 printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
1464 i = index_good; 1462 i = index_good;
1465 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), 1463 mtrr_print_out_one_result(i);
1466 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), 1464
1467 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
1468 printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t",
1469 gran_base, gran_factor, chunk_base, chunk_factor);
1470 printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n",
1471 result[i].num_reg, lose_base, lose_factor);
1472 /* convert ranges to var ranges state */ 1465 /* convert ranges to var ranges state */
1473 chunk_size = result[i].chunk_sizek; 1466 chunk_size = result[i].chunk_sizek;
1474 chunk_size <<= 10; 1467 chunk_size <<= 10;
1475 gran_size = result[i].gran_sizek; 1468 gran_size = result[i].gran_sizek;
1476 gran_size <<= 10; 1469 gran_size <<= 10;
1477 debug_print++;
1478 x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); 1470 x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
1479 debug_print--;
1480 set_var_mtrr_all(address_bits); 1471 set_var_mtrr_all(address_bits);
1472 printk(KERN_DEBUG "New variable MTRRs\n");
1473 print_out_mtrr_range_state();
1481 return 1; 1474 return 1;
1475 } else {
1476 /* print out all */
1477 for (i = 0; i < NUM_RESULT; i++)
1478 mtrr_print_out_one_result(i);
1482 } 1479 }
1483 1480
1484 printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); 1481 printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
@@ -1562,7 +1559,6 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1562{ 1559{
1563 unsigned long i, base, size, highest_pfn = 0, def, dummy; 1560 unsigned long i, base, size, highest_pfn = 0, def, dummy;
1564 mtrr_type type; 1561 mtrr_type type;
1565 int nr_range;
1566 u64 total_trim_size; 1562 u64 total_trim_size;
1567 1563
1568 /* extra one for all 0 */ 1564 /* extra one for all 0 */
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
new file mode 100644
index 000000000000..284c399e3234
--- /dev/null
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -0,0 +1,112 @@
1/*
2 * VMware Detection code.
3 *
4 * Copyright (C) 2008, VMware, Inc.
5 * Author : Alok N Kataria <akataria@vmware.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
15 * NON INFRINGEMENT. See the GNU General Public License for more
16 * details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 */
23
24#include <linux/dmi.h>
25#include <asm/div64.h>
26#include <asm/vmware.h>
27
28#define CPUID_VMWARE_INFO_LEAF 0x40000000
29#define VMWARE_HYPERVISOR_MAGIC 0x564D5868
30#define VMWARE_HYPERVISOR_PORT 0x5658
31
32#define VMWARE_PORT_CMD_GETVERSION 10
33#define VMWARE_PORT_CMD_GETHZ 45
34
35#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
36 __asm__("inl (%%dx)" : \
37 "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
38 "0"(VMWARE_HYPERVISOR_MAGIC), \
39 "1"(VMWARE_PORT_CMD_##cmd), \
40 "2"(VMWARE_HYPERVISOR_PORT), "3"(UINT_MAX) : \
41 "memory");
42
43static inline int __vmware_platform(void)
44{
45 uint32_t eax, ebx, ecx, edx;
46 VMWARE_PORT(GETVERSION, eax, ebx, ecx, edx);
47 return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC;
48}
49
50static unsigned long __vmware_get_tsc_khz(void)
51{
52 uint64_t tsc_hz;
53 uint32_t eax, ebx, ecx, edx;
54
55 VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
56
57 if (ebx == UINT_MAX)
58 return 0;
59 tsc_hz = eax | (((uint64_t)ebx) << 32);
60 do_div(tsc_hz, 1000);
61 BUG_ON(tsc_hz >> 32);
62 return tsc_hz;
63}
64
65/*
66 * While checking the dmi string infomation, just checking the product
67 * serial key should be enough, as this will always have a VMware
68 * specific string when running under VMware hypervisor.
69 */
70int vmware_platform(void)
71{
72 if (cpu_has_hypervisor) {
73 unsigned int eax, ebx, ecx, edx;
74 char hyper_vendor_id[13];
75
76 cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &ebx, &ecx, &edx);
77 memcpy(hyper_vendor_id + 0, &ebx, 4);
78 memcpy(hyper_vendor_id + 4, &ecx, 4);
79 memcpy(hyper_vendor_id + 8, &edx, 4);
80 hyper_vendor_id[12] = '\0';
81 if (!strcmp(hyper_vendor_id, "VMwareVMware"))
82 return 1;
83 } else if (dmi_available && dmi_name_in_serial("VMware") &&
84 __vmware_platform())
85 return 1;
86
87 return 0;
88}
89
90unsigned long vmware_get_tsc_khz(void)
91{
92 BUG_ON(!vmware_platform());
93 return __vmware_get_tsc_khz();
94}
95
96/*
97 * VMware hypervisor takes care of exporting a reliable TSC to the guest.
98 * Still, due to timing difference when running on virtual cpus, the TSC can
99 * be marked as unstable in some cases. For example, the TSC sync check at
100 * bootup can fail due to a marginal offset between vcpus' TSCs (though the
101 * TSCs do not drift from each other). Also, the ACPI PM timer clocksource
102 * is not suitable as a watchdog when running on a hypervisor because the
103 * kernel may miss a wrap of the counter if the vcpu is descheduled for a
104 * long time. To skip these checks at runtime we set these capability bits,
105 * so that the kernel could just trust the hypervisor with providing a
106 * reliable virtual TSC that is suitable for timekeeping.
107 */
108void __cpuinit vmware_set_feature_bits(struct cpuinfo_x86 *c)
109{
110 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
111 set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
112}
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 268553817909..d84a852e4cd7 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -29,34 +29,17 @@
29 29
30#include <mach_ipi.h> 30#include <mach_ipi.h>
31 31
32/* This keeps a track of which one is crashing cpu. */
33static int crashing_cpu;
34 32
35#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) 33#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
36static atomic_t waiting_for_crash_ipi;
37 34
38static int crash_nmi_callback(struct notifier_block *self, 35static void kdump_nmi_callback(int cpu, struct die_args *args)
39 unsigned long val, void *data)
40{ 36{
41 struct pt_regs *regs; 37 struct pt_regs *regs;
42#ifdef CONFIG_X86_32 38#ifdef CONFIG_X86_32
43 struct pt_regs fixed_regs; 39 struct pt_regs fixed_regs;
44#endif 40#endif
45 int cpu;
46 41
47 if (val != DIE_NMI_IPI) 42 regs = args->regs;
48 return NOTIFY_OK;
49
50 regs = ((struct die_args *)data)->regs;
51 cpu = raw_smp_processor_id();
52
53 /* Don't do anything if this handler is invoked on crashing cpu.
54 * Otherwise, system will completely hang. Crashing cpu can get
55 * an NMI if system was initially booted with nmi_watchdog parameter.
56 */
57 if (cpu == crashing_cpu)
58 return NOTIFY_STOP;
59 local_irq_disable();
60 43
61#ifdef CONFIG_X86_32 44#ifdef CONFIG_X86_32
62 if (!user_mode_vm(regs)) { 45 if (!user_mode_vm(regs)) {
@@ -65,54 +48,19 @@ static int crash_nmi_callback(struct notifier_block *self,
65 } 48 }
66#endif 49#endif
67 crash_save_cpu(regs, cpu); 50 crash_save_cpu(regs, cpu);
68 disable_local_APIC();
69 atomic_dec(&waiting_for_crash_ipi);
70 /* Assume hlt works */
71 halt();
72 for (;;)
73 cpu_relax();
74
75 return 1;
76}
77 51
78static void smp_send_nmi_allbutself(void) 52 disable_local_APIC();
79{
80 cpumask_t mask = cpu_online_map;
81 cpu_clear(safe_smp_processor_id(), mask);
82 if (!cpus_empty(mask))
83 send_IPI_mask(mask, NMI_VECTOR);
84} 53}
85 54
86static struct notifier_block crash_nmi_nb = { 55static void kdump_nmi_shootdown_cpus(void)
87 .notifier_call = crash_nmi_callback,
88};
89
90static void nmi_shootdown_cpus(void)
91{ 56{
92 unsigned long msecs; 57 nmi_shootdown_cpus(kdump_nmi_callback);
93
94 atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
95 /* Would it be better to replace the trap vector here? */
96 if (register_die_notifier(&crash_nmi_nb))
97 return; /* return what? */
98 /* Ensure the new callback function is set before sending
99 * out the NMI
100 */
101 wmb();
102 58
103 smp_send_nmi_allbutself();
104
105 msecs = 1000; /* Wait at most a second for the other cpus to stop */
106 while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
107 mdelay(1);
108 msecs--;
109 }
110
111 /* Leave the nmi callback set */
112 disable_local_APIC(); 59 disable_local_APIC();
113} 60}
61
114#else 62#else
115static void nmi_shootdown_cpus(void) 63static void kdump_nmi_shootdown_cpus(void)
116{ 64{
117 /* There are no cpus to shootdown */ 65 /* There are no cpus to shootdown */
118} 66}
@@ -131,9 +79,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
131 /* The kernel is broken so disable interrupts */ 79 /* The kernel is broken so disable interrupts */
132 local_irq_disable(); 80 local_irq_disable();
133 81
134 /* Make a note of crashing cpu. Will be used in NMI callback.*/ 82 kdump_nmi_shootdown_cpus();
135 crashing_cpu = safe_smp_processor_id();
136 nmi_shootdown_cpus();
137 lapic_shutdown(); 83 lapic_shutdown();
138#if defined(CONFIG_X86_IO_APIC) 84#if defined(CONFIG_X86_IO_APIC)
139 disable_IO_APIC(); 85 disable_IO_APIC();
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index a2d1176c38ee..da91701a2348 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -6,14 +6,13 @@
6 * precise-event based sampling (PEBS). 6 * precise-event based sampling (PEBS).
7 * 7 *
8 * It manages: 8 * It manages:
9 * - per-thread and per-cpu allocation of BTS and PEBS 9 * - DS and BTS hardware configuration
10 * - buffer memory allocation (optional) 10 * - buffer overflow handling (to be done)
11 * - buffer overflow handling
12 * - buffer access 11 * - buffer access
13 * 12 *
14 * It assumes: 13 * It does not do:
15 * - get_task_struct on all parameter tasks 14 * - security checking (is the caller allowed to trace the task)
16 * - current is allowed to trace parameter tasks 15 * - buffer allocation (memory accounting)
17 * 16 *
18 * 17 *
19 * Copyright (C) 2007-2008 Intel Corporation. 18 * Copyright (C) 2007-2008 Intel Corporation.
@@ -28,22 +27,69 @@
28#include <linux/slab.h> 27#include <linux/slab.h>
29#include <linux/sched.h> 28#include <linux/sched.h>
30#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/kernel.h>
31 31
32 32
33/* 33/*
34 * The configuration for a particular DS hardware implementation. 34 * The configuration for a particular DS hardware implementation.
35 */ 35 */
36struct ds_configuration { 36struct ds_configuration {
37 /* the size of the DS structure in bytes */ 37 /* the name of the configuration */
38 unsigned char sizeof_ds; 38 const char *name;
39 /* the size of one pointer-typed field in the DS structure in bytes; 39 /* the size of one pointer-typed field in the DS structure and
40 this covers the first 8 fields related to buffer management. */ 40 in the BTS and PEBS buffers in bytes;
41 this covers the first 8 DS fields related to buffer management. */
41 unsigned char sizeof_field; 42 unsigned char sizeof_field;
42 /* the size of a BTS/PEBS record in bytes */ 43 /* the size of a BTS/PEBS record in bytes */
43 unsigned char sizeof_rec[2]; 44 unsigned char sizeof_rec[2];
45 /* a series of bit-masks to control various features indexed
46 * by enum ds_feature */
47 unsigned long ctl[dsf_ctl_max];
44}; 48};
45static struct ds_configuration ds_cfg; 49static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
46 50
51#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
52
53#define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */
54#define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */
55#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
56
57#define BTS_CONTROL \
58 (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
59 ds_cfg.ctl[dsf_bts_overflow])
60
61
62/*
63 * A BTS or PEBS tracer.
64 *
65 * This holds the configuration of the tracer and serves as a handle
66 * to identify tracers.
67 */
68struct ds_tracer {
69 /* the DS context (partially) owned by this tracer */
70 struct ds_context *context;
71 /* the buffer provided on ds_request() and its size in bytes */
72 void *buffer;
73 size_t size;
74};
75
76struct bts_tracer {
77 /* the common DS part */
78 struct ds_tracer ds;
79 /* the trace including the DS configuration */
80 struct bts_trace trace;
81 /* buffer overflow notification function */
82 bts_ovfl_callback_t ovfl;
83};
84
85struct pebs_tracer {
86 /* the common DS part */
87 struct ds_tracer ds;
88 /* the trace including the DS configuration */
89 struct pebs_trace trace;
90 /* buffer overflow notification function */
91 pebs_ovfl_callback_t ovfl;
92};
47 93
48/* 94/*
49 * Debug Store (DS) save area configuration (see Intel64 and IA32 95 * Debug Store (DS) save area configuration (see Intel64 and IA32
@@ -109,32 +155,9 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
109 155
110 156
111/* 157/*
112 * Locking is done only for allocating BTS or PEBS resources and for 158 * Locking is done only for allocating BTS or PEBS resources.
113 * guarding context and buffer memory allocation.
114 *
115 * Most functions require the current task to own the ds context part
116 * they are going to access. All the locking is done when validating
117 * access to the context.
118 */ 159 */
119static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); 160static DEFINE_SPINLOCK(ds_lock);
120
121/*
122 * Validate that the current task is allowed to access the BTS/PEBS
123 * buffer of the parameter task.
124 *
125 * Returns 0, if access is granted; -Eerrno, otherwise.
126 */
127static inline int ds_validate_access(struct ds_context *context,
128 enum ds_qualifier qual)
129{
130 if (!context)
131 return -EPERM;
132
133 if (context->owner[qual] == current)
134 return 0;
135
136 return -EPERM;
137}
138 161
139 162
140/* 163/*
@@ -150,27 +173,32 @@ static inline int ds_validate_access(struct ds_context *context,
150 * >0 number of per-thread tracers 173 * >0 number of per-thread tracers
151 * <0 number of per-cpu tracers 174 * <0 number of per-cpu tracers
152 * 175 *
153 * The below functions to get and put tracers and to check the
154 * allocation type require the ds_lock to be held by the caller.
155 *
156 * Tracers essentially gives the number of ds contexts for a certain 176 * Tracers essentially gives the number of ds contexts for a certain
157 * type of allocation. 177 * type of allocation.
158 */ 178 */
159static long tracers; 179static atomic_t tracers = ATOMIC_INIT(0);
160 180
161static inline void get_tracer(struct task_struct *task) 181static inline void get_tracer(struct task_struct *task)
162{ 182{
163 tracers += (task ? 1 : -1); 183 if (task)
184 atomic_inc(&tracers);
185 else
186 atomic_dec(&tracers);
164} 187}
165 188
166static inline void put_tracer(struct task_struct *task) 189static inline void put_tracer(struct task_struct *task)
167{ 190{
168 tracers -= (task ? 1 : -1); 191 if (task)
192 atomic_dec(&tracers);
193 else
194 atomic_inc(&tracers);
169} 195}
170 196
171static inline int check_tracer(struct task_struct *task) 197static inline int check_tracer(struct task_struct *task)
172{ 198{
173 return (task ? (tracers >= 0) : (tracers <= 0)); 199 return task ?
200 (atomic_read(&tracers) >= 0) :
201 (atomic_read(&tracers) <= 0);
174} 202}
175 203
176 204
@@ -183,99 +211,70 @@ static inline int check_tracer(struct task_struct *task)
183 * 211 *
184 * Contexts are use-counted. They are allocated on first access and 212 * Contexts are use-counted. They are allocated on first access and
185 * deallocated when the last user puts the context. 213 * deallocated when the last user puts the context.
186 *
187 * We distinguish between an allocating and a non-allocating get of a
188 * context:
189 * - the allocating get is used for requesting BTS/PEBS resources. It
190 * requires the caller to hold the global ds_lock.
191 * - the non-allocating get is used for all other cases. A
192 * non-existing context indicates an error. It acquires and releases
193 * the ds_lock itself for obtaining the context.
194 *
195 * A context and its DS configuration are allocated and deallocated
196 * together. A context always has a DS configuration of the
197 * appropriate size.
198 */
199static DEFINE_PER_CPU(struct ds_context *, system_context);
200
201#define this_system_context per_cpu(system_context, smp_processor_id())
202
203/*
204 * Returns the pointer to the parameter task's context or to the
205 * system-wide context, if task is NULL.
206 *
207 * Increases the use count of the returned context, if not NULL.
208 */ 214 */
209static inline struct ds_context *ds_get_context(struct task_struct *task) 215struct ds_context {
210{ 216 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
211 struct ds_context *context; 217 unsigned char ds[MAX_SIZEOF_DS];
212 unsigned long irq; 218 /* the owner of the BTS and PEBS configuration, respectively */
219 struct bts_tracer *bts_master;
220 struct pebs_tracer *pebs_master;
221 /* use count */
222 unsigned long count;
223 /* a pointer to the context location inside the thread_struct
224 * or the per_cpu context array */
225 struct ds_context **this;
226 /* a pointer to the task owning this context, or NULL, if the
227 * context is owned by a cpu */
228 struct task_struct *task;
229};
213 230
214 spin_lock_irqsave(&ds_lock, irq); 231static DEFINE_PER_CPU(struct ds_context *, system_context_array);
215 232
216 context = (task ? task->thread.ds_ctx : this_system_context); 233#define system_context per_cpu(system_context_array, smp_processor_id())
217 if (context)
218 context->count++;
219 234
220 spin_unlock_irqrestore(&ds_lock, irq);
221
222 return context;
223}
224 235
225/* 236static inline struct ds_context *ds_get_context(struct task_struct *task)
226 * Same as ds_get_context, but allocates the context and it's DS
227 * structure, if necessary; returns NULL; if out of memory.
228 */
229static inline struct ds_context *ds_alloc_context(struct task_struct *task)
230{ 237{
231 struct ds_context **p_context = 238 struct ds_context **p_context =
232 (task ? &task->thread.ds_ctx : &this_system_context); 239 (task ? &task->thread.ds_ctx : &system_context);
233 struct ds_context *context = *p_context; 240 struct ds_context *context = NULL;
241 struct ds_context *new_context = NULL;
234 unsigned long irq; 242 unsigned long irq;
235 243
236 if (!context) { 244 /* Chances are small that we already have a context. */
237 context = kzalloc(sizeof(*context), GFP_KERNEL); 245 new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
238 if (!context) 246 if (!new_context)
239 return NULL; 247 return NULL;
240
241 context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
242 if (!context->ds) {
243 kfree(context);
244 return NULL;
245 }
246 248
247 spin_lock_irqsave(&ds_lock, irq); 249 spin_lock_irqsave(&ds_lock, irq);
248 250
249 if (*p_context) { 251 context = *p_context;
250 kfree(context->ds); 252 if (!context) {
251 kfree(context); 253 context = new_context;
252 254
253 context = *p_context; 255 context->this = p_context;
254 } else { 256 context->task = task;
255 *p_context = context; 257 context->count = 0;
256 258
257 context->this = p_context; 259 if (task)
258 context->task = task; 260 set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
259 261
260 if (task) 262 if (!task || (task == current))
261 set_tsk_thread_flag(task, TIF_DS_AREA_MSR); 263 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds);
262 264
263 if (!task || (task == current)) 265 *p_context = context;
264 wrmsrl(MSR_IA32_DS_AREA,
265 (unsigned long)context->ds);
266 }
267 spin_unlock_irqrestore(&ds_lock, irq);
268 } 266 }
269 267
270 context->count++; 268 context->count++;
271 269
270 spin_unlock_irqrestore(&ds_lock, irq);
271
272 if (context != new_context)
273 kfree(new_context);
274
272 return context; 275 return context;
273} 276}
274 277
275/*
276 * Decreases the use count of the parameter context, if not NULL.
277 * Deallocates the context, if the use count reaches zero.
278 */
279static inline void ds_put_context(struct ds_context *context) 278static inline void ds_put_context(struct ds_context *context)
280{ 279{
281 unsigned long irq; 280 unsigned long irq;
@@ -285,8 +284,10 @@ static inline void ds_put_context(struct ds_context *context)
285 284
286 spin_lock_irqsave(&ds_lock, irq); 285 spin_lock_irqsave(&ds_lock, irq);
287 286
288 if (--context->count) 287 if (--context->count) {
289 goto out; 288 spin_unlock_irqrestore(&ds_lock, irq);
289 return;
290 }
290 291
291 *(context->this) = NULL; 292 *(context->this) = NULL;
292 293
@@ -296,135 +297,263 @@ static inline void ds_put_context(struct ds_context *context)
296 if (!context->task || (context->task == current)) 297 if (!context->task || (context->task == current))
297 wrmsrl(MSR_IA32_DS_AREA, 0); 298 wrmsrl(MSR_IA32_DS_AREA, 0);
298 299
299 put_tracer(context->task); 300 spin_unlock_irqrestore(&ds_lock, irq);
300 301
301 /* free any leftover buffers from tracers that did not
302 * deallocate them properly. */
303 kfree(context->buffer[ds_bts]);
304 kfree(context->buffer[ds_pebs]);
305 kfree(context->ds);
306 kfree(context); 302 kfree(context);
307 out:
308 spin_unlock_irqrestore(&ds_lock, irq);
309} 303}
310 304
311 305
312/* 306/*
313 * Handle a buffer overflow 307 * Call the tracer's callback on a buffer overflow.
314 * 308 *
315 * task: the task whose buffers are overflowing;
316 * NULL for a buffer overflow on the current cpu
317 * context: the ds context 309 * context: the ds context
318 * qual: the buffer type 310 * qual: the buffer type
319 */ 311 */
320static void ds_overflow(struct task_struct *task, struct ds_context *context, 312static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
321 enum ds_qualifier qual)
322{ 313{
323 if (!context) 314 switch (qual) {
324 return; 315 case ds_bts:
325 316 if (context->bts_master &&
326 if (context->callback[qual]) 317 context->bts_master->ovfl)
327 (*context->callback[qual])(task); 318 context->bts_master->ovfl(context->bts_master);
328 319 break;
329 /* todo: do some more overflow handling */ 320 case ds_pebs:
321 if (context->pebs_master &&
322 context->pebs_master->ovfl)
323 context->pebs_master->ovfl(context->pebs_master);
324 break;
325 }
330} 326}
331 327
332 328
333/* 329/*
334 * Allocate a non-pageable buffer of the parameter size. 330 * Write raw data into the BTS or PEBS buffer.
335 * Checks the memory and the locked memory rlimit.
336 * 331 *
337 * Returns the buffer, if successful; 332 * The remainder of any partially written record is zeroed out.
338 * NULL, if out of memory or rlimit exceeded.
339 * 333 *
340 * size: the requested buffer size in bytes 334 * context: the DS context
341 * pages (out): if not NULL, contains the number of pages reserved 335 * qual: the buffer type
336 * record: the data to write
337 * size: the size of the data
342 */ 338 */
343static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) 339static int ds_write(struct ds_context *context, enum ds_qualifier qual,
340 const void *record, size_t size)
344{ 341{
345 unsigned long rlim, vm, pgsz; 342 int bytes_written = 0;
346 void *buffer;
347 343
348 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; 344 if (!record)
345 return -EINVAL;
349 346
350 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; 347 while (size) {
351 vm = current->mm->total_vm + pgsz; 348 unsigned long base, index, end, write_end, int_th;
352 if (rlim < vm) 349 unsigned long write_size, adj_write_size;
353 return NULL;
354 350
355 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; 351 /*
356 vm = current->mm->locked_vm + pgsz; 352 * write as much as possible without producing an
357 if (rlim < vm) 353 * overflow interrupt.
358 return NULL; 354 *
355 * interrupt_threshold must either be
356 * - bigger than absolute_maximum or
357 * - point to a record between buffer_base and absolute_maximum
358 *
359 * index points to a valid record.
360 */
361 base = ds_get(context->ds, qual, ds_buffer_base);
362 index = ds_get(context->ds, qual, ds_index);
363 end = ds_get(context->ds, qual, ds_absolute_maximum);
364 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
359 365
360 buffer = kzalloc(size, GFP_KERNEL); 366 write_end = min(end, int_th);
361 if (!buffer)
362 return NULL;
363 367
364 current->mm->total_vm += pgsz; 368 /* if we are already beyond the interrupt threshold,
365 current->mm->locked_vm += pgsz; 369 * we fill the entire buffer */
370 if (write_end <= index)
371 write_end = end;
366 372
367 if (pages) 373 if (write_end <= index)
368 *pages = pgsz; 374 break;
375
376 write_size = min((unsigned long) size, write_end - index);
377 memcpy((void *)index, record, write_size);
369 378
370 return buffer; 379 record = (const char *)record + write_size;
380 size -= write_size;
381 bytes_written += write_size;
382
383 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
384 adj_write_size *= ds_cfg.sizeof_rec[qual];
385
386 /* zero out trailing bytes */
387 memset((char *)index + write_size, 0,
388 adj_write_size - write_size);
389 index += adj_write_size;
390
391 if (index >= end)
392 index = base;
393 ds_set(context->ds, qual, ds_index, index);
394
395 if (index >= int_th)
396 ds_overflow(context, qual);
397 }
398
399 return bytes_written;
371} 400}
372 401
373static int ds_request(struct task_struct *task, void *base, size_t size, 402
374 ds_ovfl_callback_t ovfl, enum ds_qualifier qual) 403/*
404 * Branch Trace Store (BTS) uses the following format. Different
405 * architectures vary in the size of those fields.
406 * - source linear address
407 * - destination linear address
408 * - flags
409 *
410 * Later architectures use 64bit pointers throughout, whereas earlier
411 * architectures use 32bit pointers in 32bit mode.
412 *
413 * We compute the base address for the first 8 fields based on:
414 * - the field size stored in the DS configuration
415 * - the relative field position
416 *
417 * In order to store additional information in the BTS buffer, we use
418 * a special source address to indicate that the record requires
419 * special interpretation.
420 *
421 * Netburst indicated via a bit in the flags field whether the branch
422 * was predicted; this is ignored.
423 *
424 * We use two levels of abstraction:
425 * - the raw data level defined here
426 * - an arch-independent level defined in ds.h
427 */
428
429enum bts_field {
430 bts_from,
431 bts_to,
432 bts_flags,
433
434 bts_qual = bts_from,
435 bts_jiffies = bts_to,
436 bts_pid = bts_flags,
437
438 bts_qual_mask = (bts_qual_max - 1),
439 bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
440};
441
442static inline unsigned long bts_get(const char *base, enum bts_field field)
375{ 443{
376 struct ds_context *context; 444 base += (ds_cfg.sizeof_field * field);
377 unsigned long buffer, adj; 445 return *(unsigned long *)base;
378 const unsigned long alignment = (1 << 3); 446}
379 unsigned long irq; 447
380 int error = 0; 448static inline void bts_set(char *base, enum bts_field field, unsigned long val)
449{
450 base += (ds_cfg.sizeof_field * field);;
451 (*(unsigned long *)base) = val;
452}
381 453
382 if (!ds_cfg.sizeof_ds)
383 return -EOPNOTSUPP;
384 454
385 /* we require some space to do alignment adjustments below */ 455/*
386 if (size < (alignment + ds_cfg.sizeof_rec[qual])) 456 * The raw BTS data is architecture dependent.
457 *
458 * For higher-level users, we give an arch-independent view.
459 * - ds.h defines struct bts_struct
460 * - bts_read translates one raw bts record into a bts_struct
461 * - bts_write translates one bts_struct into the raw format and
462 * writes it into the top of the parameter tracer's buffer.
463 *
464 * return: bytes read/written on success; -Eerrno, otherwise
465 */
466static int bts_read(struct bts_tracer *tracer, const void *at,
467 struct bts_struct *out)
468{
469 if (!tracer)
387 return -EINVAL; 470 return -EINVAL;
388 471
389 /* buffer overflow notification is not yet implemented */ 472 if (at < tracer->trace.ds.begin)
390 if (ovfl) 473 return -EINVAL;
391 return -EOPNOTSUPP;
392 474
475 if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
476 return -EINVAL;
393 477
394 context = ds_alloc_context(task); 478 memset(out, 0, sizeof(*out));
395 if (!context) 479 if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
396 return -ENOMEM; 480 out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
481 out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
482 out->variant.timestamp.pid = bts_get(at, bts_pid);
483 } else {
484 out->qualifier = bts_branch;
485 out->variant.lbr.from = bts_get(at, bts_from);
486 out->variant.lbr.to = bts_get(at, bts_to);
487
488 if (!out->variant.lbr.from && !out->variant.lbr.to)
489 out->qualifier = bts_invalid;
490 }
397 491
398 spin_lock_irqsave(&ds_lock, irq); 492 return ds_cfg.sizeof_rec[ds_bts];
493}
399 494
400 error = -EPERM; 495static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
401 if (!check_tracer(task)) 496{
402 goto out_unlock; 497 unsigned char raw[MAX_SIZEOF_BTS];
403 498
404 get_tracer(task); 499 if (!tracer)
500 return -EINVAL;
405 501
406 error = -EALREADY; 502 if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
407 if (context->owner[qual] == current) 503 return -EOVERFLOW;
408 goto out_put_tracer;
409 error = -EPERM;
410 if (context->owner[qual] != NULL)
411 goto out_put_tracer;
412 context->owner[qual] = current;
413 504
414 spin_unlock_irqrestore(&ds_lock, irq); 505 switch (in->qualifier) {
506 case bts_invalid:
507 bts_set(raw, bts_from, 0);
508 bts_set(raw, bts_to, 0);
509 bts_set(raw, bts_flags, 0);
510 break;
511 case bts_branch:
512 bts_set(raw, bts_from, in->variant.lbr.from);
513 bts_set(raw, bts_to, in->variant.lbr.to);
514 bts_set(raw, bts_flags, 0);
515 break;
516 case bts_task_arrives:
517 case bts_task_departs:
518 bts_set(raw, bts_qual, (bts_escape | in->qualifier));
519 bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
520 bts_set(raw, bts_pid, in->variant.timestamp.pid);
521 break;
522 default:
523 return -EINVAL;
524 }
415 525
526 return ds_write(tracer->ds.context, ds_bts, raw,
527 ds_cfg.sizeof_rec[ds_bts]);
528}
416 529
417 error = -ENOMEM;
418 if (!base) {
419 base = ds_allocate_buffer(size, &context->pages[qual]);
420 if (!base)
421 goto out_release;
422 530
423 context->buffer[qual] = base; 531static void ds_write_config(struct ds_context *context,
424 } 532 struct ds_trace *cfg, enum ds_qualifier qual)
425 error = 0; 533{
534 unsigned char *ds = context->ds;
535
536 ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
537 ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
538 ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
539 ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
540}
541
542static void ds_read_config(struct ds_context *context,
543 struct ds_trace *cfg, enum ds_qualifier qual)
544{
545 unsigned char *ds = context->ds;
426 546
427 context->callback[qual] = ovfl; 547 cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
548 cfg->top = (void *)ds_get(ds, qual, ds_index);
549 cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
550 cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
551}
552
553static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
554 void *base, size_t size, size_t ith,
555 unsigned int flags) {
556 unsigned long buffer, adj;
428 557
429 /* adjust the buffer address and size to meet alignment 558 /* adjust the buffer address and size to meet alignment
430 * constraints: 559 * constraints:
@@ -436,410 +565,383 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
436 */ 565 */
437 buffer = (unsigned long)base; 566 buffer = (unsigned long)base;
438 567
439 adj = ALIGN(buffer, alignment) - buffer; 568 adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
440 buffer += adj; 569 buffer += adj;
441 size -= adj; 570 size -= adj;
442 571
443 size /= ds_cfg.sizeof_rec[qual]; 572 trace->n = size / ds_cfg.sizeof_rec[qual];
444 size *= ds_cfg.sizeof_rec[qual]; 573 trace->size = ds_cfg.sizeof_rec[qual];
445
446 ds_set(context->ds, qual, ds_buffer_base, buffer);
447 ds_set(context->ds, qual, ds_index, buffer);
448 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
449 574
450 if (ovfl) { 575 size = (trace->n * trace->size);
451 /* todo: select a suitable interrupt threshold */
452 } else
453 ds_set(context->ds, qual,
454 ds_interrupt_threshold, buffer + size + 1);
455 576
456 /* we keep the context until ds_release */ 577 trace->begin = (void *)buffer;
457 return error; 578 trace->top = trace->begin;
458 579 trace->end = (void *)(buffer + size);
459 out_release: 580 /* The value for 'no threshold' is -1, which will set the
460 context->owner[qual] = NULL; 581 * threshold outside of the buffer, just like we want it.
461 ds_put_context(context); 582 */
462 put_tracer(task); 583 trace->ith = (void *)(buffer + size - ith);
463 return error;
464
465 out_put_tracer:
466 spin_unlock_irqrestore(&ds_lock, irq);
467 ds_put_context(context);
468 put_tracer(task);
469 return error;
470 584
471 out_unlock: 585 trace->flags = flags;
472 spin_unlock_irqrestore(&ds_lock, irq);
473 ds_put_context(context);
474 return error;
475} 586}
476 587
477int ds_request_bts(struct task_struct *task, void *base, size_t size,
478 ds_ovfl_callback_t ovfl)
479{
480 return ds_request(task, base, size, ovfl, ds_bts);
481}
482 588
483int ds_request_pebs(struct task_struct *task, void *base, size_t size, 589static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
484 ds_ovfl_callback_t ovfl) 590 enum ds_qualifier qual, struct task_struct *task,
485{ 591 void *base, size_t size, size_t th, unsigned int flags)
486 return ds_request(task, base, size, ovfl, ds_pebs);
487}
488
489static int ds_release(struct task_struct *task, enum ds_qualifier qual)
490{ 592{
491 struct ds_context *context; 593 struct ds_context *context;
492 int error; 594 int error;
493 595
494 context = ds_get_context(task); 596 error = -EINVAL;
495 error = ds_validate_access(context, qual); 597 if (!base)
496 if (error < 0)
497 goto out; 598 goto out;
498 599
499 kfree(context->buffer[qual]); 600 /* we require some space to do alignment adjustments below */
500 context->buffer[qual] = NULL; 601 error = -EINVAL;
501 602 if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
502 current->mm->total_vm -= context->pages[qual]; 603 goto out;
503 current->mm->locked_vm -= context->pages[qual];
504 context->pages[qual] = 0;
505 context->owner[qual] = NULL;
506
507 /*
508 * we put the context twice:
509 * once for the ds_get_context
510 * once for the corresponding ds_request
511 */
512 ds_put_context(context);
513 out:
514 ds_put_context(context);
515 return error;
516}
517 604
518int ds_release_bts(struct task_struct *task) 605 if (th != (size_t)-1) {
519{ 606 th *= ds_cfg.sizeof_rec[qual];
520 return ds_release(task, ds_bts);
521}
522 607
523int ds_release_pebs(struct task_struct *task) 608 error = -EINVAL;
524{ 609 if (size <= th)
525 return ds_release(task, ds_pebs); 610 goto out;
526} 611 }
527 612
528static int ds_get_index(struct task_struct *task, size_t *pos, 613 tracer->buffer = base;
529 enum ds_qualifier qual) 614 tracer->size = size;
530{
531 struct ds_context *context;
532 unsigned long base, index;
533 int error;
534 615
616 error = -ENOMEM;
535 context = ds_get_context(task); 617 context = ds_get_context(task);
536 error = ds_validate_access(context, qual); 618 if (!context)
537 if (error < 0)
538 goto out; 619 goto out;
620 tracer->context = context;
539 621
540 base = ds_get(context->ds, qual, ds_buffer_base); 622 ds_init_ds_trace(trace, qual, base, size, th, flags);
541 index = ds_get(context->ds, qual, ds_index);
542 623
543 error = ((index - base) / ds_cfg.sizeof_rec[qual]); 624 error = 0;
544 if (pos)
545 *pos = error;
546 out: 625 out:
547 ds_put_context(context);
548 return error; 626 return error;
549} 627}
550 628
551int ds_get_bts_index(struct task_struct *task, size_t *pos) 629struct bts_tracer *ds_request_bts(struct task_struct *task,
552{ 630 void *base, size_t size,
553 return ds_get_index(task, pos, ds_bts); 631 bts_ovfl_callback_t ovfl, size_t th,
554} 632 unsigned int flags)
555
556int ds_get_pebs_index(struct task_struct *task, size_t *pos)
557{ 633{
558 return ds_get_index(task, pos, ds_pebs); 634 struct bts_tracer *tracer;
559} 635 unsigned long irq;
560
561static int ds_get_end(struct task_struct *task, size_t *pos,
562 enum ds_qualifier qual)
563{
564 struct ds_context *context;
565 unsigned long base, end;
566 int error; 636 int error;
567 637
568 context = ds_get_context(task); 638 error = -EOPNOTSUPP;
569 error = ds_validate_access(context, qual); 639 if (!ds_cfg.ctl[dsf_bts])
570 if (error < 0)
571 goto out; 640 goto out;
572 641
573 base = ds_get(context->ds, qual, ds_buffer_base); 642 /* buffer overflow notification is not yet implemented */
574 end = ds_get(context->ds, qual, ds_absolute_maximum); 643 error = -EOPNOTSUPP;
644 if (ovfl)
645 goto out;
575 646
576 error = ((end - base) / ds_cfg.sizeof_rec[qual]); 647 error = -ENOMEM;
577 if (pos) 648 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
578 *pos = error; 649 if (!tracer)
579 out: 650 goto out;
580 ds_put_context(context); 651 tracer->ovfl = ovfl;
581 return error;
582}
583 652
584int ds_get_bts_end(struct task_struct *task, size_t *pos) 653 error = ds_request(&tracer->ds, &tracer->trace.ds,
585{ 654 ds_bts, task, base, size, th, flags);
586 return ds_get_end(task, pos, ds_bts); 655 if (error < 0)
587} 656 goto out_tracer;
588 657
589int ds_get_pebs_end(struct task_struct *task, size_t *pos)
590{
591 return ds_get_end(task, pos, ds_pebs);
592}
593 658
594static int ds_access(struct task_struct *task, size_t index, 659 spin_lock_irqsave(&ds_lock, irq);
595 const void **record, enum ds_qualifier qual)
596{
597 struct ds_context *context;
598 unsigned long base, idx;
599 int error;
600 660
601 if (!record) 661 error = -EPERM;
602 return -EINVAL; 662 if (!check_tracer(task))
663 goto out_unlock;
664 get_tracer(task);
603 665
604 context = ds_get_context(task); 666 error = -EPERM;
605 error = ds_validate_access(context, qual); 667 if (tracer->ds.context->bts_master)
606 if (error < 0) 668 goto out_put_tracer;
607 goto out; 669 tracer->ds.context->bts_master = tracer;
608 670
609 base = ds_get(context->ds, qual, ds_buffer_base); 671 spin_unlock_irqrestore(&ds_lock, irq);
610 idx = base + (index * ds_cfg.sizeof_rec[qual]);
611 672
612 error = -EINVAL;
613 if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
614 goto out;
615 673
616 *record = (const void *)idx; 674 tracer->trace.read = bts_read;
617 error = ds_cfg.sizeof_rec[qual]; 675 tracer->trace.write = bts_write;
618 out:
619 ds_put_context(context);
620 return error;
621}
622 676
623int ds_access_bts(struct task_struct *task, size_t index, const void **record) 677 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
624{ 678 ds_resume_bts(tracer);
625 return ds_access(task, index, record, ds_bts);
626}
627 679
628int ds_access_pebs(struct task_struct *task, size_t index, const void **record) 680 return tracer;
629{ 681
630 return ds_access(task, index, record, ds_pebs); 682 out_put_tracer:
683 put_tracer(task);
684 out_unlock:
685 spin_unlock_irqrestore(&ds_lock, irq);
686 ds_put_context(tracer->ds.context);
687 out_tracer:
688 kfree(tracer);
689 out:
690 return ERR_PTR(error);
631} 691}
632 692
633static int ds_write(struct task_struct *task, const void *record, size_t size, 693struct pebs_tracer *ds_request_pebs(struct task_struct *task,
634 enum ds_qualifier qual, int force) 694 void *base, size_t size,
695 pebs_ovfl_callback_t ovfl, size_t th,
696 unsigned int flags)
635{ 697{
636 struct ds_context *context; 698 struct pebs_tracer *tracer;
699 unsigned long irq;
637 int error; 700 int error;
638 701
639 if (!record) 702 /* buffer overflow notification is not yet implemented */
640 return -EINVAL; 703 error = -EOPNOTSUPP;
704 if (ovfl)
705 goto out;
641 706
642 error = -EPERM; 707 error = -ENOMEM;
643 context = ds_get_context(task); 708 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
644 if (!context) 709 if (!tracer)
645 goto out; 710 goto out;
711 tracer->ovfl = ovfl;
646 712
647 if (!force) { 713 error = ds_request(&tracer->ds, &tracer->trace.ds,
648 error = ds_validate_access(context, qual); 714 ds_pebs, task, base, size, th, flags);
649 if (error < 0) 715 if (error < 0)
650 goto out; 716 goto out_tracer;
651 }
652 717
653 error = 0; 718 spin_lock_irqsave(&ds_lock, irq);
654 while (size) {
655 unsigned long base, index, end, write_end, int_th;
656 unsigned long write_size, adj_write_size;
657 719
658 /* 720 error = -EPERM;
659 * write as much as possible without producing an 721 if (!check_tracer(task))
660 * overflow interrupt. 722 goto out_unlock;
661 * 723 get_tracer(task);
662 * interrupt_threshold must either be
663 * - bigger than absolute_maximum or
664 * - point to a record between buffer_base and absolute_maximum
665 *
666 * index points to a valid record.
667 */
668 base = ds_get(context->ds, qual, ds_buffer_base);
669 index = ds_get(context->ds, qual, ds_index);
670 end = ds_get(context->ds, qual, ds_absolute_maximum);
671 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
672 724
673 write_end = min(end, int_th); 725 error = -EPERM;
726 if (tracer->ds.context->pebs_master)
727 goto out_put_tracer;
728 tracer->ds.context->pebs_master = tracer;
674 729
675 /* if we are already beyond the interrupt threshold, 730 spin_unlock_irqrestore(&ds_lock, irq);
676 * we fill the entire buffer */
677 if (write_end <= index)
678 write_end = end;
679 731
680 if (write_end <= index) 732 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
681 goto out; 733 ds_resume_pebs(tracer);
682 734
683 write_size = min((unsigned long) size, write_end - index); 735 return tracer;
684 memcpy((void *)index, record, write_size);
685 736
686 record = (const char *)record + write_size; 737 out_put_tracer:
687 size -= write_size; 738 put_tracer(task);
688 error += write_size; 739 out_unlock:
740 spin_unlock_irqrestore(&ds_lock, irq);
741 ds_put_context(tracer->ds.context);
742 out_tracer:
743 kfree(tracer);
744 out:
745 return ERR_PTR(error);
746}
689 747
690 adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; 748void ds_release_bts(struct bts_tracer *tracer)
691 adj_write_size *= ds_cfg.sizeof_rec[qual]; 749{
750 if (!tracer)
751 return;
692 752
693 /* zero out trailing bytes */ 753 ds_suspend_bts(tracer);
694 memset((char *)index + write_size, 0,
695 adj_write_size - write_size);
696 index += adj_write_size;
697 754
698 if (index >= end) 755 WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
699 index = base; 756 tracer->ds.context->bts_master = NULL;
700 ds_set(context->ds, qual, ds_index, index);
701 757
702 if (index >= int_th) 758 put_tracer(tracer->ds.context->task);
703 ds_overflow(task, context, qual); 759 ds_put_context(tracer->ds.context);
704 }
705 760
706 out: 761 kfree(tracer);
707 ds_put_context(context);
708 return error;
709} 762}
710 763
711int ds_write_bts(struct task_struct *task, const void *record, size_t size) 764void ds_suspend_bts(struct bts_tracer *tracer)
712{ 765{
713 return ds_write(task, record, size, ds_bts, /* force = */ 0); 766 struct task_struct *task;
714}
715 767
716int ds_write_pebs(struct task_struct *task, const void *record, size_t size) 768 if (!tracer)
717{ 769 return;
718 return ds_write(task, record, size, ds_pebs, /* force = */ 0);
719}
720 770
721int ds_unchecked_write_bts(struct task_struct *task, 771 task = tracer->ds.context->task;
722 const void *record, size_t size)
723{
724 return ds_write(task, record, size, ds_bts, /* force = */ 1);
725}
726 772
727int ds_unchecked_write_pebs(struct task_struct *task, 773 if (!task || (task == current))
728 const void *record, size_t size) 774 update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
729{ 775
730 return ds_write(task, record, size, ds_pebs, /* force = */ 1); 776 if (task) {
777 task->thread.debugctlmsr &= ~BTS_CONTROL;
778
779 if (!task->thread.debugctlmsr)
780 clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
781 }
731} 782}
732 783
733static int ds_reset_or_clear(struct task_struct *task, 784void ds_resume_bts(struct bts_tracer *tracer)
734 enum ds_qualifier qual, int clear)
735{ 785{
736 struct ds_context *context; 786 struct task_struct *task;
737 unsigned long base, end; 787 unsigned long control;
738 int error;
739 788
740 context = ds_get_context(task); 789 if (!tracer)
741 error = ds_validate_access(context, qual); 790 return;
742 if (error < 0)
743 goto out;
744 791
745 base = ds_get(context->ds, qual, ds_buffer_base); 792 task = tracer->ds.context->task;
746 end = ds_get(context->ds, qual, ds_absolute_maximum);
747 793
748 if (clear) 794 control = ds_cfg.ctl[dsf_bts];
749 memset((void *)base, 0, end - base); 795 if (!(tracer->trace.ds.flags & BTS_KERNEL))
796 control |= ds_cfg.ctl[dsf_bts_kernel];
797 if (!(tracer->trace.ds.flags & BTS_USER))
798 control |= ds_cfg.ctl[dsf_bts_user];
750 799
751 ds_set(context->ds, qual, ds_index, base); 800 if (task) {
801 task->thread.debugctlmsr |= control;
802 set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
803 }
752 804
753 error = 0; 805 if (!task || (task == current))
754 out: 806 update_debugctlmsr(get_debugctlmsr() | control);
755 ds_put_context(context);
756 return error;
757} 807}
758 808
759int ds_reset_bts(struct task_struct *task) 809void ds_release_pebs(struct pebs_tracer *tracer)
760{ 810{
761 return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); 811 if (!tracer)
812 return;
813
814 ds_suspend_pebs(tracer);
815
816 WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
817 tracer->ds.context->pebs_master = NULL;
818
819 put_tracer(tracer->ds.context->task);
820 ds_put_context(tracer->ds.context);
821
822 kfree(tracer);
762} 823}
763 824
764int ds_reset_pebs(struct task_struct *task) 825void ds_suspend_pebs(struct pebs_tracer *tracer)
765{ 826{
766 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); 827
767} 828}
768 829
769int ds_clear_bts(struct task_struct *task) 830void ds_resume_pebs(struct pebs_tracer *tracer)
770{ 831{
771 return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); 832
772} 833}
773 834
774int ds_clear_pebs(struct task_struct *task) 835const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
775{ 836{
776 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); 837 if (!tracer)
838 return NULL;
839
840 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
841 return &tracer->trace;
777} 842}
778 843
779int ds_get_pebs_reset(struct task_struct *task, u64 *value) 844const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
780{ 845{
781 struct ds_context *context; 846 if (!tracer)
782 int error; 847 return NULL;
848
849 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
850 tracer->trace.reset_value =
851 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
783 852
784 if (!value) 853 return &tracer->trace;
854}
855
856int ds_reset_bts(struct bts_tracer *tracer)
857{
858 if (!tracer)
785 return -EINVAL; 859 return -EINVAL;
786 860
787 context = ds_get_context(task); 861 tracer->trace.ds.top = tracer->trace.ds.begin;
788 error = ds_validate_access(context, ds_pebs);
789 if (error < 0)
790 goto out;
791 862
792 *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); 863 ds_set(tracer->ds.context->ds, ds_bts, ds_index,
864 (unsigned long)tracer->trace.ds.top);
793 865
794 error = 0; 866 return 0;
795 out:
796 ds_put_context(context);
797 return error;
798} 867}
799 868
800int ds_set_pebs_reset(struct task_struct *task, u64 value) 869int ds_reset_pebs(struct pebs_tracer *tracer)
801{ 870{
802 struct ds_context *context; 871 if (!tracer)
803 int error; 872 return -EINVAL;
804 873
805 context = ds_get_context(task); 874 tracer->trace.ds.top = tracer->trace.ds.begin;
806 error = ds_validate_access(context, ds_pebs);
807 if (error < 0)
808 goto out;
809 875
810 *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; 876 ds_set(tracer->ds.context->ds, ds_bts, ds_index,
877 (unsigned long)tracer->trace.ds.top);
811 878
812 error = 0; 879 return 0;
813 out: 880}
814 ds_put_context(context); 881
815 return error; 882int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
883{
884 if (!tracer)
885 return -EINVAL;
886
887 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
888
889 return 0;
816} 890}
817 891
818static const struct ds_configuration ds_cfg_var = { 892static const struct ds_configuration ds_cfg_netburst = {
819 .sizeof_ds = sizeof(long) * 12, 893 .name = "netburst",
820 .sizeof_field = sizeof(long), 894 .ctl[dsf_bts] = (1 << 2) | (1 << 3),
821 .sizeof_rec[ds_bts] = sizeof(long) * 3, 895 .ctl[dsf_bts_kernel] = (1 << 5),
896 .ctl[dsf_bts_user] = (1 << 6),
897
898 .sizeof_field = sizeof(long),
899 .sizeof_rec[ds_bts] = sizeof(long) * 3,
822#ifdef __i386__ 900#ifdef __i386__
823 .sizeof_rec[ds_pebs] = sizeof(long) * 10 901 .sizeof_rec[ds_pebs] = sizeof(long) * 10,
824#else 902#else
825 .sizeof_rec[ds_pebs] = sizeof(long) * 18 903 .sizeof_rec[ds_pebs] = sizeof(long) * 18,
826#endif 904#endif
827}; 905};
828static const struct ds_configuration ds_cfg_64 = { 906static const struct ds_configuration ds_cfg_pentium_m = {
829 .sizeof_ds = 8 * 12, 907 .name = "pentium m",
830 .sizeof_field = 8, 908 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
831 .sizeof_rec[ds_bts] = 8 * 3, 909
910 .sizeof_field = sizeof(long),
911 .sizeof_rec[ds_bts] = sizeof(long) * 3,
832#ifdef __i386__ 912#ifdef __i386__
833 .sizeof_rec[ds_pebs] = 8 * 10 913 .sizeof_rec[ds_pebs] = sizeof(long) * 10,
834#else 914#else
835 .sizeof_rec[ds_pebs] = 8 * 18 915 .sizeof_rec[ds_pebs] = sizeof(long) * 18,
836#endif 916#endif
837}; 917};
918static const struct ds_configuration ds_cfg_core2 = {
919 .name = "core 2",
920 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
921 .ctl[dsf_bts_kernel] = (1 << 9),
922 .ctl[dsf_bts_user] = (1 << 10),
923
924 .sizeof_field = 8,
925 .sizeof_rec[ds_bts] = 8 * 3,
926 .sizeof_rec[ds_pebs] = 8 * 18,
927};
838 928
839static inline void 929static void
840ds_configure(const struct ds_configuration *cfg) 930ds_configure(const struct ds_configuration *cfg)
841{ 931{
932 memset(&ds_cfg, 0, sizeof(ds_cfg));
842 ds_cfg = *cfg; 933 ds_cfg = *cfg;
934
935 printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name);
936
937 if (!cpu_has_bts) {
938 ds_cfg.ctl[dsf_bts] = 0;
939 printk(KERN_INFO "[ds] bts not available\n");
940 }
941 if (!cpu_has_pebs)
942 printk(KERN_INFO "[ds] pebs not available\n");
943
944 WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field));
843} 945}
844 946
845void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) 947void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
@@ -847,16 +949,15 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
847 switch (c->x86) { 949 switch (c->x86) {
848 case 0x6: 950 case 0x6:
849 switch (c->x86_model) { 951 switch (c->x86_model) {
952 case 0 ... 0xC:
953 /* sorry, don't know about them */
954 break;
850 case 0xD: 955 case 0xD:
851 case 0xE: /* Pentium M */ 956 case 0xE: /* Pentium M */
852 ds_configure(&ds_cfg_var); 957 ds_configure(&ds_cfg_pentium_m);
853 break; 958 break;
854 case 0xF: /* Core2 */ 959 default: /* Core2, Atom, ... */
855 case 0x1C: /* Atom */ 960 ds_configure(&ds_cfg_core2);
856 ds_configure(&ds_cfg_64);
857 break;
858 default:
859 /* sorry, don't know about them */
860 break; 961 break;
861 } 962 }
862 break; 963 break;
@@ -865,7 +966,7 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
865 case 0x0: 966 case 0x0:
866 case 0x1: 967 case 0x1:
867 case 0x2: /* Netburst */ 968 case 0x2: /* Netburst */
868 ds_configure(&ds_cfg_var); 969 ds_configure(&ds_cfg_netburst);
869 break; 970 break;
870 default: 971 default:
871 /* sorry, don't know about them */ 972 /* sorry, don't know about them */
@@ -878,12 +979,52 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
878 } 979 }
879} 980}
880 981
881void ds_free(struct ds_context *context) 982/*
983 * Change the DS configuration from tracing prev to tracing next.
984 */
985void ds_switch_to(struct task_struct *prev, struct task_struct *next)
986{
987 struct ds_context *prev_ctx = prev->thread.ds_ctx;
988 struct ds_context *next_ctx = next->thread.ds_ctx;
989
990 if (prev_ctx) {
991 update_debugctlmsr(0);
992
993 if (prev_ctx->bts_master &&
994 (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
995 struct bts_struct ts = {
996 .qualifier = bts_task_departs,
997 .variant.timestamp.jiffies = jiffies_64,
998 .variant.timestamp.pid = prev->pid
999 };
1000 bts_write(prev_ctx->bts_master, &ts);
1001 }
1002 }
1003
1004 if (next_ctx) {
1005 if (next_ctx->bts_master &&
1006 (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
1007 struct bts_struct ts = {
1008 .qualifier = bts_task_arrives,
1009 .variant.timestamp.jiffies = jiffies_64,
1010 .variant.timestamp.pid = next->pid
1011 };
1012 bts_write(next_ctx->bts_master, &ts);
1013 }
1014
1015 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
1016 }
1017
1018 update_debugctlmsr(next->thread.debugctlmsr);
1019}
1020
1021void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
1022{
1023 clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR);
1024 tsk->thread.ds_ctx = NULL;
1025}
1026
1027void ds_exit_thread(struct task_struct *tsk)
882{ 1028{
883 /* This is called when the task owning the parameter context 1029 WARN_ON(tsk->thread.ds_ctx);
884 * is dying. There should not be any user of that context left
885 * to disturb us, anymore. */
886 unsigned long leftovers = context->count;
887 while (leftovers--)
888 ds_put_context(context);
889} 1030}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
new file mode 100644
index 000000000000..6b1f6f6f8661
--- /dev/null
+++ b/arch/x86/kernel/dumpstack.c
@@ -0,0 +1,351 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5#include <linux/kallsyms.h>
6#include <linux/kprobes.h>
7#include <linux/uaccess.h>
8#include <linux/utsname.h>
9#include <linux/hardirq.h>
10#include <linux/kdebug.h>
11#include <linux/module.h>
12#include <linux/ptrace.h>
13#include <linux/kexec.h>
14#include <linux/bug.h>
15#include <linux/nmi.h>
16#include <linux/sysfs.h>
17
18#include <asm/stacktrace.h>
19
20#include "dumpstack.h"
21
22int panic_on_unrecovered_nmi;
23unsigned int code_bytes = 64;
24int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
25static int die_counter;
26
27void printk_address(unsigned long address, int reliable)
28{
29 printk(" [<%p>] %s%pS\n", (void *) address,
30 reliable ? "" : "? ", (void *) address);
31}
32
33#ifdef CONFIG_FUNCTION_GRAPH_TRACER
34static void
35print_ftrace_graph_addr(unsigned long addr, void *data,
36 const struct stacktrace_ops *ops,
37 struct thread_info *tinfo, int *graph)
38{
39 struct task_struct *task = tinfo->task;
40 unsigned long ret_addr;
41 int index = task->curr_ret_stack;
42
43 if (addr != (unsigned long)return_to_handler)
44 return;
45
46 if (!task->ret_stack || index < *graph)
47 return;
48
49 index -= *graph;
50 ret_addr = task->ret_stack[index].ret;
51
52 ops->address(data, ret_addr, 1);
53
54 (*graph)++;
55}
56#else
57static inline void
58print_ftrace_graph_addr(unsigned long addr, void *data,
59 const struct stacktrace_ops *ops,
60 struct thread_info *tinfo, int *graph)
61{ }
62#endif
63
64/*
65 * x86-64 can have up to three kernel stacks:
66 * process stack
67 * interrupt stack
68 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
69 */
70
71static inline int valid_stack_ptr(struct thread_info *tinfo,
72 void *p, unsigned int size, void *end)
73{
74 void *t = tinfo;
75 if (end) {
76 if (p < end && p >= (end-THREAD_SIZE))
77 return 1;
78 else
79 return 0;
80 }
81 return p > t && p < t + THREAD_SIZE - size;
82}
83
84unsigned long
85print_context_stack(struct thread_info *tinfo,
86 unsigned long *stack, unsigned long bp,
87 const struct stacktrace_ops *ops, void *data,
88 unsigned long *end, int *graph)
89{
90 struct stack_frame *frame = (struct stack_frame *)bp;
91
92 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
93 unsigned long addr;
94
95 addr = *stack;
96 if (__kernel_text_address(addr)) {
97 if ((unsigned long) stack == bp + sizeof(long)) {
98 ops->address(data, addr, 1);
99 frame = frame->next_frame;
100 bp = (unsigned long) frame;
101 } else {
102 ops->address(data, addr, bp == 0);
103 }
104 print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
105 }
106 stack++;
107 }
108 return bp;
109}
110
111
112static void
113print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
114{
115 printk(data);
116 print_symbol(msg, symbol);
117 printk("\n");
118}
119
120static void print_trace_warning(void *data, char *msg)
121{
122 printk("%s%s\n", (char *)data, msg);
123}
124
125static int print_trace_stack(void *data, char *name)
126{
127 printk("%s <%s> ", (char *)data, name);
128 return 0;
129}
130
131/*
132 * Print one address/symbol entries per line.
133 */
134static void print_trace_address(void *data, unsigned long addr, int reliable)
135{
136 touch_nmi_watchdog();
137 printk(data);
138 printk_address(addr, reliable);
139}
140
141static const struct stacktrace_ops print_trace_ops = {
142 .warning = print_trace_warning,
143 .warning_symbol = print_trace_warning_symbol,
144 .stack = print_trace_stack,
145 .address = print_trace_address,
146};
147
148void
149show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
150 unsigned long *stack, unsigned long bp, char *log_lvl)
151{
152 printk("%sCall Trace:\n", log_lvl);
153 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
154}
155
156void show_trace(struct task_struct *task, struct pt_regs *regs,
157 unsigned long *stack, unsigned long bp)
158{
159 show_trace_log_lvl(task, regs, stack, bp, "");
160}
161
162void show_stack(struct task_struct *task, unsigned long *sp)
163{
164 show_stack_log_lvl(task, NULL, sp, 0, "");
165}
166
167/*
168 * The architecture-independent dump_stack generator
169 */
170void dump_stack(void)
171{
172 unsigned long bp = 0;
173 unsigned long stack;
174
175#ifdef CONFIG_FRAME_POINTER
176 if (!bp)
177 get_bp(bp);
178#endif
179
180 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
181 current->pid, current->comm, print_tainted(),
182 init_utsname()->release,
183 (int)strcspn(init_utsname()->version, " "),
184 init_utsname()->version);
185 show_trace(NULL, NULL, &stack, bp);
186}
187EXPORT_SYMBOL(dump_stack);
188
189static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
190static int die_owner = -1;
191static unsigned int die_nest_count;
192
193unsigned __kprobes long oops_begin(void)
194{
195 int cpu;
196 unsigned long flags;
197
198 oops_enter();
199
200 /* racy, but better than risking deadlock. */
201 raw_local_irq_save(flags);
202 cpu = smp_processor_id();
203 if (!__raw_spin_trylock(&die_lock)) {
204 if (cpu == die_owner)
205 /* nested oops. should stop eventually */;
206 else
207 __raw_spin_lock(&die_lock);
208 }
209 die_nest_count++;
210 die_owner = cpu;
211 console_verbose();
212 bust_spinlocks(1);
213 return flags;
214}
215
216void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
217{
218 if (regs && kexec_should_crash(current))
219 crash_kexec(regs);
220
221 bust_spinlocks(0);
222 die_owner = -1;
223 add_taint(TAINT_DIE);
224 die_nest_count--;
225 if (!die_nest_count)
226 /* Nest count reaches zero, release the lock. */
227 __raw_spin_unlock(&die_lock);
228 raw_local_irq_restore(flags);
229 oops_exit();
230
231 if (!signr)
232 return;
233 if (in_interrupt())
234 panic("Fatal exception in interrupt");
235 if (panic_on_oops)
236 panic("Fatal exception");
237 do_exit(signr);
238}
239
240int __kprobes __die(const char *str, struct pt_regs *regs, long err)
241{
242#ifdef CONFIG_X86_32
243 unsigned short ss;
244 unsigned long sp;
245#endif
246 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
247#ifdef CONFIG_PREEMPT
248 printk("PREEMPT ");
249#endif
250#ifdef CONFIG_SMP
251 printk("SMP ");
252#endif
253#ifdef CONFIG_DEBUG_PAGEALLOC
254 printk("DEBUG_PAGEALLOC");
255#endif
256 printk("\n");
257 sysfs_printk_last_file();
258 if (notify_die(DIE_OOPS, str, regs, err,
259 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
260 return 1;
261
262 show_registers(regs);
263#ifdef CONFIG_X86_32
264 sp = (unsigned long) (&regs->sp);
265 savesegment(ss, ss);
266 if (user_mode(regs)) {
267 sp = regs->sp;
268 ss = regs->ss & 0xffff;
269 }
270 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
271 print_symbol("%s", regs->ip);
272 printk(" SS:ESP %04x:%08lx\n", ss, sp);
273#else
274 /* Executive summary in case the oops scrolled away */
275 printk(KERN_ALERT "RIP ");
276 printk_address(regs->ip, 1);
277 printk(" RSP <%016lx>\n", regs->sp);
278#endif
279 return 0;
280}
281
282/*
283 * This is gone through when something in the kernel has done something bad
284 * and is about to be terminated:
285 */
286void die(const char *str, struct pt_regs *regs, long err)
287{
288 unsigned long flags = oops_begin();
289 int sig = SIGSEGV;
290
291 if (!user_mode_vm(regs))
292 report_bug(regs->ip, regs);
293
294 if (__die(str, regs, err))
295 sig = 0;
296 oops_end(flags, regs, sig);
297}
298
299void notrace __kprobes
300die_nmi(char *str, struct pt_regs *regs, int do_panic)
301{
302 unsigned long flags;
303
304 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
305 return;
306
307 /*
308 * We are in trouble anyway, lets at least try
309 * to get a message out.
310 */
311 flags = oops_begin();
312 printk(KERN_EMERG "%s", str);
313 printk(" on CPU%d, ip %08lx, registers:\n",
314 smp_processor_id(), regs->ip);
315 show_registers(regs);
316 oops_end(flags, regs, 0);
317 if (do_panic || panic_on_oops)
318 panic("Non maskable interrupt");
319 nmi_exit();
320 local_irq_enable();
321 do_exit(SIGBUS);
322}
323
324static int __init oops_setup(char *s)
325{
326 if (!s)
327 return -EINVAL;
328 if (!strcmp(s, "panic"))
329 panic_on_oops = 1;
330 return 0;
331}
332early_param("oops", oops_setup);
333
334static int __init kstack_setup(char *s)
335{
336 if (!s)
337 return -EINVAL;
338 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
339 return 0;
340}
341early_param("kstack", kstack_setup);
342
343static int __init code_bytes_setup(char *s)
344{
345 code_bytes = simple_strtoul(s, NULL, 0);
346 if (code_bytes > 8192)
347 code_bytes = 8192;
348
349 return 1;
350}
351__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
new file mode 100644
index 000000000000..da87590b8698
--- /dev/null
+++ b/arch/x86/kernel/dumpstack.h
@@ -0,0 +1,39 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5
6#ifndef DUMPSTACK_H
7#define DUMPSTACK_H
8
9#ifdef CONFIG_X86_32
10#define STACKSLOTS_PER_LINE 8
11#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
12#else
13#define STACKSLOTS_PER_LINE 4
14#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
15#endif
16
17extern unsigned long
18print_context_stack(struct thread_info *tinfo,
19 unsigned long *stack, unsigned long bp,
20 const struct stacktrace_ops *ops, void *data,
21 unsigned long *end, int *graph);
22
23extern void
24show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
25 unsigned long *stack, unsigned long bp, char *log_lvl);
26
27extern void
28show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
29 unsigned long *sp, unsigned long bp, char *log_lvl);
30
31extern unsigned int code_bytes;
32extern int kstack_depth_to_print;
33
34/* The form of the top of the frame on the stack */
35struct stack_frame {
36 struct stack_frame *next_frame;
37 unsigned long return_address;
38};
39#endif
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index b3614752197b..d593cd1f58dc 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,69 +17,14 @@
17 17
18#include <asm/stacktrace.h> 18#include <asm/stacktrace.h>
19 19
20#define STACKSLOTS_PER_LINE 8 20#include "dumpstack.h"
21#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
22
23int panic_on_unrecovered_nmi;
24int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
25static unsigned int code_bytes = 64;
26static int die_counter;
27
28void printk_address(unsigned long address, int reliable)
29{
30 printk(" [<%p>] %s%pS\n", (void *) address,
31 reliable ? "" : "? ", (void *) address);
32}
33
34static inline int valid_stack_ptr(struct thread_info *tinfo,
35 void *p, unsigned int size, void *end)
36{
37 void *t = tinfo;
38 if (end) {
39 if (p < end && p >= (end-THREAD_SIZE))
40 return 1;
41 else
42 return 0;
43 }
44 return p > t && p < t + THREAD_SIZE - size;
45}
46
47/* The form of the top of the frame on the stack */
48struct stack_frame {
49 struct stack_frame *next_frame;
50 unsigned long return_address;
51};
52
53static inline unsigned long
54print_context_stack(struct thread_info *tinfo,
55 unsigned long *stack, unsigned long bp,
56 const struct stacktrace_ops *ops, void *data,
57 unsigned long *end)
58{
59 struct stack_frame *frame = (struct stack_frame *)bp;
60
61 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
62 unsigned long addr;
63
64 addr = *stack;
65 if (__kernel_text_address(addr)) {
66 if ((unsigned long) stack == bp + sizeof(long)) {
67 ops->address(data, addr, 1);
68 frame = frame->next_frame;
69 bp = (unsigned long) frame;
70 } else {
71 ops->address(data, addr, bp == 0);
72 }
73 }
74 stack++;
75 }
76 return bp;
77}
78 21
79void dump_trace(struct task_struct *task, struct pt_regs *regs, 22void dump_trace(struct task_struct *task, struct pt_regs *regs,
80 unsigned long *stack, unsigned long bp, 23 unsigned long *stack, unsigned long bp,
81 const struct stacktrace_ops *ops, void *data) 24 const struct stacktrace_ops *ops, void *data)
82{ 25{
26 int graph = 0;
27
83 if (!task) 28 if (!task)
84 task = current; 29 task = current;
85 30
@@ -107,7 +52,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
107 52
108 context = (struct thread_info *) 53 context = (struct thread_info *)
109 ((unsigned long)stack & (~(THREAD_SIZE - 1))); 54 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
110 bp = print_context_stack(context, stack, bp, ops, data, NULL); 55 bp = print_context_stack(context, stack, bp, ops,
56 data, NULL, &graph);
111 57
112 stack = (unsigned long *)context->previous_esp; 58 stack = (unsigned long *)context->previous_esp;
113 if (!stack) 59 if (!stack)
@@ -119,57 +65,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
119} 65}
120EXPORT_SYMBOL(dump_trace); 66EXPORT_SYMBOL(dump_trace);
121 67
122static void 68void
123print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
124{
125 printk(data);
126 print_symbol(msg, symbol);
127 printk("\n");
128}
129
130static void print_trace_warning(void *data, char *msg)
131{
132 printk("%s%s\n", (char *)data, msg);
133}
134
135static int print_trace_stack(void *data, char *name)
136{
137 printk("%s <%s> ", (char *)data, name);
138 return 0;
139}
140
141/*
142 * Print one address/symbol entries per line.
143 */
144static void print_trace_address(void *data, unsigned long addr, int reliable)
145{
146 touch_nmi_watchdog();
147 printk(data);
148 printk_address(addr, reliable);
149}
150
151static const struct stacktrace_ops print_trace_ops = {
152 .warning = print_trace_warning,
153 .warning_symbol = print_trace_warning_symbol,
154 .stack = print_trace_stack,
155 .address = print_trace_address,
156};
157
158static void
159show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
160 unsigned long *stack, unsigned long bp, char *log_lvl)
161{
162 printk("%sCall Trace:\n", log_lvl);
163 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
164}
165
166void show_trace(struct task_struct *task, struct pt_regs *regs,
167 unsigned long *stack, unsigned long bp)
168{
169 show_trace_log_lvl(task, regs, stack, bp, "");
170}
171
172static void
173show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 69show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
174 unsigned long *sp, unsigned long bp, char *log_lvl) 70 unsigned long *sp, unsigned long bp, char *log_lvl)
175{ 71{
@@ -196,33 +92,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
196 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 92 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
197} 93}
198 94
199void show_stack(struct task_struct *task, unsigned long *sp)
200{
201 show_stack_log_lvl(task, NULL, sp, 0, "");
202}
203
204/*
205 * The architecture-independent dump_stack generator
206 */
207void dump_stack(void)
208{
209 unsigned long bp = 0;
210 unsigned long stack;
211
212#ifdef CONFIG_FRAME_POINTER
213 if (!bp)
214 get_bp(bp);
215#endif
216
217 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
218 current->pid, current->comm, print_tainted(),
219 init_utsname()->release,
220 (int)strcspn(init_utsname()->version, " "),
221 init_utsname()->version);
222 show_trace(NULL, NULL, &stack, bp);
223}
224
225EXPORT_SYMBOL(dump_stack);
226 95
227void show_registers(struct pt_regs *regs) 96void show_registers(struct pt_regs *regs)
228{ 97{
@@ -283,167 +152,3 @@ int is_valid_bugaddr(unsigned long ip)
283 return ud2 == 0x0b0f; 152 return ud2 == 0x0b0f;
284} 153}
285 154
286static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
287static int die_owner = -1;
288static unsigned int die_nest_count;
289
290unsigned __kprobes long oops_begin(void)
291{
292 unsigned long flags;
293
294 oops_enter();
295
296 if (die_owner != raw_smp_processor_id()) {
297 console_verbose();
298 raw_local_irq_save(flags);
299 __raw_spin_lock(&die_lock);
300 die_owner = smp_processor_id();
301 die_nest_count = 0;
302 bust_spinlocks(1);
303 } else {
304 raw_local_irq_save(flags);
305 }
306 die_nest_count++;
307 return flags;
308}
309
310void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
311{
312 bust_spinlocks(0);
313 die_owner = -1;
314 add_taint(TAINT_DIE);
315 __raw_spin_unlock(&die_lock);
316 raw_local_irq_restore(flags);
317
318 if (!regs)
319 return;
320
321 if (kexec_should_crash(current))
322 crash_kexec(regs);
323 if (in_interrupt())
324 panic("Fatal exception in interrupt");
325 if (panic_on_oops)
326 panic("Fatal exception");
327 oops_exit();
328 do_exit(signr);
329}
330
331int __kprobes __die(const char *str, struct pt_regs *regs, long err)
332{
333 unsigned short ss;
334 unsigned long sp;
335
336 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
337#ifdef CONFIG_PREEMPT
338 printk("PREEMPT ");
339#endif
340#ifdef CONFIG_SMP
341 printk("SMP ");
342#endif
343#ifdef CONFIG_DEBUG_PAGEALLOC
344 printk("DEBUG_PAGEALLOC");
345#endif
346 printk("\n");
347 sysfs_printk_last_file();
348 if (notify_die(DIE_OOPS, str, regs, err,
349 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
350 return 1;
351
352 show_registers(regs);
353 /* Executive summary in case the oops scrolled away */
354 sp = (unsigned long) (&regs->sp);
355 savesegment(ss, ss);
356 if (user_mode(regs)) {
357 sp = regs->sp;
358 ss = regs->ss & 0xffff;
359 }
360 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
361 print_symbol("%s", regs->ip);
362 printk(" SS:ESP %04x:%08lx\n", ss, sp);
363 return 0;
364}
365
366/*
367 * This is gone through when something in the kernel has done something bad
368 * and is about to be terminated:
369 */
370void die(const char *str, struct pt_regs *regs, long err)
371{
372 unsigned long flags = oops_begin();
373
374 if (die_nest_count < 3) {
375 report_bug(regs->ip, regs);
376
377 if (__die(str, regs, err))
378 regs = NULL;
379 } else {
380 printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
381 }
382
383 oops_end(flags, regs, SIGSEGV);
384}
385
386static DEFINE_SPINLOCK(nmi_print_lock);
387
388void notrace __kprobes
389die_nmi(char *str, struct pt_regs *regs, int do_panic)
390{
391 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
392 return;
393
394 spin_lock(&nmi_print_lock);
395 /*
396 * We are in trouble anyway, lets at least try
397 * to get a message out:
398 */
399 bust_spinlocks(1);
400 printk(KERN_EMERG "%s", str);
401 printk(" on CPU%d, ip %08lx, registers:\n",
402 smp_processor_id(), regs->ip);
403 show_registers(regs);
404 if (do_panic)
405 panic("Non maskable interrupt");
406 console_silent();
407 spin_unlock(&nmi_print_lock);
408
409 /*
410 * If we are in kernel we are probably nested up pretty bad
411 * and might aswell get out now while we still can:
412 */
413 if (!user_mode_vm(regs)) {
414 current->thread.trap_no = 2;
415 crash_kexec(regs);
416 }
417
418 bust_spinlocks(0);
419 do_exit(SIGSEGV);
420}
421
422static int __init oops_setup(char *s)
423{
424 if (!s)
425 return -EINVAL;
426 if (!strcmp(s, "panic"))
427 panic_on_oops = 1;
428 return 0;
429}
430early_param("oops", oops_setup);
431
432static int __init kstack_setup(char *s)
433{
434 if (!s)
435 return -EINVAL;
436 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
437 return 0;
438}
439early_param("kstack", kstack_setup);
440
441static int __init code_bytes_setup(char *s)
442{
443 code_bytes = simple_strtoul(s, NULL, 0);
444 if (code_bytes > 8192)
445 code_bytes = 8192;
446
447 return 1;
448}
449__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 96a5db7da8a7..c302d0707048 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -17,19 +17,7 @@
17 17
18#include <asm/stacktrace.h> 18#include <asm/stacktrace.h>
19 19
20#define STACKSLOTS_PER_LINE 4 20#include "dumpstack.h"
21#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
22
23int panic_on_unrecovered_nmi;
24int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
25static unsigned int code_bytes = 64;
26static int die_counter;
27
28void printk_address(unsigned long address, int reliable)
29{
30 printk(" [<%p>] %s%pS\n", (void *) address,
31 reliable ? "" : "? ", (void *) address);
32}
33 21
34static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 22static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
35 unsigned *usedp, char **idp) 23 unsigned *usedp, char **idp)
@@ -113,51 +101,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
113 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 101 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
114 */ 102 */
115 103
116static inline int valid_stack_ptr(struct thread_info *tinfo,
117 void *p, unsigned int size, void *end)
118{
119 void *t = tinfo;
120 if (end) {
121 if (p < end && p >= (end-THREAD_SIZE))
122 return 1;
123 else
124 return 0;
125 }
126 return p > t && p < t + THREAD_SIZE - size;
127}
128
129/* The form of the top of the frame on the stack */
130struct stack_frame {
131 struct stack_frame *next_frame;
132 unsigned long return_address;
133};
134
135static inline unsigned long
136print_context_stack(struct thread_info *tinfo,
137 unsigned long *stack, unsigned long bp,
138 const struct stacktrace_ops *ops, void *data,
139 unsigned long *end)
140{
141 struct stack_frame *frame = (struct stack_frame *)bp;
142
143 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
144 unsigned long addr;
145
146 addr = *stack;
147 if (__kernel_text_address(addr)) {
148 if ((unsigned long) stack == bp + sizeof(long)) {
149 ops->address(data, addr, 1);
150 frame = frame->next_frame;
151 bp = (unsigned long) frame;
152 } else {
153 ops->address(data, addr, bp == 0);
154 }
155 }
156 stack++;
157 }
158 return bp;
159}
160
161void dump_trace(struct task_struct *task, struct pt_regs *regs, 104void dump_trace(struct task_struct *task, struct pt_regs *regs,
162 unsigned long *stack, unsigned long bp, 105 unsigned long *stack, unsigned long bp,
163 const struct stacktrace_ops *ops, void *data) 106 const struct stacktrace_ops *ops, void *data)
@@ -166,6 +109,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
166 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 109 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
167 unsigned used = 0; 110 unsigned used = 0;
168 struct thread_info *tinfo; 111 struct thread_info *tinfo;
112 int graph = 0;
169 113
170 if (!task) 114 if (!task)
171 task = current; 115 task = current;
@@ -206,7 +150,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
206 break; 150 break;
207 151
208 bp = print_context_stack(tinfo, stack, bp, ops, 152 bp = print_context_stack(tinfo, stack, bp, ops,
209 data, estack_end); 153 data, estack_end, &graph);
210 ops->stack(data, "<EOE>"); 154 ops->stack(data, "<EOE>");
211 /* 155 /*
212 * We link to the next stack via the 156 * We link to the next stack via the
@@ -225,7 +169,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
225 if (ops->stack(data, "IRQ") < 0) 169 if (ops->stack(data, "IRQ") < 0)
226 break; 170 break;
227 bp = print_context_stack(tinfo, stack, bp, 171 bp = print_context_stack(tinfo, stack, bp,
228 ops, data, irqstack_end); 172 ops, data, irqstack_end, &graph);
229 /* 173 /*
230 * We link to the next stack (which would be 174 * We link to the next stack (which would be
231 * the process stack normally) the last 175 * the process stack normally) the last
@@ -243,62 +187,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
243 /* 187 /*
244 * This handles the process stack: 188 * This handles the process stack:
245 */ 189 */
246 bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); 190 bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph);
247 put_cpu(); 191 put_cpu();
248} 192}
249EXPORT_SYMBOL(dump_trace); 193EXPORT_SYMBOL(dump_trace);
250 194
251static void 195void
252print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
253{
254 printk(data);
255 print_symbol(msg, symbol);
256 printk("\n");
257}
258
259static void print_trace_warning(void *data, char *msg)
260{
261 printk("%s%s\n", (char *)data, msg);
262}
263
264static int print_trace_stack(void *data, char *name)
265{
266 printk("%s <%s> ", (char *)data, name);
267 return 0;
268}
269
270/*
271 * Print one address/symbol entries per line.
272 */
273static void print_trace_address(void *data, unsigned long addr, int reliable)
274{
275 touch_nmi_watchdog();
276 printk(data);
277 printk_address(addr, reliable);
278}
279
280static const struct stacktrace_ops print_trace_ops = {
281 .warning = print_trace_warning,
282 .warning_symbol = print_trace_warning_symbol,
283 .stack = print_trace_stack,
284 .address = print_trace_address,
285};
286
287static void
288show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
289 unsigned long *stack, unsigned long bp, char *log_lvl)
290{
291 printk("%sCall Trace:\n", log_lvl);
292 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
293}
294
295void show_trace(struct task_struct *task, struct pt_regs *regs,
296 unsigned long *stack, unsigned long bp)
297{
298 show_trace_log_lvl(task, regs, stack, bp, "");
299}
300
301static void
302show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 196show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
303 unsigned long *sp, unsigned long bp, char *log_lvl) 197 unsigned long *sp, unsigned long bp, char *log_lvl)
304{ 198{
@@ -342,33 +236,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
342 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 236 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
343} 237}
344 238
345void show_stack(struct task_struct *task, unsigned long *sp)
346{
347 show_stack_log_lvl(task, NULL, sp, 0, "");
348}
349
350/*
351 * The architecture-independent dump_stack generator
352 */
353void dump_stack(void)
354{
355 unsigned long bp = 0;
356 unsigned long stack;
357
358#ifdef CONFIG_FRAME_POINTER
359 if (!bp)
360 get_bp(bp);
361#endif
362
363 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
364 current->pid, current->comm, print_tainted(),
365 init_utsname()->release,
366 (int)strcspn(init_utsname()->version, " "),
367 init_utsname()->version);
368 show_trace(NULL, NULL, &stack, bp);
369}
370EXPORT_SYMBOL(dump_stack);
371
372void show_registers(struct pt_regs *regs) 239void show_registers(struct pt_regs *regs)
373{ 240{
374 int i; 241 int i;
@@ -429,147 +296,3 @@ int is_valid_bugaddr(unsigned long ip)
429 return ud2 == 0x0b0f; 296 return ud2 == 0x0b0f;
430} 297}
431 298
432static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
433static int die_owner = -1;
434static unsigned int die_nest_count;
435
436unsigned __kprobes long oops_begin(void)
437{
438 int cpu;
439 unsigned long flags;
440
441 oops_enter();
442
443 /* racy, but better than risking deadlock. */
444 raw_local_irq_save(flags);
445 cpu = smp_processor_id();
446 if (!__raw_spin_trylock(&die_lock)) {
447 if (cpu == die_owner)
448 /* nested oops. should stop eventually */;
449 else
450 __raw_spin_lock(&die_lock);
451 }
452 die_nest_count++;
453 die_owner = cpu;
454 console_verbose();
455 bust_spinlocks(1);
456 return flags;
457}
458
459void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
460{
461 die_owner = -1;
462 bust_spinlocks(0);
463 die_nest_count--;
464 if (!die_nest_count)
465 /* Nest count reaches zero, release the lock. */
466 __raw_spin_unlock(&die_lock);
467 raw_local_irq_restore(flags);
468 if (!regs) {
469 oops_exit();
470 return;
471 }
472 if (in_interrupt())
473 panic("Fatal exception in interrupt");
474 if (panic_on_oops)
475 panic("Fatal exception");
476 oops_exit();
477 do_exit(signr);
478}
479
480int __kprobes __die(const char *str, struct pt_regs *regs, long err)
481{
482 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
483#ifdef CONFIG_PREEMPT
484 printk("PREEMPT ");
485#endif
486#ifdef CONFIG_SMP
487 printk("SMP ");
488#endif
489#ifdef CONFIG_DEBUG_PAGEALLOC
490 printk("DEBUG_PAGEALLOC");
491#endif
492 printk("\n");
493 sysfs_printk_last_file();
494 if (notify_die(DIE_OOPS, str, regs, err,
495 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
496 return 1;
497
498 show_registers(regs);
499 add_taint(TAINT_DIE);
500 /* Executive summary in case the oops scrolled away */
501 printk(KERN_ALERT "RIP ");
502 printk_address(regs->ip, 1);
503 printk(" RSP <%016lx>\n", regs->sp);
504 if (kexec_should_crash(current))
505 crash_kexec(regs);
506 return 0;
507}
508
509void die(const char *str, struct pt_regs *regs, long err)
510{
511 unsigned long flags = oops_begin();
512
513 if (!user_mode(regs))
514 report_bug(regs->ip, regs);
515
516 if (__die(str, regs, err))
517 regs = NULL;
518 oops_end(flags, regs, SIGSEGV);
519}
520
521notrace __kprobes void
522die_nmi(char *str, struct pt_regs *regs, int do_panic)
523{
524 unsigned long flags;
525
526 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
527 return;
528
529 flags = oops_begin();
530 /*
531 * We are in trouble anyway, lets at least try
532 * to get a message out.
533 */
534 printk(KERN_EMERG "%s", str);
535 printk(" on CPU%d, ip %08lx, registers:\n",
536 smp_processor_id(), regs->ip);
537 show_registers(regs);
538 if (kexec_should_crash(current))
539 crash_kexec(regs);
540 if (do_panic || panic_on_oops)
541 panic("Non maskable interrupt");
542 oops_end(flags, NULL, SIGBUS);
543 nmi_exit();
544 local_irq_enable();
545 do_exit(SIGBUS);
546}
547
548static int __init oops_setup(char *s)
549{
550 if (!s)
551 return -EINVAL;
552 if (!strcmp(s, "panic"))
553 panic_on_oops = 1;
554 return 0;
555}
556early_param("oops", oops_setup);
557
558static int __init kstack_setup(char *s)
559{
560 if (!s)
561 return -EINVAL;
562 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
563 return 0;
564}
565early_param("kstack", kstack_setup);
566
567static int __init code_bytes_setup(char *s)
568{
569 code_bytes = simple_strtoul(s, NULL, 0);
570 if (code_bytes > 8192)
571 code_bytes = 8192;
572
573 return 1;
574}
575__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7aafeb5263ef..65a13943e098 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -677,22 +677,6 @@ struct early_res {
677}; 677};
678static struct early_res early_res[MAX_EARLY_RES] __initdata = { 678static struct early_res early_res[MAX_EARLY_RES] __initdata = {
679 { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ 679 { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
680#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE)
681 { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
682#endif
683#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
684 /*
685 * But first pinch a few for the stack/trampoline stuff
686 * FIXME: Don't need the extra page at 4K, but need to fix
687 * trampoline before removing it. (see the GDT stuff)
688 */
689 { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" },
690 /*
691 * Has to be in very low memory so we can execute
692 * real-mode AP code.
693 */
694 { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" },
695#endif
696 {} 680 {}
697}; 681};
698 682
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 1b894b72c0f5..744aa7fc49d5 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -17,6 +17,7 @@
17#include <asm/io_apic.h> 17#include <asm/io_apic.h>
18#include <asm/apic.h> 18#include <asm/apic.h>
19#include <asm/iommu.h> 19#include <asm/iommu.h>
20#include <asm/gart.h>
20 21
21static void __init fix_hypertransport_config(int num, int slot, int func) 22static void __init fix_hypertransport_config(int num, int slot, int func)
22{ 23{
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 34ad997d3834..23b138e31e9c 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -875,49 +875,6 @@ static struct console early_dbgp_console = {
875}; 875};
876#endif 876#endif
877 877
878/* Console interface to a host file on AMD's SimNow! */
879
880static int simnow_fd;
881
882enum {
883 MAGIC1 = 0xBACCD00A,
884 MAGIC2 = 0xCA110000,
885 XOPEN = 5,
886 XWRITE = 4,
887};
888
889static noinline long simnow(long cmd, long a, long b, long c)
890{
891 long ret;
892
893 asm volatile("cpuid" :
894 "=a" (ret) :
895 "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2));
896 return ret;
897}
898
899static void __init simnow_init(char *str)
900{
901 char *fn = "klog";
902
903 if (*str == '=')
904 fn = ++str;
905 /* error ignored */
906 simnow_fd = simnow(XOPEN, (unsigned long)fn, O_WRONLY|O_APPEND|O_CREAT, 0644);
907}
908
909static void simnow_write(struct console *con, const char *s, unsigned n)
910{
911 simnow(XWRITE, simnow_fd, (unsigned long)s, n);
912}
913
914static struct console simnow_console = {
915 .name = "simnow",
916 .write = simnow_write,
917 .flags = CON_PRINTBUFFER,
918 .index = -1,
919};
920
921/* Direct interface for emergencies */ 878/* Direct interface for emergencies */
922static struct console *early_console = &early_vga_console; 879static struct console *early_console = &early_vga_console;
923static int __initdata early_console_initialized; 880static int __initdata early_console_initialized;
@@ -960,10 +917,6 @@ static int __init setup_early_printk(char *buf)
960 max_ypos = boot_params.screen_info.orig_video_lines; 917 max_ypos = boot_params.screen_info.orig_video_lines;
961 current_ypos = boot_params.screen_info.orig_y; 918 current_ypos = boot_params.screen_info.orig_y;
962 early_console = &early_vga_console; 919 early_console = &early_vga_console;
963 } else if (!strncmp(buf, "simnow", 6)) {
964 simnow_init(buf + 6);
965 early_console = &simnow_console;
966 keep_early = 1;
967#ifdef CONFIG_EARLY_PRINTK_DBGP 920#ifdef CONFIG_EARLY_PRINTK_DBGP
968 } else if (!strncmp(buf, "dbgp", 4)) { 921 } else if (!strncmp(buf, "dbgp", 4)) {
969 if (early_dbgp_init(buf+4) < 0) 922 if (early_dbgp_init(buf+4) < 0)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index fe7014176eb0..d6f0490a7391 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -954,6 +954,9 @@ ENTRY(mcount)
954END(mcount) 954END(mcount)
955 955
956ENTRY(ftrace_caller) 956ENTRY(ftrace_caller)
957 cmpl $0, function_trace_stop
958 jne ftrace_stub
959
957 pushl %eax 960 pushl %eax
958 pushl %ecx 961 pushl %ecx
959 pushl %edx 962 pushl %edx
@@ -968,6 +971,11 @@ ftrace_call:
968 popl %edx 971 popl %edx
969 popl %ecx 972 popl %ecx
970 popl %eax 973 popl %eax
974#ifdef CONFIG_FUNCTION_GRAPH_TRACER
975.globl ftrace_graph_call
976ftrace_graph_call:
977 jmp ftrace_stub
978#endif
971 979
972.globl ftrace_stub 980.globl ftrace_stub
973ftrace_stub: 981ftrace_stub:
@@ -977,8 +985,18 @@ END(ftrace_caller)
977#else /* ! CONFIG_DYNAMIC_FTRACE */ 985#else /* ! CONFIG_DYNAMIC_FTRACE */
978 986
979ENTRY(mcount) 987ENTRY(mcount)
988 cmpl $0, function_trace_stop
989 jne ftrace_stub
990
980 cmpl $ftrace_stub, ftrace_trace_function 991 cmpl $ftrace_stub, ftrace_trace_function
981 jnz trace 992 jnz trace
993#ifdef CONFIG_FUNCTION_GRAPH_TRACER
994 cmpl $ftrace_stub, ftrace_graph_return
995 jnz ftrace_graph_caller
996
997 cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
998 jnz ftrace_graph_caller
999#endif
982.globl ftrace_stub 1000.globl ftrace_stub
983ftrace_stub: 1001ftrace_stub:
984 ret 1002 ret
@@ -997,12 +1015,43 @@ trace:
997 popl %edx 1015 popl %edx
998 popl %ecx 1016 popl %ecx
999 popl %eax 1017 popl %eax
1000
1001 jmp ftrace_stub 1018 jmp ftrace_stub
1002END(mcount) 1019END(mcount)
1003#endif /* CONFIG_DYNAMIC_FTRACE */ 1020#endif /* CONFIG_DYNAMIC_FTRACE */
1004#endif /* CONFIG_FUNCTION_TRACER */ 1021#endif /* CONFIG_FUNCTION_TRACER */
1005 1022
1023#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1024ENTRY(ftrace_graph_caller)
1025 cmpl $0, function_trace_stop
1026 jne ftrace_stub
1027
1028 pushl %eax
1029 pushl %ecx
1030 pushl %edx
1031 movl 0xc(%esp), %edx
1032 lea 0x4(%ebp), %eax
1033 subl $MCOUNT_INSN_SIZE, %edx
1034 call prepare_ftrace_return
1035 popl %edx
1036 popl %ecx
1037 popl %eax
1038 ret
1039END(ftrace_graph_caller)
1040
1041.globl return_to_handler
1042return_to_handler:
1043 pushl $0
1044 pushl %eax
1045 pushl %ecx
1046 pushl %edx
1047 call ftrace_return_to_handler
1048 movl %eax, 0xc(%esp)
1049 popl %edx
1050 popl %ecx
1051 popl %eax
1052 ret
1053#endif
1054
1006.section .rodata,"a" 1055.section .rodata,"a"
1007#include "syscall_table_32.S" 1056#include "syscall_table_32.S"
1008 1057
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index fc013cfde307..1954a9662203 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -67,16 +67,10 @@ ENTRY(mcount)
67END(mcount) 67END(mcount)
68 68
69ENTRY(ftrace_caller) 69ENTRY(ftrace_caller)
70 cmpl $0, function_trace_stop
71 jne ftrace_stub
70 72
71 /* taken from glibc */ 73 MCOUNT_SAVE_FRAME
72 subq $0x38, %rsp
73 movq %rax, (%rsp)
74 movq %rcx, 8(%rsp)
75 movq %rdx, 16(%rsp)
76 movq %rsi, 24(%rsp)
77 movq %rdi, 32(%rsp)
78 movq %r8, 40(%rsp)
79 movq %r9, 48(%rsp)
80 74
81 movq 0x38(%rsp), %rdi 75 movq 0x38(%rsp), %rdi
82 movq 8(%rbp), %rsi 76 movq 8(%rbp), %rsi
@@ -86,14 +80,13 @@ ENTRY(ftrace_caller)
86ftrace_call: 80ftrace_call:
87 call ftrace_stub 81 call ftrace_stub
88 82
89 movq 48(%rsp), %r9 83 MCOUNT_RESTORE_FRAME
90 movq 40(%rsp), %r8 84
91 movq 32(%rsp), %rdi 85#ifdef CONFIG_FUNCTION_GRAPH_TRACER
92 movq 24(%rsp), %rsi 86.globl ftrace_graph_call
93 movq 16(%rsp), %rdx 87ftrace_graph_call:
94 movq 8(%rsp), %rcx 88 jmp ftrace_stub
95 movq (%rsp), %rax 89#endif
96 addq $0x38, %rsp
97 90
98.globl ftrace_stub 91.globl ftrace_stub
99ftrace_stub: 92ftrace_stub:
@@ -102,15 +95,63 @@ END(ftrace_caller)
102 95
103#else /* ! CONFIG_DYNAMIC_FTRACE */ 96#else /* ! CONFIG_DYNAMIC_FTRACE */
104ENTRY(mcount) 97ENTRY(mcount)
98 cmpl $0, function_trace_stop
99 jne ftrace_stub
100
105 cmpq $ftrace_stub, ftrace_trace_function 101 cmpq $ftrace_stub, ftrace_trace_function
106 jnz trace 102 jnz trace
103
104#ifdef CONFIG_FUNCTION_GRAPH_TRACER
105 cmpq $ftrace_stub, ftrace_graph_return
106 jnz ftrace_graph_caller
107
108 cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
109 jnz ftrace_graph_caller
110#endif
111
107.globl ftrace_stub 112.globl ftrace_stub
108ftrace_stub: 113ftrace_stub:
109 retq 114 retq
110 115
111trace: 116trace:
112 /* taken from glibc */ 117 MCOUNT_SAVE_FRAME
113 subq $0x38, %rsp 118
119 movq 0x38(%rsp), %rdi
120 movq 8(%rbp), %rsi
121 subq $MCOUNT_INSN_SIZE, %rdi
122
123 call *ftrace_trace_function
124
125 MCOUNT_RESTORE_FRAME
126
127 jmp ftrace_stub
128END(mcount)
129#endif /* CONFIG_DYNAMIC_FTRACE */
130#endif /* CONFIG_FUNCTION_TRACER */
131
132#ifdef CONFIG_FUNCTION_GRAPH_TRACER
133ENTRY(ftrace_graph_caller)
134 cmpl $0, function_trace_stop
135 jne ftrace_stub
136
137 MCOUNT_SAVE_FRAME
138
139 leaq 8(%rbp), %rdi
140 movq 0x38(%rsp), %rsi
141 subq $MCOUNT_INSN_SIZE, %rsi
142
143 call prepare_ftrace_return
144
145 MCOUNT_RESTORE_FRAME
146
147 retq
148END(ftrace_graph_caller)
149
150
151.globl return_to_handler
152return_to_handler:
153 subq $80, %rsp
154
114 movq %rax, (%rsp) 155 movq %rax, (%rsp)
115 movq %rcx, 8(%rsp) 156 movq %rcx, 8(%rsp)
116 movq %rdx, 16(%rsp) 157 movq %rdx, 16(%rsp)
@@ -118,13 +159,14 @@ trace:
118 movq %rdi, 32(%rsp) 159 movq %rdi, 32(%rsp)
119 movq %r8, 40(%rsp) 160 movq %r8, 40(%rsp)
120 movq %r9, 48(%rsp) 161 movq %r9, 48(%rsp)
162 movq %r10, 56(%rsp)
163 movq %r11, 64(%rsp)
121 164
122 movq 0x38(%rsp), %rdi 165 call ftrace_return_to_handler
123 movq 8(%rbp), %rsi
124 subq $MCOUNT_INSN_SIZE, %rdi
125
126 call *ftrace_trace_function
127 166
167 movq %rax, 72(%rsp)
168 movq 64(%rsp), %r11
169 movq 56(%rsp), %r10
128 movq 48(%rsp), %r9 170 movq 48(%rsp), %r9
129 movq 40(%rsp), %r8 171 movq 40(%rsp), %r8
130 movq 32(%rsp), %rdi 172 movq 32(%rsp), %rdi
@@ -132,12 +174,10 @@ trace:
132 movq 16(%rsp), %rdx 174 movq 16(%rsp), %rdx
133 movq 8(%rsp), %rcx 175 movq 8(%rsp), %rcx
134 movq (%rsp), %rax 176 movq (%rsp), %rax
135 addq $0x38, %rsp 177 addq $72, %rsp
178 retq
179#endif
136 180
137 jmp ftrace_stub
138END(mcount)
139#endif /* CONFIG_DYNAMIC_FTRACE */
140#endif /* CONFIG_FUNCTION_TRACER */
141 181
142#ifndef CONFIG_PREEMPT 182#ifndef CONFIG_PREEMPT
143#define retint_kernel retint_restore_args 183#define retint_kernel retint_restore_args
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
index 0aa2c443d600..53699c931ad4 100644
--- a/arch/x86/kernel/es7000_32.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -38,8 +38,11 @@
38#include <asm/io.h> 38#include <asm/io.h>
39#include <asm/nmi.h> 39#include <asm/nmi.h>
40#include <asm/smp.h> 40#include <asm/smp.h>
41#include <asm/atomic.h>
41#include <asm/apicdef.h> 42#include <asm/apicdef.h>
42#include <mach_mpparse.h> 43#include <mach_mpparse.h>
44#include <asm/genapic.h>
45#include <asm/setup.h>
43 46
44/* 47/*
45 * ES7000 chipsets 48 * ES7000 chipsets
@@ -161,6 +164,43 @@ es7000_rename_gsi(int ioapic, int gsi)
161 return gsi; 164 return gsi;
162} 165}
163 166
167static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
168{
169 unsigned long vect = 0, psaival = 0;
170
171 if (psai == NULL)
172 return -1;
173
174 vect = ((unsigned long)__pa(eip)/0x1000) << 16;
175 psaival = (0x1000000 | vect | cpu);
176
177 while (*psai & 0x1000000)
178 ;
179
180 *psai = psaival;
181
182 return 0;
183}
184
185static void noop_wait_for_deassert(atomic_t *deassert_not_used)
186{
187}
188
189static int __init es7000_update_genapic(void)
190{
191 genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
192
193 /* MPENTIUMIII */
194 if (boot_cpu_data.x86 == 6 &&
195 (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
196 es7000_update_genapic_to_cluster();
197 genapic->wait_for_init_deassert = noop_wait_for_deassert;
198 genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
199 }
200
201 return 0;
202}
203
164void __init 204void __init
165setup_unisys(void) 205setup_unisys(void)
166{ 206{
@@ -176,6 +216,8 @@ setup_unisys(void)
176 else 216 else
177 es7000_plat = ES7000_CLASSIC; 217 es7000_plat = ES7000_CLASSIC;
178 ioapic_renumber_irq = es7000_rename_gsi; 218 ioapic_renumber_irq = es7000_rename_gsi;
219
220 x86_quirks->update_genapic = es7000_update_genapic;
179} 221}
180 222
181/* 223/*
@@ -317,26 +359,6 @@ es7000_mip_write(struct mip_reg *mip_reg)
317 return status; 359 return status;
318} 360}
319 361
320int
321es7000_start_cpu(int cpu, unsigned long eip)
322{
323 unsigned long vect = 0, psaival = 0;
324
325 if (psai == NULL)
326 return -1;
327
328 vect = ((unsigned long)__pa(eip)/0x1000) << 16;
329 psaival = (0x1000000 | vect | cpu);
330
331 while (*psai & 0x1000000)
332 ;
333
334 *psai = psaival;
335
336 return 0;
337
338}
339
340void __init 362void __init
341es7000_sw_apic(void) 363es7000_sw_apic(void)
342{ 364{
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 50ea0ac8c9bf..1b43086b097a 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -14,14 +14,17 @@
14#include <linux/uaccess.h> 14#include <linux/uaccess.h>
15#include <linux/ftrace.h> 15#include <linux/ftrace.h>
16#include <linux/percpu.h> 16#include <linux/percpu.h>
17#include <linux/sched.h>
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/list.h> 19#include <linux/list.h>
19 20
20#include <asm/ftrace.h> 21#include <asm/ftrace.h>
22#include <linux/ftrace.h>
21#include <asm/nops.h> 23#include <asm/nops.h>
24#include <asm/nmi.h>
22 25
23 26
24static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; 27#ifdef CONFIG_DYNAMIC_FTRACE
25 28
26union ftrace_code_union { 29union ftrace_code_union {
27 char code[MCOUNT_INSN_SIZE]; 30 char code[MCOUNT_INSN_SIZE];
@@ -31,18 +34,12 @@ union ftrace_code_union {
31 } __attribute__((packed)); 34 } __attribute__((packed));
32}; 35};
33 36
34
35static int ftrace_calc_offset(long ip, long addr) 37static int ftrace_calc_offset(long ip, long addr)
36{ 38{
37 return (int)(addr - ip); 39 return (int)(addr - ip);
38} 40}
39 41
40unsigned char *ftrace_nop_replace(void) 42static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
41{
42 return ftrace_nop;
43}
44
45unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
46{ 43{
47 static union ftrace_code_union calc; 44 static union ftrace_code_union calc;
48 45
@@ -56,7 +53,142 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
56 return calc.code; 53 return calc.code;
57} 54}
58 55
59int 56/*
57 * Modifying code must take extra care. On an SMP machine, if
58 * the code being modified is also being executed on another CPU
59 * that CPU will have undefined results and possibly take a GPF.
60 * We use kstop_machine to stop other CPUS from exectuing code.
61 * But this does not stop NMIs from happening. We still need
62 * to protect against that. We separate out the modification of
63 * the code to take care of this.
64 *
65 * Two buffers are added: An IP buffer and a "code" buffer.
66 *
67 * 1) Put the instruction pointer into the IP buffer
68 * and the new code into the "code" buffer.
69 * 2) Set a flag that says we are modifying code
70 * 3) Wait for any running NMIs to finish.
71 * 4) Write the code
72 * 5) clear the flag.
73 * 6) Wait for any running NMIs to finish.
74 *
75 * If an NMI is executed, the first thing it does is to call
76 * "ftrace_nmi_enter". This will check if the flag is set to write
77 * and if it is, it will write what is in the IP and "code" buffers.
78 *
79 * The trick is, it does not matter if everyone is writing the same
80 * content to the code location. Also, if a CPU is executing code
81 * it is OK to write to that code location if the contents being written
82 * are the same as what exists.
83 */
84
85static atomic_t in_nmi = ATOMIC_INIT(0);
86static int mod_code_status; /* holds return value of text write */
87static int mod_code_write; /* set when NMI should do the write */
88static void *mod_code_ip; /* holds the IP to write to */
89static void *mod_code_newcode; /* holds the text to write to the IP */
90
91static unsigned nmi_wait_count;
92static atomic_t nmi_update_count = ATOMIC_INIT(0);
93
94int ftrace_arch_read_dyn_info(char *buf, int size)
95{
96 int r;
97
98 r = snprintf(buf, size, "%u %u",
99 nmi_wait_count,
100 atomic_read(&nmi_update_count));
101 return r;
102}
103
104static void ftrace_mod_code(void)
105{
106 /*
107 * Yes, more than one CPU process can be writing to mod_code_status.
108 * (and the code itself)
109 * But if one were to fail, then they all should, and if one were
110 * to succeed, then they all should.
111 */
112 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
113 MCOUNT_INSN_SIZE);
114}
115
116void ftrace_nmi_enter(void)
117{
118 atomic_inc(&in_nmi);
119 /* Must have in_nmi seen before reading write flag */
120 smp_mb();
121 if (mod_code_write) {
122 ftrace_mod_code();
123 atomic_inc(&nmi_update_count);
124 }
125}
126
127void ftrace_nmi_exit(void)
128{
129 /* Finish all executions before clearing in_nmi */
130 smp_wmb();
131 atomic_dec(&in_nmi);
132}
133
134static void wait_for_nmi(void)
135{
136 int waited = 0;
137
138 while (atomic_read(&in_nmi)) {
139 waited = 1;
140 cpu_relax();
141 }
142
143 if (waited)
144 nmi_wait_count++;
145}
146
147static int
148do_ftrace_mod_code(unsigned long ip, void *new_code)
149{
150 mod_code_ip = (void *)ip;
151 mod_code_newcode = new_code;
152
153 /* The buffers need to be visible before we let NMIs write them */
154 smp_wmb();
155
156 mod_code_write = 1;
157
158 /* Make sure write bit is visible before we wait on NMIs */
159 smp_mb();
160
161 wait_for_nmi();
162
163 /* Make sure all running NMIs have finished before we write the code */
164 smp_mb();
165
166 ftrace_mod_code();
167
168 /* Make sure the write happens before clearing the bit */
169 smp_wmb();
170
171 mod_code_write = 0;
172
173 /* make sure NMIs see the cleared bit */
174 smp_mb();
175
176 wait_for_nmi();
177
178 return mod_code_status;
179}
180
181
182
183
184static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
185
186static unsigned char *ftrace_nop_replace(void)
187{
188 return ftrace_nop;
189}
190
191static int
60ftrace_modify_code(unsigned long ip, unsigned char *old_code, 192ftrace_modify_code(unsigned long ip, unsigned char *old_code,
61 unsigned char *new_code) 193 unsigned char *new_code)
62{ 194{
@@ -81,7 +213,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
81 return -EINVAL; 213 return -EINVAL;
82 214
83 /* replace the text with the new text */ 215 /* replace the text with the new text */
84 if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) 216 if (do_ftrace_mod_code(ip, new_code))
85 return -EPERM; 217 return -EPERM;
86 218
87 sync_core(); 219 sync_core();
@@ -89,6 +221,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
89 return 0; 221 return 0;
90} 222}
91 223
224int ftrace_make_nop(struct module *mod,
225 struct dyn_ftrace *rec, unsigned long addr)
226{
227 unsigned char *new, *old;
228 unsigned long ip = rec->ip;
229
230 old = ftrace_call_replace(ip, addr);
231 new = ftrace_nop_replace();
232
233 return ftrace_modify_code(rec->ip, old, new);
234}
235
236int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
237{
238 unsigned char *new, *old;
239 unsigned long ip = rec->ip;
240
241 old = ftrace_nop_replace();
242 new = ftrace_call_replace(ip, addr);
243
244 return ftrace_modify_code(rec->ip, old, new);
245}
246
92int ftrace_update_ftrace_func(ftrace_func_t func) 247int ftrace_update_ftrace_func(ftrace_func_t func)
93{ 248{
94 unsigned long ip = (unsigned long)(&ftrace_call); 249 unsigned long ip = (unsigned long)(&ftrace_call);
@@ -165,3 +320,218 @@ int __init ftrace_dyn_arch_init(void *data)
165 320
166 return 0; 321 return 0;
167} 322}
323#endif
324
325#ifdef CONFIG_FUNCTION_GRAPH_TRACER
326
327#ifdef CONFIG_DYNAMIC_FTRACE
328extern void ftrace_graph_call(void);
329
330static int ftrace_mod_jmp(unsigned long ip,
331 int old_offset, int new_offset)
332{
333 unsigned char code[MCOUNT_INSN_SIZE];
334
335 if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
336 return -EFAULT;
337
338 if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
339 return -EINVAL;
340
341 *(int *)(&code[1]) = new_offset;
342
343 if (do_ftrace_mod_code(ip, &code))
344 return -EPERM;
345
346 return 0;
347}
348
349int ftrace_enable_ftrace_graph_caller(void)
350{
351 unsigned long ip = (unsigned long)(&ftrace_graph_call);
352 int old_offset, new_offset;
353
354 old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
355 new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
356
357 return ftrace_mod_jmp(ip, old_offset, new_offset);
358}
359
360int ftrace_disable_ftrace_graph_caller(void)
361{
362 unsigned long ip = (unsigned long)(&ftrace_graph_call);
363 int old_offset, new_offset;
364
365 old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
366 new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
367
368 return ftrace_mod_jmp(ip, old_offset, new_offset);
369}
370
371#else /* CONFIG_DYNAMIC_FTRACE */
372
373/*
374 * These functions are picked from those used on
375 * this page for dynamic ftrace. They have been
376 * simplified to ignore all traces in NMI context.
377 */
378static atomic_t in_nmi;
379
380void ftrace_nmi_enter(void)
381{
382 atomic_inc(&in_nmi);
383}
384
385void ftrace_nmi_exit(void)
386{
387 atomic_dec(&in_nmi);
388}
389
390#endif /* !CONFIG_DYNAMIC_FTRACE */
391
392/* Add a function return address to the trace stack on thread info.*/
393static int push_return_trace(unsigned long ret, unsigned long long time,
394 unsigned long func, int *depth)
395{
396 int index;
397
398 if (!current->ret_stack)
399 return -EBUSY;
400
401 /* The return trace stack is full */
402 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
403 atomic_inc(&current->trace_overrun);
404 return -EBUSY;
405 }
406
407 index = ++current->curr_ret_stack;
408 barrier();
409 current->ret_stack[index].ret = ret;
410 current->ret_stack[index].func = func;
411 current->ret_stack[index].calltime = time;
412 *depth = index;
413
414 return 0;
415}
416
417/* Retrieve a function return address to the trace stack on thread info.*/
418static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
419{
420 int index;
421
422 index = current->curr_ret_stack;
423
424 if (unlikely(index < 0)) {
425 ftrace_graph_stop();
426 WARN_ON(1);
427 /* Might as well panic, otherwise we have no where to go */
428 *ret = (unsigned long)panic;
429 return;
430 }
431
432 *ret = current->ret_stack[index].ret;
433 trace->func = current->ret_stack[index].func;
434 trace->calltime = current->ret_stack[index].calltime;
435 trace->overrun = atomic_read(&current->trace_overrun);
436 trace->depth = index;
437 barrier();
438 current->curr_ret_stack--;
439
440}
441
442/*
443 * Send the trace to the ring-buffer.
444 * @return the original return address.
445 */
446unsigned long ftrace_return_to_handler(void)
447{
448 struct ftrace_graph_ret trace;
449 unsigned long ret;
450
451 pop_return_trace(&trace, &ret);
452 trace.rettime = cpu_clock(raw_smp_processor_id());
453 ftrace_graph_return(&trace);
454
455 if (unlikely(!ret)) {
456 ftrace_graph_stop();
457 WARN_ON(1);
458 /* Might as well panic. What else to do? */
459 ret = (unsigned long)panic;
460 }
461
462 return ret;
463}
464
465/*
466 * Hook the return address and push it in the stack of return addrs
467 * in current thread info.
468 */
469void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
470{
471 unsigned long old;
472 unsigned long long calltime;
473 int faulted;
474 struct ftrace_graph_ent trace;
475 unsigned long return_hooker = (unsigned long)
476 &return_to_handler;
477
478 /* Nmi's are currently unsupported */
479 if (unlikely(atomic_read(&in_nmi)))
480 return;
481
482 if (unlikely(atomic_read(&current->tracing_graph_pause)))
483 return;
484
485 /*
486 * Protect against fault, even if it shouldn't
487 * happen. This tool is too much intrusive to
488 * ignore such a protection.
489 */
490 asm volatile(
491 "1: " _ASM_MOV " (%[parent_old]), %[old]\n"
492 "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n"
493 " movl $0, %[faulted]\n"
494
495 ".section .fixup, \"ax\"\n"
496 "3: movl $1, %[faulted]\n"
497 ".previous\n"
498
499 _ASM_EXTABLE(1b, 3b)
500 _ASM_EXTABLE(2b, 3b)
501
502 : [parent_replaced] "=r" (parent), [old] "=r" (old),
503 [faulted] "=r" (faulted)
504 : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
505 : "memory"
506 );
507
508 if (unlikely(faulted)) {
509 ftrace_graph_stop();
510 WARN_ON(1);
511 return;
512 }
513
514 if (unlikely(!__kernel_text_address(old))) {
515 ftrace_graph_stop();
516 *parent = old;
517 WARN_ON(1);
518 return;
519 }
520
521 calltime = cpu_clock(raw_smp_processor_id());
522
523 if (push_return_trace(old, calltime,
524 self_addr, &trace.depth) == -EBUSY) {
525 *parent = old;
526 return;
527 }
528
529 trace.func = self_addr;
530
531 /* Only trace if the calling function expects to */
532 if (!ftrace_graph_entry(&trace)) {
533 current->curr_ret_stack--;
534 *parent = old;
535 }
536}
537#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 6c9bfc9e1e95..2bced78b0b8e 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -21,6 +21,7 @@
21#include <asm/smp.h> 21#include <asm/smp.h>
22#include <asm/ipi.h> 22#include <asm/ipi.h>
23#include <asm/genapic.h> 23#include <asm/genapic.h>
24#include <asm/setup.h>
24 25
25extern struct genapic apic_flat; 26extern struct genapic apic_flat;
26extern struct genapic apic_physflat; 27extern struct genapic apic_physflat;
@@ -53,6 +54,9 @@ void __init setup_apic_routing(void)
53 genapic = &apic_physflat; 54 genapic = &apic_physflat;
54 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); 55 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
55 } 56 }
57
58 if (x86_quirks->update_genapic)
59 x86_quirks->update_genapic();
56} 60}
57 61
58/* Same for both flat and physical. */ 62/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 2c7dbdb98278..dece17289731 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -10,6 +10,7 @@
10 10
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/threads.h> 12#include <linux/threads.h>
13#include <linux/cpu.h>
13#include <linux/cpumask.h> 14#include <linux/cpumask.h>
14#include <linux/string.h> 15#include <linux/string.h>
15#include <linux/ctype.h> 16#include <linux/ctype.h>
@@ -17,6 +18,9 @@
17#include <linux/sched.h> 18#include <linux/sched.h>
18#include <linux/module.h> 19#include <linux/module.h>
19#include <linux/hardirq.h> 20#include <linux/hardirq.h>
21#include <linux/timer.h>
22#include <linux/proc_fs.h>
23#include <asm/current.h>
20#include <asm/smp.h> 24#include <asm/smp.h>
21#include <asm/ipi.h> 25#include <asm/ipi.h>
22#include <asm/genapic.h> 26#include <asm/genapic.h>
@@ -356,6 +360,103 @@ static __init void uv_rtc_init(void)
356} 360}
357 361
358/* 362/*
363 * percpu heartbeat timer
364 */
365static void uv_heartbeat(unsigned long ignored)
366{
367 struct timer_list *timer = &uv_hub_info->scir.timer;
368 unsigned char bits = uv_hub_info->scir.state;
369
370 /* flip heartbeat bit */
371 bits ^= SCIR_CPU_HEARTBEAT;
372
373 /* is this cpu idle? */
374 if (idle_cpu(raw_smp_processor_id()))
375 bits &= ~SCIR_CPU_ACTIVITY;
376 else
377 bits |= SCIR_CPU_ACTIVITY;
378
379 /* update system controller interface reg */
380 uv_set_scir_bits(bits);
381
382 /* enable next timer period */
383 mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
384}
385
386static void __cpuinit uv_heartbeat_enable(int cpu)
387{
388 if (!uv_cpu_hub_info(cpu)->scir.enabled) {
389 struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
390
391 uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
392 setup_timer(timer, uv_heartbeat, cpu);
393 timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
394 add_timer_on(timer, cpu);
395 uv_cpu_hub_info(cpu)->scir.enabled = 1;
396 }
397
398 /* check boot cpu */
399 if (!uv_cpu_hub_info(0)->scir.enabled)
400 uv_heartbeat_enable(0);
401}
402
403#ifdef CONFIG_HOTPLUG_CPU
404static void __cpuinit uv_heartbeat_disable(int cpu)
405{
406 if (uv_cpu_hub_info(cpu)->scir.enabled) {
407 uv_cpu_hub_info(cpu)->scir.enabled = 0;
408 del_timer(&uv_cpu_hub_info(cpu)->scir.timer);
409 }
410 uv_set_cpu_scir_bits(cpu, 0xff);
411}
412
413/*
414 * cpu hotplug notifier
415 */
416static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self,
417 unsigned long action, void *hcpu)
418{
419 long cpu = (long)hcpu;
420
421 switch (action) {
422 case CPU_ONLINE:
423 uv_heartbeat_enable(cpu);
424 break;
425 case CPU_DOWN_PREPARE:
426 uv_heartbeat_disable(cpu);
427 break;
428 default:
429 break;
430 }
431 return NOTIFY_OK;
432}
433
434static __init void uv_scir_register_cpu_notifier(void)
435{
436 hotcpu_notifier(uv_scir_cpu_notify, 0);
437}
438
439#else /* !CONFIG_HOTPLUG_CPU */
440
441static __init void uv_scir_register_cpu_notifier(void)
442{
443}
444
445static __init int uv_init_heartbeat(void)
446{
447 int cpu;
448
449 if (is_uv_system())
450 for_each_online_cpu(cpu)
451 uv_heartbeat_enable(cpu);
452 return 0;
453}
454
455late_initcall(uv_init_heartbeat);
456
457#endif /* !CONFIG_HOTPLUG_CPU */
458
459/*
359 * Called on each cpu to initialize the per_cpu UV data area. 460 * Called on each cpu to initialize the per_cpu UV data area.
360 * ZZZ hotplug not supported yet 461 * ZZZ hotplug not supported yet
361 */ 462 */
@@ -428,7 +529,7 @@ void __init uv_system_init(void)
428 529
429 uv_bios_init(); 530 uv_bios_init();
430 uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, 531 uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
431 &uv_coherency_id, &uv_region_size); 532 &sn_coherency_id, &sn_region_size);
432 uv_rtc_init(); 533 uv_rtc_init();
433 534
434 for_each_present_cpu(cpu) { 535 for_each_present_cpu(cpu) {
@@ -439,8 +540,7 @@ void __init uv_system_init(void)
439 uv_blade_info[blade].nr_possible_cpus++; 540 uv_blade_info[blade].nr_possible_cpus++;
440 541
441 uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; 542 uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
442 uv_cpu_hub_info(cpu)->lowmem_remap_top = 543 uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
443 lowmem_redir_base + lowmem_redir_size;
444 uv_cpu_hub_info(cpu)->m_val = m_val; 544 uv_cpu_hub_info(cpu)->m_val = m_val;
445 uv_cpu_hub_info(cpu)->n_val = m_val; 545 uv_cpu_hub_info(cpu)->n_val = m_val;
446 uv_cpu_hub_info(cpu)->numa_blade_id = blade; 546 uv_cpu_hub_info(cpu)->numa_blade_id = blade;
@@ -450,7 +550,8 @@ void __init uv_system_init(void)
450 uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; 550 uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
451 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; 551 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
452 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; 552 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
453 uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id; 553 uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
554 uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu;
454 uv_node_to_blade[nid] = blade; 555 uv_node_to_blade[nid] = blade;
455 uv_cpu_to_blade[cpu] = blade; 556 uv_cpu_to_blade[cpu] = blade;
456 max_pnode = max(pnode, max_pnode); 557 max_pnode = max(pnode, max_pnode);
@@ -467,4 +568,6 @@ void __init uv_system_init(void)
467 map_mmioh_high(max_pnode); 568 map_mmioh_high(max_pnode);
468 569
469 uv_cpu_init(); 570 uv_cpu_init();
571 uv_scir_register_cpu_notifier();
572 proc_mkdir("sgi_uv", NULL);
470} 573}
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index 1dcb0f13897e..3e66bd364a9d 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -35,7 +35,6 @@ void __init reserve_ebda_region(void)
35 35
36 /* start of EBDA area */ 36 /* start of EBDA area */
37 ebda_addr = get_bios_ebda(); 37 ebda_addr = get_bios_ebda();
38 printk(KERN_INFO "BIOS EBDA/lowmem at: %08x/%08x\n", ebda_addr, lowmem);
39 38
40 /* Fixup: bios puts an EBDA in the top 64K segment */ 39 /* Fixup: bios puts an EBDA in the top 64K segment */
41 /* of conventional memory, but does not adjust lowmem. */ 40 /* of conventional memory, but does not adjust lowmem. */
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index fa1d25dd83e3..ac108d1fe182 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -12,9 +12,12 @@
12#include <asm/sections.h> 12#include <asm/sections.h>
13#include <asm/e820.h> 13#include <asm/e820.h>
14#include <asm/bios_ebda.h> 14#include <asm/bios_ebda.h>
15#include <asm/trampoline.h>
15 16
16void __init i386_start_kernel(void) 17void __init i386_start_kernel(void)
17{ 18{
19 reserve_trampoline_memory();
20
18 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); 21 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
19 22
20#ifdef CONFIG_BLK_DEV_INITRD 23#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index d16084f90649..388e05a5fc17 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -24,6 +24,7 @@
24#include <asm/kdebug.h> 24#include <asm/kdebug.h>
25#include <asm/e820.h> 25#include <asm/e820.h>
26#include <asm/bios_ebda.h> 26#include <asm/bios_ebda.h>
27#include <asm/trampoline.h>
27 28
28/* boot cpu pda */ 29/* boot cpu pda */
29static struct x8664_pda _boot_cpu_pda __read_mostly; 30static struct x8664_pda _boot_cpu_pda __read_mostly;
@@ -120,6 +121,8 @@ void __init x86_64_start_reservations(char *real_mode_data)
120{ 121{
121 copy_bootdata(__va(real_mode_data)); 122 copy_bootdata(__va(real_mode_data));
122 123
124 reserve_trampoline_memory();
125
123 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); 126 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
124 127
125#ifdef CONFIG_BLK_DEV_INITRD 128#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 067d8de913f6..3f0a3edf0a57 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -33,7 +33,9 @@
33 * HPET address is set in acpi/boot.c, when an ACPI entry exists 33 * HPET address is set in acpi/boot.c, when an ACPI entry exists
34 */ 34 */
35unsigned long hpet_address; 35unsigned long hpet_address;
36unsigned long hpet_num_timers; 36#ifdef CONFIG_PCI_MSI
37static unsigned long hpet_num_timers;
38#endif
37static void __iomem *hpet_virt_address; 39static void __iomem *hpet_virt_address;
38 40
39struct hpet_dev { 41struct hpet_dev {
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index a4f93b4120c1..d39918076bb4 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -14,7 +14,6 @@ static struct fs_struct init_fs = INIT_FS;
14static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 14static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
15static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 15static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
16struct mm_struct init_mm = INIT_MM(init_mm); 16struct mm_struct init_mm = INIT_MM(init_mm);
17EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */
18 17
19/* 18/*
20 * Initial thread structure. 19 * Initial thread structure.
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9043251210fb..679e7bbbbcd6 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -2216,10 +2216,9 @@ static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
2216asmlinkage void smp_irq_move_cleanup_interrupt(void) 2216asmlinkage void smp_irq_move_cleanup_interrupt(void)
2217{ 2217{
2218 unsigned vector, me; 2218 unsigned vector, me;
2219
2219 ack_APIC_irq(); 2220 ack_APIC_irq();
2220#ifdef CONFIG_X86_64
2221 exit_idle(); 2221 exit_idle();
2222#endif
2223 irq_enter(); 2222 irq_enter();
2224 2223
2225 me = smp_processor_id(); 2224 me = smp_processor_id();
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 1d3d0e71b044..1df869e5bd0b 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -13,6 +13,7 @@
13#include <linux/seq_file.h> 13#include <linux/seq_file.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/delay.h> 15#include <linux/delay.h>
16#include <linux/ftrace.h>
16#include <asm/uaccess.h> 17#include <asm/uaccess.h>
17#include <asm/io_apic.h> 18#include <asm/io_apic.h>
18#include <asm/idle.h> 19#include <asm/idle.h>
@@ -45,7 +46,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
45 * SMP cross-CPU interrupts have their own specific 46 * SMP cross-CPU interrupts have their own specific
46 * handlers). 47 * handlers).
47 */ 48 */
48asmlinkage unsigned int do_IRQ(struct pt_regs *regs) 49asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
49{ 50{
50 struct pt_regs *old_regs = set_irq_regs(regs); 51 struct pt_regs *old_regs = set_irq_regs(regs);
51 struct irq_desc *desc; 52 struct irq_desc *desc;
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 7a385746509a..37f420018a41 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -13,6 +13,7 @@
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h> 14#include <linux/ftrace.h>
15#include <linux/suspend.h> 15#include <linux/suspend.h>
16#include <linux/gfp.h>
16 17
17#include <asm/pgtable.h> 18#include <asm/pgtable.h>
18#include <asm/pgalloc.h> 19#include <asm/pgalloc.h>
@@ -25,15 +26,6 @@
25#include <asm/system.h> 26#include <asm/system.h>
26#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
27 28
28#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
29static u32 kexec_pgd[1024] PAGE_ALIGNED;
30#ifdef CONFIG_X86_PAE
31static u32 kexec_pmd0[1024] PAGE_ALIGNED;
32static u32 kexec_pmd1[1024] PAGE_ALIGNED;
33#endif
34static u32 kexec_pte0[1024] PAGE_ALIGNED;
35static u32 kexec_pte1[1024] PAGE_ALIGNED;
36
37static void set_idt(void *newidt, __u16 limit) 29static void set_idt(void *newidt, __u16 limit)
38{ 30{
39 struct desc_ptr curidt; 31 struct desc_ptr curidt;
@@ -76,6 +68,76 @@ static void load_segments(void)
76#undef __STR 68#undef __STR
77} 69}
78 70
71static void machine_kexec_free_page_tables(struct kimage *image)
72{
73 free_page((unsigned long)image->arch.pgd);
74#ifdef CONFIG_X86_PAE
75 free_page((unsigned long)image->arch.pmd0);
76 free_page((unsigned long)image->arch.pmd1);
77#endif
78 free_page((unsigned long)image->arch.pte0);
79 free_page((unsigned long)image->arch.pte1);
80}
81
82static int machine_kexec_alloc_page_tables(struct kimage *image)
83{
84 image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
85#ifdef CONFIG_X86_PAE
86 image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
87 image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
88#endif
89 image->arch.pte0 = (pte_t *)get_zeroed_page(GFP_KERNEL);
90 image->arch.pte1 = (pte_t *)get_zeroed_page(GFP_KERNEL);
91 if (!image->arch.pgd ||
92#ifdef CONFIG_X86_PAE
93 !image->arch.pmd0 || !image->arch.pmd1 ||
94#endif
95 !image->arch.pte0 || !image->arch.pte1) {
96 machine_kexec_free_page_tables(image);
97 return -ENOMEM;
98 }
99 return 0;
100}
101
102static void machine_kexec_page_table_set_one(
103 pgd_t *pgd, pmd_t *pmd, pte_t *pte,
104 unsigned long vaddr, unsigned long paddr)
105{
106 pud_t *pud;
107
108 pgd += pgd_index(vaddr);
109#ifdef CONFIG_X86_PAE
110 if (!(pgd_val(*pgd) & _PAGE_PRESENT))
111 set_pgd(pgd, __pgd(__pa(pmd) | _PAGE_PRESENT));
112#endif
113 pud = pud_offset(pgd, vaddr);
114 pmd = pmd_offset(pud, vaddr);
115 if (!(pmd_val(*pmd) & _PAGE_PRESENT))
116 set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
117 pte = pte_offset_kernel(pmd, vaddr);
118 set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
119}
120
121static void machine_kexec_prepare_page_tables(struct kimage *image)
122{
123 void *control_page;
124 pmd_t *pmd = 0;
125
126 control_page = page_address(image->control_code_page);
127#ifdef CONFIG_X86_PAE
128 pmd = image->arch.pmd0;
129#endif
130 machine_kexec_page_table_set_one(
131 image->arch.pgd, pmd, image->arch.pte0,
132 (unsigned long)control_page, __pa(control_page));
133#ifdef CONFIG_X86_PAE
134 pmd = image->arch.pmd1;
135#endif
136 machine_kexec_page_table_set_one(
137 image->arch.pgd, pmd, image->arch.pte1,
138 __pa(control_page), __pa(control_page));
139}
140
79/* 141/*
80 * A architecture hook called to validate the 142 * A architecture hook called to validate the
81 * proposed image and prepare the control pages 143 * proposed image and prepare the control pages
@@ -87,12 +149,20 @@ static void load_segments(void)
87 * reboot code buffer to allow us to avoid allocations 149 * reboot code buffer to allow us to avoid allocations
88 * later. 150 * later.
89 * 151 *
90 * Make control page executable. 152 * - Make control page executable.
153 * - Allocate page tables
154 * - Setup page tables
91 */ 155 */
92int machine_kexec_prepare(struct kimage *image) 156int machine_kexec_prepare(struct kimage *image)
93{ 157{
158 int error;
159
94 if (nx_enabled) 160 if (nx_enabled)
95 set_pages_x(image->control_code_page, 1); 161 set_pages_x(image->control_code_page, 1);
162 error = machine_kexec_alloc_page_tables(image);
163 if (error)
164 return error;
165 machine_kexec_prepare_page_tables(image);
96 return 0; 166 return 0;
97} 167}
98 168
@@ -104,6 +174,7 @@ void machine_kexec_cleanup(struct kimage *image)
104{ 174{
105 if (nx_enabled) 175 if (nx_enabled)
106 set_pages_nx(image->control_code_page, 1); 176 set_pages_nx(image->control_code_page, 1);
177 machine_kexec_free_page_tables(image);
107} 178}
108 179
109/* 180/*
@@ -150,18 +221,7 @@ void machine_kexec(struct kimage *image)
150 relocate_kernel_ptr = control_page; 221 relocate_kernel_ptr = control_page;
151 page_list[PA_CONTROL_PAGE] = __pa(control_page); 222 page_list[PA_CONTROL_PAGE] = __pa(control_page);
152 page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; 223 page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
153 page_list[PA_PGD] = __pa(kexec_pgd); 224 page_list[PA_PGD] = __pa(image->arch.pgd);
154 page_list[VA_PGD] = (unsigned long)kexec_pgd;
155#ifdef CONFIG_X86_PAE
156 page_list[PA_PMD_0] = __pa(kexec_pmd0);
157 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
158 page_list[PA_PMD_1] = __pa(kexec_pmd1);
159 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
160#endif
161 page_list[PA_PTE_0] = __pa(kexec_pte0);
162 page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
163 page_list[PA_PTE_1] = __pa(kexec_pte1);
164 page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
165 225
166 if (image->type == KEXEC_TYPE_DEFAULT) 226 if (image->type == KEXEC_TYPE_DEFAULT)
167 page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) 227 page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 5f8e5d75a254..c25fdb382292 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -10,7 +10,7 @@
10 * This driver allows to upgrade microcode on AMD 10 * This driver allows to upgrade microcode on AMD
11 * family 0x10 and 0x11 processors. 11 * family 0x10 and 0x11 processors.
12 * 12 *
13 * Licensed unter the terms of the GNU General Public 13 * Licensed under the terms of the GNU General Public
14 * License version 2. See file COPYING for details. 14 * License version 2. See file COPYING for details.
15*/ 15*/
16 16
@@ -32,9 +32,9 @@
32#include <linux/platform_device.h> 32#include <linux/platform_device.h>
33#include <linux/pci.h> 33#include <linux/pci.h>
34#include <linux/pci_ids.h> 34#include <linux/pci_ids.h>
35#include <linux/uaccess.h>
35 36
36#include <asm/msr.h> 37#include <asm/msr.h>
37#include <asm/uaccess.h>
38#include <asm/processor.h> 38#include <asm/processor.h>
39#include <asm/microcode.h> 39#include <asm/microcode.h>
40 40
@@ -47,43 +47,38 @@ MODULE_LICENSE("GPL v2");
47#define UCODE_UCODE_TYPE 0x00000001 47#define UCODE_UCODE_TYPE 0x00000001
48 48
49struct equiv_cpu_entry { 49struct equiv_cpu_entry {
50 unsigned int installed_cpu; 50 u32 installed_cpu;
51 unsigned int fixed_errata_mask; 51 u32 fixed_errata_mask;
52 unsigned int fixed_errata_compare; 52 u32 fixed_errata_compare;
53 unsigned int equiv_cpu; 53 u16 equiv_cpu;
54}; 54 u16 res;
55} __attribute__((packed));
55 56
56struct microcode_header_amd { 57struct microcode_header_amd {
57 unsigned int data_code; 58 u32 data_code;
58 unsigned int patch_id; 59 u32 patch_id;
59 unsigned char mc_patch_data_id[2]; 60 u16 mc_patch_data_id;
60 unsigned char mc_patch_data_len; 61 u8 mc_patch_data_len;
61 unsigned char init_flag; 62 u8 init_flag;
62 unsigned int mc_patch_data_checksum; 63 u32 mc_patch_data_checksum;
63 unsigned int nb_dev_id; 64 u32 nb_dev_id;
64 unsigned int sb_dev_id; 65 u32 sb_dev_id;
65 unsigned char processor_rev_id[2]; 66 u16 processor_rev_id;
66 unsigned char nb_rev_id; 67 u8 nb_rev_id;
67 unsigned char sb_rev_id; 68 u8 sb_rev_id;
68 unsigned char bios_api_rev; 69 u8 bios_api_rev;
69 unsigned char reserved1[3]; 70 u8 reserved1[3];
70 unsigned int match_reg[8]; 71 u32 match_reg[8];
71}; 72} __attribute__((packed));
72 73
73struct microcode_amd { 74struct microcode_amd {
74 struct microcode_header_amd hdr; 75 struct microcode_header_amd hdr;
75 unsigned int mpb[0]; 76 unsigned int mpb[0];
76}; 77};
77 78
78#define UCODE_MAX_SIZE (2048) 79#define UCODE_MAX_SIZE 2048
79#define DEFAULT_UCODE_DATASIZE (896) 80#define UCODE_CONTAINER_SECTION_HDR 8
80#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) 81#define UCODE_CONTAINER_HEADER_SIZE 12
81#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
82#define DWSIZE (sizeof(u32))
83/* For now we support a fixed ucode total size only */
84#define get_totalsize(mc) \
85 ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \
86 + MC_HEADER_SIZE)
87 82
88/* serialize access to the physical write */ 83/* serialize access to the physical write */
89static DEFINE_SPINLOCK(microcode_update_lock); 84static DEFINE_SPINLOCK(microcode_update_lock);
@@ -93,31 +88,24 @@ static struct equiv_cpu_entry *equiv_cpu_table;
93static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) 88static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
94{ 89{
95 struct cpuinfo_x86 *c = &cpu_data(cpu); 90 struct cpuinfo_x86 *c = &cpu_data(cpu);
91 u32 dummy;
96 92
97 memset(csig, 0, sizeof(*csig)); 93 memset(csig, 0, sizeof(*csig));
98
99 if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { 94 if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
100 printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n", 95 printk(KERN_WARNING "microcode: CPU%d: AMD CPU family 0x%x not "
101 cpu); 96 "supported\n", cpu, c->x86);
102 return -1; 97 return -1;
103 } 98 }
104 99 rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy);
105 asm volatile("movl %1, %%ecx; rdmsr" 100 printk(KERN_INFO "microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev);
106 : "=a" (csig->rev)
107 : "i" (0x0000008B) : "ecx");
108
109 printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n",
110 csig->rev);
111
112 return 0; 101 return 0;
113} 102}
114 103
115static int get_matching_microcode(int cpu, void *mc, int rev) 104static int get_matching_microcode(int cpu, void *mc, int rev)
116{ 105{
117 struct microcode_header_amd *mc_header = mc; 106 struct microcode_header_amd *mc_header = mc;
118 struct pci_dev *nb_pci_dev, *sb_pci_dev;
119 unsigned int current_cpu_id; 107 unsigned int current_cpu_id;
120 unsigned int equiv_cpu_id = 0x00; 108 u16 equiv_cpu_id = 0;
121 unsigned int i = 0; 109 unsigned int i = 0;
122 110
123 BUG_ON(equiv_cpu_table == NULL); 111 BUG_ON(equiv_cpu_table == NULL);
@@ -132,57 +120,25 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
132 } 120 }
133 121
134 if (!equiv_cpu_id) { 122 if (!equiv_cpu_id) {
135 printk(KERN_ERR "microcode: CPU%d cpu_id " 123 printk(KERN_WARNING "microcode: CPU%d: cpu revision "
136 "not found in equivalent cpu table \n", cpu); 124 "not listed in equivalent cpu table\n", cpu);
137 return 0; 125 return 0;
138 } 126 }
139 127
140 if ((mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff)) { 128 if (mc_header->processor_rev_id != equiv_cpu_id) {
141 printk(KERN_ERR 129 printk(KERN_ERR "microcode: CPU%d: patch mismatch "
142 "microcode: CPU%d patch does not match " 130 "(processor_rev_id: %x, equiv_cpu_id: %x)\n",
143 "(patch is %x, cpu extended is %x) \n", 131 cpu, mc_header->processor_rev_id, equiv_cpu_id);
144 cpu, mc_header->processor_rev_id[0],
145 (equiv_cpu_id & 0xff));
146 return 0; 132 return 0;
147 } 133 }
148 134
149 if ((mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff)) { 135 /* ucode might be chipset specific -- currently we don't support this */
150 printk(KERN_ERR "microcode: CPU%d patch does not match " 136 if (mc_header->nb_dev_id || mc_header->sb_dev_id) {
151 "(patch is %x, cpu base id is %x) \n", 137 printk(KERN_ERR "microcode: CPU%d: loading of chipset "
152 cpu, mc_header->processor_rev_id[1], 138 "specific code not yet supported\n", cpu);
153 ((equiv_cpu_id >> 16) & 0xff));
154
155 return 0; 139 return 0;
156 } 140 }
157 141
158 /* ucode may be northbridge specific */
159 if (mc_header->nb_dev_id) {
160 nb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD,
161 (mc_header->nb_dev_id & 0xff),
162 NULL);
163 if ((!nb_pci_dev) ||
164 (mc_header->nb_rev_id != nb_pci_dev->revision)) {
165 printk(KERN_ERR "microcode: CPU%d NB mismatch \n", cpu);
166 pci_dev_put(nb_pci_dev);
167 return 0;
168 }
169 pci_dev_put(nb_pci_dev);
170 }
171
172 /* ucode may be southbridge specific */
173 if (mc_header->sb_dev_id) {
174 sb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD,
175 (mc_header->sb_dev_id & 0xff),
176 NULL);
177 if ((!sb_pci_dev) ||
178 (mc_header->sb_rev_id != sb_pci_dev->revision)) {
179 printk(KERN_ERR "microcode: CPU%d SB mismatch \n", cpu);
180 pci_dev_put(sb_pci_dev);
181 return 0;
182 }
183 pci_dev_put(sb_pci_dev);
184 }
185
186 if (mc_header->patch_id <= rev) 142 if (mc_header->patch_id <= rev)
187 return 0; 143 return 0;
188 144
@@ -192,12 +148,10 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
192static void apply_microcode_amd(int cpu) 148static void apply_microcode_amd(int cpu)
193{ 149{
194 unsigned long flags; 150 unsigned long flags;
195 unsigned int eax, edx; 151 u32 rev, dummy;
196 unsigned int rev;
197 int cpu_num = raw_smp_processor_id(); 152 int cpu_num = raw_smp_processor_id();
198 struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; 153 struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
199 struct microcode_amd *mc_amd = uci->mc; 154 struct microcode_amd *mc_amd = uci->mc;
200 unsigned long addr;
201 155
202 /* We should bind the task to the CPU */ 156 /* We should bind the task to the CPU */
203 BUG_ON(cpu_num != cpu); 157 BUG_ON(cpu_num != cpu);
@@ -206,42 +160,34 @@ static void apply_microcode_amd(int cpu)
206 return; 160 return;
207 161
208 spin_lock_irqsave(&microcode_update_lock, flags); 162 spin_lock_irqsave(&microcode_update_lock, flags);
209 163 wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
210 addr = (unsigned long)&mc_amd->hdr.data_code;
211 edx = (unsigned int)(((unsigned long)upper_32_bits(addr)));
212 eax = (unsigned int)(((unsigned long)lower_32_bits(addr)));
213
214 asm volatile("movl %0, %%ecx; wrmsr" :
215 : "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx");
216
217 /* get patch id after patching */ 164 /* get patch id after patching */
218 asm volatile("movl %1, %%ecx; rdmsr" 165 rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
219 : "=a" (rev)
220 : "i" (0x0000008B) : "ecx");
221
222 spin_unlock_irqrestore(&microcode_update_lock, flags); 166 spin_unlock_irqrestore(&microcode_update_lock, flags);
223 167
224 /* check current patch id and patch's id for match */ 168 /* check current patch id and patch's id for match */
225 if (rev != mc_amd->hdr.patch_id) { 169 if (rev != mc_amd->hdr.patch_id) {
226 printk(KERN_ERR "microcode: CPU%d update from revision " 170 printk(KERN_ERR "microcode: CPU%d: update failed "
227 "0x%x to 0x%x failed\n", cpu_num, 171 "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id);
228 mc_amd->hdr.patch_id, rev);
229 return; 172 return;
230 } 173 }
231 174
232 printk(KERN_INFO "microcode: CPU%d updated from revision " 175 printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n",
233 "0x%x to 0x%x \n", 176 cpu, rev);
234 cpu_num, uci->cpu_sig.rev, mc_amd->hdr.patch_id);
235 177
236 uci->cpu_sig.rev = rev; 178 uci->cpu_sig.rev = rev;
237} 179}
238 180
239static void * get_next_ucode(u8 *buf, unsigned int size, 181static int get_ucode_data(void *to, const u8 *from, size_t n)
240 int (*get_ucode_data)(void *, const void *, size_t), 182{
241 unsigned int *mc_size) 183 memcpy(to, from, n);
184 return 0;
185}
186
187static void *get_next_ucode(const u8 *buf, unsigned int size,
188 unsigned int *mc_size)
242{ 189{
243 unsigned int total_size; 190 unsigned int total_size;
244#define UCODE_CONTAINER_SECTION_HDR 8
245 u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; 191 u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
246 void *mc; 192 void *mc;
247 193
@@ -249,39 +195,37 @@ static void * get_next_ucode(u8 *buf, unsigned int size,
249 return NULL; 195 return NULL;
250 196
251 if (section_hdr[0] != UCODE_UCODE_TYPE) { 197 if (section_hdr[0] != UCODE_UCODE_TYPE) {
252 printk(KERN_ERR "microcode: error! " 198 printk(KERN_ERR "microcode: error: invalid type field in "
253 "Wrong microcode payload type field\n"); 199 "container file section header\n");
254 return NULL; 200 return NULL;
255 } 201 }
256 202
257 total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); 203 total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8));
258 204
259 printk(KERN_INFO "microcode: size %u, total_size %u\n", 205 printk(KERN_DEBUG "microcode: size %u, total_size %u\n",
260 size, total_size); 206 size, total_size);
261 207
262 if (total_size > size || total_size > UCODE_MAX_SIZE) { 208 if (total_size > size || total_size > UCODE_MAX_SIZE) {
263 printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); 209 printk(KERN_ERR "microcode: error: size mismatch\n");
264 return NULL; 210 return NULL;
265 } 211 }
266 212
267 mc = vmalloc(UCODE_MAX_SIZE); 213 mc = vmalloc(UCODE_MAX_SIZE);
268 if (mc) { 214 if (mc) {
269 memset(mc, 0, UCODE_MAX_SIZE); 215 memset(mc, 0, UCODE_MAX_SIZE);
270 if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) { 216 if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR,
217 total_size)) {
271 vfree(mc); 218 vfree(mc);
272 mc = NULL; 219 mc = NULL;
273 } else 220 } else
274 *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; 221 *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
275 } 222 }
276#undef UCODE_CONTAINER_SECTION_HDR
277 return mc; 223 return mc;
278} 224}
279 225
280 226
281static int install_equiv_cpu_table(u8 *buf, 227static int install_equiv_cpu_table(const u8 *buf)
282 int (*get_ucode_data)(void *, const void *, size_t))
283{ 228{
284#define UCODE_CONTAINER_HEADER_SIZE 12
285 u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; 229 u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE];
286 unsigned int *buf_pos = (unsigned int *)container_hdr; 230 unsigned int *buf_pos = (unsigned int *)container_hdr;
287 unsigned long size; 231 unsigned long size;
@@ -292,14 +236,15 @@ static int install_equiv_cpu_table(u8 *buf,
292 size = buf_pos[2]; 236 size = buf_pos[2];
293 237
294 if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { 238 if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
295 printk(KERN_ERR "microcode: error! " 239 printk(KERN_ERR "microcode: error: invalid type field in "
296 "Wrong microcode equivalnet cpu table\n"); 240 "container file section header\n");
297 return 0; 241 return 0;
298 } 242 }
299 243
300 equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); 244 equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size);
301 if (!equiv_cpu_table) { 245 if (!equiv_cpu_table) {
302 printk(KERN_ERR "microcode: error, can't allocate memory for equiv CPU table\n"); 246 printk(KERN_ERR "microcode: failed to allocate "
247 "equivalent CPU table\n");
303 return 0; 248 return 0;
304 } 249 }
305 250
@@ -310,7 +255,6 @@ static int install_equiv_cpu_table(u8 *buf,
310 } 255 }
311 256
312 return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ 257 return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
313#undef UCODE_CONTAINER_HEADER_SIZE
314} 258}
315 259
316static void free_equiv_cpu_table(void) 260static void free_equiv_cpu_table(void)
@@ -321,18 +265,20 @@ static void free_equiv_cpu_table(void)
321 } 265 }
322} 266}
323 267
324static int generic_load_microcode(int cpu, void *data, size_t size, 268static int generic_load_microcode(int cpu, const u8 *data, size_t size)
325 int (*get_ucode_data)(void *, const void *, size_t))
326{ 269{
327 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 270 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
328 u8 *ucode_ptr = data, *new_mc = NULL, *mc; 271 const u8 *ucode_ptr = data;
272 void *new_mc = NULL;
273 void *mc;
329 int new_rev = uci->cpu_sig.rev; 274 int new_rev = uci->cpu_sig.rev;
330 unsigned int leftover; 275 unsigned int leftover;
331 unsigned long offset; 276 unsigned long offset;
332 277
333 offset = install_equiv_cpu_table(ucode_ptr, get_ucode_data); 278 offset = install_equiv_cpu_table(ucode_ptr);
334 if (!offset) { 279 if (!offset) {
335 printk(KERN_ERR "microcode: installing equivalent cpu table failed\n"); 280 printk(KERN_ERR "microcode: failed to create "
281 "equivalent cpu table\n");
336 return -EINVAL; 282 return -EINVAL;
337 } 283 }
338 284
@@ -343,7 +289,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
343 unsigned int uninitialized_var(mc_size); 289 unsigned int uninitialized_var(mc_size);
344 struct microcode_header_amd *mc_header; 290 struct microcode_header_amd *mc_header;
345 291
346 mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data, &mc_size); 292 mc = get_next_ucode(ucode_ptr, leftover, &mc_size);
347 if (!mc) 293 if (!mc)
348 break; 294 break;
349 295
@@ -353,7 +299,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
353 vfree(new_mc); 299 vfree(new_mc);
354 new_rev = mc_header->patch_id; 300 new_rev = mc_header->patch_id;
355 new_mc = mc; 301 new_mc = mc;
356 } else 302 } else
357 vfree(mc); 303 vfree(mc);
358 304
359 ucode_ptr += mc_size; 305 ucode_ptr += mc_size;
@@ -365,9 +311,9 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
365 if (uci->mc) 311 if (uci->mc)
366 vfree(uci->mc); 312 vfree(uci->mc);
367 uci->mc = new_mc; 313 uci->mc = new_mc;
368 pr_debug("microcode: CPU%d found a matching microcode update with" 314 pr_debug("microcode: CPU%d found a matching microcode "
369 " version 0x%x (current=0x%x)\n", 315 "update with version 0x%x (current=0x%x)\n",
370 cpu, new_rev, uci->cpu_sig.rev); 316 cpu, new_rev, uci->cpu_sig.rev);
371 } else 317 } else
372 vfree(new_mc); 318 vfree(new_mc);
373 } 319 }
@@ -377,12 +323,6 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
377 return (int)leftover; 323 return (int)leftover;
378} 324}
379 325
380static int get_ucode_fw(void *to, const void *from, size_t n)
381{
382 memcpy(to, from, n);
383 return 0;
384}
385
386static int request_microcode_fw(int cpu, struct device *device) 326static int request_microcode_fw(int cpu, struct device *device)
387{ 327{
388 const char *fw_name = "amd-ucode/microcode_amd.bin"; 328 const char *fw_name = "amd-ucode/microcode_amd.bin";
@@ -394,12 +334,11 @@ static int request_microcode_fw(int cpu, struct device *device)
394 334
395 ret = request_firmware(&firmware, fw_name, device); 335 ret = request_firmware(&firmware, fw_name, device);
396 if (ret) { 336 if (ret) {
397 printk(KERN_ERR "microcode: ucode data file %s load failed\n", fw_name); 337 printk(KERN_ERR "microcode: failed to load file %s\n", fw_name);
398 return ret; 338 return ret;
399 } 339 }
400 340
401 ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, 341 ret = generic_load_microcode(cpu, firmware->data, firmware->size);
402 &get_ucode_fw);
403 342
404 release_firmware(firmware); 343 release_firmware(firmware);
405 344
@@ -408,8 +347,8 @@ static int request_microcode_fw(int cpu, struct device *device)
408 347
409static int request_microcode_user(int cpu, const void __user *buf, size_t size) 348static int request_microcode_user(int cpu, const void __user *buf, size_t size)
410{ 349{
411 printk(KERN_WARNING "microcode: AMD microcode update via /dev/cpu/microcode" 350 printk(KERN_INFO "microcode: AMD microcode update via "
412 "is not supported\n"); 351 "/dev/cpu/microcode not supported\n");
413 return -1; 352 return -1;
414} 353}
415 354
@@ -433,3 +372,4 @@ struct microcode_ops * __init init_amd_microcode(void)
433{ 372{
434 return &microcode_amd_ops; 373 return &microcode_amd_ops;
435} 374}
375
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 82fb2809ce32..c9b721ba968c 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -99,7 +99,7 @@ MODULE_LICENSE("GPL");
99 99
100#define MICROCODE_VERSION "2.00" 100#define MICROCODE_VERSION "2.00"
101 101
102struct microcode_ops *microcode_ops; 102static struct microcode_ops *microcode_ops;
103 103
104/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ 104/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
105static DEFINE_MUTEX(microcode_mutex); 105static DEFINE_MUTEX(microcode_mutex);
@@ -203,7 +203,7 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
203#endif 203#endif
204 204
205/* fake device for request_firmware */ 205/* fake device for request_firmware */
206struct platform_device *microcode_pdev; 206static struct platform_device *microcode_pdev;
207 207
208static ssize_t reload_store(struct sys_device *dev, 208static ssize_t reload_store(struct sys_device *dev,
209 struct sysdev_attribute *attr, 209 struct sysdev_attribute *attr,
@@ -272,13 +272,18 @@ static struct attribute_group mc_attr_group = {
272 .name = "microcode", 272 .name = "microcode",
273}; 273};
274 274
275static void microcode_fini_cpu(int cpu) 275static void __microcode_fini_cpu(int cpu)
276{ 276{
277 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 277 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
278 278
279 mutex_lock(&microcode_mutex);
280 microcode_ops->microcode_fini_cpu(cpu); 279 microcode_ops->microcode_fini_cpu(cpu);
281 uci->valid = 0; 280 uci->valid = 0;
281}
282
283static void microcode_fini_cpu(int cpu)
284{
285 mutex_lock(&microcode_mutex);
286 __microcode_fini_cpu(cpu);
282 mutex_unlock(&microcode_mutex); 287 mutex_unlock(&microcode_mutex);
283} 288}
284 289
@@ -306,12 +311,16 @@ static int microcode_resume_cpu(int cpu)
306 * to this cpu (a bit of paranoia): 311 * to this cpu (a bit of paranoia):
307 */ 312 */
308 if (microcode_ops->collect_cpu_info(cpu, &nsig)) { 313 if (microcode_ops->collect_cpu_info(cpu, &nsig)) {
309 microcode_fini_cpu(cpu); 314 __microcode_fini_cpu(cpu);
315 printk(KERN_ERR "failed to collect_cpu_info for resuming cpu #%d\n",
316 cpu);
310 return -1; 317 return -1;
311 } 318 }
312 319
313 if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) { 320 if ((nsig.sig != uci->cpu_sig.sig) || (nsig.pf != uci->cpu_sig.pf)) {
314 microcode_fini_cpu(cpu); 321 __microcode_fini_cpu(cpu);
322 printk(KERN_ERR "cached ucode doesn't match the resuming cpu #%d\n",
323 cpu);
315 /* Should we look for a new ucode here? */ 324 /* Should we look for a new ucode here? */
316 return 1; 325 return 1;
317 } 326 }
@@ -319,7 +328,7 @@ static int microcode_resume_cpu(int cpu)
319 return 0; 328 return 0;
320} 329}
321 330
322void microcode_update_cpu(int cpu) 331static void microcode_update_cpu(int cpu)
323{ 332{
324 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 333 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
325 int err = 0; 334 int err = 0;
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 622dc4a21784..b7f4c929e615 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -155,6 +155,7 @@ static DEFINE_SPINLOCK(microcode_update_lock);
155static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) 155static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
156{ 156{
157 struct cpuinfo_x86 *c = &cpu_data(cpu_num); 157 struct cpuinfo_x86 *c = &cpu_data(cpu_num);
158 unsigned long flags;
158 unsigned int val[2]; 159 unsigned int val[2];
159 160
160 memset(csig, 0, sizeof(*csig)); 161 memset(csig, 0, sizeof(*csig));
@@ -174,11 +175,16 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
174 csig->pf = 1 << ((val[1] >> 18) & 7); 175 csig->pf = 1 << ((val[1] >> 18) & 7);
175 } 176 }
176 177
178 /* serialize access to the physical write to MSR 0x79 */
179 spin_lock_irqsave(&microcode_update_lock, flags);
180
177 wrmsr(MSR_IA32_UCODE_REV, 0, 0); 181 wrmsr(MSR_IA32_UCODE_REV, 0, 0);
178 /* see notes above for revision 1.07. Apparent chip bug */ 182 /* see notes above for revision 1.07. Apparent chip bug */
179 sync_core(); 183 sync_core();
180 /* get the current revision from MSR 0x8B */ 184 /* get the current revision from MSR 0x8B */
181 rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); 185 rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
186 spin_unlock_irqrestore(&microcode_update_lock, flags);
187
182 pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", 188 pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
183 csig->sig, csig->pf, csig->rev); 189 csig->sig, csig->pf, csig->rev);
184 190
@@ -465,7 +471,7 @@ static void microcode_fini_cpu(int cpu)
465 uci->mc = NULL; 471 uci->mc = NULL;
466} 472}
467 473
468struct microcode_ops microcode_intel_ops = { 474static struct microcode_ops microcode_intel_ops = {
469 .request_microcode_user = request_microcode_user, 475 .request_microcode_user = request_microcode_user,
470 .request_microcode_fw = request_microcode_fw, 476 .request_microcode_fw = request_microcode_fw,
471 .collect_cpu_info = collect_cpu_info, 477 .collect_cpu_info = collect_cpu_info,
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 0f4c1fd5a1f4..45e3b69808ba 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -586,26 +586,23 @@ static void __init __get_smp_config(unsigned int early)
586{ 586{
587 struct intel_mp_floating *mpf = mpf_found; 587 struct intel_mp_floating *mpf = mpf_found;
588 588
589 if (x86_quirks->mach_get_smp_config) { 589 if (!mpf)
590 if (x86_quirks->mach_get_smp_config(early)) 590 return;
591 return; 591
592 }
593 if (acpi_lapic && early) 592 if (acpi_lapic && early)
594 return; 593 return;
594
595 /* 595 /*
596 * ACPI supports both logical (e.g. Hyper-Threading) and physical 596 * MPS doesn't support hyperthreading, aka only have
597 * processors, where MPS only supports physical. 597 * thread 0 apic id in MPS table
598 */ 598 */
599 if (acpi_lapic && acpi_ioapic) { 599 if (acpi_lapic && acpi_ioapic)
600 printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
601 "information\n");
602 return; 600 return;
603 } else if (acpi_lapic)
604 printk(KERN_INFO "Using ACPI for processor (LAPIC) "
605 "configuration information\n");
606 601
607 if (!mpf) 602 if (x86_quirks->mach_get_smp_config) {
608 return; 603 if (x86_quirks->mach_get_smp_config(early))
604 return;
605 }
609 606
610 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", 607 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
611 mpf->mpf_specification); 608 mpf->mpf_specification);
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 2c97f07f1c2c..8bd1bf9622a7 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -131,6 +131,11 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
131 atomic_dec(&nmi_active); 131 atomic_dec(&nmi_active);
132} 132}
133 133
134static void __acpi_nmi_disable(void *__unused)
135{
136 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
137}
138
134int __init check_nmi_watchdog(void) 139int __init check_nmi_watchdog(void)
135{ 140{
136 unsigned int *prev_nmi_count; 141 unsigned int *prev_nmi_count;
@@ -179,8 +184,12 @@ int __init check_nmi_watchdog(void)
179 kfree(prev_nmi_count); 184 kfree(prev_nmi_count);
180 return 0; 185 return 0;
181error: 186error:
182 if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259) 187 if (nmi_watchdog == NMI_IO_APIC) {
183 disable_8259A_irq(0); 188 if (!timer_through_8259)
189 disable_8259A_irq(0);
190 on_each_cpu(__acpi_nmi_disable, NULL, 1);
191 }
192
184#ifdef CONFIG_X86_32 193#ifdef CONFIG_X86_32
185 timer_ack = 0; 194 timer_ack = 0;
186#endif 195#endif
@@ -199,12 +208,17 @@ static int __init setup_nmi_watchdog(char *str)
199 ++str; 208 ++str;
200 } 209 }
201 210
202 get_option(&str, &nmi); 211 if (!strncmp(str, "lapic", 5))
203 212 nmi_watchdog = NMI_LOCAL_APIC;
204 if (nmi >= NMI_INVALID) 213 else if (!strncmp(str, "ioapic", 6))
205 return 0; 214 nmi_watchdog = NMI_IO_APIC;
215 else {
216 get_option(&str, &nmi);
217 if (nmi >= NMI_INVALID)
218 return 0;
219 nmi_watchdog = nmi;
220 }
206 221
207 nmi_watchdog = nmi;
208 return 1; 222 return 1;
209} 223}
210__setup("nmi_watchdog=", setup_nmi_watchdog); 224__setup("nmi_watchdog=", setup_nmi_watchdog);
@@ -285,11 +299,6 @@ void acpi_nmi_enable(void)
285 on_each_cpu(__acpi_nmi_enable, NULL, 1); 299 on_each_cpu(__acpi_nmi_enable, NULL, 1);
286} 300}
287 301
288static void __acpi_nmi_disable(void *__unused)
289{
290 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
291}
292
293/* 302/*
294 * Disable timer based NMIs on all CPUs: 303 * Disable timer based NMIs on all CPUs:
295 */ 304 */
@@ -340,6 +349,8 @@ void stop_apic_nmi_watchdog(void *unused)
340 return; 349 return;
341 if (nmi_watchdog == NMI_LOCAL_APIC) 350 if (nmi_watchdog == NMI_LOCAL_APIC)
342 lapic_watchdog_stop(); 351 lapic_watchdog_stop();
352 else
353 __acpi_nmi_disable(NULL);
343 __get_cpu_var(wd_enabled) = 0; 354 __get_cpu_var(wd_enabled) = 0;
344 atomic_dec(&nmi_active); 355 atomic_dec(&nmi_active);
345} 356}
@@ -465,6 +476,24 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
465 476
466#ifdef CONFIG_SYSCTL 477#ifdef CONFIG_SYSCTL
467 478
479static void enable_ioapic_nmi_watchdog_single(void *unused)
480{
481 __get_cpu_var(wd_enabled) = 1;
482 atomic_inc(&nmi_active);
483 __acpi_nmi_enable(NULL);
484}
485
486static void enable_ioapic_nmi_watchdog(void)
487{
488 on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
489 touch_nmi_watchdog();
490}
491
492static void disable_ioapic_nmi_watchdog(void)
493{
494 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
495}
496
468static int __init setup_unknown_nmi_panic(char *str) 497static int __init setup_unknown_nmi_panic(char *str)
469{ 498{
470 unknown_nmi_panic = 1; 499 unknown_nmi_panic = 1;
@@ -507,6 +536,11 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
507 enable_lapic_nmi_watchdog(); 536 enable_lapic_nmi_watchdog();
508 else 537 else
509 disable_lapic_nmi_watchdog(); 538 disable_lapic_nmi_watchdog();
539 } else if (nmi_watchdog == NMI_IO_APIC) {
540 if (nmi_watchdog_enabled)
541 enable_ioapic_nmi_watchdog();
542 else
543 disable_ioapic_nmi_watchdog();
510 } else { 544 } else {
511 printk(KERN_WARNING 545 printk(KERN_WARNING
512 "NMI watchdog doesn't know what hardware to touch\n"); 546 "NMI watchdog doesn't know what hardware to touch\n");
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 4caff39078e0..0deea37a53cf 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -31,7 +31,7 @@
31#include <asm/numaq.h> 31#include <asm/numaq.h>
32#include <asm/topology.h> 32#include <asm/topology.h>
33#include <asm/processor.h> 33#include <asm/processor.h>
34#include <asm/mpspec.h> 34#include <asm/genapic.h>
35#include <asm/e820.h> 35#include <asm/e820.h>
36#include <asm/setup.h> 36#include <asm/setup.h>
37 37
@@ -235,6 +235,13 @@ static int __init numaq_setup_ioapic_ids(void)
235 return 1; 235 return 1;
236} 236}
237 237
238static int __init numaq_update_genapic(void)
239{
240 genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
241
242 return 0;
243}
244
238static struct x86_quirks numaq_x86_quirks __initdata = { 245static struct x86_quirks numaq_x86_quirks __initdata = {
239 .arch_pre_time_init = numaq_pre_time_init, 246 .arch_pre_time_init = numaq_pre_time_init,
240 .arch_time_init = NULL, 247 .arch_time_init = NULL,
@@ -250,6 +257,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
250 .mpc_oem_pci_bus = mpc_oem_pci_bus, 257 .mpc_oem_pci_bus = mpc_oem_pci_bus,
251 .smp_read_mpc_oem = smp_read_mpc_oem, 258 .smp_read_mpc_oem = smp_read_mpc_oem,
252 .setup_ioapic_ids = numaq_setup_ioapic_ids, 259 .setup_ioapic_ids = numaq_setup_ioapic_ids,
260 .update_genapic = numaq_update_genapic,
253}; 261};
254 262
255void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, 263void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 192624820217..7a3dfceb90e4 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -6,6 +6,7 @@
6#include <asm/proto.h> 6#include <asm/proto.h>
7#include <asm/dma.h> 7#include <asm/dma.h>
8#include <asm/iommu.h> 8#include <asm/iommu.h>
9#include <asm/gart.h>
9#include <asm/calgary.h> 10#include <asm/calgary.h>
10#include <asm/amd_iommu.h> 11#include <asm/amd_iommu.h>
11 12
@@ -30,11 +31,6 @@ int no_iommu __read_mostly;
30/* Set this to 1 if there is a HW IOMMU in the system */ 31/* Set this to 1 if there is a HW IOMMU in the system */
31int iommu_detected __read_mostly = 0; 32int iommu_detected __read_mostly = 0;
32 33
33/* This tells the BIO block layer to assume merging. Default to off
34 because we cannot guarantee merging later. */
35int iommu_bio_merge __read_mostly = 0;
36EXPORT_SYMBOL(iommu_bio_merge);
37
38dma_addr_t bad_dma_address __read_mostly = 0; 34dma_addr_t bad_dma_address __read_mostly = 0;
39EXPORT_SYMBOL(bad_dma_address); 35EXPORT_SYMBOL(bad_dma_address);
40 36
@@ -188,7 +184,6 @@ static __init int iommu_setup(char *p)
188 } 184 }
189 185
190 if (!strncmp(p, "biomerge", 8)) { 186 if (!strncmp(p, "biomerge", 8)) {
191 iommu_bio_merge = 4096;
192 iommu_merge = 1; 187 iommu_merge = 1;
193 force_iommu = 1; 188 force_iommu = 1;
194 } 189 }
@@ -300,8 +295,8 @@ fs_initcall(pci_iommu_init);
300static __devinit void via_no_dac(struct pci_dev *dev) 295static __devinit void via_no_dac(struct pci_dev *dev)
301{ 296{
302 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { 297 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
303 printk(KERN_INFO "PCI: VIA PCI bridge detected." 298 printk(KERN_INFO
304 "Disabling DAC.\n"); 299 "PCI: VIA PCI bridge detected. Disabling DAC.\n");
305 forbid_dac = 1; 300 forbid_dac = 1;
306 } 301 }
307} 302}
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index ba7ad83e20a8..a35eaa379ff6 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -745,10 +745,8 @@ void __init gart_iommu_init(void)
745 unsigned long scratch; 745 unsigned long scratch;
746 long i; 746 long i;
747 747
748 if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) { 748 if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0)
749 printk(KERN_INFO "PCI-GART: No AMD GART found.\n");
750 return; 749 return;
751 }
752 750
753#ifndef CONFIG_AGP_AMD64 751#ifndef CONFIG_AGP_AMD64
754 no_agp = 1; 752 no_agp = 1;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c622772744d8..e68bb9e30864 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -1,13 +1,16 @@
1#include <linux/errno.h> 1#include <linux/errno.h>
2#include <linux/kernel.h> 2#include <linux/kernel.h>
3#include <linux/mm.h> 3#include <linux/mm.h>
4#include <asm/idle.h>
4#include <linux/smp.h> 5#include <linux/smp.h>
5#include <linux/slab.h> 6#include <linux/slab.h>
6#include <linux/sched.h> 7#include <linux/sched.h>
7#include <linux/module.h> 8#include <linux/module.h>
8#include <linux/pm.h> 9#include <linux/pm.h>
9#include <linux/clockchips.h> 10#include <linux/clockchips.h>
11#include <linux/ftrace.h>
10#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/apic.h>
11 14
12unsigned long idle_halt; 15unsigned long idle_halt;
13EXPORT_SYMBOL(idle_halt); 16EXPORT_SYMBOL(idle_halt);
@@ -100,6 +103,9 @@ static inline int hlt_use_halt(void)
100void default_idle(void) 103void default_idle(void)
101{ 104{
102 if (hlt_use_halt()) { 105 if (hlt_use_halt()) {
106 struct power_trace it;
107
108 trace_power_start(&it, POWER_CSTATE, 1);
103 current_thread_info()->status &= ~TS_POLLING; 109 current_thread_info()->status &= ~TS_POLLING;
104 /* 110 /*
105 * TS_POLLING-cleared state must be visible before we 111 * TS_POLLING-cleared state must be visible before we
@@ -112,6 +118,7 @@ void default_idle(void)
112 else 118 else
113 local_irq_enable(); 119 local_irq_enable();
114 current_thread_info()->status |= TS_POLLING; 120 current_thread_info()->status |= TS_POLLING;
121 trace_power_end(&it);
115 } else { 122 } else {
116 local_irq_enable(); 123 local_irq_enable();
117 /* loop is done by the caller */ 124 /* loop is done by the caller */
@@ -122,6 +129,21 @@ void default_idle(void)
122EXPORT_SYMBOL(default_idle); 129EXPORT_SYMBOL(default_idle);
123#endif 130#endif
124 131
132void stop_this_cpu(void *dummy)
133{
134 local_irq_disable();
135 /*
136 * Remove this CPU:
137 */
138 cpu_clear(smp_processor_id(), cpu_online_map);
139 disable_local_APIC();
140
141 for (;;) {
142 if (hlt_works(smp_processor_id()))
143 halt();
144 }
145}
146
125static void do_nothing(void *unused) 147static void do_nothing(void *unused)
126{ 148{
127} 149}
@@ -154,24 +176,31 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
154 */ 176 */
155void mwait_idle_with_hints(unsigned long ax, unsigned long cx) 177void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
156{ 178{
179 struct power_trace it;
180
181 trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
157 if (!need_resched()) { 182 if (!need_resched()) {
158 __monitor((void *)&current_thread_info()->flags, 0, 0); 183 __monitor((void *)&current_thread_info()->flags, 0, 0);
159 smp_mb(); 184 smp_mb();
160 if (!need_resched()) 185 if (!need_resched())
161 __mwait(ax, cx); 186 __mwait(ax, cx);
162 } 187 }
188 trace_power_end(&it);
163} 189}
164 190
165/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 191/* Default MONITOR/MWAIT with no hints, used for default C1 state */
166static void mwait_idle(void) 192static void mwait_idle(void)
167{ 193{
194 struct power_trace it;
168 if (!need_resched()) { 195 if (!need_resched()) {
196 trace_power_start(&it, POWER_CSTATE, 1);
169 __monitor((void *)&current_thread_info()->flags, 0, 0); 197 __monitor((void *)&current_thread_info()->flags, 0, 0);
170 smp_mb(); 198 smp_mb();
171 if (!need_resched()) 199 if (!need_resched())
172 __sti_mwait(0, 0); 200 __sti_mwait(0, 0);
173 else 201 else
174 local_irq_enable(); 202 local_irq_enable();
203 trace_power_end(&it);
175 } else 204 } else
176 local_irq_enable(); 205 local_irq_enable();
177} 206}
@@ -183,9 +212,13 @@ static void mwait_idle(void)
183 */ 212 */
184static void poll_idle(void) 213static void poll_idle(void)
185{ 214{
215 struct power_trace it;
216
217 trace_power_start(&it, POWER_CSTATE, 0);
186 local_irq_enable(); 218 local_irq_enable();
187 while (!need_resched()) 219 while (!need_resched())
188 cpu_relax(); 220 cpu_relax();
221 trace_power_end(&it);
189} 222}
190 223
191/* 224/*
@@ -270,7 +303,7 @@ static void c1e_idle(void)
270 rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); 303 rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
271 if (lo & K8_INTP_C1E_ACTIVE_MASK) { 304 if (lo & K8_INTP_C1E_ACTIVE_MASK) {
272 c1e_detected = 1; 305 c1e_detected = 1;
273 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 306 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
274 mark_tsc_unstable("TSC halt in AMD C1E"); 307 mark_tsc_unstable("TSC halt in AMD C1E");
275 printk(KERN_INFO "System has AMD C1E enabled\n"); 308 printk(KERN_INFO "System has AMD C1E enabled\n");
276 set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); 309 set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 0a1302fe6d45..3ba155d24884 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,7 @@
38#include <linux/percpu.h> 38#include <linux/percpu.h>
39#include <linux/prctl.h> 39#include <linux/prctl.h>
40#include <linux/dmi.h> 40#include <linux/dmi.h>
41#include <linux/ftrace.h>
41 42
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
43#include <asm/pgtable.h> 44#include <asm/pgtable.h>
@@ -59,6 +60,7 @@
59#include <asm/idle.h> 60#include <asm/idle.h>
60#include <asm/syscalls.h> 61#include <asm/syscalls.h>
61#include <asm/smp.h> 62#include <asm/smp.h>
63#include <asm/ds.h>
62 64
63asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 65asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
64 66
@@ -250,14 +252,8 @@ void exit_thread(void)
250 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; 252 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
251 put_cpu(); 253 put_cpu();
252 } 254 }
253#ifdef CONFIG_X86_DS 255
254 /* Free any DS contexts that have not been properly released. */ 256 ds_exit_thread(current);
255 if (unlikely(current->thread.ds_ctx)) {
256 /* we clear debugctl to make sure DS is not used. */
257 update_debugctlmsr(0);
258 ds_free(current->thread.ds_ctx);
259 }
260#endif /* CONFIG_X86_DS */
261} 257}
262 258
263void flush_thread(void) 259void flush_thread(void)
@@ -339,6 +335,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
339 kfree(p->thread.io_bitmap_ptr); 335 kfree(p->thread.io_bitmap_ptr);
340 p->thread.io_bitmap_max = 0; 336 p->thread.io_bitmap_max = 0;
341 } 337 }
338
339 ds_copy_thread(p, current);
340
341 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
342 p->thread.debugctlmsr = 0;
343
342 return err; 344 return err;
343} 345}
344 346
@@ -419,48 +421,19 @@ int set_tsc_mode(unsigned int val)
419 return 0; 421 return 0;
420} 422}
421 423
422#ifdef CONFIG_X86_DS
423static int update_debugctl(struct thread_struct *prev,
424 struct thread_struct *next, unsigned long debugctl)
425{
426 unsigned long ds_prev = 0;
427 unsigned long ds_next = 0;
428
429 if (prev->ds_ctx)
430 ds_prev = (unsigned long)prev->ds_ctx->ds;
431 if (next->ds_ctx)
432 ds_next = (unsigned long)next->ds_ctx->ds;
433
434 if (ds_next != ds_prev) {
435 /* we clear debugctl to make sure DS
436 * is not in use when we change it */
437 debugctl = 0;
438 update_debugctlmsr(0);
439 wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
440 }
441 return debugctl;
442}
443#else
444static int update_debugctl(struct thread_struct *prev,
445 struct thread_struct *next, unsigned long debugctl)
446{
447 return debugctl;
448}
449#endif /* CONFIG_X86_DS */
450
451static noinline void 424static noinline void
452__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 425__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
453 struct tss_struct *tss) 426 struct tss_struct *tss)
454{ 427{
455 struct thread_struct *prev, *next; 428 struct thread_struct *prev, *next;
456 unsigned long debugctl;
457 429
458 prev = &prev_p->thread; 430 prev = &prev_p->thread;
459 next = &next_p->thread; 431 next = &next_p->thread;
460 432
461 debugctl = update_debugctl(prev, next, prev->debugctlmsr); 433 if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
462 434 test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
463 if (next->debugctlmsr != debugctl) 435 ds_switch_to(prev_p, next_p);
436 else if (next->debugctlmsr != prev->debugctlmsr)
464 update_debugctlmsr(next->debugctlmsr); 437 update_debugctlmsr(next->debugctlmsr);
465 438
466 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { 439 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
@@ -482,15 +455,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
482 hard_enable_TSC(); 455 hard_enable_TSC();
483 } 456 }
484 457
485#ifdef CONFIG_X86_PTRACE_BTS
486 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
487 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
488
489 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
490 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
491#endif /* CONFIG_X86_PTRACE_BTS */
492
493
494 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 458 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
495 /* 459 /*
496 * Disable the bitmap via an invalid offset. We still cache 460 * Disable the bitmap via an invalid offset. We still cache
@@ -548,7 +512,8 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
548 * the task-switch, and shows up in ret_from_fork in entry.S, 512 * the task-switch, and shows up in ret_from_fork in entry.S,
549 * for example. 513 * for example.
550 */ 514 */
551struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) 515__notrace_funcgraph struct task_struct *
516__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
552{ 517{
553 struct thread_struct *prev = &prev_p->thread, 518 struct thread_struct *prev = &prev_p->thread,
554 *next = &next_p->thread; 519 *next = &next_p->thread;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c958120fb1b6..416fb9282f4f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -39,6 +39,7 @@
39#include <linux/prctl.h> 39#include <linux/prctl.h>
40#include <linux/uaccess.h> 40#include <linux/uaccess.h>
41#include <linux/io.h> 41#include <linux/io.h>
42#include <linux/ftrace.h>
42 43
43#include <asm/pgtable.h> 44#include <asm/pgtable.h>
44#include <asm/system.h> 45#include <asm/system.h>
@@ -52,6 +53,7 @@
52#include <asm/ia32.h> 53#include <asm/ia32.h>
53#include <asm/idle.h> 54#include <asm/idle.h>
54#include <asm/syscalls.h> 55#include <asm/syscalls.h>
56#include <asm/ds.h>
55 57
56asmlinkage extern void ret_from_fork(void); 58asmlinkage extern void ret_from_fork(void);
57 59
@@ -235,14 +237,8 @@ void exit_thread(void)
235 t->io_bitmap_max = 0; 237 t->io_bitmap_max = 0;
236 put_cpu(); 238 put_cpu();
237 } 239 }
238#ifdef CONFIG_X86_DS 240
239 /* Free any DS contexts that have not been properly released. */ 241 ds_exit_thread(current);
240 if (unlikely(t->ds_ctx)) {
241 /* we clear debugctl to make sure DS is not used. */
242 update_debugctlmsr(0);
243 ds_free(t->ds_ctx);
244 }
245#endif /* CONFIG_X86_DS */
246} 242}
247 243
248void flush_thread(void) 244void flush_thread(void)
@@ -372,6 +368,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
372 if (err) 368 if (err)
373 goto out; 369 goto out;
374 } 370 }
371
372 ds_copy_thread(p, me);
373
374 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
375 p->thread.debugctlmsr = 0;
376
375 err = 0; 377 err = 0;
376out: 378out:
377 if (err && p->thread.io_bitmap_ptr) { 379 if (err && p->thread.io_bitmap_ptr) {
@@ -470,35 +472,14 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
470 struct tss_struct *tss) 472 struct tss_struct *tss)
471{ 473{
472 struct thread_struct *prev, *next; 474 struct thread_struct *prev, *next;
473 unsigned long debugctl;
474 475
475 prev = &prev_p->thread, 476 prev = &prev_p->thread,
476 next = &next_p->thread; 477 next = &next_p->thread;
477 478
478 debugctl = prev->debugctlmsr; 479 if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
479 480 test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
480#ifdef CONFIG_X86_DS 481 ds_switch_to(prev_p, next_p);
481 { 482 else if (next->debugctlmsr != prev->debugctlmsr)
482 unsigned long ds_prev = 0, ds_next = 0;
483
484 if (prev->ds_ctx)
485 ds_prev = (unsigned long)prev->ds_ctx->ds;
486 if (next->ds_ctx)
487 ds_next = (unsigned long)next->ds_ctx->ds;
488
489 if (ds_next != ds_prev) {
490 /*
491 * We clear debugctl to make sure DS
492 * is not in use when we change it:
493 */
494 debugctl = 0;
495 update_debugctlmsr(0);
496 wrmsrl(MSR_IA32_DS_AREA, ds_next);
497 }
498 }
499#endif /* CONFIG_X86_DS */
500
501 if (next->debugctlmsr != debugctl)
502 update_debugctlmsr(next->debugctlmsr); 483 update_debugctlmsr(next->debugctlmsr);
503 484
504 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { 485 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
@@ -533,14 +514,6 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
533 */ 514 */
534 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 515 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
535 } 516 }
536
537#ifdef CONFIG_X86_PTRACE_BTS
538 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
539 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
540
541 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
542 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
543#endif /* CONFIG_X86_PTRACE_BTS */
544} 517}
545 518
546/* 519/*
@@ -551,8 +524,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
551 * - could test fs/gs bitsliced 524 * - could test fs/gs bitsliced
552 * 525 *
553 * Kprobes not supported here. Set the probe on schedule instead. 526 * Kprobes not supported here. Set the probe on schedule instead.
527 * Function graph tracer not supported too.
554 */ 528 */
555struct task_struct * 529__notrace_funcgraph struct task_struct *
556__switch_to(struct task_struct *prev_p, struct task_struct *next_p) 530__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
557{ 531{
558 struct thread_struct *prev = &prev_p->thread; 532 struct thread_struct *prev = &prev_p->thread;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a6d8c12e10d..0a5df5f82fb9 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -581,158 +581,91 @@ static int ioperm_get(struct task_struct *target,
581} 581}
582 582
583#ifdef CONFIG_X86_PTRACE_BTS 583#ifdef CONFIG_X86_PTRACE_BTS
584/*
585 * The configuration for a particular BTS hardware implementation.
586 */
587struct bts_configuration {
588 /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */
589 unsigned char sizeof_bts;
590 /* the size of a field in the BTS record in bytes */
591 unsigned char sizeof_field;
592 /* a bitmask to enable/disable BTS in DEBUGCTL MSR */
593 unsigned long debugctl_mask;
594};
595static struct bts_configuration bts_cfg;
596
597#define BTS_MAX_RECORD_SIZE (8 * 3)
598
599
600/*
601 * Branch Trace Store (BTS) uses the following format. Different
602 * architectures vary in the size of those fields.
603 * - source linear address
604 * - destination linear address
605 * - flags
606 *
607 * Later architectures use 64bit pointers throughout, whereas earlier
608 * architectures use 32bit pointers in 32bit mode.
609 *
610 * We compute the base address for the first 8 fields based on:
611 * - the field size stored in the DS configuration
612 * - the relative field position
613 *
614 * In order to store additional information in the BTS buffer, we use
615 * a special source address to indicate that the record requires
616 * special interpretation.
617 *
618 * Netburst indicated via a bit in the flags field whether the branch
619 * was predicted; this is ignored.
620 */
621
622enum bts_field {
623 bts_from = 0,
624 bts_to,
625 bts_flags,
626
627 bts_escape = (unsigned long)-1,
628 bts_qual = bts_to,
629 bts_jiffies = bts_flags
630};
631
632static inline unsigned long bts_get(const char *base, enum bts_field field)
633{
634 base += (bts_cfg.sizeof_field * field);
635 return *(unsigned long *)base;
636}
637
638static inline void bts_set(char *base, enum bts_field field, unsigned long val)
639{
640 base += (bts_cfg.sizeof_field * field);;
641 (*(unsigned long *)base) = val;
642}
643
644/*
645 * Translate a BTS record from the raw format into the bts_struct format
646 *
647 * out (out): bts_struct interpretation
648 * raw: raw BTS record
649 */
650static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw)
651{
652 memset(out, 0, sizeof(*out));
653 if (bts_get(raw, bts_from) == bts_escape) {
654 out->qualifier = bts_get(raw, bts_qual);
655 out->variant.jiffies = bts_get(raw, bts_jiffies);
656 } else {
657 out->qualifier = BTS_BRANCH;
658 out->variant.lbr.from_ip = bts_get(raw, bts_from);
659 out->variant.lbr.to_ip = bts_get(raw, bts_to);
660 }
661}
662
663static int ptrace_bts_read_record(struct task_struct *child, size_t index, 584static int ptrace_bts_read_record(struct task_struct *child, size_t index,
664 struct bts_struct __user *out) 585 struct bts_struct __user *out)
665{ 586{
666 struct bts_struct ret; 587 const struct bts_trace *trace;
667 const void *bts_record; 588 struct bts_struct bts;
668 size_t bts_index, bts_end; 589 const unsigned char *at;
669 int error; 590 int error;
670 591
671 error = ds_get_bts_end(child, &bts_end); 592 trace = ds_read_bts(child->bts);
672 if (error < 0) 593 if (!trace)
673 return error; 594 return -EPERM;
674
675 if (bts_end <= index)
676 return -EINVAL;
677 595
678 error = ds_get_bts_index(child, &bts_index); 596 at = trace->ds.top - ((index + 1) * trace->ds.size);
679 if (error < 0) 597 if ((void *)at < trace->ds.begin)
680 return error; 598 at += (trace->ds.n * trace->ds.size);
681 599
682 /* translate the ptrace bts index into the ds bts index */ 600 if (!trace->read)
683 bts_index += bts_end - (index + 1); 601 return -EOPNOTSUPP;
684 if (bts_end <= bts_index)
685 bts_index -= bts_end;
686 602
687 error = ds_access_bts(child, bts_index, &bts_record); 603 error = trace->read(child->bts, at, &bts);
688 if (error < 0) 604 if (error < 0)
689 return error; 605 return error;
690 606
691 ptrace_bts_translate_record(&ret, bts_record); 607 if (copy_to_user(out, &bts, sizeof(bts)))
692
693 if (copy_to_user(out, &ret, sizeof(ret)))
694 return -EFAULT; 608 return -EFAULT;
695 609
696 return sizeof(ret); 610 return sizeof(bts);
697} 611}
698 612
699static int ptrace_bts_drain(struct task_struct *child, 613static int ptrace_bts_drain(struct task_struct *child,
700 long size, 614 long size,
701 struct bts_struct __user *out) 615 struct bts_struct __user *out)
702{ 616{
703 struct bts_struct ret; 617 const struct bts_trace *trace;
704 const unsigned char *raw; 618 const unsigned char *at;
705 size_t end, i; 619 int error, drained = 0;
706 int error;
707 620
708 error = ds_get_bts_index(child, &end); 621 trace = ds_read_bts(child->bts);
709 if (error < 0) 622 if (!trace)
710 return error; 623 return -EPERM;
711 624
712 if (size < (end * sizeof(struct bts_struct))) 625 if (!trace->read)
626 return -EOPNOTSUPP;
627
628 if (size < (trace->ds.top - trace->ds.begin))
713 return -EIO; 629 return -EIO;
714 630
715 error = ds_access_bts(child, 0, (const void **)&raw); 631 for (at = trace->ds.begin; (void *)at < trace->ds.top;
716 if (error < 0) 632 out++, drained++, at += trace->ds.size) {
717 return error; 633 struct bts_struct bts;
634 int error;
718 635
719 for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) { 636 error = trace->read(child->bts, at, &bts);
720 ptrace_bts_translate_record(&ret, raw); 637 if (error < 0)
638 return error;
721 639
722 if (copy_to_user(out, &ret, sizeof(ret))) 640 if (copy_to_user(out, &bts, sizeof(bts)))
723 return -EFAULT; 641 return -EFAULT;
724 } 642 }
725 643
726 error = ds_clear_bts(child); 644 memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
645
646 error = ds_reset_bts(child->bts);
727 if (error < 0) 647 if (error < 0)
728 return error; 648 return error;
729 649
730 return end; 650 return drained;
731} 651}
732 652
733static void ptrace_bts_ovfl(struct task_struct *child) 653static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size)
734{ 654{
735 send_sig(child->thread.bts_ovfl_signal, child, 0); 655 child->bts_buffer = alloc_locked_buffer(size);
656 if (!child->bts_buffer)
657 return -ENOMEM;
658
659 child->bts_size = size;
660
661 return 0;
662}
663
664static void ptrace_bts_free_buffer(struct task_struct *child)
665{
666 free_locked_buffer(child->bts_buffer, child->bts_size);
667 child->bts_buffer = NULL;
668 child->bts_size = 0;
736} 669}
737 670
738static int ptrace_bts_config(struct task_struct *child, 671static int ptrace_bts_config(struct task_struct *child,
@@ -740,114 +673,86 @@ static int ptrace_bts_config(struct task_struct *child,
740 const struct ptrace_bts_config __user *ucfg) 673 const struct ptrace_bts_config __user *ucfg)
741{ 674{
742 struct ptrace_bts_config cfg; 675 struct ptrace_bts_config cfg;
743 int error = 0; 676 unsigned int flags = 0;
744
745 error = -EOPNOTSUPP;
746 if (!bts_cfg.sizeof_bts)
747 goto errout;
748 677
749 error = -EIO;
750 if (cfg_size < sizeof(cfg)) 678 if (cfg_size < sizeof(cfg))
751 goto errout; 679 return -EIO;
752 680
753 error = -EFAULT;
754 if (copy_from_user(&cfg, ucfg, sizeof(cfg))) 681 if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
755 goto errout; 682 return -EFAULT;
756 683
757 error = -EINVAL; 684 if (child->bts) {
758 if ((cfg.flags & PTRACE_BTS_O_SIGNAL) && 685 ds_release_bts(child->bts);
759 !(cfg.flags & PTRACE_BTS_O_ALLOC)) 686 child->bts = NULL;
760 goto errout; 687 }
761 688
762 if (cfg.flags & PTRACE_BTS_O_ALLOC) { 689 if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
763 ds_ovfl_callback_t ovfl = NULL; 690 if (!cfg.signal)
764 unsigned int sig = 0; 691 return -EINVAL;
765 692
766 /* we ignore the error in case we were not tracing child */ 693 return -EOPNOTSUPP;
767 (void)ds_release_bts(child);
768 694
769 if (cfg.flags & PTRACE_BTS_O_SIGNAL) { 695 child->thread.bts_ovfl_signal = cfg.signal;
770 if (!cfg.signal) 696 }
771 goto errout;
772 697
773 sig = cfg.signal; 698 if ((cfg.flags & PTRACE_BTS_O_ALLOC) &&
774 ovfl = ptrace_bts_ovfl; 699 (cfg.size != child->bts_size)) {
775 } 700 int error;
776 701
777 error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); 702 ptrace_bts_free_buffer(child);
778 if (error < 0)
779 goto errout;
780 703
781 child->thread.bts_ovfl_signal = sig; 704 error = ptrace_bts_allocate_buffer(child, cfg.size);
705 if (error < 0)
706 return error;
782 } 707 }
783 708
784 error = -EINVAL;
785 if (!child->thread.ds_ctx && cfg.flags)
786 goto errout;
787
788 if (cfg.flags & PTRACE_BTS_O_TRACE) 709 if (cfg.flags & PTRACE_BTS_O_TRACE)
789 child->thread.debugctlmsr |= bts_cfg.debugctl_mask; 710 flags |= BTS_USER;
790 else
791 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
792 711
793 if (cfg.flags & PTRACE_BTS_O_SCHED) 712 if (cfg.flags & PTRACE_BTS_O_SCHED)
794 set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 713 flags |= BTS_TIMESTAMPS;
795 else
796 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
797 714
798 error = sizeof(cfg); 715 child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size,
716 /* ovfl = */ NULL, /* th = */ (size_t)-1,
717 flags);
718 if (IS_ERR(child->bts)) {
719 int error = PTR_ERR(child->bts);
799 720
800out: 721 ptrace_bts_free_buffer(child);
801 if (child->thread.debugctlmsr) 722 child->bts = NULL;
802 set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
803 else
804 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
805 723
806 return error; 724 return error;
725 }
807 726
808errout: 727 return sizeof(cfg);
809 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
810 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
811 goto out;
812} 728}
813 729
814static int ptrace_bts_status(struct task_struct *child, 730static int ptrace_bts_status(struct task_struct *child,
815 long cfg_size, 731 long cfg_size,
816 struct ptrace_bts_config __user *ucfg) 732 struct ptrace_bts_config __user *ucfg)
817{ 733{
734 const struct bts_trace *trace;
818 struct ptrace_bts_config cfg; 735 struct ptrace_bts_config cfg;
819 size_t end;
820 const void *base, *max;
821 int error;
822 736
823 if (cfg_size < sizeof(cfg)) 737 if (cfg_size < sizeof(cfg))
824 return -EIO; 738 return -EIO;
825 739
826 error = ds_get_bts_end(child, &end); 740 trace = ds_read_bts(child->bts);
827 if (error < 0) 741 if (!trace)
828 return error; 742 return -EPERM;
829
830 error = ds_access_bts(child, /* index = */ 0, &base);
831 if (error < 0)
832 return error;
833
834 error = ds_access_bts(child, /* index = */ end, &max);
835 if (error < 0)
836 return error;
837 743
838 memset(&cfg, 0, sizeof(cfg)); 744 memset(&cfg, 0, sizeof(cfg));
839 cfg.size = (max - base); 745 cfg.size = trace->ds.end - trace->ds.begin;
840 cfg.signal = child->thread.bts_ovfl_signal; 746 cfg.signal = child->thread.bts_ovfl_signal;
841 cfg.bts_size = sizeof(struct bts_struct); 747 cfg.bts_size = sizeof(struct bts_struct);
842 748
843 if (cfg.signal) 749 if (cfg.signal)
844 cfg.flags |= PTRACE_BTS_O_SIGNAL; 750 cfg.flags |= PTRACE_BTS_O_SIGNAL;
845 751
846 if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && 752 if (trace->ds.flags & BTS_USER)
847 child->thread.debugctlmsr & bts_cfg.debugctl_mask)
848 cfg.flags |= PTRACE_BTS_O_TRACE; 753 cfg.flags |= PTRACE_BTS_O_TRACE;
849 754
850 if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) 755 if (trace->ds.flags & BTS_TIMESTAMPS)
851 cfg.flags |= PTRACE_BTS_O_SCHED; 756 cfg.flags |= PTRACE_BTS_O_SCHED;
852 757
853 if (copy_to_user(ucfg, &cfg, sizeof(cfg))) 758 if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
@@ -856,110 +761,77 @@ static int ptrace_bts_status(struct task_struct *child,
856 return sizeof(cfg); 761 return sizeof(cfg);
857} 762}
858 763
859static int ptrace_bts_write_record(struct task_struct *child, 764static int ptrace_bts_clear(struct task_struct *child)
860 const struct bts_struct *in)
861{ 765{
862 unsigned char bts_record[BTS_MAX_RECORD_SIZE]; 766 const struct bts_trace *trace;
863 767
864 BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts); 768 trace = ds_read_bts(child->bts);
769 if (!trace)
770 return -EPERM;
865 771
866 memset(bts_record, 0, bts_cfg.sizeof_bts); 772 memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
867 switch (in->qualifier) {
868 case BTS_INVALID:
869 break;
870 773
871 case BTS_BRANCH: 774 return ds_reset_bts(child->bts);
872 bts_set(bts_record, bts_from, in->variant.lbr.from_ip); 775}
873 bts_set(bts_record, bts_to, in->variant.lbr.to_ip);
874 break;
875 776
876 case BTS_TASK_ARRIVES: 777static int ptrace_bts_size(struct task_struct *child)
877 case BTS_TASK_DEPARTS: 778{
878 bts_set(bts_record, bts_from, bts_escape); 779 const struct bts_trace *trace;
879 bts_set(bts_record, bts_qual, in->qualifier);
880 bts_set(bts_record, bts_jiffies, in->variant.jiffies);
881 break;
882 780
883 default: 781 trace = ds_read_bts(child->bts);
884 return -EINVAL; 782 if (!trace)
885 } 783 return -EPERM;
886 784
887 /* The writing task will be the switched-to task on a context 785 return (trace->ds.top - trace->ds.begin) / trace->ds.size;
888 * switch. It needs to write into the switched-from task's BTS
889 * buffer. */
890 return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
891} 786}
892 787
893void ptrace_bts_take_timestamp(struct task_struct *tsk, 788static void ptrace_bts_fork(struct task_struct *tsk)
894 enum bts_qualifier qualifier)
895{ 789{
896 struct bts_struct rec = { 790 tsk->bts = NULL;
897 .qualifier = qualifier, 791 tsk->bts_buffer = NULL;
898 .variant.jiffies = jiffies_64 792 tsk->bts_size = 0;
899 }; 793 tsk->thread.bts_ovfl_signal = 0;
900
901 ptrace_bts_write_record(tsk, &rec);
902} 794}
903 795
904static const struct bts_configuration bts_cfg_netburst = { 796static void ptrace_bts_untrace(struct task_struct *child)
905 .sizeof_bts = sizeof(long) * 3, 797{
906 .sizeof_field = sizeof(long), 798 if (unlikely(child->bts)) {
907 .debugctl_mask = (1<<2)|(1<<3)|(1<<5) 799 ds_release_bts(child->bts);
908}; 800 child->bts = NULL;
801
802 /* We cannot update total_vm and locked_vm since
803 child's mm is already gone. But we can reclaim the
804 memory. */
805 kfree(child->bts_buffer);
806 child->bts_buffer = NULL;
807 child->bts_size = 0;
808 }
809}
909 810
910static const struct bts_configuration bts_cfg_pentium_m = { 811static void ptrace_bts_detach(struct task_struct *child)
911 .sizeof_bts = sizeof(long) * 3, 812{
912 .sizeof_field = sizeof(long), 813 if (unlikely(child->bts)) {
913 .debugctl_mask = (1<<6)|(1<<7) 814 ds_release_bts(child->bts);
914}; 815 child->bts = NULL;
915 816
916static const struct bts_configuration bts_cfg_core2 = { 817 ptrace_bts_free_buffer(child);
917 .sizeof_bts = 8 * 3, 818 }
918 .sizeof_field = 8, 819}
919 .debugctl_mask = (1<<6)|(1<<7)|(1<<9) 820#else
920}; 821static inline void ptrace_bts_fork(struct task_struct *tsk) {}
822static inline void ptrace_bts_detach(struct task_struct *child) {}
823static inline void ptrace_bts_untrace(struct task_struct *child) {}
824#endif /* CONFIG_X86_PTRACE_BTS */
921 825
922static inline void bts_configure(const struct bts_configuration *cfg) 826void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags)
923{ 827{
924 bts_cfg = *cfg; 828 ptrace_bts_fork(child);
925} 829}
926 830
927void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) 831void x86_ptrace_untrace(struct task_struct *child)
928{ 832{
929 switch (c->x86) { 833 ptrace_bts_untrace(child);
930 case 0x6:
931 switch (c->x86_model) {
932 case 0xD:
933 case 0xE: /* Pentium M */
934 bts_configure(&bts_cfg_pentium_m);
935 break;
936 case 0xF: /* Core2 */
937 case 0x1C: /* Atom */
938 bts_configure(&bts_cfg_core2);
939 break;
940 default:
941 /* sorry, don't know about them */
942 break;
943 }
944 break;
945 case 0xF:
946 switch (c->x86_model) {
947 case 0x0:
948 case 0x1:
949 case 0x2: /* Netburst */
950 bts_configure(&bts_cfg_netburst);
951 break;
952 default:
953 /* sorry, don't know about them */
954 break;
955 }
956 break;
957 default:
958 /* sorry, don't know about them */
959 break;
960 }
961} 834}
962#endif /* CONFIG_X86_PTRACE_BTS */
963 835
964/* 836/*
965 * Called by kernel/ptrace.c when detaching.. 837 * Called by kernel/ptrace.c when detaching..
@@ -972,15 +844,7 @@ void ptrace_disable(struct task_struct *child)
972#ifdef TIF_SYSCALL_EMU 844#ifdef TIF_SYSCALL_EMU
973 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); 845 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
974#endif 846#endif
975#ifdef CONFIG_X86_PTRACE_BTS 847 ptrace_bts_detach(child);
976 (void)ds_release_bts(child);
977
978 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
979 if (!child->thread.debugctlmsr)
980 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
981
982 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
983#endif /* CONFIG_X86_PTRACE_BTS */
984} 848}
985 849
986#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 850#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -1112,7 +976,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1112 break; 976 break;
1113 977
1114 case PTRACE_BTS_SIZE: 978 case PTRACE_BTS_SIZE:
1115 ret = ds_get_bts_index(child, /* pos = */ NULL); 979 ret = ptrace_bts_size(child);
1116 break; 980 break;
1117 981
1118 case PTRACE_BTS_GET: 982 case PTRACE_BTS_GET:
@@ -1121,7 +985,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1121 break; 985 break;
1122 986
1123 case PTRACE_BTS_CLEAR: 987 case PTRACE_BTS_CLEAR:
1124 ret = ds_clear_bts(child); 988 ret = ptrace_bts_clear(child);
1125 break; 989 break;
1126 990
1127 case PTRACE_BTS_DRAIN: 991 case PTRACE_BTS_DRAIN:
@@ -1384,6 +1248,14 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
1384 1248
1385 case PTRACE_GET_THREAD_AREA: 1249 case PTRACE_GET_THREAD_AREA:
1386 case PTRACE_SET_THREAD_AREA: 1250 case PTRACE_SET_THREAD_AREA:
1251#ifdef CONFIG_X86_PTRACE_BTS
1252 case PTRACE_BTS_CONFIG:
1253 case PTRACE_BTS_STATUS:
1254 case PTRACE_BTS_SIZE:
1255 case PTRACE_BTS_GET:
1256 case PTRACE_BTS_CLEAR:
1257 case PTRACE_BTS_DRAIN:
1258#endif /* CONFIG_X86_PTRACE_BTS */
1387 return arch_ptrace(child, request, addr, data); 1259 return arch_ptrace(child, request, addr, data);
1388 1260
1389 default: 1261 default:
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index cc5a2545dd41..61f718df6eec 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -21,6 +21,9 @@
21# include <asm/iommu.h> 21# include <asm/iommu.h>
22#endif 22#endif
23 23
24#include <mach_ipi.h>
25
26
24/* 27/*
25 * Power off function, if any 28 * Power off function, if any
26 */ 29 */
@@ -36,7 +39,10 @@ int reboot_force;
36static int reboot_cpu = -1; 39static int reboot_cpu = -1;
37#endif 40#endif
38 41
39/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] 42/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
43bool port_cf9_safe = false;
44
45/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
40 warm Don't set the cold reboot flag 46 warm Don't set the cold reboot flag
41 cold Set the cold reboot flag 47 cold Set the cold reboot flag
42 bios Reboot by jumping through the BIOS (only for X86_32) 48 bios Reboot by jumping through the BIOS (only for X86_32)
@@ -45,6 +51,7 @@ static int reboot_cpu = -1;
45 kbd Use the keyboard controller. cold reset (default) 51 kbd Use the keyboard controller. cold reset (default)
46 acpi Use the RESET_REG in the FADT 52 acpi Use the RESET_REG in the FADT
47 efi Use efi reset_system runtime service 53 efi Use efi reset_system runtime service
54 pci Use the so-called "PCI reset register", CF9
48 force Avoid anything that could hang. 55 force Avoid anything that could hang.
49 */ 56 */
50static int __init reboot_setup(char *str) 57static int __init reboot_setup(char *str)
@@ -79,6 +86,7 @@ static int __init reboot_setup(char *str)
79 case 'k': 86 case 'k':
80 case 't': 87 case 't':
81 case 'e': 88 case 'e':
89 case 'p':
82 reboot_type = *str; 90 reboot_type = *str;
83 break; 91 break;
84 92
@@ -404,12 +412,27 @@ static void native_machine_emergency_restart(void)
404 reboot_type = BOOT_KBD; 412 reboot_type = BOOT_KBD;
405 break; 413 break;
406 414
407
408 case BOOT_EFI: 415 case BOOT_EFI:
409 if (efi_enabled) 416 if (efi_enabled)
410 efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD, 417 efi.reset_system(reboot_mode ?
418 EFI_RESET_WARM :
419 EFI_RESET_COLD,
411 EFI_SUCCESS, 0, NULL); 420 EFI_SUCCESS, 0, NULL);
421 reboot_type = BOOT_KBD;
422 break;
412 423
424 case BOOT_CF9:
425 port_cf9_safe = true;
426 /* fall through */
427
428 case BOOT_CF9_COND:
429 if (port_cf9_safe) {
430 u8 cf9 = inb(0xcf9) & ~6;
431 outb(cf9|2, 0xcf9); /* Request hard reset */
432 udelay(50);
433 outb(cf9|6, 0xcf9); /* Actually do the reset */
434 udelay(50);
435 }
413 reboot_type = BOOT_KBD; 436 reboot_type = BOOT_KBD;
414 break; 437 break;
415 } 438 }
@@ -470,6 +493,11 @@ static void native_machine_restart(char *__unused)
470 493
471static void native_machine_halt(void) 494static void native_machine_halt(void)
472{ 495{
496 /* stop other cpus and apics */
497 machine_shutdown();
498
499 /* stop this cpu */
500 stop_this_cpu(NULL);
473} 501}
474 502
475static void native_machine_power_off(void) 503static void native_machine_power_off(void)
@@ -523,3 +551,95 @@ void machine_crash_shutdown(struct pt_regs *regs)
523 machine_ops.crash_shutdown(regs); 551 machine_ops.crash_shutdown(regs);
524} 552}
525#endif 553#endif
554
555
556#if defined(CONFIG_SMP)
557
558/* This keeps a track of which one is crashing cpu. */
559static int crashing_cpu;
560static nmi_shootdown_cb shootdown_callback;
561
562static atomic_t waiting_for_crash_ipi;
563
564static int crash_nmi_callback(struct notifier_block *self,
565 unsigned long val, void *data)
566{
567 int cpu;
568
569 if (val != DIE_NMI_IPI)
570 return NOTIFY_OK;
571
572 cpu = raw_smp_processor_id();
573
574 /* Don't do anything if this handler is invoked on crashing cpu.
575 * Otherwise, system will completely hang. Crashing cpu can get
576 * an NMI if system was initially booted with nmi_watchdog parameter.
577 */
578 if (cpu == crashing_cpu)
579 return NOTIFY_STOP;
580 local_irq_disable();
581
582 shootdown_callback(cpu, (struct die_args *)data);
583
584 atomic_dec(&waiting_for_crash_ipi);
585 /* Assume hlt works */
586 halt();
587 for (;;)
588 cpu_relax();
589
590 return 1;
591}
592
593static void smp_send_nmi_allbutself(void)
594{
595 cpumask_t mask = cpu_online_map;
596 cpu_clear(safe_smp_processor_id(), mask);
597 if (!cpus_empty(mask))
598 send_IPI_mask(mask, NMI_VECTOR);
599}
600
601static struct notifier_block crash_nmi_nb = {
602 .notifier_call = crash_nmi_callback,
603};
604
605/* Halt all other CPUs, calling the specified function on each of them
606 *
607 * This function can be used to halt all other CPUs on crash
608 * or emergency reboot time. The function passed as parameter
609 * will be called inside a NMI handler on all CPUs.
610 */
611void nmi_shootdown_cpus(nmi_shootdown_cb callback)
612{
613 unsigned long msecs;
614 local_irq_disable();
615
616 /* Make a note of crashing cpu. Will be used in NMI callback.*/
617 crashing_cpu = safe_smp_processor_id();
618
619 shootdown_callback = callback;
620
621 atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
622 /* Would it be better to replace the trap vector here? */
623 if (register_die_notifier(&crash_nmi_nb))
624 return; /* return what? */
625 /* Ensure the new callback function is set before sending
626 * out the NMI
627 */
628 wmb();
629
630 smp_send_nmi_allbutself();
631
632 msecs = 1000; /* Wait at most a second for the other cpus to stop */
633 while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
634 mdelay(1);
635 msecs--;
636 }
637
638 /* Leave the nmi callback set */
639}
640#else /* !CONFIG_SMP */
641void nmi_shootdown_cpus(nmi_shootdown_cb callback)
642{
643 /* No other CPUs to shoot down */
644}
645#endif
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 6f50664b2ba5..a160f3119725 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -10,15 +10,12 @@
10#include <asm/page.h> 10#include <asm/page.h>
11#include <asm/kexec.h> 11#include <asm/kexec.h>
12#include <asm/processor-flags.h> 12#include <asm/processor-flags.h>
13#include <asm/pgtable.h>
14 13
15/* 14/*
16 * Must be relocatable PIC code callable as a C function 15 * Must be relocatable PIC code callable as a C function
17 */ 16 */
18 17
19#define PTR(x) (x << 2) 18#define PTR(x) (x << 2)
20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
21#define PAE_PGD_ATTR (_PAGE_PRESENT)
22 19
23/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE 20/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
24 * ~ control_page + PAGE_SIZE are used as data storage and stack for 21 * ~ control_page + PAGE_SIZE are used as data storage and stack for
@@ -39,7 +36,6 @@
39#define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) 36#define CP_PA_BACKUP_PAGES_MAP DATA(0x1c)
40 37
41 .text 38 .text
42 .align PAGE_SIZE
43 .globl relocate_kernel 39 .globl relocate_kernel
44relocate_kernel: 40relocate_kernel:
45 /* Save the CPU context, used for jumping back */ 41 /* Save the CPU context, used for jumping back */
@@ -60,117 +56,6 @@ relocate_kernel:
60 movl %cr4, %eax 56 movl %cr4, %eax
61 movl %eax, CR4(%edi) 57 movl %eax, CR4(%edi)
62 58
63#ifdef CONFIG_X86_PAE
64 /* map the control page at its virtual address */
65
66 movl PTR(VA_PGD)(%ebp), %edi
67 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
68 andl $0xc0000000, %eax
69 shrl $27, %eax
70 addl %edi, %eax
71
72 movl PTR(PA_PMD_0)(%ebp), %edx
73 orl $PAE_PGD_ATTR, %edx
74 movl %edx, (%eax)
75
76 movl PTR(VA_PMD_0)(%ebp), %edi
77 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
78 andl $0x3fe00000, %eax
79 shrl $18, %eax
80 addl %edi, %eax
81
82 movl PTR(PA_PTE_0)(%ebp), %edx
83 orl $PAGE_ATTR, %edx
84 movl %edx, (%eax)
85
86 movl PTR(VA_PTE_0)(%ebp), %edi
87 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
88 andl $0x001ff000, %eax
89 shrl $9, %eax
90 addl %edi, %eax
91
92 movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
93 orl $PAGE_ATTR, %edx
94 movl %edx, (%eax)
95
96 /* identity map the control page at its physical address */
97
98 movl PTR(VA_PGD)(%ebp), %edi
99 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
100 andl $0xc0000000, %eax
101 shrl $27, %eax
102 addl %edi, %eax
103
104 movl PTR(PA_PMD_1)(%ebp), %edx
105 orl $PAE_PGD_ATTR, %edx
106 movl %edx, (%eax)
107
108 movl PTR(VA_PMD_1)(%ebp), %edi
109 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
110 andl $0x3fe00000, %eax
111 shrl $18, %eax
112 addl %edi, %eax
113
114 movl PTR(PA_PTE_1)(%ebp), %edx
115 orl $PAGE_ATTR, %edx
116 movl %edx, (%eax)
117
118 movl PTR(VA_PTE_1)(%ebp), %edi
119 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
120 andl $0x001ff000, %eax
121 shrl $9, %eax
122 addl %edi, %eax
123
124 movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
125 orl $PAGE_ATTR, %edx
126 movl %edx, (%eax)
127#else
128 /* map the control page at its virtual address */
129
130 movl PTR(VA_PGD)(%ebp), %edi
131 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
132 andl $0xffc00000, %eax
133 shrl $20, %eax
134 addl %edi, %eax
135
136 movl PTR(PA_PTE_0)(%ebp), %edx
137 orl $PAGE_ATTR, %edx
138 movl %edx, (%eax)
139
140 movl PTR(VA_PTE_0)(%ebp), %edi
141 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
142 andl $0x003ff000, %eax
143 shrl $10, %eax
144 addl %edi, %eax
145
146 movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
147 orl $PAGE_ATTR, %edx
148 movl %edx, (%eax)
149
150 /* identity map the control page at its physical address */
151
152 movl PTR(VA_PGD)(%ebp), %edi
153 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
154 andl $0xffc00000, %eax
155 shrl $20, %eax
156 addl %edi, %eax
157
158 movl PTR(PA_PTE_1)(%ebp), %edx
159 orl $PAGE_ATTR, %edx
160 movl %edx, (%eax)
161
162 movl PTR(VA_PTE_1)(%ebp), %edi
163 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
164 andl $0x003ff000, %eax
165 shrl $10, %eax
166 addl %edi, %eax
167
168 movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
169 orl $PAGE_ATTR, %edx
170 movl %edx, (%eax)
171#endif
172
173relocate_new_kernel:
174 /* read the arguments and say goodbye to the stack */ 59 /* read the arguments and say goodbye to the stack */
175 movl 20+4(%esp), %ebx /* page_list */ 60 movl 20+4(%esp), %ebx /* page_list */
176 movl 20+8(%esp), %ebp /* list of pages */ 61 movl 20+8(%esp), %ebp /* list of pages */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 9d5674f7b6cc..08e02e8453c9 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -93,11 +93,13 @@
93#include <asm/desc.h> 93#include <asm/desc.h>
94#include <asm/dma.h> 94#include <asm/dma.h>
95#include <asm/iommu.h> 95#include <asm/iommu.h>
96#include <asm/gart.h>
96#include <asm/mmu_context.h> 97#include <asm/mmu_context.h>
97#include <asm/proto.h> 98#include <asm/proto.h>
98 99
99#include <mach_apic.h> 100#include <mach_apic.h>
100#include <asm/paravirt.h> 101#include <asm/paravirt.h>
102#include <asm/hypervisor.h>
101 103
102#include <asm/percpu.h> 104#include <asm/percpu.h>
103#include <asm/topology.h> 105#include <asm/topology.h>
@@ -448,6 +450,7 @@ static void __init reserve_early_setup_data(void)
448 * @size: Size of the crashkernel memory to reserve. 450 * @size: Size of the crashkernel memory to reserve.
449 * Returns the base address on success, and -1ULL on failure. 451 * Returns the base address on success, and -1ULL on failure.
450 */ 452 */
453static
451unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) 454unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
452{ 455{
453 const unsigned long long alignment = 16<<20; /* 16M */ 456 const unsigned long long alignment = 16<<20; /* 16M */
@@ -583,161 +586,24 @@ static int __init setup_elfcorehdr(char *arg)
583early_param("elfcorehdr", setup_elfcorehdr); 586early_param("elfcorehdr", setup_elfcorehdr);
584#endif 587#endif
585 588
586static struct x86_quirks default_x86_quirks __initdata; 589static int __init default_update_genapic(void)
587
588struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
589
590/*
591 * Some BIOSes seem to corrupt the low 64k of memory during events
592 * like suspend/resume and unplugging an HDMI cable. Reserve all
593 * remaining free memory in that area and fill it with a distinct
594 * pattern.
595 */
596#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
597#define MAX_SCAN_AREAS 8
598
599static int __read_mostly memory_corruption_check = -1;
600
601static unsigned __read_mostly corruption_check_size = 64*1024;
602static unsigned __read_mostly corruption_check_period = 60; /* seconds */
603
604static struct e820entry scan_areas[MAX_SCAN_AREAS];
605static int num_scan_areas;
606
607
608static int set_corruption_check(char *arg)
609{ 590{
610 char *end; 591#ifdef CONFIG_X86_SMP
611 592# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64)
612 memory_corruption_check = simple_strtol(arg, &end, 10); 593 genapic->wakeup_cpu = wakeup_secondary_cpu_via_init;
613 594# endif
614 return (*end == 0) ? 0 : -EINVAL;
615}
616early_param("memory_corruption_check", set_corruption_check);
617
618static int set_corruption_check_period(char *arg)
619{
620 char *end;
621
622 corruption_check_period = simple_strtoul(arg, &end, 10);
623
624 return (*end == 0) ? 0 : -EINVAL;
625}
626early_param("memory_corruption_check_period", set_corruption_check_period);
627
628static int set_corruption_check_size(char *arg)
629{
630 char *end;
631 unsigned size;
632
633 size = memparse(arg, &end);
634
635 if (*end == '\0')
636 corruption_check_size = size;
637
638 return (size == corruption_check_size) ? 0 : -EINVAL;
639}
640early_param("memory_corruption_check_size", set_corruption_check_size);
641
642
643static void __init setup_bios_corruption_check(void)
644{
645 u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */
646
647 if (memory_corruption_check == -1) {
648 memory_corruption_check =
649#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
650 1
651#else
652 0
653#endif 595#endif
654 ;
655 }
656
657 if (corruption_check_size == 0)
658 memory_corruption_check = 0;
659
660 if (!memory_corruption_check)
661 return;
662
663 corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
664
665 while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
666 u64 size;
667 addr = find_e820_area_size(addr, &size, PAGE_SIZE);
668
669 if (addr == 0)
670 break;
671 596
672 if ((addr + size) > corruption_check_size) 597 return 0;
673 size = corruption_check_size - addr;
674
675 if (size == 0)
676 break;
677
678 e820_update_range(addr, size, E820_RAM, E820_RESERVED);
679 scan_areas[num_scan_areas].addr = addr;
680 scan_areas[num_scan_areas].size = size;
681 num_scan_areas++;
682
683 /* Assume we've already mapped this early memory */
684 memset(__va(addr), 0, size);
685
686 addr += size;
687 }
688
689 printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
690 num_scan_areas);
691 update_e820();
692}
693
694static struct timer_list periodic_check_timer;
695
696void check_for_bios_corruption(void)
697{
698 int i;
699 int corruption = 0;
700
701 if (!memory_corruption_check)
702 return;
703
704 for(i = 0; i < num_scan_areas; i++) {
705 unsigned long *addr = __va(scan_areas[i].addr);
706 unsigned long size = scan_areas[i].size;
707
708 for(; size; addr++, size -= sizeof(unsigned long)) {
709 if (!*addr)
710 continue;
711 printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
712 addr, __pa(addr), *addr);
713 corruption = 1;
714 *addr = 0;
715 }
716 }
717
718 WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n");
719}
720
721static void periodic_check_for_corruption(unsigned long data)
722{
723 check_for_bios_corruption();
724 mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ));
725} 598}
726 599
727void start_periodic_check_for_corruption(void) 600static struct x86_quirks default_x86_quirks __initdata = {
728{ 601 .update_genapic = default_update_genapic,
729 if (!memory_corruption_check || corruption_check_period == 0) 602};
730 return;
731
732 printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
733 corruption_check_period);
734 603
735 init_timer(&periodic_check_timer); 604struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
736 periodic_check_timer.function = &periodic_check_for_corruption;
737 periodic_check_for_corruption(0);
738}
739#endif
740 605
606#ifdef CONFIG_X86_RESERVE_LOW_64K
741static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) 607static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
742{ 608{
743 printk(KERN_NOTICE 609 printk(KERN_NOTICE
@@ -749,6 +615,7 @@ static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
749 615
750 return 0; 616 return 0;
751} 617}
618#endif
752 619
753/* List of systems that have known low memory corruption BIOS problems */ 620/* List of systems that have known low memory corruption BIOS problems */
754static struct dmi_system_id __initdata bad_bios_dmi_table[] = { 621static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
@@ -794,6 +661,9 @@ void __init setup_arch(char **cmdline_p)
794 printk(KERN_INFO "Command line: %s\n", boot_command_line); 661 printk(KERN_INFO "Command line: %s\n", boot_command_line);
795#endif 662#endif
796 663
664 /* VMI may relocate the fixmap; do this before touching ioremap area */
665 vmi_init();
666
797 early_cpu_init(); 667 early_cpu_init();
798 early_ioremap_init(); 668 early_ioremap_init();
799 669
@@ -880,13 +750,8 @@ void __init setup_arch(char **cmdline_p)
880 check_efer(); 750 check_efer();
881#endif 751#endif
882 752
883#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) 753 /* Must be before kernel pagetables are setup */
884 /* 754 vmi_activate();
885 * Must be before kernel pagetables are setup
886 * or fixmap area is touched.
887 */
888 vmi_init();
889#endif
890 755
891 /* after early param, so could get panic from serial */ 756 /* after early param, so could get panic from serial */
892 reserve_early_setup_data(); 757 reserve_early_setup_data();
@@ -909,6 +774,12 @@ void __init setup_arch(char **cmdline_p)
909 774
910 dmi_check_system(bad_bios_dmi_table); 775 dmi_check_system(bad_bios_dmi_table);
911 776
777 /*
778 * VMware detection requires dmi to be available, so this
779 * needs to be done after dmi_scan_machine, for the BP.
780 */
781 init_hypervisor(&boot_cpu_data);
782
912#ifdef CONFIG_X86_32 783#ifdef CONFIG_X86_32
913 probe_roms(); 784 probe_roms();
914#endif 785#endif
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h
deleted file mode 100644
index cc673aa55ce4..000000000000
--- a/arch/x86/kernel/sigframe.h
+++ /dev/null
@@ -1,42 +0,0 @@
1#ifdef CONFIG_X86_32
2struct sigframe {
3 char __user *pretcode;
4 int sig;
5 struct sigcontext sc;
6 /*
7 * fpstate is unused. fpstate is moved/allocated after
8 * retcode[] below. This movement allows to have the FP state and the
9 * future state extensions (xsave) stay together.
10 * And at the same time retaining the unused fpstate, prevents changing
11 * the offset of extramask[] in the sigframe and thus prevent any
12 * legacy application accessing/modifying it.
13 */
14 struct _fpstate fpstate_unused;
15 unsigned long extramask[_NSIG_WORDS-1];
16 char retcode[8];
17 /* fp state follows here */
18};
19
20struct rt_sigframe {
21 char __user *pretcode;
22 int sig;
23 struct siginfo __user *pinfo;
24 void __user *puc;
25 struct siginfo info;
26 struct ucontext uc;
27 char retcode[8];
28 /* fp state follows here */
29};
30#else
31struct rt_sigframe {
32 char __user *pretcode;
33 struct ucontext uc;
34 struct siginfo info;
35 /* fp state follows here */
36};
37
38int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
39 sigset_t *set, struct pt_regs *regs);
40int ia32_setup_frame(int sig, struct k_sigaction *ka,
41 sigset_t *set, struct pt_regs *regs);
42#endif
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index dee553c503d3..4fa5243c2069 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -35,7 +35,7 @@
35#include <asm/syscall.h> 35#include <asm/syscall.h>
36#include <asm/syscalls.h> 36#include <asm/syscalls.h>
37 37
38#include "sigframe.h" 38#include <asm/sigframe.h>
39 39
40#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) 40#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
41 41
@@ -594,17 +594,7 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
594 return ax; 594 return ax;
595 595
596badframe: 596badframe:
597 if (show_unhandled_signals && printk_ratelimit()) { 597 signal_fault(regs, frame, "sigreturn");
598 printk("%s%s[%d] bad frame in sigreturn frame:"
599 "%p ip:%lx sp:%lx oeax:%lx",
600 task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
601 current->comm, task_pid_nr(current), frame, regs->ip,
602 regs->sp, regs->orig_ax);
603 print_vma_addr(" in ", regs->ip);
604 printk(KERN_CONT "\n");
605 }
606
607 force_sig(SIGSEGV, current);
608 598
609 return 0; 599 return 0;
610} 600}
@@ -681,6 +671,11 @@ static int signr_convert(int sig)
681#define is_ia32 0 671#define is_ia32 0
682#endif /* CONFIG_IA32_EMULATION */ 672#endif /* CONFIG_IA32_EMULATION */
683 673
674int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
675 sigset_t *set, struct pt_regs *regs);
676int ia32_setup_frame(int sig, struct k_sigaction *ka,
677 sigset_t *set, struct pt_regs *regs);
678
684#endif /* CONFIG_X86_32 */ 679#endif /* CONFIG_X86_32 */
685 680
686static int 681static int
@@ -906,8 +901,9 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
906 struct task_struct *me = current; 901 struct task_struct *me = current;
907 902
908 if (show_unhandled_signals && printk_ratelimit()) { 903 if (show_unhandled_signals && printk_ratelimit()) {
909 printk(KERN_INFO 904 printk("%s"
910 "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", 905 "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
906 task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
911 me->comm, me->pid, where, frame, 907 me->comm, me->pid, where, frame,
912 regs->ip, regs->sp, regs->orig_ax); 908 regs->ip, regs->sp, regs->orig_ax);
913 print_vma_addr(" in ", regs->ip); 909 print_vma_addr(" in ", regs->ip);
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index d18537ce2c79..7e558db362c1 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -140,19 +140,6 @@ void native_send_call_func_ipi(cpumask_t mask)
140 send_IPI_mask(mask, CALL_FUNCTION_VECTOR); 140 send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
141} 141}
142 142
143static void stop_this_cpu(void *dummy)
144{
145 local_irq_disable();
146 /*
147 * Remove this CPU:
148 */
149 cpu_clear(smp_processor_id(), cpu_online_map);
150 disable_local_APIC();
151 if (hlt_works(smp_processor_id()))
152 for (;;) halt();
153 for (;;);
154}
155
156/* 143/*
157 * this function calls the 'stop' function on all other CPUs in the system. 144 * this function calls the 'stop' function on all other CPUs in the system.
158 */ 145 */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7b1093397319..f8500c969442 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -62,6 +62,7 @@
62#include <asm/mtrr.h> 62#include <asm/mtrr.h>
63#include <asm/vmi.h> 63#include <asm/vmi.h>
64#include <asm/genapic.h> 64#include <asm/genapic.h>
65#include <asm/setup.h>
65#include <linux/mc146818rtc.h> 66#include <linux/mc146818rtc.h>
66 67
67#include <mach_apic.h> 68#include <mach_apic.h>
@@ -287,16 +288,14 @@ static int __cpuinitdata unsafe_smp;
287/* 288/*
288 * Activate a secondary processor. 289 * Activate a secondary processor.
289 */ 290 */
290static void __cpuinit start_secondary(void *unused) 291notrace static void __cpuinit start_secondary(void *unused)
291{ 292{
292 /* 293 /*
293 * Don't put *anything* before cpu_init(), SMP booting is too 294 * Don't put *anything* before cpu_init(), SMP booting is too
294 * fragile that we want to limit the things done here to the 295 * fragile that we want to limit the things done here to the
295 * most necessary things. 296 * most necessary things.
296 */ 297 */
297#ifdef CONFIG_VMI
298 vmi_bringup(); 298 vmi_bringup();
299#endif
300 cpu_init(); 299 cpu_init();
301 preempt_disable(); 300 preempt_disable();
302 smp_callin(); 301 smp_callin();
@@ -536,7 +535,7 @@ static void impress_friends(void)
536 pr_debug("Before bogocount - setting activated=1.\n"); 535 pr_debug("Before bogocount - setting activated=1.\n");
537} 536}
538 537
539static inline void __inquire_remote_apic(int apicid) 538void __inquire_remote_apic(int apicid)
540{ 539{
541 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; 540 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
542 char *names[] = { "ID", "VERSION", "SPIV" }; 541 char *names[] = { "ID", "VERSION", "SPIV" };
@@ -575,14 +574,13 @@ static inline void __inquire_remote_apic(int apicid)
575 } 574 }
576} 575}
577 576
578#ifdef WAKE_SECONDARY_VIA_NMI
579/* 577/*
580 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal 578 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
581 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this 579 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
582 * won't ... remember to clear down the APIC, etc later. 580 * won't ... remember to clear down the APIC, etc later.
583 */ 581 */
584static int __devinit 582int __devinit
585wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) 583wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
586{ 584{
587 unsigned long send_status, accept_status = 0; 585 unsigned long send_status, accept_status = 0;
588 int maxlvt; 586 int maxlvt;
@@ -599,7 +597,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
599 * Give the other CPU some time to accept the IPI. 597 * Give the other CPU some time to accept the IPI.
600 */ 598 */
601 udelay(200); 599 udelay(200);
602 if (APIC_INTEGRATED(apic_version[phys_apicid])) { 600 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
603 maxlvt = lapic_get_maxlvt(); 601 maxlvt = lapic_get_maxlvt();
604 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 602 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
605 apic_write(APIC_ESR, 0); 603 apic_write(APIC_ESR, 0);
@@ -614,11 +612,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
614 612
615 return (send_status | accept_status); 613 return (send_status | accept_status);
616} 614}
617#endif /* WAKE_SECONDARY_VIA_NMI */
618 615
619#ifdef WAKE_SECONDARY_VIA_INIT 616int __devinit
620static int __devinit 617wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
621wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
622{ 618{
623 unsigned long send_status, accept_status = 0; 619 unsigned long send_status, accept_status = 0;
624 int maxlvt, num_starts, j; 620 int maxlvt, num_starts, j;
@@ -737,7 +733,6 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
737 733
738 return (send_status | accept_status); 734 return (send_status | accept_status);
739} 735}
740#endif /* WAKE_SECONDARY_VIA_INIT */
741 736
742struct create_idle { 737struct create_idle {
743 struct work_struct work; 738 struct work_struct work;
@@ -1086,8 +1081,10 @@ static int __init smp_sanity_check(unsigned max_cpus)
1086#endif 1081#endif
1087 1082
1088 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { 1083 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
1089 printk(KERN_WARNING "weird, boot CPU (#%d) not listed" 1084 printk(KERN_WARNING
1090 "by the BIOS.\n", hard_smp_processor_id()); 1085 "weird, boot CPU (#%d) not listed by the BIOS.\n",
1086 hard_smp_processor_id());
1087
1091 physid_set(hard_smp_processor_id(), phys_cpu_present_map); 1088 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1092 } 1089 }
1093 1090
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index a03e7f6d90c3..10786af95545 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -6,6 +6,7 @@
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/stacktrace.h> 7#include <linux/stacktrace.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/uaccess.h>
9#include <asm/stacktrace.h> 10#include <asm/stacktrace.h>
10 11
11static void save_stack_warning(void *data, char *msg) 12static void save_stack_warning(void *data, char *msg)
@@ -83,3 +84,66 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
83 trace->entries[trace->nr_entries++] = ULONG_MAX; 84 trace->entries[trace->nr_entries++] = ULONG_MAX;
84} 85}
85EXPORT_SYMBOL_GPL(save_stack_trace_tsk); 86EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
87
88/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
89
90struct stack_frame {
91 const void __user *next_fp;
92 unsigned long ret_addr;
93};
94
95static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
96{
97 int ret;
98
99 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
100 return 0;
101
102 ret = 1;
103 pagefault_disable();
104 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
105 ret = 0;
106 pagefault_enable();
107
108 return ret;
109}
110
111static inline void __save_stack_trace_user(struct stack_trace *trace)
112{
113 const struct pt_regs *regs = task_pt_regs(current);
114 const void __user *fp = (const void __user *)regs->bp;
115
116 if (trace->nr_entries < trace->max_entries)
117 trace->entries[trace->nr_entries++] = regs->ip;
118
119 while (trace->nr_entries < trace->max_entries) {
120 struct stack_frame frame;
121
122 frame.next_fp = NULL;
123 frame.ret_addr = 0;
124 if (!copy_stack_frame(fp, &frame))
125 break;
126 if ((unsigned long)fp < regs->sp)
127 break;
128 if (frame.ret_addr) {
129 trace->entries[trace->nr_entries++] =
130 frame.ret_addr;
131 }
132 if (fp == frame.next_fp)
133 break;
134 fp = frame.next_fp;
135 }
136}
137
138void save_stack_trace_user(struct stack_trace *trace)
139{
140 /*
141 * Trace user stack if we are not a kernel thread
142 */
143 if (current->mm) {
144 __save_stack_trace_user(trace);
145 }
146 if (trace->nr_entries < trace->max_entries)
147 trace->entries[trace->nr_entries++] = ULONG_MAX;
148}
149
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 77b400f06ea2..65309e4cb1c0 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -75,7 +75,7 @@ EXPORT_SYMBOL(profile_pc);
75irqreturn_t timer_interrupt(int irq, void *dev_id) 75irqreturn_t timer_interrupt(int irq, void *dev_id)
76{ 76{
77 /* Keep nmi watchdog up to date */ 77 /* Keep nmi watchdog up to date */
78 per_cpu(irq_stat, smp_processor_id()).irq0_irqs++; 78 inc_irq_stat(irq0_irqs);
79 79
80#ifdef CONFIG_X86_IO_APIC 80#ifdef CONFIG_X86_IO_APIC
81 if (timer_ack) { 81 if (timer_ack) {
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 418a095c5796..891e7a7c4334 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -49,9 +49,9 @@ unsigned long profile_pc(struct pt_regs *regs)
49} 49}
50EXPORT_SYMBOL(profile_pc); 50EXPORT_SYMBOL(profile_pc);
51 51
52irqreturn_t timer_interrupt(int irq, void *dev_id) 52static irqreturn_t timer_interrupt(int irq, void *dev_id)
53{ 53{
54 add_pda(irq0_irqs, 1); 54 inc_irq_stat(irq0_irqs);
55 55
56 global_clock_event->event_handler(global_clock_event); 56 global_clock_event->event_handler(global_clock_event);
57 57
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index f4049f3513b6..8da059f949be 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -34,9 +34,8 @@ static DEFINE_SPINLOCK(tlbstate_lock);
34 */ 34 */
35void leave_mm(int cpu) 35void leave_mm(int cpu)
36{ 36{
37 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) 37 BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK);
38 BUG(); 38 cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask);
39 cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
40 load_cr3(swapper_pg_dir); 39 load_cr3(swapper_pg_dir);
41} 40}
42EXPORT_SYMBOL_GPL(leave_mm); 41EXPORT_SYMBOL_GPL(leave_mm);
@@ -104,8 +103,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
104 * BUG(); 103 * BUG();
105 */ 104 */
106 105
107 if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { 106 if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) {
108 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { 107 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) {
109 if (flush_va == TLB_FLUSH_ALL) 108 if (flush_va == TLB_FLUSH_ALL)
110 local_flush_tlb(); 109 local_flush_tlb();
111 else 110 else
@@ -119,7 +118,7 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
119 smp_mb__after_clear_bit(); 118 smp_mb__after_clear_bit();
120out: 119out:
121 put_cpu_no_resched(); 120 put_cpu_no_resched();
122 __get_cpu_var(irq_stat).irq_tlb_count++; 121 inc_irq_stat(irq_tlb_count);
123} 122}
124 123
125void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, 124void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
@@ -238,7 +237,7 @@ static void do_flush_tlb_all(void *info)
238 unsigned long cpu = smp_processor_id(); 237 unsigned long cpu = smp_processor_id();
239 238
240 __flush_tlb_all(); 239 __flush_tlb_all();
241 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY) 240 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY)
242 leave_mm(cpu); 241 leave_mm(cpu);
243} 242}
244 243
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 8f919ca69494..29887d7081a9 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -154,7 +154,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
154out: 154out:
155 ack_APIC_irq(); 155 ack_APIC_irq();
156 cpu_clear(cpu, f->flush_cpumask); 156 cpu_clear(cpu, f->flush_cpumask);
157 add_pda(irq_tlb_count, 1); 157 inc_irq_stat(irq_tlb_count);
158} 158}
159 159
160void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, 160void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 04431f34fd16..6a00e5faaa74 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -566,14 +566,10 @@ static int __init uv_ptc_init(void)
566 if (!is_uv_system()) 566 if (!is_uv_system())
567 return 0; 567 return 0;
568 568
569 if (!proc_mkdir("sgi_uv", NULL))
570 return -EINVAL;
571
572 proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); 569 proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL);
573 if (!proc_uv_ptc) { 570 if (!proc_uv_ptc) {
574 printk(KERN_ERR "unable to create %s proc entry\n", 571 printk(KERN_ERR "unable to create %s proc entry\n",
575 UV_PTC_BASENAME); 572 UV_PTC_BASENAME);
576 remove_proc_entry("sgi_uv", NULL);
577 return -EINVAL; 573 return -EINVAL;
578 } 574 }
579 proc_uv_ptc->proc_fops = &proc_uv_ptc_operations; 575 proc_uv_ptc->proc_fops = &proc_uv_ptc_operations;
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index 1106fac6024d..808031a5ba19 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -1,10 +1,26 @@
1#include <linux/io.h> 1#include <linux/io.h>
2 2
3#include <asm/trampoline.h> 3#include <asm/trampoline.h>
4#include <asm/e820.h>
4 5
5/* ready for x86_64 and x86 */ 6/* ready for x86_64 and x86 */
6unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); 7unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
7 8
9void __init reserve_trampoline_memory(void)
10{
11#ifdef CONFIG_X86_32
12 /*
13 * But first pinch a few for the stack/trampoline stuff
14 * FIXME: Don't need the extra page at 4K, but need to fix
15 * trampoline before removing it. (see the GDT stuff)
16 */
17 reserve_early(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE");
18#endif
19 /* Has to be in very low memory so we can execute real-mode AP code. */
20 reserve_early(TRAMPOLINE_BASE, TRAMPOLINE_BASE + TRAMPOLINE_SIZE,
21 "TRAMPOLINE");
22}
23
8/* 24/*
9 * Currently trivial. Write the real->protected mode 25 * Currently trivial. Write the real->protected mode
10 * bootstrap into the page concerned. The caller 26 * bootstrap into the page concerned. The caller
@@ -12,7 +28,6 @@ unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
12 */ 28 */
13unsigned long setup_trampoline(void) 29unsigned long setup_trampoline(void)
14{ 30{
15 memcpy(trampoline_base, trampoline_data, 31 memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
16 trampoline_end - trampoline_data);
17 return virt_to_phys(trampoline_base); 32 return virt_to_phys(trampoline_base);
18} 33}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d815293e6d94..141907ab6e22 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -660,7 +660,7 @@ void math_error(void __user *ip)
660{ 660{
661 struct task_struct *task; 661 struct task_struct *task;
662 siginfo_t info; 662 siginfo_t info;
663 unsigned short cwd, swd; 663 unsigned short cwd, swd, err;
664 664
665 /* 665 /*
666 * Save the info for the exception handler and clear the error. 666 * Save the info for the exception handler and clear the error.
@@ -671,7 +671,6 @@ void math_error(void __user *ip)
671 task->thread.error_code = 0; 671 task->thread.error_code = 0;
672 info.si_signo = SIGFPE; 672 info.si_signo = SIGFPE;
673 info.si_errno = 0; 673 info.si_errno = 0;
674 info.si_code = __SI_FAULT;
675 info.si_addr = ip; 674 info.si_addr = ip;
676 /* 675 /*
677 * (~cwd & swd) will mask out exceptions that are not set to unmasked 676 * (~cwd & swd) will mask out exceptions that are not set to unmasked
@@ -685,34 +684,31 @@ void math_error(void __user *ip)
685 */ 684 */
686 cwd = get_fpu_cwd(task); 685 cwd = get_fpu_cwd(task);
687 swd = get_fpu_swd(task); 686 swd = get_fpu_swd(task);
688 switch (swd & ~cwd & 0x3f) { 687
689 case 0x000: /* No unmasked exception */ 688 err = swd & ~cwd & 0x3f;
689
690#ifdef CONFIG_X86_32 690#ifdef CONFIG_X86_32
691 if (!err)
691 return; 692 return;
692#endif 693#endif
693 default: /* Multiple exceptions */ 694
694 break; 695 if (err & 0x001) { /* Invalid op */
695 case 0x001: /* Invalid Op */
696 /* 696 /*
697 * swd & 0x240 == 0x040: Stack Underflow 697 * swd & 0x240 == 0x040: Stack Underflow
698 * swd & 0x240 == 0x240: Stack Overflow 698 * swd & 0x240 == 0x240: Stack Overflow
699 * User must clear the SF bit (0x40) if set 699 * User must clear the SF bit (0x40) if set
700 */ 700 */
701 info.si_code = FPE_FLTINV; 701 info.si_code = FPE_FLTINV;
702 break; 702 } else if (err & 0x004) { /* Divide by Zero */
703 case 0x002: /* Denormalize */
704 case 0x010: /* Underflow */
705 info.si_code = FPE_FLTUND;
706 break;
707 case 0x004: /* Zero Divide */
708 info.si_code = FPE_FLTDIV; 703 info.si_code = FPE_FLTDIV;
709 break; 704 } else if (err & 0x008) { /* Overflow */
710 case 0x008: /* Overflow */
711 info.si_code = FPE_FLTOVF; 705 info.si_code = FPE_FLTOVF;
712 break; 706 } else if (err & 0x012) { /* Denormal, Underflow */
713 case 0x020: /* Precision */ 707 info.si_code = FPE_FLTUND;
708 } else if (err & 0x020) { /* Precision */
714 info.si_code = FPE_FLTRES; 709 info.si_code = FPE_FLTRES;
715 break; 710 } else {
711 info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */
716 } 712 }
717 force_sig_info(SIGFPE, &info, task); 713 force_sig_info(SIGFPE, &info, task);
718} 714}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 424093b157d3..599e58168631 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -15,6 +15,7 @@
15#include <asm/vgtod.h> 15#include <asm/vgtod.h>
16#include <asm/time.h> 16#include <asm/time.h>
17#include <asm/delay.h> 17#include <asm/delay.h>
18#include <asm/hypervisor.h>
18 19
19unsigned int cpu_khz; /* TSC clocks / usec, not used here */ 20unsigned int cpu_khz; /* TSC clocks / usec, not used here */
20EXPORT_SYMBOL(cpu_khz); 21EXPORT_SYMBOL(cpu_khz);
@@ -31,6 +32,7 @@ static int tsc_unstable;
31 erroneous rdtsc usage on !cpu_has_tsc processors */ 32 erroneous rdtsc usage on !cpu_has_tsc processors */
32static int tsc_disabled = -1; 33static int tsc_disabled = -1;
33 34
35static int tsc_clocksource_reliable;
34/* 36/*
35 * Scheduler clock - returns current time in nanosec units. 37 * Scheduler clock - returns current time in nanosec units.
36 */ 38 */
@@ -98,6 +100,15 @@ int __init notsc_setup(char *str)
98 100
99__setup("notsc", notsc_setup); 101__setup("notsc", notsc_setup);
100 102
103static int __init tsc_setup(char *str)
104{
105 if (!strcmp(str, "reliable"))
106 tsc_clocksource_reliable = 1;
107 return 1;
108}
109
110__setup("tsc=", tsc_setup);
111
101#define MAX_RETRIES 5 112#define MAX_RETRIES 5
102#define SMI_TRESHOLD 50000 113#define SMI_TRESHOLD 50000
103 114
@@ -352,9 +363,15 @@ unsigned long native_calibrate_tsc(void)
352{ 363{
353 u64 tsc1, tsc2, delta, ref1, ref2; 364 u64 tsc1, tsc2, delta, ref1, ref2;
354 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; 365 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
355 unsigned long flags, latch, ms, fast_calibrate; 366 unsigned long flags, latch, ms, fast_calibrate, tsc_khz;
356 int hpet = is_hpet_enabled(), i, loopmin; 367 int hpet = is_hpet_enabled(), i, loopmin;
357 368
369 tsc_khz = get_hypervisor_tsc_freq();
370 if (tsc_khz) {
371 printk(KERN_INFO "TSC: Frequency read from the hypervisor\n");
372 return tsc_khz;
373 }
374
358 local_irq_save(flags); 375 local_irq_save(flags);
359 fast_calibrate = quick_pit_calibrate(); 376 fast_calibrate = quick_pit_calibrate();
360 local_irq_restore(flags); 377 local_irq_restore(flags);
@@ -731,24 +748,21 @@ static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
731 {} 748 {}
732}; 749};
733 750
734/* 751static void __init check_system_tsc_reliable(void)
735 * Geode_LX - the OLPC CPU has a possibly a very reliable TSC 752{
736 */
737#ifdef CONFIG_MGEODE_LX 753#ifdef CONFIG_MGEODE_LX
738/* RTSC counts during suspend */ 754 /* RTSC counts during suspend */
739#define RTSC_SUSP 0x100 755#define RTSC_SUSP 0x100
740
741static void __init check_geode_tsc_reliable(void)
742{
743 unsigned long res_low, res_high; 756 unsigned long res_low, res_high;
744 757
745 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); 758 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
759 /* Geode_LX - the OLPC CPU has a possibly a very reliable TSC */
746 if (res_low & RTSC_SUSP) 760 if (res_low & RTSC_SUSP)
747 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; 761 tsc_clocksource_reliable = 1;
748}
749#else
750static inline void check_geode_tsc_reliable(void) { }
751#endif 762#endif
763 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
764 tsc_clocksource_reliable = 1;
765}
752 766
753/* 767/*
754 * Make an educated guess if the TSC is trustworthy and synchronized 768 * Make an educated guess if the TSC is trustworthy and synchronized
@@ -783,6 +797,8 @@ static void __init init_tsc_clocksource(void)
783{ 797{
784 clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, 798 clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
785 clocksource_tsc.shift); 799 clocksource_tsc.shift);
800 if (tsc_clocksource_reliable)
801 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
786 /* lower the rating if we already know its unstable: */ 802 /* lower the rating if we already know its unstable: */
787 if (check_tsc_unstable()) { 803 if (check_tsc_unstable()) {
788 clocksource_tsc.rating = 0; 804 clocksource_tsc.rating = 0;
@@ -843,7 +859,7 @@ void __init tsc_init(void)
843 if (unsynchronized_tsc()) 859 if (unsynchronized_tsc())
844 mark_tsc_unstable("TSCs unsynchronized"); 860 mark_tsc_unstable("TSCs unsynchronized");
845 861
846 check_geode_tsc_reliable(); 862 check_system_tsc_reliable();
847 init_tsc_clocksource(); 863 init_tsc_clocksource();
848} 864}
849 865
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 1c0dfbca87c1..bf36328f6ef9 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -112,6 +112,12 @@ void __cpuinit check_tsc_sync_source(int cpu)
112 if (unsynchronized_tsc()) 112 if (unsynchronized_tsc())
113 return; 113 return;
114 114
115 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
116 printk(KERN_INFO
117 "Skipping synchronization checks as TSC is reliable.\n");
118 return;
119 }
120
115 printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", 121 printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:",
116 smp_processor_id(), cpu); 122 smp_processor_id(), cpu);
117 123
@@ -165,7 +171,7 @@ void __cpuinit check_tsc_sync_target(void)
165{ 171{
166 int cpus = 2; 172 int cpus = 2;
167 173
168 if (unsynchronized_tsc()) 174 if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
169 return; 175 return;
170 176
171 /* 177 /*
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 8b6c393ab9fd..23206ba16874 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -266,109 +266,6 @@ static void vmi_nop(void)
266{ 266{
267} 267}
268 268
269#ifdef CONFIG_DEBUG_PAGE_TYPE
270
271#ifdef CONFIG_X86_PAE
272#define MAX_BOOT_PTS (2048+4+1)
273#else
274#define MAX_BOOT_PTS (1024+1)
275#endif
276
277/*
278 * During boot, mem_map is not yet available in paging_init, so stash
279 * all the boot page allocations here.
280 */
281static struct {
282 u32 pfn;
283 int type;
284} boot_page_allocations[MAX_BOOT_PTS];
285static int num_boot_page_allocations;
286static int boot_allocations_applied;
287
288void vmi_apply_boot_page_allocations(void)
289{
290 int i;
291 BUG_ON(!mem_map);
292 for (i = 0; i < num_boot_page_allocations; i++) {
293 struct page *page = pfn_to_page(boot_page_allocations[i].pfn);
294 page->type = boot_page_allocations[i].type;
295 page->type = boot_page_allocations[i].type &
296 ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE);
297 }
298 boot_allocations_applied = 1;
299}
300
301static void record_page_type(u32 pfn, int type)
302{
303 BUG_ON(num_boot_page_allocations >= MAX_BOOT_PTS);
304 boot_page_allocations[num_boot_page_allocations].pfn = pfn;
305 boot_page_allocations[num_boot_page_allocations].type = type;
306 num_boot_page_allocations++;
307}
308
309static void check_zeroed_page(u32 pfn, int type, struct page *page)
310{
311 u32 *ptr;
312 int i;
313 int limit = PAGE_SIZE / sizeof(int);
314
315 if (page_address(page))
316 ptr = (u32 *)page_address(page);
317 else
318 ptr = (u32 *)__va(pfn << PAGE_SHIFT);
319 /*
320 * When cloning the root in non-PAE mode, only the userspace
321 * pdes need to be zeroed.
322 */
323 if (type & VMI_PAGE_CLONE)
324 limit = KERNEL_PGD_BOUNDARY;
325 for (i = 0; i < limit; i++)
326 BUG_ON(ptr[i]);
327}
328
329/*
330 * We stash the page type into struct page so we can verify the page
331 * types are used properly.
332 */
333static void vmi_set_page_type(u32 pfn, int type)
334{
335 /* PAE can have multiple roots per page - don't track */
336 if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP))
337 return;
338
339 if (boot_allocations_applied) {
340 struct page *page = pfn_to_page(pfn);
341 if (type != VMI_PAGE_NORMAL)
342 BUG_ON(page->type);
343 else
344 BUG_ON(page->type == VMI_PAGE_NORMAL);
345 page->type = type & ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE);
346 if (type & VMI_PAGE_ZEROED)
347 check_zeroed_page(pfn, type, page);
348 } else {
349 record_page_type(pfn, type);
350 }
351}
352
353static void vmi_check_page_type(u32 pfn, int type)
354{
355 /* PAE can have multiple roots per page - skip checks */
356 if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP))
357 return;
358
359 type &= ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE);
360 if (boot_allocations_applied) {
361 struct page *page = pfn_to_page(pfn);
362 BUG_ON((page->type ^ type) & VMI_PAGE_PAE);
363 BUG_ON(type == VMI_PAGE_NORMAL && page->type);
364 BUG_ON((type & page->type) == 0);
365 }
366}
367#else
368#define vmi_set_page_type(p,t) do { } while (0)
369#define vmi_check_page_type(p,t) do { } while (0)
370#endif
371
372#ifdef CONFIG_HIGHPTE 269#ifdef CONFIG_HIGHPTE
373static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) 270static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
374{ 271{
@@ -395,7 +292,6 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
395 292
396static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) 293static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn)
397{ 294{
398 vmi_set_page_type(pfn, VMI_PAGE_L1);
399 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); 295 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
400} 296}
401 297
@@ -406,27 +302,22 @@ static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn)
406 * It is called only for swapper_pg_dir, which already has 302 * It is called only for swapper_pg_dir, which already has
407 * data on it. 303 * data on it.
408 */ 304 */
409 vmi_set_page_type(pfn, VMI_PAGE_L2);
410 vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); 305 vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
411} 306}
412 307
413static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) 308static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count)
414{ 309{
415 vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
416 vmi_check_page_type(clonepfn, VMI_PAGE_L2);
417 vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); 310 vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
418} 311}
419 312
420static void vmi_release_pte(unsigned long pfn) 313static void vmi_release_pte(unsigned long pfn)
421{ 314{
422 vmi_ops.release_page(pfn, VMI_PAGE_L1); 315 vmi_ops.release_page(pfn, VMI_PAGE_L1);
423 vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
424} 316}
425 317
426static void vmi_release_pmd(unsigned long pfn) 318static void vmi_release_pmd(unsigned long pfn)
427{ 319{
428 vmi_ops.release_page(pfn, VMI_PAGE_L2); 320 vmi_ops.release_page(pfn, VMI_PAGE_L2);
429 vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
430} 321}
431 322
432/* 323/*
@@ -450,26 +341,22 @@ static void vmi_release_pmd(unsigned long pfn)
450 341
451static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 342static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
452{ 343{
453 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
454 vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); 344 vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
455} 345}
456 346
457static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 347static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
458{ 348{
459 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
460 vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); 349 vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0));
461} 350}
462 351
463static void vmi_set_pte(pte_t *ptep, pte_t pte) 352static void vmi_set_pte(pte_t *ptep, pte_t pte)
464{ 353{
465 /* XXX because of set_pmd_pte, this can be called on PT or PD layers */ 354 /* XXX because of set_pmd_pte, this can be called on PT or PD layers */
466 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE | VMI_PAGE_PD);
467 vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); 355 vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT);
468} 356}
469 357
470static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) 358static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
471{ 359{
472 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
473 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); 360 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
474} 361}
475 362
@@ -477,10 +364,8 @@ static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval)
477{ 364{
478#ifdef CONFIG_X86_PAE 365#ifdef CONFIG_X86_PAE
479 const pte_t pte = { .pte = pmdval.pmd }; 366 const pte_t pte = { .pte = pmdval.pmd };
480 vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD);
481#else 367#else
482 const pte_t pte = { pmdval.pud.pgd.pgd }; 368 const pte_t pte = { pmdval.pud.pgd.pgd };
483 vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PGD);
484#endif 369#endif
485 vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD); 370 vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD);
486} 371}
@@ -502,7 +387,6 @@ static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval)
502 387
503static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) 388static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
504{ 389{
505 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
506 vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1)); 390 vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1));
507} 391}
508 392
@@ -510,21 +394,18 @@ static void vmi_set_pud(pud_t *pudp, pud_t pudval)
510{ 394{
511 /* Um, eww */ 395 /* Um, eww */
512 const pte_t pte = { .pte = pudval.pgd.pgd }; 396 const pte_t pte = { .pte = pudval.pgd.pgd };
513 vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD);
514 vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); 397 vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP);
515} 398}
516 399
517static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 400static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
518{ 401{
519 const pte_t pte = { .pte = 0 }; 402 const pte_t pte = { .pte = 0 };
520 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
521 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); 403 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
522} 404}
523 405
524static void vmi_pmd_clear(pmd_t *pmd) 406static void vmi_pmd_clear(pmd_t *pmd)
525{ 407{
526 const pte_t pte = { .pte = 0 }; 408 const pte_t pte = { .pte = 0 };
527 vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD);
528 vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); 409 vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD);
529} 410}
530#endif 411#endif
@@ -960,8 +841,6 @@ static inline int __init activate_vmi(void)
960 841
961void __init vmi_init(void) 842void __init vmi_init(void)
962{ 843{
963 unsigned long flags;
964
965 if (!vmi_rom) 844 if (!vmi_rom)
966 probe_vmi_rom(); 845 probe_vmi_rom();
967 else 846 else
@@ -973,13 +852,21 @@ void __init vmi_init(void)
973 852
974 reserve_top_address(-vmi_rom->virtual_top); 853 reserve_top_address(-vmi_rom->virtual_top);
975 854
976 local_irq_save(flags);
977 activate_vmi();
978
979#ifdef CONFIG_X86_IO_APIC 855#ifdef CONFIG_X86_IO_APIC
980 /* This is virtual hardware; timer routing is wired correctly */ 856 /* This is virtual hardware; timer routing is wired correctly */
981 no_timer_check = 1; 857 no_timer_check = 1;
982#endif 858#endif
859}
860
861void vmi_activate(void)
862{
863 unsigned long flags;
864
865 if (!vmi_rom)
866 return;
867
868 local_irq_save(flags);
869 activate_vmi();
983 local_irq_restore(flags & X86_EFLAGS_IF); 870 local_irq_restore(flags & X86_EFLAGS_IF);
984} 871}
985 872
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index a9b8560adbc2..82c67559dde7 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -44,6 +44,7 @@ SECTIONS
44 SCHED_TEXT 44 SCHED_TEXT
45 LOCK_TEXT 45 LOCK_TEXT
46 KPROBES_TEXT 46 KPROBES_TEXT
47 IRQENTRY_TEXT
47 *(.fixup) 48 *(.fixup)
48 *(.gnu.warning) 49 *(.gnu.warning)
49 _etext = .; /* End of text section */ 50 _etext = .; /* End of text section */
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 46e05447405b..1a614c0e6bef 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -35,6 +35,7 @@ SECTIONS
35 SCHED_TEXT 35 SCHED_TEXT
36 LOCK_TEXT 36 LOCK_TEXT
37 KPROBES_TEXT 37 KPROBES_TEXT
38 IRQENTRY_TEXT
38 *(.fixup) 39 *(.fixup)
39 *(.gnu.warning) 40 *(.gnu.warning)
40 _etext = .; /* End of text section */ 41 _etext = .; /* End of text section */
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index ebf2f12900f5..44153afc9067 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -17,6 +17,9 @@
17 * want per guest time just set the kernel.vsyscall64 sysctl to 0. 17 * want per guest time just set the kernel.vsyscall64 sysctl to 0.
18 */ 18 */
19 19
20/* Disable profiling for userspace code: */
21#define DISABLE_BRANCH_PROFILING
22
20#include <linux/time.h> 23#include <linux/time.h>
21#include <linux/init.h> 24#include <linux/init.h>
22#include <linux/kernel.h> 25#include <linux/kernel.h>
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 3c3b471ea496..3624a364b7f3 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -17,6 +17,7 @@
17#include <asm/bigsmp/apic.h> 17#include <asm/bigsmp/apic.h>
18#include <asm/bigsmp/ipi.h> 18#include <asm/bigsmp/ipi.h>
19#include <asm/mach-default/mach_mpparse.h> 19#include <asm/mach-default/mach_mpparse.h>
20#include <asm/mach-default/mach_wakecpu.h>
20 21
21static int dmi_bigsmp; /* can be set by dmi scanners */ 22static int dmi_bigsmp; /* can be set by dmi scanners */
22 23
diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c
index 9e835a11a13a..e63a4a76d8cd 100644
--- a/arch/x86/mach-generic/default.c
+++ b/arch/x86/mach-generic/default.c
@@ -16,6 +16,7 @@
16#include <asm/mach-default/mach_apic.h> 16#include <asm/mach-default/mach_apic.h>
17#include <asm/mach-default/mach_ipi.h> 17#include <asm/mach-default/mach_ipi.h>
18#include <asm/mach-default/mach_mpparse.h> 18#include <asm/mach-default/mach_mpparse.h>
19#include <asm/mach-default/mach_wakecpu.h>
19 20
20/* should be called last. */ 21/* should be called last. */
21static int probe_default(void) 22static int probe_default(void)
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 28459cab3ddb..7b4e6d0d1690 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -16,7 +16,19 @@
16#include <asm/es7000/apic.h> 16#include <asm/es7000/apic.h>
17#include <asm/es7000/ipi.h> 17#include <asm/es7000/ipi.h>
18#include <asm/es7000/mpparse.h> 18#include <asm/es7000/mpparse.h>
19#include <asm/es7000/wakecpu.h> 19#include <asm/mach-default/mach_wakecpu.h>
20
21void __init es7000_update_genapic_to_cluster(void)
22{
23 genapic->target_cpus = target_cpus_cluster;
24 genapic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER;
25 genapic->int_dest_mode = INT_DEST_MODE_CLUSTER;
26 genapic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER;
27
28 genapic->init_apic_ldr = init_apic_ldr_cluster;
29
30 genapic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster;
31}
20 32
21static int probe_es7000(void) 33static int probe_es7000(void)
22{ 34{
diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c
index 5a7e4619e1c4..c346d9d0226f 100644
--- a/arch/x86/mach-generic/probe.c
+++ b/arch/x86/mach-generic/probe.c
@@ -15,6 +15,7 @@
15#include <asm/mpspec.h> 15#include <asm/mpspec.h>
16#include <asm/apicdef.h> 16#include <asm/apicdef.h>
17#include <asm/genapic.h> 17#include <asm/genapic.h>
18#include <asm/setup.h>
18 19
19extern struct genapic apic_numaq; 20extern struct genapic apic_numaq;
20extern struct genapic apic_summit; 21extern struct genapic apic_summit;
@@ -57,6 +58,9 @@ static int __init parse_apic(char *arg)
57 } 58 }
58 } 59 }
59 60
61 if (x86_quirks->update_genapic)
62 x86_quirks->update_genapic();
63
60 /* Parsed again by __setup for debug/verbose */ 64 /* Parsed again by __setup for debug/verbose */
61 return 0; 65 return 0;
62} 66}
@@ -72,12 +76,15 @@ void __init generic_bigsmp_probe(void)
72 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support 76 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
73 */ 77 */
74 78
75 if (!cmdline_apic && genapic == &apic_default) 79 if (!cmdline_apic && genapic == &apic_default) {
76 if (apic_bigsmp.probe()) { 80 if (apic_bigsmp.probe()) {
77 genapic = &apic_bigsmp; 81 genapic = &apic_bigsmp;
82 if (x86_quirks->update_genapic)
83 x86_quirks->update_genapic();
78 printk(KERN_INFO "Overriding APIC driver with %s\n", 84 printk(KERN_INFO "Overriding APIC driver with %s\n",
79 genapic->name); 85 genapic->name);
80 } 86 }
87 }
81#endif 88#endif
82} 89}
83 90
@@ -94,6 +101,9 @@ void __init generic_apic_probe(void)
94 /* Not visible without early console */ 101 /* Not visible without early console */
95 if (!apic_probe[i]) 102 if (!apic_probe[i])
96 panic("Didn't find an APIC driver"); 103 panic("Didn't find an APIC driver");
104
105 if (x86_quirks->update_genapic)
106 x86_quirks->update_genapic();
97 } 107 }
98 printk(KERN_INFO "Using APIC driver %s\n", genapic->name); 108 printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
99} 109}
@@ -108,6 +118,8 @@ int __init mps_oem_check(struct mp_config_table *mpc, char *oem,
108 if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) { 118 if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) {
109 if (!cmdline_apic) { 119 if (!cmdline_apic) {
110 genapic = apic_probe[i]; 120 genapic = apic_probe[i];
121 if (x86_quirks->update_genapic)
122 x86_quirks->update_genapic();
111 printk(KERN_INFO "Switched to APIC driver `%s'.\n", 123 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
112 genapic->name); 124 genapic->name);
113 } 125 }
@@ -124,6 +136,8 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
124 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { 136 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
125 if (!cmdline_apic) { 137 if (!cmdline_apic) {
126 genapic = apic_probe[i]; 138 genapic = apic_probe[i];
139 if (x86_quirks->update_genapic)
140 x86_quirks->update_genapic();
127 printk(KERN_INFO "Switched to APIC driver `%s'.\n", 141 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
128 genapic->name); 142 genapic->name);
129 } 143 }
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 6272b5e69da6..2c6d234e0009 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -16,6 +16,7 @@
16#include <asm/summit/apic.h> 16#include <asm/summit/apic.h>
17#include <asm/summit/ipi.h> 17#include <asm/summit/ipi.h>
18#include <asm/summit/mpparse.h> 18#include <asm/summit/mpparse.h>
19#include <asm/mach-default/mach_wakecpu.h>
19 20
20static int probe_summit(void) 21static int probe_summit(void)
21{ 22{
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index fea4565ff576..d8cc96a2738f 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,9 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
8 8
9obj-$(CONFIG_HIGHMEM) += highmem_32.o 9obj-$(CONFIG_HIGHMEM) += highmem_32.o
10 10
11obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
12obj-$(CONFIG_MMIOTRACE) += mmiotrace.o 11obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
13mmiotrace-y := pf_in.o mmio-mod.o 12mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
14obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o 13obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
15 14
16obj-$(CONFIG_NUMA) += numa_$(BITS).o 15obj-$(CONFIG_NUMA) += numa_$(BITS).o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 31e8730fa246..57ec8c86a877 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -53,7 +53,7 @@
53 53
54static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) 54static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
55{ 55{
56#ifdef CONFIG_MMIOTRACE_HOOKS 56#ifdef CONFIG_MMIOTRACE
57 if (unlikely(is_kmmio_active())) 57 if (unlikely(is_kmmio_active()))
58 if (kmmio_handler(regs, addr) == 1) 58 if (kmmio_handler(regs, addr) == 1)
59 return -1; 59 return -1;
@@ -393,7 +393,7 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
393 if (pte && pte_present(*pte) && !pte_exec(*pte)) 393 if (pte && pte_present(*pte) && !pte_exec(*pte))
394 printk(KERN_CRIT "kernel tried to execute " 394 printk(KERN_CRIT "kernel tried to execute "
395 "NX-protected page - exploit attempt? " 395 "NX-protected page - exploit attempt? "
396 "(uid: %d)\n", current->uid); 396 "(uid: %d)\n", current_uid());
397 } 397 }
398#endif 398#endif
399 399
@@ -413,6 +413,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
413 unsigned long error_code) 413 unsigned long error_code)
414{ 414{
415 unsigned long flags = oops_begin(); 415 unsigned long flags = oops_begin();
416 int sig = SIGKILL;
416 struct task_struct *tsk; 417 struct task_struct *tsk;
417 418
418 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", 419 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
@@ -423,8 +424,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
423 tsk->thread.trap_no = 14; 424 tsk->thread.trap_no = 14;
424 tsk->thread.error_code = error_code; 425 tsk->thread.error_code = error_code;
425 if (__die("Bad pagetable", regs, error_code)) 426 if (__die("Bad pagetable", regs, error_code))
426 regs = NULL; 427 sig = 0;
427 oops_end(flags, regs, SIGKILL); 428 oops_end(flags, regs, sig);
428} 429}
429#endif 430#endif
430 431
@@ -590,6 +591,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
590 int fault; 591 int fault;
591#ifdef CONFIG_X86_64 592#ifdef CONFIG_X86_64
592 unsigned long flags; 593 unsigned long flags;
594 int sig;
593#endif 595#endif
594 596
595 tsk = current; 597 tsk = current;
@@ -849,11 +851,12 @@ no_context:
849 bust_spinlocks(0); 851 bust_spinlocks(0);
850 do_exit(SIGKILL); 852 do_exit(SIGKILL);
851#else 853#else
854 sig = SIGKILL;
852 if (__die("Oops", regs, error_code)) 855 if (__die("Oops", regs, error_code))
853 regs = NULL; 856 sig = 0;
854 /* Executive summary in case the body of the oops scrolled away */ 857 /* Executive summary in case the body of the oops scrolled away */
855 printk(KERN_EMERG "CR2: %016lx\n", address); 858 printk(KERN_EMERG "CR2: %016lx\n", address);
856 oops_end(flags, regs, SIGKILL); 859 oops_end(flags, regs, sig);
857#endif 860#endif
858 861
859/* 862/*
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 3ffed259883e..800e1d94c1b5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -67,7 +67,7 @@ static unsigned long __meminitdata table_top;
67 67
68static int __initdata after_init_bootmem; 68static int __initdata after_init_bootmem;
69 69
70static __init void *alloc_low_page(unsigned long *phys) 70static __init void *alloc_low_page(void)
71{ 71{
72 unsigned long pfn = table_end++; 72 unsigned long pfn = table_end++;
73 void *adr; 73 void *adr;
@@ -77,7 +77,6 @@ static __init void *alloc_low_page(unsigned long *phys)
77 77
78 adr = __va(pfn * PAGE_SIZE); 78 adr = __va(pfn * PAGE_SIZE);
79 memset(adr, 0, PAGE_SIZE); 79 memset(adr, 0, PAGE_SIZE);
80 *phys = pfn * PAGE_SIZE;
81 return adr; 80 return adr;
82} 81}
83 82
@@ -92,12 +91,11 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
92 pmd_t *pmd_table; 91 pmd_t *pmd_table;
93 92
94#ifdef CONFIG_X86_PAE 93#ifdef CONFIG_X86_PAE
95 unsigned long phys;
96 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { 94 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
97 if (after_init_bootmem) 95 if (after_init_bootmem)
98 pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); 96 pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
99 else 97 else
100 pmd_table = (pmd_t *)alloc_low_page(&phys); 98 pmd_table = (pmd_t *)alloc_low_page();
101 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); 99 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
102 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 100 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
103 pud = pud_offset(pgd, 0); 101 pud = pud_offset(pgd, 0);
@@ -128,10 +126,8 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
128 if (!page_table) 126 if (!page_table)
129 page_table = 127 page_table =
130 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); 128 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
131 } else { 129 } else
132 unsigned long phys; 130 page_table = (pte_t *)alloc_low_page();
133 page_table = (pte_t *)alloc_low_page(&phys);
134 }
135 131
136 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); 132 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
137 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 133 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
@@ -971,8 +967,6 @@ void __init mem_init(void)
971 int codesize, reservedpages, datasize, initsize; 967 int codesize, reservedpages, datasize, initsize;
972 int tmp; 968 int tmp;
973 969
974 start_periodic_check_for_corruption();
975
976#ifdef CONFIG_FLATMEM 970#ifdef CONFIG_FLATMEM
977 BUG_ON(!mem_map); 971 BUG_ON(!mem_map);
978#endif 972#endif
@@ -1042,11 +1036,25 @@ void __init mem_init(void)
1042 (unsigned long)&_text, (unsigned long)&_etext, 1036 (unsigned long)&_text, (unsigned long)&_etext,
1043 ((unsigned long)&_etext - (unsigned long)&_text) >> 10); 1037 ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
1044 1038
1039 /*
1040 * Check boundaries twice: Some fundamental inconsistencies can
1041 * be detected at build time already.
1042 */
1043#define __FIXADDR_TOP (-PAGE_SIZE)
1044#ifdef CONFIG_HIGHMEM
1045 BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
1046 BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE);
1047#endif
1048#define high_memory (-128UL << 20)
1049 BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
1050#undef high_memory
1051#undef __FIXADDR_TOP
1052
1045#ifdef CONFIG_HIGHMEM 1053#ifdef CONFIG_HIGHMEM
1046 BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); 1054 BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
1047 BUG_ON(VMALLOC_END > PKMAP_BASE); 1055 BUG_ON(VMALLOC_END > PKMAP_BASE);
1048#endif 1056#endif
1049 BUG_ON(VMALLOC_START > VMALLOC_END); 1057 BUG_ON(VMALLOC_START >= VMALLOC_END);
1050 BUG_ON((unsigned long)high_memory > VMALLOC_START); 1058 BUG_ON((unsigned long)high_memory > VMALLOC_START);
1051 1059
1052 if (boot_cpu_data.wp_works_ok < 0) 1060 if (boot_cpu_data.wp_works_ok < 0)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9db01db6e3cd..9f7a0d24d42a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -902,8 +902,6 @@ void __init mem_init(void)
902 long codesize, reservedpages, datasize, initsize; 902 long codesize, reservedpages, datasize, initsize;
903 unsigned long absent_pages; 903 unsigned long absent_pages;
904 904
905 start_periodic_check_for_corruption();
906
907 pci_iommu_alloc(); 905 pci_iommu_alloc();
908 906
909 /* clear_bss() already clear the empty_zero_page */ 907 /* clear_bss() already clear the empty_zero_page */
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index d4c4307ff3e0..bd85d42819e1 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -223,7 +223,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
223 * Check if the request spans more than any BAR in the iomem resource 223 * Check if the request spans more than any BAR in the iomem resource
224 * tree. 224 * tree.
225 */ 225 */
226 WARN_ON(iomem_map_sanity_check(phys_addr, size)); 226 WARN_ONCE(iomem_map_sanity_check(phys_addr, size),
227 KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
227 228
228 /* 229 /*
229 * Don't allow anybody to remap normal RAM that we're using.. 230 * Don't allow anybody to remap normal RAM that we're using..
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index eb1bf000d12e..85cbd3cd3723 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -596,6 +596,242 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
596 free_memtype(addr, addr + size); 596 free_memtype(addr, addr + size);
597} 597}
598 598
599/*
600 * Internal interface to reserve a range of physical memory with prot.
601 * Reserved non RAM regions only and after successful reserve_memtype,
602 * this func also keeps identity mapping (if any) in sync with this new prot.
603 */
604static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot)
605{
606 int is_ram = 0;
607 int id_sz, ret;
608 unsigned long flags;
609 unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
610
611 is_ram = pagerange_is_ram(paddr, paddr + size);
612
613 if (is_ram != 0) {
614 /*
615 * For mapping RAM pages, drivers need to call
616 * set_memory_[uc|wc|wb] directly, for reserve and free, before
617 * setting up the PTE.
618 */
619 WARN_ON_ONCE(1);
620 return 0;
621 }
622
623 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
624 if (ret)
625 return ret;
626
627 if (flags != want_flags) {
628 free_memtype(paddr, paddr + size);
629 printk(KERN_ERR
630 "%s:%d map pfn expected mapping type %s for %Lx-%Lx, got %s\n",
631 current->comm, current->pid,
632 cattr_name(want_flags),
633 (unsigned long long)paddr,
634 (unsigned long long)(paddr + size),
635 cattr_name(flags));
636 return -EINVAL;
637 }
638
639 /* Need to keep identity mapping in sync */
640 if (paddr >= __pa(high_memory))
641 return 0;
642
643 id_sz = (__pa(high_memory) < paddr + size) ?
644 __pa(high_memory) - paddr :
645 size;
646
647 if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) {
648 free_memtype(paddr, paddr + size);
649 printk(KERN_ERR
650 "%s:%d reserve_pfn_range ioremap_change_attr failed %s "
651 "for %Lx-%Lx\n",
652 current->comm, current->pid,
653 cattr_name(flags),
654 (unsigned long long)paddr,
655 (unsigned long long)(paddr + size));
656 return -EINVAL;
657 }
658 return 0;
659}
660
661/*
662 * Internal interface to free a range of physical memory.
663 * Frees non RAM regions only.
664 */
665static void free_pfn_range(u64 paddr, unsigned long size)
666{
667 int is_ram;
668
669 is_ram = pagerange_is_ram(paddr, paddr + size);
670 if (is_ram == 0)
671 free_memtype(paddr, paddr + size);
672}
673
674/*
675 * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
676 * copied through copy_page_range().
677 *
678 * If the vma has a linear pfn mapping for the entire range, we get the prot
679 * from pte and reserve the entire vma range with single reserve_pfn_range call.
680 * Otherwise, we reserve the entire vma range, my ging through the PTEs page
681 * by page to get physical address and protection.
682 */
683int track_pfn_vma_copy(struct vm_area_struct *vma)
684{
685 int retval = 0;
686 unsigned long i, j;
687 resource_size_t paddr;
688 unsigned long prot;
689 unsigned long vma_start = vma->vm_start;
690 unsigned long vma_end = vma->vm_end;
691 unsigned long vma_size = vma_end - vma_start;
692
693 if (!pat_enabled)
694 return 0;
695
696 if (is_linear_pfn_mapping(vma)) {
697 /*
698 * reserve the whole chunk covered by vma. We need the
699 * starting address and protection from pte.
700 */
701 if (follow_phys(vma, vma_start, 0, &prot, &paddr)) {
702 WARN_ON_ONCE(1);
703 return -EINVAL;
704 }
705 return reserve_pfn_range(paddr, vma_size, __pgprot(prot));
706 }
707
708 /* reserve entire vma page by page, using pfn and prot from pte */
709 for (i = 0; i < vma_size; i += PAGE_SIZE) {
710 if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
711 continue;
712
713 retval = reserve_pfn_range(paddr, PAGE_SIZE, __pgprot(prot));
714 if (retval)
715 goto cleanup_ret;
716 }
717 return 0;
718
719cleanup_ret:
720 /* Reserve error: Cleanup partial reservation and return error */
721 for (j = 0; j < i; j += PAGE_SIZE) {
722 if (follow_phys(vma, vma_start + j, 0, &prot, &paddr))
723 continue;
724
725 free_pfn_range(paddr, PAGE_SIZE);
726 }
727
728 return retval;
729}
730
731/*
732 * track_pfn_vma_new is called when a _new_ pfn mapping is being established
733 * for physical range indicated by pfn and size.
734 *
735 * prot is passed in as a parameter for the new mapping. If the vma has a
736 * linear pfn mapping for the entire range reserve the entire vma range with
737 * single reserve_pfn_range call.
738 * Otherwise, we look t the pfn and size and reserve only the specified range
739 * page by page.
740 *
741 * Note that this function can be called with caller trying to map only a
742 * subrange/page inside the vma.
743 */
744int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
745 unsigned long pfn, unsigned long size)
746{
747 int retval = 0;
748 unsigned long i, j;
749 resource_size_t base_paddr;
750 resource_size_t paddr;
751 unsigned long vma_start = vma->vm_start;
752 unsigned long vma_end = vma->vm_end;
753 unsigned long vma_size = vma_end - vma_start;
754
755 if (!pat_enabled)
756 return 0;
757
758 if (is_linear_pfn_mapping(vma)) {
759 /* reserve the whole chunk starting from vm_pgoff */
760 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
761 return reserve_pfn_range(paddr, vma_size, prot);
762 }
763
764 /* reserve page by page using pfn and size */
765 base_paddr = (resource_size_t)pfn << PAGE_SHIFT;
766 for (i = 0; i < size; i += PAGE_SIZE) {
767 paddr = base_paddr + i;
768 retval = reserve_pfn_range(paddr, PAGE_SIZE, prot);
769 if (retval)
770 goto cleanup_ret;
771 }
772 return 0;
773
774cleanup_ret:
775 /* Reserve error: Cleanup partial reservation and return error */
776 for (j = 0; j < i; j += PAGE_SIZE) {
777 paddr = base_paddr + j;
778 free_pfn_range(paddr, PAGE_SIZE);
779 }
780
781 return retval;
782}
783
784/*
785 * untrack_pfn_vma is called while unmapping a pfnmap for a region.
786 * untrack can be called for a specific region indicated by pfn and size or
787 * can be for the entire vma (in which case size can be zero).
788 */
789void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
790 unsigned long size)
791{
792 unsigned long i;
793 resource_size_t paddr;
794 unsigned long prot;
795 unsigned long vma_start = vma->vm_start;
796 unsigned long vma_end = vma->vm_end;
797 unsigned long vma_size = vma_end - vma_start;
798
799 if (!pat_enabled)
800 return;
801
802 if (is_linear_pfn_mapping(vma)) {
803 /* free the whole chunk starting from vm_pgoff */
804 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
805 free_pfn_range(paddr, vma_size);
806 return;
807 }
808
809 if (size != 0 && size != vma_size) {
810 /* free page by page, using pfn and size */
811 paddr = (resource_size_t)pfn << PAGE_SHIFT;
812 for (i = 0; i < size; i += PAGE_SIZE) {
813 paddr = paddr + i;
814 free_pfn_range(paddr, PAGE_SIZE);
815 }
816 } else {
817 /* free entire vma, page by page, using the pfn from pte */
818 for (i = 0; i < vma_size; i += PAGE_SIZE) {
819 if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
820 continue;
821
822 free_pfn_range(paddr, PAGE_SIZE);
823 }
824 }
825}
826
827pgprot_t pgprot_writecombine(pgprot_t prot)
828{
829 if (pat_enabled)
830 return __pgprot(pgprot_val(prot) | _PAGE_CACHE_WC);
831 else
832 return pgprot_noncached(prot);
833}
834
599#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) 835#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
600 836
601/* get Nth element of the linked list */ 837/* get Nth element of the linked list */
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index b67732bbb85a..bb1a01f089e2 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -23,6 +23,12 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
23unsigned int pci_early_dump_regs; 23unsigned int pci_early_dump_regs;
24static int pci_bf_sort; 24static int pci_bf_sort;
25int pci_routeirq; 25int pci_routeirq;
26int noioapicquirk;
27#ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS
28int noioapicreroute = 0;
29#else
30int noioapicreroute = 1;
31#endif
26int pcibios_last_bus = -1; 32int pcibios_last_bus = -1;
27unsigned long pirq_table_addr; 33unsigned long pirq_table_addr;
28struct pci_bus *pci_root_bus; 34struct pci_bus *pci_root_bus;
@@ -519,6 +525,17 @@ char * __devinit pcibios_setup(char *str)
519 } else if (!strcmp(str, "skip_isa_align")) { 525 } else if (!strcmp(str, "skip_isa_align")) {
520 pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; 526 pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
521 return NULL; 527 return NULL;
528 } else if (!strcmp(str, "noioapicquirk")) {
529 noioapicquirk = 1;
530 return NULL;
531 } else if (!strcmp(str, "ioapicreroute")) {
532 if (noioapicreroute != -1)
533 noioapicreroute = 0;
534 return NULL;
535 } else if (!strcmp(str, "noioapicreroute")) {
536 if (noioapicreroute != -1)
537 noioapicreroute = 1;
538 return NULL;
522 } 539 }
523 return str; 540 return str;
524} 541}
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 9915293500fb..9a5af6c8fbe9 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -173,7 +173,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus,
173 173
174#undef PCI_CONF2_ADDRESS 174#undef PCI_CONF2_ADDRESS
175 175
176static struct pci_raw_ops pci_direct_conf2 = { 176struct pci_raw_ops pci_direct_conf2 = {
177 .read = pci_conf2_read, 177 .read = pci_conf2_read,
178 .write = pci_conf2_write, 178 .write = pci_conf2_write,
179}; 179};
@@ -289,6 +289,7 @@ int __init pci_direct_probe(void)
289 289
290 if (pci_check_type1()) { 290 if (pci_check_type1()) {
291 raw_pci_ops = &pci_direct_conf1; 291 raw_pci_ops = &pci_direct_conf1;
292 port_cf9_safe = true;
292 return 1; 293 return 1;
293 } 294 }
294 release_resource(region); 295 release_resource(region);
@@ -305,6 +306,7 @@ int __init pci_direct_probe(void)
305 306
306 if (pci_check_type2()) { 307 if (pci_check_type2()) {
307 raw_pci_ops = &pci_direct_conf2; 308 raw_pci_ops = &pci_direct_conf2;
309 port_cf9_safe = true;
308 return 2; 310 return 2;
309 } 311 }
310 312
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
index 15b9cf6be729..1959018aac02 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -96,6 +96,7 @@ extern struct pci_raw_ops *raw_pci_ops;
96extern struct pci_raw_ops *raw_pci_ext_ops; 96extern struct pci_raw_ops *raw_pci_ext_ops;
97 97
98extern struct pci_raw_ops pci_direct_conf1; 98extern struct pci_raw_ops pci_direct_conf1;
99extern bool port_cf9_safe;
99 100
100/* arch_initcall level */ 101/* arch_initcall level */
101extern int pci_direct_probe(void); 102extern int pci_direct_probe(void);
diff --git a/arch/x86/scripts/strip-symbols b/arch/x86/scripts/strip-symbols
new file mode 100644
index 000000000000..a2f1ccb827c7
--- /dev/null
+++ b/arch/x86/scripts/strip-symbols
@@ -0,0 +1 @@
__cpu_vendor_dev_X86_VENDOR_*
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 1ef0f90813d6..d9d35824c56f 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -9,6 +9,9 @@
9 * Also alternative() doesn't work. 9 * Also alternative() doesn't work.
10 */ 10 */
11 11
12/* Disable profiling for userspace code: */
13#define DISABLE_BRANCH_PROFILING
14
12#include <linux/kernel.h> 15#include <linux/kernel.h>
13#include <linux/posix-timers.h> 16#include <linux/posix-timers.h>
14#include <linux/time.h> 17#include <linux/time.h>
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 513f330c5832..1241f118ab56 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -310,7 +310,7 @@ int __init sysenter_setup(void)
310} 310}
311 311
312/* Setup a VMA at program startup for the vsyscall page */ 312/* Setup a VMA at program startup for the vsyscall page */
313int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) 313int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
314{ 314{
315 struct mm_struct *mm = current->mm; 315 struct mm_struct *mm = current->mm;
316 unsigned long addr; 316 unsigned long addr;
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 257ba4a10abf..9c98cc6ba978 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -98,7 +98,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
98 98
99/* Setup a VMA at program startup for the vsyscall page. 99/* Setup a VMA at program startup for the vsyscall page.
100 Not called for compat tasks */ 100 Not called for compat tasks */
101int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) 101int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
102{ 102{
103 struct mm_struct *mm = current->mm; 103 struct mm_struct *mm = current->mm;
104 unsigned long addr; 104 unsigned long addr;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 5e4686d70f62..bea215230b20 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -28,6 +28,7 @@
28#include <linux/console.h> 28#include <linux/console.h>
29 29
30#include <xen/interface/xen.h> 30#include <xen/interface/xen.h>
31#include <xen/interface/version.h>
31#include <xen/interface/physdev.h> 32#include <xen/interface/physdev.h>
32#include <xen/interface/vcpu.h> 33#include <xen/interface/vcpu.h>
33#include <xen/features.h> 34#include <xen/features.h>
@@ -793,7 +794,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
793 794
794 ret = 0; 795 ret = 0;
795 796
796 switch(msr) { 797 switch (msr) {
797#ifdef CONFIG_X86_64 798#ifdef CONFIG_X86_64
798 unsigned which; 799 unsigned which;
799 u64 base; 800 u64 base;
@@ -1453,7 +1454,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1453 1454
1454 ident_pte = 0; 1455 ident_pte = 0;
1455 pfn = 0; 1456 pfn = 0;
1456 for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { 1457 for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
1457 pte_t *pte_page; 1458 pte_t *pte_page;
1458 1459
1459 /* Reuse or allocate a page of ptes */ 1460 /* Reuse or allocate a page of ptes */
@@ -1471,7 +1472,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1471 } 1472 }
1472 1473
1473 /* Install mappings */ 1474 /* Install mappings */
1474 for(pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { 1475 for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
1475 pte_t pte; 1476 pte_t pte;
1476 1477
1477 if (pfn > max_pfn_mapped) 1478 if (pfn > max_pfn_mapped)
@@ -1485,7 +1486,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1485 } 1486 }
1486 } 1487 }
1487 1488
1488 for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) 1489 for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
1489 set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); 1490 set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
1490 1491
1491 set_page_prot(pmd, PAGE_KERNEL_RO); 1492 set_page_prot(pmd, PAGE_KERNEL_RO);
@@ -1499,7 +1500,7 @@ static void convert_pfn_mfn(void *v)
1499 1500
1500 /* All levels are converted the same way, so just treat them 1501 /* All levels are converted the same way, so just treat them
1501 as ptes. */ 1502 as ptes. */
1502 for(i = 0; i < PTRS_PER_PTE; i++) 1503 for (i = 0; i < PTRS_PER_PTE; i++)
1503 pte[i] = xen_make_pte(pte[i].pte); 1504 pte[i] = xen_make_pte(pte[i].pte);
1504} 1505}
1505 1506
@@ -1514,7 +1515,8 @@ static void convert_pfn_mfn(void *v)
1514 * of the physical mapping once some sort of allocator has been set 1515 * of the physical mapping once some sort of allocator has been set
1515 * up. 1516 * up.
1516 */ 1517 */
1517static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) 1518static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
1519 unsigned long max_pfn)
1518{ 1520{
1519 pud_t *l3; 1521 pud_t *l3;
1520 pmd_t *l2; 1522 pmd_t *l2;
@@ -1577,7 +1579,8 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pf
1577#else /* !CONFIG_X86_64 */ 1579#else /* !CONFIG_X86_64 */
1578static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; 1580static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
1579 1581
1580static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) 1582static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
1583 unsigned long max_pfn)
1581{ 1584{
1582 pmd_t *kernel_pmd; 1585 pmd_t *kernel_pmd;
1583 1586
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 636ef4caa52d..773d68d3e912 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -154,13 +154,13 @@ void xen_setup_mfn_list_list(void)
154{ 154{
155 unsigned pfn, idx; 155 unsigned pfn, idx;
156 156
157 for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) { 157 for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
158 unsigned topidx = p2m_top_index(pfn); 158 unsigned topidx = p2m_top_index(pfn);
159 159
160 p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]); 160 p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
161 } 161 }
162 162
163 for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { 163 for (idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
164 unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; 164 unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
165 p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); 165 p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
166 } 166 }
@@ -179,7 +179,7 @@ void __init xen_build_dynamic_phys_to_machine(void)
179 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); 179 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
180 unsigned pfn; 180 unsigned pfn;
181 181
182 for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { 182 for (pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
183 unsigned topidx = p2m_top_index(pfn); 183 unsigned topidx = p2m_top_index(pfn);
184 184
185 p2m_top[topidx] = &mfn_list[pfn]; 185 p2m_top[topidx] = &mfn_list[pfn];
@@ -207,7 +207,7 @@ static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
207 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); 207 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
208 BUG_ON(p == NULL); 208 BUG_ON(p == NULL);
209 209
210 for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++) 210 for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
211 p[i] = INVALID_P2M_ENTRY; 211 p[i] = INVALID_P2M_ENTRY;
212 212
213 if (cmpxchg(pp, p2m_missing, p) != p2m_missing) 213 if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
@@ -407,7 +407,8 @@ out:
407 preempt_enable(); 407 preempt_enable();
408} 408}
409 409
410pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 410pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
411 unsigned long addr, pte_t *ptep)
411{ 412{
412 /* Just return the pte as-is. We preserve the bits on commit */ 413 /* Just return the pte as-is. We preserve the bits on commit */
413 return *ptep; 414 return *ptep;
@@ -878,7 +879,8 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
878 879
879 if (user_pgd) { 880 if (user_pgd) {
880 xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD); 881 xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
881 xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); 882 xen_do_pin(MMUEXT_PIN_L4_TABLE,
883 PFN_DOWN(__pa(user_pgd)));
882 } 884 }
883 } 885 }
884#else /* CONFIG_X86_32 */ 886#else /* CONFIG_X86_32 */
@@ -993,7 +995,8 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
993 pgd_t *user_pgd = xen_get_user_pgd(pgd); 995 pgd_t *user_pgd = xen_get_user_pgd(pgd);
994 996
995 if (user_pgd) { 997 if (user_pgd) {
996 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); 998 xen_do_pin(MMUEXT_UNPIN_TABLE,
999 PFN_DOWN(__pa(user_pgd)));
997 xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD); 1000 xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
998 } 1001 }
999 } 1002 }
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 8ea8a0d0b0de..c738644b5435 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -154,7 +154,7 @@ void xen_mc_flush(void)
154 ret, smp_processor_id()); 154 ret, smp_processor_id());
155 dump_stack(); 155 dump_stack();
156 for (i = 0; i < b->mcidx; i++) { 156 for (i = 0; i < b->mcidx; i++) {
157 printk(" call %2d/%d: op=%lu arg=[%lx] result=%ld\n", 157 printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\n",
158 i+1, b->mcidx, 158 i+1, b->mcidx,
159 b->debug[i].op, 159 b->debug[i].op,
160 b->debug[i].args[0], 160 b->debug[i].args[0],
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index d67901083888..15c6c68db6a2 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -28,6 +28,9 @@
28/* These are code, but not functions. Defined in entry.S */ 28/* These are code, but not functions. Defined in entry.S */
29extern const char xen_hypervisor_callback[]; 29extern const char xen_hypervisor_callback[];
30extern const char xen_failsafe_callback[]; 30extern const char xen_failsafe_callback[];
31extern void xen_sysenter_target(void);
32extern void xen_syscall_target(void);
33extern void xen_syscall32_target(void);
31 34
32 35
33/** 36/**
@@ -110,7 +113,6 @@ static __cpuinit int register_callback(unsigned type, const void *func)
110 113
111void __cpuinit xen_enable_sysenter(void) 114void __cpuinit xen_enable_sysenter(void)
112{ 115{
113 extern void xen_sysenter_target(void);
114 int ret; 116 int ret;
115 unsigned sysenter_feature; 117 unsigned sysenter_feature;
116 118
@@ -132,8 +134,6 @@ void __cpuinit xen_enable_syscall(void)
132{ 134{
133#ifdef CONFIG_X86_64 135#ifdef CONFIG_X86_64
134 int ret; 136 int ret;
135 extern void xen_syscall_target(void);
136 extern void xen_syscall32_target(void);
137 137
138 ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); 138 ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
139 if (ret != 0) { 139 if (ret != 0) {
@@ -160,7 +160,8 @@ void __init xen_arch_setup(void)
160 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 160 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
161 161
162 if (!xen_feature(XENFEAT_auto_translated_physmap)) 162 if (!xen_feature(XENFEAT_auto_translated_physmap))
163 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3); 163 HYPERVISOR_vm_assist(VMASST_CMD_enable,
164 VMASST_TYPE_pae_extended_cr3);
164 165
165 if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || 166 if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
166 register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) 167 register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))