aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig26
-rw-r--r--arch/x86/Kconfig.cpu4
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/boot/compressed/Makefile6
-rw-r--r--arch/x86/boot/compressed/misc.c19
-rw-r--r--arch/x86/boot/compressed/relocs.c87
-rw-r--r--arch/x86/boot/header.S2
-rw-r--r--arch/x86/boot/mkcpustr.c2
-rw-r--r--arch/x86/boot/version.c4
-rw-r--r--arch/x86/boot/video-vga.c9
-rw-r--r--arch/x86/boot/video.c7
-rw-r--r--arch/x86/ia32/ia32_aout.c13
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/ia32/sys_ia32.c43
-rw-r--r--arch/x86/include/asm/acpi.h26
-rw-r--r--arch/x86/include/asm/alternative.h12
-rw-r--r--arch/x86/include/asm/amd_iommu_proto.h5
-rw-r--r--arch/x86/include/asm/asm-offsets.h1
-rw-r--r--arch/x86/include/asm/atomic.h299
-rw-r--r--arch/x86/include/asm/atomic64_32.h160
-rw-r--r--arch/x86/include/asm/atomic64_64.h224
-rw-r--r--arch/x86/include/asm/atomic_32.h415
-rw-r--r--arch/x86/include/asm/atomic_64.h485
-rw-r--r--arch/x86/include/asm/cpu_debug.h127
-rw-r--r--arch/x86/include/asm/cpufeature.h5
-rw-r--r--arch/x86/include/asm/debugreg.h3
-rw-r--r--arch/x86/include/asm/desc_defs.h4
-rw-r--r--arch/x86/include/asm/dma-mapping.h2
-rw-r--r--arch/x86/include/asm/elf.h16
-rw-r--r--arch/x86/include/asm/fb.h4
-rw-r--r--arch/x86/include/asm/fixmap.h16
-rw-r--r--arch/x86/include/asm/geode.h219
-rw-r--r--arch/x86/include/asm/hpet.h1
-rw-r--r--arch/x86/include/asm/hw_irq.h3
-rw-r--r--arch/x86/include/asm/i387.h12
-rw-r--r--arch/x86/include/asm/io.h155
-rw-r--r--arch/x86/include/asm/io_32.h196
-rw-r--r--arch/x86/include/asm/io_64.h181
-rw-r--r--arch/x86/include/asm/irq_vectors.h2
-rw-r--r--arch/x86/include/asm/kvm.h4
-rw-r--r--arch/x86/include/asm/mce.h3
-rw-r--r--arch/x86/include/asm/microcode.h2
-rw-r--r--arch/x86/include/asm/mmzone_32.h2
-rw-r--r--arch/x86/include/asm/mmzone_64.h6
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/msr.h19
-rw-r--r--arch/x86/include/asm/nmi.h1
-rw-r--r--arch/x86/include/asm/numa_64.h5
-rw-r--r--arch/x86/include/asm/numaq.h4
-rw-r--r--arch/x86/include/asm/olpc.h2
-rw-r--r--arch/x86/include/asm/page_types.h1
-rw-r--r--arch/x86/include/asm/paravirt.h14
-rw-r--r--arch/x86/include/asm/paravirt_types.h14
-rw-r--r--arch/x86/include/asm/pci_x86.h21
-rw-r--r--arch/x86/include/asm/percpu.h104
-rw-r--r--arch/x86/include/asm/perf_event.h17
-rw-r--r--arch/x86/include/asm/pgalloc.h5
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/ptrace.h6
-rw-r--r--arch/x86/include/asm/rwsem.h81
-rw-r--r--arch/x86/include/asm/sigcontext.h4
-rw-r--r--arch/x86/include/asm/smp.h9
-rw-r--r--arch/x86/include/asm/spinlock.h62
-rw-r--r--arch/x86/include/asm/spinlock_types.h10
-rw-r--r--arch/x86/include/asm/stacktrace.h24
-rw-r--r--arch/x86/include/asm/swiotlb.h8
-rw-r--r--arch/x86/include/asm/sys_ia32.h4
-rw-r--r--arch/x86/include/asm/syscall.h2
-rw-r--r--arch/x86/include/asm/syscalls.h34
-rw-r--r--arch/x86/include/asm/system.h5
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/topology.h9
-rw-r--r--arch/x86/include/asm/trampoline.h1
-rw-r--r--arch/x86/include/asm/uaccess_32.h5
-rw-r--r--arch/x86/include/asm/uaccess_64.h26
-rw-r--r--arch/x86/include/asm/user.h58
-rw-r--r--arch/x86/include/asm/uv/bios.h22
-rw-r--r--arch/x86/include/asm/uv/uv.h1
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h2
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h143
-rw-r--r--arch/x86/include/asm/x86_init.h2
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h27
-rw-r--r--arch/x86/include/asm/xsave.h2
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c58
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/acpi/processor.c101
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/alternative.c22
-rw-r--r--arch/x86/kernel/amd_iommu.c77
-rw-r--r--arch/x86/kernel/amd_iommu_init.c25
-rw-r--r--arch/x86/kernel/aperture_64.c11
-rw-r--r--arch/x86/kernel/apic/apic.c24
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c10
-rw-r--r--arch/x86/kernel/apic/apic_noop.c2
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c5
-rw-r--r--arch/x86/kernel/apic/es7000_32.c12
-rw-r--r--arch/x86/kernel/apic/io_apic.c45
-rw-r--r--arch/x86/kernel/apic/nmi.c8
-rw-r--r--arch/x86/kernel/apic/numaq_32.c2
-rw-r--r--arch/x86/kernel/apic/probe_32.c29
-rw-r--r--arch/x86/kernel/apic/probe_64.c13
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c5
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c5
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c132
-rw-r--r--arch/x86/kernel/apm_32.c4
-rw-r--r--arch/x86/kernel/bios_uv.c47
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c19
-rw-r--r--arch/x86/kernel/cpu/amd.c55
-rw-r--r--arch/x86/kernel/cpu/common.c16
-rw-r--r--arch/x86/kernel/cpu/cpu_debug.c688
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c49
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k6.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c19
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c35
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.h24
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-smi.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c401
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c22
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c20
-rw-r--r--arch/x86/kernel/cpu/mtrr/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mtrr/amd.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/centaur.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c10
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c11
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c6
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h6
-rw-r--r--arch/x86/kernel/cpu/mtrr/state.c94
-rw-r--r--arch/x86/kernel/cpu/perf_event.c1859
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c416
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c971
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c157
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c11
-rw-r--r--arch/x86/kernel/cpuid.c7
-rw-r--r--arch/x86/kernel/ds.c4
-rw-r--r--arch/x86/kernel/dumpstack.c43
-rw-r--r--arch/x86/kernel/dumpstack.h6
-rw-r--r--arch/x86/kernel/dumpstack_32.c7
-rw-r--r--arch/x86/kernel/dumpstack_64.c43
-rw-r--r--arch/x86/kernel/e820.c21
-rw-r--r--arch/x86/kernel/efi.c2
-rw-r--r--arch/x86/kernel/entry_32.S69
-rw-r--r--arch/x86/kernel/entry_64.S55
-rw-r--r--arch/x86/kernel/ftrace.c36
-rw-r--r--arch/x86/kernel/geode_32.c196
-rw-r--r--arch/x86/kernel/head32.c2
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/hpet.c8
-rw-r--r--arch/x86/kernel/hw_breakpoint.c43
-rw-r--r--arch/x86/kernel/i387.c71
-rw-r--r--arch/x86/kernel/ioport.c28
-rw-r--r--arch/x86/kernel/irq.c14
-rw-r--r--arch/x86/kernel/kgdb.c234
-rw-r--r--arch/x86/kernel/kprobes.c9
-rw-r--r--arch/x86/kernel/mfgpt_32.c410
-rw-r--r--arch/x86/kernel/microcode_amd.c80
-rw-r--r--arch/x86/kernel/microcode_core.c34
-rw-r--r--arch/x86/kernel/microcode_intel.c47
-rw-r--r--arch/x86/kernel/mpparse.c10
-rw-r--r--arch/x86/kernel/msr.c11
-rw-r--r--arch/x86/kernel/olpc.c4
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c4
-rw-r--r--arch/x86/kernel/pci-calgary_64.c6
-rw-r--r--arch/x86/kernel/pci-dma.c6
-rw-r--r--arch/x86/kernel/pci-gart_64.c9
-rw-r--r--arch/x86/kernel/pci-swiotlb.c11
-rw-r--r--arch/x86/kernel/process.c112
-rw-r--r--arch/x86/kernel/process_32.c107
-rw-r--r--arch/x86/kernel/process_64.c87
-rw-r--r--arch/x86/kernel/ptrace.c200
-rw-r--r--arch/x86/kernel/quirks.c13
-rw-r--r--arch/x86/kernel/reboot.c17
-rw-r--r--arch/x86/kernel/reboot_fixups_32.c2
-rw-r--r--arch/x86/kernel/setup.c48
-rw-r--r--arch/x86/kernel/setup_percpu.c13
-rw-r--r--arch/x86/kernel/signal.c12
-rw-r--r--arch/x86/kernel/smpboot.c52
-rw-r--r--arch/x86/kernel/stacktrace.c18
-rw-r--r--arch/x86/kernel/sys_i386_32.c27
-rw-r--r--arch/x86/kernel/sys_x86_64.c17
-rw-r--r--arch/x86/kernel/syscall_table_32.S2
-rw-r--r--arch/x86/kernel/trampoline.c20
-rw-r--r--arch/x86/kernel/traps.c3
-rw-r--r--arch/x86/kernel/tsc.c3
-rw-r--r--arch/x86/kernel/tsc_sync.c10
-rw-r--r--arch/x86/kernel/uv_irq.c3
-rw-r--r--arch/x86/kernel/uv_sysfs.c6
-rw-r--r--arch/x86/kernel/uv_time.c13
-rw-r--r--arch/x86/kernel/vm86_32.c11
-rw-r--r--arch/x86/kernel/vmlinux.lds.S4
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c9
-rw-r--r--arch/x86/kernel/x86_init.c3
-rw-r--r--arch/x86/kernel/xsave.c1
-rw-r--r--arch/x86/kvm/i8254.c15
-rw-r--r--arch/x86/kvm/lapic.c12
-rw-r--r--arch/x86/kvm/mmu.c6
-rw-r--r--arch/x86/kvm/paging_tmpl.h22
-rw-r--r--arch/x86/kvm/svm.c64
-rw-r--r--arch/x86/kvm/x86.c25
-rw-r--r--arch/x86/lib/Makefile11
-rw-r--r--arch/x86/lib/cache-smp.c19
-rw-r--r--arch/x86/lib/copy_user_64.S6
-rw-r--r--arch/x86/lib/io_64.c25
-rw-r--r--arch/x86/lib/memcpy_64.S23
-rw-r--r--arch/x86/lib/memset_64.S18
-rw-r--r--arch/x86/lib/msr-smp.c204
-rw-r--r--arch/x86/lib/msr.c219
-rw-r--r--arch/x86/lib/rwsem_64.S81
-rw-r--r--arch/x86/mm/gup.c2
-rw-r--r--arch/x86/mm/init.c7
-rw-r--r--arch/x86/mm/init_32.c11
-rw-r--r--arch/x86/mm/init_64.c19
-rw-r--r--arch/x86/mm/ioremap.c41
-rw-r--r--arch/x86/mm/kmemcheck/error.c19
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c2
-rw-r--r--arch/x86/mm/kmemcheck/shadow.c16
-rw-r--r--arch/x86/mm/kmemcheck/shadow.h2
-rw-r--r--arch/x86/mm/kmmio.c53
-rw-r--r--arch/x86/mm/mmap.c4
-rw-r--r--arch/x86/mm/mmio-mod.c71
-rw-r--r--arch/x86/mm/numa_64.c235
-rw-r--r--arch/x86/mm/pat.c3
-rw-r--r--arch/x86/mm/pgtable.c31
-rw-r--r--arch/x86/mm/srat_32.c2
-rw-r--r--arch/x86/mm/srat_64.c8
-rw-r--r--arch/x86/mm/tlb.c8
-rw-r--r--arch/x86/oprofile/backtrace.c9
-rw-r--r--arch/x86/oprofile/nmi_int.c20
-rw-r--r--arch/x86/oprofile/op_model_amd.c244
-rw-r--r--arch/x86/oprofile/op_model_p4.c6
-rw-r--r--arch/x86/oprofile/op_model_ppro.c17
-rw-r--r--arch/x86/oprofile/op_x86_model.h20
-rw-r--r--arch/x86/pci/Makefile5
-rw-r--r--arch/x86/pci/acpi.c124
-rw-r--r--arch/x86/pci/amd_bus.c120
-rw-r--r--arch/x86/pci/bus_numa.c102
-rw-r--r--arch/x86/pci/bus_numa.h26
-rw-r--r--arch/x86/pci/common.c23
-rw-r--r--arch/x86/pci/early.c7
-rw-r--r--arch/x86/pci/i386.c56
-rw-r--r--arch/x86/pci/irq.c2
-rw-r--r--arch/x86/pci/mmconfig-shared.c355
-rw-r--r--arch/x86/pci/mmconfig_32.c16
-rw-r--r--arch/x86/pci/mmconfig_64.c88
-rw-r--r--arch/x86/pci/numaq_32.c6
-rw-r--r--arch/x86/tools/chkobjdump.awk16
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk20
-rw-r--r--arch/x86/tools/test_get_len.c6
-rw-r--r--arch/x86/xen/enlighten.c37
-rw-r--r--arch/x86/xen/mmu.c2
-rw-r--r--arch/x86/xen/smp.c42
-rw-r--r--arch/x86/xen/spinlock.c16
-rw-r--r--arch/x86/xen/suspend.c17
-rw-r--r--arch/x86/xen/time.c31
-rw-r--r--arch/x86/xen/xen-asm_64.S4
-rw-r--r--arch/x86/xen/xen-ops.h2
265 files changed, 6841 insertions, 7374 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 32a1918e1b88..0896008f7509 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -45,11 +45,15 @@ config X86
45 select HAVE_GENERIC_DMA_COHERENT if X86_32 45 select HAVE_GENERIC_DMA_COHERENT if X86_32
46 select HAVE_EFFICIENT_UNALIGNED_ACCESS 46 select HAVE_EFFICIENT_UNALIGNED_ACCESS
47 select USER_STACKTRACE_SUPPORT 47 select USER_STACKTRACE_SUPPORT
48 select HAVE_REGS_AND_STACK_ACCESS_API
48 select HAVE_DMA_API_DEBUG 49 select HAVE_DMA_API_DEBUG
49 select HAVE_KERNEL_GZIP 50 select HAVE_KERNEL_GZIP
50 select HAVE_KERNEL_BZIP2 51 select HAVE_KERNEL_BZIP2
51 select HAVE_KERNEL_LZMA 52 select HAVE_KERNEL_LZMA
53 select HAVE_KERNEL_LZO
52 select HAVE_HW_BREAKPOINT 54 select HAVE_HW_BREAKPOINT
55 select PERF_EVENTS
56 select ANON_INODES
53 select HAVE_ARCH_KMEMCHECK 57 select HAVE_ARCH_KMEMCHECK
54 select HAVE_USER_RETURN_NOTIFIER 58 select HAVE_USER_RETURN_NOTIFIER
55 59
@@ -986,12 +990,6 @@ config X86_CPUID
986 with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to 990 with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
987 /dev/cpu/31/cpuid. 991 /dev/cpu/31/cpuid.
988 992
989config X86_CPU_DEBUG
990 tristate "/sys/kernel/debug/x86/cpu/* - CPU Debug support"
991 ---help---
992 If you select this option, this will provide various x86 CPUs
993 information through debugfs.
994
995choice 993choice
996 prompt "High Memory Support" 994 prompt "High Memory Support"
997 default HIGHMEM4G if !X86_NUMAQ 995 default HIGHMEM4G if !X86_NUMAQ
@@ -1244,6 +1242,11 @@ config ARCH_MEMORY_PROBE
1244 def_bool X86_64 1242 def_bool X86_64
1245 depends on MEMORY_HOTPLUG 1243 depends on MEMORY_HOTPLUG
1246 1244
1245config ILLEGAL_POINTER_VALUE
1246 hex
1247 default 0 if X86_32
1248 default 0xdead000000000000 if X86_64
1249
1247source "mm/Kconfig" 1250source "mm/Kconfig"
1248 1251
1249config HIGHPTE 1252config HIGHPTE
@@ -2012,18 +2015,9 @@ config SCx200HR_TIMER
2012 processor goes idle (as is done by the scheduler). The 2015 processor goes idle (as is done by the scheduler). The
2013 other workaround is idle=poll boot option. 2016 other workaround is idle=poll boot option.
2014 2017
2015config GEODE_MFGPT_TIMER
2016 def_bool y
2017 prompt "Geode Multi-Function General Purpose Timer (MFGPT) events"
2018 depends on MGEODE_LX && GENERIC_TIME && GENERIC_CLOCKEVENTS
2019 ---help---
2020 This driver provides a clock event source based on the MFGPT
2021 timer(s) in the CS5535 and CS5536 companion chip for the geode.
2022 MFGPTs have a better resolution and max interval than the
2023 generic PIT, and are suitable for use as high-res timers.
2024
2025config OLPC 2018config OLPC
2026 bool "One Laptop Per Child support" 2019 bool "One Laptop Per Child support"
2020 select GPIOLIB
2027 default n 2021 default n
2028 ---help--- 2022 ---help---
2029 Add support for detecting the unique features of the OLPC 2023 Add support for detecting the unique features of the OLPC
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 08e442bc3ab9..a19829374e6a 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -319,7 +319,7 @@ config X86_L1_CACHE_SHIFT
319 319
320config X86_XADD 320config X86_XADD
321 def_bool y 321 def_bool y
322 depends on X86_32 && !M386 322 depends on X86_64 || !M386
323 323
324config X86_PPRO_FENCE 324config X86_PPRO_FENCE
325 bool "PentiumPro memory ordering errata workaround" 325 bool "PentiumPro memory ordering errata workaround"
@@ -396,7 +396,7 @@ config X86_TSC
396 396
397config X86_CMPXCHG64 397config X86_CMPXCHG64
398 def_bool y 398 def_bool y
399 depends on !M386 && !M486 399 depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
400 400
401# this should be set for all -march=.. options where the compiler 401# this should be set for all -march=.. options where the compiler
402# generates cmov. 402# generates cmov.
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 731318e5ac1d..bc01e3ebfeb2 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -187,8 +187,8 @@ config HAVE_MMIOTRACE_SUPPORT
187 def_bool y 187 def_bool y
188 188
189config X86_DECODER_SELFTEST 189config X86_DECODER_SELFTEST
190 bool "x86 instruction decoder selftest" 190 bool "x86 instruction decoder selftest"
191 depends on DEBUG_KERNEL 191 depends on DEBUG_KERNEL && KPROBES
192 ---help--- 192 ---help---
193 Perform x86 instruction decoder selftests at build time. 193 Perform x86 instruction decoder selftests at build time.
194 This option is useful for checking the sanity of x86 instruction 194 This option is useful for checking the sanity of x86 instruction
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 78b32be55e9e..0a43dc515e4c 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -135,9 +135,7 @@ drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
135# suspend and hibernation support 135# suspend and hibernation support
136drivers-$(CONFIG_PM) += arch/x86/power/ 136drivers-$(CONFIG_PM) += arch/x86/power/
137 137
138ifeq ($(CONFIG_X86_32),y)
139drivers-$(CONFIG_FB) += arch/x86/video/ 138drivers-$(CONFIG_FB) += arch/x86/video/
140endif
141 139
142#### 140####
143# boot loader support. Several targets are kept for legacy purposes 141# boot loader support. Several targets are kept for legacy purposes
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index f8ed0658404c..fbb47daf2459 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -4,11 +4,12 @@
4# create a compressed vmlinux image from the original vmlinux 4# create a compressed vmlinux image from the original vmlinux
5# 5#
6 6
7targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma head_$(BITS).o misc.o piggy.o 7targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.lzo head_$(BITS).o misc.o piggy.o
8 8
9KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 9KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
10KBUILD_CFLAGS += -fno-strict-aliasing -fPIC 10KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
11KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING 11KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
12cflags-$(CONFIG_X86_32) := -march=i386
12cflags-$(CONFIG_X86_64) := -mcmodel=small 13cflags-$(CONFIG_X86_64) := -mcmodel=small
13KBUILD_CFLAGS += $(cflags-y) 14KBUILD_CFLAGS += $(cflags-y)
14KBUILD_CFLAGS += $(call cc-option,-ffreestanding) 15KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
@@ -48,10 +49,13 @@ $(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
48 $(call if_changed,bzip2) 49 $(call if_changed,bzip2)
49$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE 50$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
50 $(call if_changed,lzma) 51 $(call if_changed,lzma)
52$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
53 $(call if_changed,lzo)
51 54
52suffix-$(CONFIG_KERNEL_GZIP) := gz 55suffix-$(CONFIG_KERNEL_GZIP) := gz
53suffix-$(CONFIG_KERNEL_BZIP2) := bz2 56suffix-$(CONFIG_KERNEL_BZIP2) := bz2
54suffix-$(CONFIG_KERNEL_LZMA) := lzma 57suffix-$(CONFIG_KERNEL_LZMA) := lzma
58suffix-$(CONFIG_KERNEL_LZO) := lzo
55 59
56quiet_cmd_mkpiggy = MKPIGGY $@ 60quiet_cmd_mkpiggy = MKPIGGY $@
57 cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) 61 cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false )
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 842b2a36174a..51e240779a44 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -19,11 +19,6 @@
19#define _ASM_X86_DESC_H 1 19#define _ASM_X86_DESC_H 1
20#endif 20#endif
21 21
22#ifdef CONFIG_X86_64
23#define _LINUX_STRING_H_ 1
24#define __LINUX_BITMAP_H 1
25#endif
26
27#include <linux/linkage.h> 22#include <linux/linkage.h>
28#include <linux/screen_info.h> 23#include <linux/screen_info.h>
29#include <linux/elf.h> 24#include <linux/elf.h>
@@ -131,8 +126,8 @@ static void error(char *m);
131static struct boot_params *real_mode; /* Pointer to real-mode data */ 126static struct boot_params *real_mode; /* Pointer to real-mode data */
132static int quiet; 127static int quiet;
133 128
134static void *memset(void *s, int c, unsigned n); 129void *memset(void *s, int c, size_t n);
135void *memcpy(void *dest, const void *src, unsigned n); 130void *memcpy(void *dest, const void *src, size_t n);
136 131
137static void __putstr(int, const char *); 132static void __putstr(int, const char *);
138#define putstr(__x) __putstr(0, __x) 133#define putstr(__x) __putstr(0, __x)
@@ -162,6 +157,10 @@ static int lines, cols;
162#include "../../../../lib/decompress_unlzma.c" 157#include "../../../../lib/decompress_unlzma.c"
163#endif 158#endif
164 159
160#ifdef CONFIG_KERNEL_LZO
161#include "../../../../lib/decompress_unlzo.c"
162#endif
163
165static void scroll(void) 164static void scroll(void)
166{ 165{
167 int i; 166 int i;
@@ -181,11 +180,9 @@ static void __putstr(int error, const char *s)
181 return; 180 return;
182#endif 181#endif
183 182
184#ifdef CONFIG_X86_32
185 if (real_mode->screen_info.orig_video_mode == 0 && 183 if (real_mode->screen_info.orig_video_mode == 0 &&
186 lines == 0 && cols == 0) 184 lines == 0 && cols == 0)
187 return; 185 return;
188#endif
189 186
190 x = real_mode->screen_info.orig_x; 187 x = real_mode->screen_info.orig_x;
191 y = real_mode->screen_info.orig_y; 188 y = real_mode->screen_info.orig_y;
@@ -219,7 +216,7 @@ static void __putstr(int error, const char *s)
219 outb(0xff & (pos >> 1), vidport+1); 216 outb(0xff & (pos >> 1), vidport+1);
220} 217}
221 218
222static void *memset(void *s, int c, unsigned n) 219void *memset(void *s, int c, size_t n)
223{ 220{
224 int i; 221 int i;
225 char *ss = s; 222 char *ss = s;
@@ -229,7 +226,7 @@ static void *memset(void *s, int c, unsigned n)
229 return s; 226 return s;
230} 227}
231 228
232void *memcpy(void *dest, const void *src, unsigned n) 229void *memcpy(void *dest, const void *src, size_t n)
233{ 230{
234 int i; 231 int i;
235 const char *s = src; 232 const char *s = src;
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index bbeb0c3fbd90..89bbf4e4d05d 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c
@@ -9,6 +9,9 @@
9#include <byteswap.h> 9#include <byteswap.h>
10#define USE_BSD 10#define USE_BSD
11#include <endian.h> 11#include <endian.h>
12#include <regex.h>
13
14static void die(char *fmt, ...);
12 15
13#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 16#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
14static Elf32_Ehdr ehdr; 17static Elf32_Ehdr ehdr;
@@ -30,25 +33,47 @@ static struct section *secs;
30 * the address for which it has been compiled. Don't warn user about 33 * the address for which it has been compiled. Don't warn user about
31 * absolute relocations present w.r.t these symbols. 34 * absolute relocations present w.r.t these symbols.
32 */ 35 */
33static const char* safe_abs_relocs[] = { 36static const char abs_sym_regex[] =
34 "xen_irq_disable_direct_reloc", 37 "^(xen_irq_disable_direct_reloc$|"
35 "xen_save_fl_direct_reloc", 38 "xen_save_fl_direct_reloc$|"
36}; 39 "VDSO|"
40 "__crc_)";
41static regex_t abs_sym_regex_c;
42static int is_abs_reloc(const char *sym_name)
43{
44 return !regexec(&abs_sym_regex_c, sym_name, 0, NULL, 0);
45}
37 46
38static int is_safe_abs_reloc(const char* sym_name) 47/*
48 * These symbols are known to be relative, even if the linker marks them
49 * as absolute (typically defined outside any section in the linker script.)
50 */
51static const char rel_sym_regex[] =
52 "^_end$";
53static regex_t rel_sym_regex_c;
54static int is_rel_reloc(const char *sym_name)
39{ 55{
40 int i; 56 return !regexec(&rel_sym_regex_c, sym_name, 0, NULL, 0);
57}
41 58
42 for (i = 0; i < ARRAY_SIZE(safe_abs_relocs); i++) { 59static void regex_init(void)
43 if (!strcmp(sym_name, safe_abs_relocs[i])) 60{
44 /* Match found */ 61 char errbuf[128];
45 return 1; 62 int err;
46 } 63
47 if (strncmp(sym_name, "VDSO", 4) == 0) 64 err = regcomp(&abs_sym_regex_c, abs_sym_regex,
48 return 1; 65 REG_EXTENDED|REG_NOSUB);
49 if (strncmp(sym_name, "__crc_", 6) == 0) 66 if (err) {
50 return 1; 67 regerror(err, &abs_sym_regex_c, errbuf, sizeof errbuf);
51 return 0; 68 die("%s", errbuf);
69 }
70
71 err = regcomp(&rel_sym_regex_c, rel_sym_regex,
72 REG_EXTENDED|REG_NOSUB);
73 if (err) {
74 regerror(err, &rel_sym_regex_c, errbuf, sizeof errbuf);
75 die("%s", errbuf);
76 }
52} 77}
53 78
54static void die(char *fmt, ...) 79static void die(char *fmt, ...)
@@ -131,7 +156,7 @@ static const char *rel_type(unsigned type)
131#undef REL_TYPE 156#undef REL_TYPE
132 }; 157 };
133 const char *name = "unknown type rel type name"; 158 const char *name = "unknown type rel type name";
134 if (type < ARRAY_SIZE(type_name)) { 159 if (type < ARRAY_SIZE(type_name) && type_name[type]) {
135 name = type_name[type]; 160 name = type_name[type];
136 } 161 }
137 return name; 162 return name;
@@ -448,7 +473,7 @@ static void print_absolute_relocs(void)
448 * Before warning check if this absolute symbol 473 * Before warning check if this absolute symbol
449 * relocation is harmless. 474 * relocation is harmless.
450 */ 475 */
451 if (is_safe_abs_reloc(name)) 476 if (is_abs_reloc(name) || is_rel_reloc(name))
452 continue; 477 continue;
453 478
454 if (!printed) { 479 if (!printed) {
@@ -501,21 +526,26 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
501 sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; 526 sym = &sh_symtab[ELF32_R_SYM(rel->r_info)];
502 r_type = ELF32_R_TYPE(rel->r_info); 527 r_type = ELF32_R_TYPE(rel->r_info);
503 /* Don't visit relocations to absolute symbols */ 528 /* Don't visit relocations to absolute symbols */
504 if (sym->st_shndx == SHN_ABS) { 529 if (sym->st_shndx == SHN_ABS &&
530 !is_rel_reloc(sym_name(sym_strtab, sym))) {
505 continue; 531 continue;
506 } 532 }
507 if (r_type == R_386_NONE || r_type == R_386_PC32) { 533 switch (r_type) {
534 case R_386_NONE:
535 case R_386_PC32:
508 /* 536 /*
509 * NONE can be ignored and and PC relative 537 * NONE can be ignored and and PC relative
510 * relocations don't need to be adjusted. 538 * relocations don't need to be adjusted.
511 */ 539 */
512 } 540 break;
513 else if (r_type == R_386_32) { 541 case R_386_32:
514 /* Visit relocations that need to be adjusted */ 542 /* Visit relocations that need to be adjusted */
515 visit(rel, sym); 543 visit(rel, sym);
516 } 544 break;
517 else { 545 default:
518 die("Unsupported relocation type: %d\n", r_type); 546 die("Unsupported relocation type: %s (%d)\n",
547 rel_type(r_type), r_type);
548 break;
519 } 549 }
520 } 550 }
521 } 551 }
@@ -571,16 +601,15 @@ static void emit_relocs(int as_text)
571 } 601 }
572 else { 602 else {
573 unsigned char buf[4]; 603 unsigned char buf[4];
574 buf[0] = buf[1] = buf[2] = buf[3] = 0;
575 /* Print a stop */ 604 /* Print a stop */
576 printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); 605 fwrite("\0\0\0\0", 4, 1, stdout);
577 /* Now print each relocation */ 606 /* Now print each relocation */
578 for (i = 0; i < reloc_count; i++) { 607 for (i = 0; i < reloc_count; i++) {
579 buf[0] = (relocs[i] >> 0) & 0xff; 608 buf[0] = (relocs[i] >> 0) & 0xff;
580 buf[1] = (relocs[i] >> 8) & 0xff; 609 buf[1] = (relocs[i] >> 8) & 0xff;
581 buf[2] = (relocs[i] >> 16) & 0xff; 610 buf[2] = (relocs[i] >> 16) & 0xff;
582 buf[3] = (relocs[i] >> 24) & 0xff; 611 buf[3] = (relocs[i] >> 24) & 0xff;
583 printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); 612 fwrite(buf, 4, 1, stdout);
584 } 613 }
585 } 614 }
586} 615}
@@ -598,6 +627,8 @@ int main(int argc, char **argv)
598 FILE *fp; 627 FILE *fp;
599 int i; 628 int i;
600 629
630 regex_init();
631
601 show_absolute_syms = 0; 632 show_absolute_syms = 0;
602 show_absolute_relocs = 0; 633 show_absolute_relocs = 0;
603 as_text = 0; 634 as_text = 0;
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index b31cc54b4641..93e689f4bd86 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -16,7 +16,7 @@
16 */ 16 */
17 17
18#include <asm/segment.h> 18#include <asm/segment.h>
19#include <linux/utsrelease.h> 19#include <generated/utsrelease.h>
20#include <asm/boot.h> 20#include <asm/boot.h>
21#include <asm/e820.h> 21#include <asm/e820.h>
22#include <asm/page_types.h> 22#include <asm/page_types.h>
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index 8ef60f20b371..919257f526f2 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c
@@ -22,7 +22,7 @@ int main(void)
22 int i, j; 22 int i, j;
23 const char *str; 23 const char *str;
24 24
25 printf("static const char x86_cap_strs[] = \n"); 25 printf("static const char x86_cap_strs[] =\n");
26 26
27 for (i = 0; i < NCAPINTS; i++) { 27 for (i = 0; i < NCAPINTS; i++) {
28 for (j = 0; j < 32; j++) { 28 for (j = 0; j < 32; j++) {
diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c
index 2723d9b5ce43..2b15aa488ffb 100644
--- a/arch/x86/boot/version.c
+++ b/arch/x86/boot/version.c
@@ -13,8 +13,8 @@
13 */ 13 */
14 14
15#include "boot.h" 15#include "boot.h"
16#include <linux/utsrelease.h> 16#include <generated/utsrelease.h>
17#include <linux/compile.h> 17#include <generated/compile.h>
18 18
19const char kernel_version[] = 19const char kernel_version[] =
20 UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") " 20 UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") "
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index 819caa1f2008..ed7aeff786b2 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -42,22 +42,15 @@ static u8 vga_set_basic_mode(void)
42{ 42{
43 struct biosregs ireg, oreg; 43 struct biosregs ireg, oreg;
44 u16 ax; 44 u16 ax;
45 u8 rows;
46 u8 mode; 45 u8 mode;
47 46
48 initregs(&ireg); 47 initregs(&ireg);
49 48
49 /* Query current mode */
50 ax = 0x0f00; 50 ax = 0x0f00;
51 intcall(0x10, &ireg, &oreg); 51 intcall(0x10, &ireg, &oreg);
52 mode = oreg.al; 52 mode = oreg.al;
53 53
54 set_fs(0);
55 rows = rdfs8(0x484); /* rows minus one */
56
57 if ((oreg.ax == 0x5003 || oreg.ax == 0x5007) &&
58 (rows == 0 || rows == 24))
59 return mode;
60
61 if (mode != 3 && mode != 7) 54 if (mode != 3 && mode != 7)
62 mode = 3; 55 mode = 3;
63 56
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index f767164cd5df..43eda284d27f 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -298,11 +298,18 @@ static void restore_screen(void)
298 } 298 }
299 299
300 /* Restore cursor position */ 300 /* Restore cursor position */
301 if (saved.curx >= xs)
302 saved.curx = xs-1;
303 if (saved.cury >= ys)
304 saved.cury = ys-1;
305
301 initregs(&ireg); 306 initregs(&ireg);
302 ireg.ah = 0x02; /* Set cursor position */ 307 ireg.ah = 0x02; /* Set cursor position */
303 ireg.dh = saved.cury; 308 ireg.dh = saved.cury;
304 ireg.dl = saved.curx; 309 ireg.dl = saved.curx;
305 intcall(0x10, &ireg, NULL); 310 intcall(0x10, &ireg, NULL);
311
312 store_cursor_position();
306} 313}
307 314
308void set_video(void) 315void set_video(void)
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 2a4d073d2cf1..9046e4af66ce 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -297,7 +297,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
297 * size limits imposed on them by creating programs with large 297 * size limits imposed on them by creating programs with large
298 * arrays in the data or bss. 298 * arrays in the data or bss.
299 */ 299 */
300 rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; 300 rlim = rlimit(RLIMIT_DATA);
301 if (rlim >= RLIM_INFINITY) 301 if (rlim >= RLIM_INFINITY)
302 rlim = ~0; 302 rlim = ~0;
303 if (ex.a_data + ex.a_bss > rlim) 303 if (ex.a_data + ex.a_bss > rlim)
@@ -308,14 +308,15 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
308 if (retval) 308 if (retval)
309 return retval; 309 return retval;
310 310
311 regs->cs = __USER32_CS;
312 regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
313 regs->r13 = regs->r14 = regs->r15 = 0;
314
315 /* OK, This is the point of no return */ 311 /* OK, This is the point of no return */
316 set_personality(PER_LINUX); 312 set_personality(PER_LINUX);
317 set_thread_flag(TIF_IA32); 313 set_thread_flag(TIF_IA32);
318 clear_thread_flag(TIF_ABI_PENDING); 314
315 setup_new_exec(bprm);
316
317 regs->cs = __USER32_CS;
318 regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
319 regs->r13 = regs->r14 = regs->r15 = 0;
319 320
320 current->mm->end_code = ex.a_text + 321 current->mm->end_code = ex.a_text +
321 (current->mm->start_code = N_TXTADDR(ex)); 322 (current->mm->start_code = N_TXTADDR(ex));
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 4eefdca9832b..53147ad85b96 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -696,7 +696,7 @@ ia32_sys_call_table:
696 .quad quiet_ni_syscall /* streams2 */ 696 .quad quiet_ni_syscall /* streams2 */
697 .quad stub32_vfork /* 190 */ 697 .quad stub32_vfork /* 190 */
698 .quad compat_sys_getrlimit 698 .quad compat_sys_getrlimit
699 .quad sys32_mmap2 699 .quad sys_mmap_pgoff
700 .quad sys32_truncate64 700 .quad sys32_truncate64
701 .quad sys32_ftruncate64 701 .quad sys32_ftruncate64
702 .quad sys32_stat64 /* 195 */ 702 .quad sys32_stat64 /* 195 */
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index df82c0e48ded..422572c77923 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -155,9 +155,6 @@ struct mmap_arg_struct {
155asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg) 155asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg)
156{ 156{
157 struct mmap_arg_struct a; 157 struct mmap_arg_struct a;
158 struct file *file = NULL;
159 unsigned long retval;
160 struct mm_struct *mm ;
161 158
162 if (copy_from_user(&a, arg, sizeof(a))) 159 if (copy_from_user(&a, arg, sizeof(a)))
163 return -EFAULT; 160 return -EFAULT;
@@ -165,22 +162,8 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg)
165 if (a.offset & ~PAGE_MASK) 162 if (a.offset & ~PAGE_MASK)
166 return -EINVAL; 163 return -EINVAL;
167 164
168 if (!(a.flags & MAP_ANONYMOUS)) { 165 return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
169 file = fget(a.fd);
170 if (!file)
171 return -EBADF;
172 }
173
174 mm = current->mm;
175 down_write(&mm->mmap_sem);
176 retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags,
177 a.offset>>PAGE_SHIFT); 166 a.offset>>PAGE_SHIFT);
178 if (file)
179 fput(file);
180
181 up_write(&mm->mmap_sem);
182
183 return retval;
184} 167}
185 168
186asmlinkage long sys32_mprotect(unsigned long start, size_t len, 169asmlinkage long sys32_mprotect(unsigned long start, size_t len,
@@ -483,30 +466,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd,
483 return ret; 466 return ret;
484} 467}
485 468
486asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len,
487 unsigned long prot, unsigned long flags,
488 unsigned long fd, unsigned long pgoff)
489{
490 struct mm_struct *mm = current->mm;
491 unsigned long error;
492 struct file *file = NULL;
493
494 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
495 if (!(flags & MAP_ANONYMOUS)) {
496 file = fget(fd);
497 if (!file)
498 return -EBADF;
499 }
500
501 down_write(&mm->mmap_sem);
502 error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
503 up_write(&mm->mmap_sem);
504
505 if (file)
506 fput(file);
507 return error;
508}
509
510asmlinkage long sys32_olduname(struct oldold_utsname __user *name) 469asmlinkage long sys32_olduname(struct oldold_utsname __user *name)
511{ 470{
512 char *arch = "x86_64"; 471 char *arch = "x86_64";
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 60d2b2db0bc5..56f462cf22d2 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -142,6 +142,32 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
142 return max_cstate; 142 return max_cstate;
143} 143}
144 144
145static inline bool arch_has_acpi_pdc(void)
146{
147 struct cpuinfo_x86 *c = &cpu_data(0);
148 return (c->x86_vendor == X86_VENDOR_INTEL ||
149 c->x86_vendor == X86_VENDOR_CENTAUR);
150}
151
152static inline void arch_acpi_set_pdc_bits(u32 *buf)
153{
154 struct cpuinfo_x86 *c = &cpu_data(0);
155
156 buf[2] |= ACPI_PDC_C_CAPABILITY_SMP;
157
158 if (cpu_has(c, X86_FEATURE_EST))
159 buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP;
160
161 if (cpu_has(c, X86_FEATURE_ACPI))
162 buf[2] |= ACPI_PDC_T_FFH;
163
164 /*
165 * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
166 */
167 if (!cpu_has(c, X86_FEATURE_MWAIT))
168 buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
169}
170
145#else /* !CONFIG_ACPI */ 171#else /* !CONFIG_ACPI */
146 172
147#define acpi_lapic 0 173#define acpi_lapic 0
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 69b74a7b877f..f1e253ceba4b 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -65,12 +65,17 @@ extern void alternatives_smp_module_add(struct module *mod, char *name,
65 void *text, void *text_end); 65 void *text, void *text_end);
66extern void alternatives_smp_module_del(struct module *mod); 66extern void alternatives_smp_module_del(struct module *mod);
67extern void alternatives_smp_switch(int smp); 67extern void alternatives_smp_switch(int smp);
68extern int alternatives_text_reserved(void *start, void *end);
68#else 69#else
69static inline void alternatives_smp_module_add(struct module *mod, char *name, 70static inline void alternatives_smp_module_add(struct module *mod, char *name,
70 void *locks, void *locks_end, 71 void *locks, void *locks_end,
71 void *text, void *text_end) {} 72 void *text, void *text_end) {}
72static inline void alternatives_smp_module_del(struct module *mod) {} 73static inline void alternatives_smp_module_del(struct module *mod) {}
73static inline void alternatives_smp_switch(int smp) {} 74static inline void alternatives_smp_switch(int smp) {}
75static inline int alternatives_text_reserved(void *start, void *end)
76{
77 return 0;
78}
74#endif /* CONFIG_SMP */ 79#endif /* CONFIG_SMP */
75 80
76/* alternative assembly primitive: */ 81/* alternative assembly primitive: */
@@ -125,11 +130,16 @@ static inline void alternatives_smp_switch(int smp) {}
125 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ 130 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
126 : output : "i" (0), ## input) 131 : output : "i" (0), ## input)
127 132
133/* Like alternative_io, but for replacing a direct call with another one. */
134#define alternative_call(oldfunc, newfunc, feature, output, input...) \
135 asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \
136 : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
137
128/* 138/*
129 * use this macro(s) if you need more than one output parameter 139 * use this macro(s) if you need more than one output parameter
130 * in alternative_io 140 * in alternative_io
131 */ 141 */
132#define ASM_OUTPUT2(a, b) a, b 142#define ASM_OUTPUT2(a...) a
133 143
134struct paravirt_patch_site; 144struct paravirt_patch_site;
135#ifdef CONFIG_PARAVIRT 145#ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index 84786fb9a23b..d2544f1d705d 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -28,7 +28,10 @@ extern void amd_iommu_flush_all_domains(void);
28extern void amd_iommu_flush_all_devices(void); 28extern void amd_iommu_flush_all_devices(void);
29extern void amd_iommu_apply_erratum_63(u16 devid); 29extern void amd_iommu_apply_erratum_63(u16 devid);
30extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); 30extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
31 31extern int amd_iommu_init_devices(void);
32extern void amd_iommu_uninit_devices(void);
33extern void amd_iommu_init_notifier(void);
34extern void amd_iommu_init_api(void);
32#ifndef CONFIG_AMD_IOMMU_STATS 35#ifndef CONFIG_AMD_IOMMU_STATS
33 36
34static inline void amd_iommu_stats_init(void) { } 37static inline void amd_iommu_stats_init(void) { }
diff --git a/arch/x86/include/asm/asm-offsets.h b/arch/x86/include/asm/asm-offsets.h
new file mode 100644
index 000000000000..d370ee36a182
--- /dev/null
+++ b/arch/x86/include/asm/asm-offsets.h
@@ -0,0 +1 @@
#include <generated/asm-offsets.h>
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 4e1b8873c474..8f8217b9bdac 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -1,5 +1,300 @@
1#ifndef _ASM_X86_ATOMIC_H
2#define _ASM_X86_ATOMIC_H
3
4#include <linux/compiler.h>
5#include <linux/types.h>
6#include <asm/processor.h>
7#include <asm/alternative.h>
8#include <asm/cmpxchg.h>
9
10/*
11 * Atomic operations that C can't guarantee us. Useful for
12 * resource counting etc..
13 */
14
15#define ATOMIC_INIT(i) { (i) }
16
17/**
18 * atomic_read - read atomic variable
19 * @v: pointer of type atomic_t
20 *
21 * Atomically reads the value of @v.
22 */
23static inline int atomic_read(const atomic_t *v)
24{
25 return v->counter;
26}
27
28/**
29 * atomic_set - set atomic variable
30 * @v: pointer of type atomic_t
31 * @i: required value
32 *
33 * Atomically sets the value of @v to @i.
34 */
35static inline void atomic_set(atomic_t *v, int i)
36{
37 v->counter = i;
38}
39
40/**
41 * atomic_add - add integer to atomic variable
42 * @i: integer value to add
43 * @v: pointer of type atomic_t
44 *
45 * Atomically adds @i to @v.
46 */
47static inline void atomic_add(int i, atomic_t *v)
48{
49 asm volatile(LOCK_PREFIX "addl %1,%0"
50 : "+m" (v->counter)
51 : "ir" (i));
52}
53
54/**
55 * atomic_sub - subtract integer from atomic variable
56 * @i: integer value to subtract
57 * @v: pointer of type atomic_t
58 *
59 * Atomically subtracts @i from @v.
60 */
61static inline void atomic_sub(int i, atomic_t *v)
62{
63 asm volatile(LOCK_PREFIX "subl %1,%0"
64 : "+m" (v->counter)
65 : "ir" (i));
66}
67
68/**
69 * atomic_sub_and_test - subtract value from variable and test result
70 * @i: integer value to subtract
71 * @v: pointer of type atomic_t
72 *
73 * Atomically subtracts @i from @v and returns
74 * true if the result is zero, or false for all
75 * other cases.
76 */
77static inline int atomic_sub_and_test(int i, atomic_t *v)
78{
79 unsigned char c;
80
81 asm volatile(LOCK_PREFIX "subl %2,%0; sete %1"
82 : "+m" (v->counter), "=qm" (c)
83 : "ir" (i) : "memory");
84 return c;
85}
86
87/**
88 * atomic_inc - increment atomic variable
89 * @v: pointer of type atomic_t
90 *
91 * Atomically increments @v by 1.
92 */
93static inline void atomic_inc(atomic_t *v)
94{
95 asm volatile(LOCK_PREFIX "incl %0"
96 : "+m" (v->counter));
97}
98
99/**
100 * atomic_dec - decrement atomic variable
101 * @v: pointer of type atomic_t
102 *
103 * Atomically decrements @v by 1.
104 */
105static inline void atomic_dec(atomic_t *v)
106{
107 asm volatile(LOCK_PREFIX "decl %0"
108 : "+m" (v->counter));
109}
110
111/**
112 * atomic_dec_and_test - decrement and test
113 * @v: pointer of type atomic_t
114 *
115 * Atomically decrements @v by 1 and
116 * returns true if the result is 0, or false for all other
117 * cases.
118 */
119static inline int atomic_dec_and_test(atomic_t *v)
120{
121 unsigned char c;
122
123 asm volatile(LOCK_PREFIX "decl %0; sete %1"
124 : "+m" (v->counter), "=qm" (c)
125 : : "memory");
126 return c != 0;
127}
128
129/**
130 * atomic_inc_and_test - increment and test
131 * @v: pointer of type atomic_t
132 *
133 * Atomically increments @v by 1
134 * and returns true if the result is zero, or false for all
135 * other cases.
136 */
137static inline int atomic_inc_and_test(atomic_t *v)
138{
139 unsigned char c;
140
141 asm volatile(LOCK_PREFIX "incl %0; sete %1"
142 : "+m" (v->counter), "=qm" (c)
143 : : "memory");
144 return c != 0;
145}
146
147/**
148 * atomic_add_negative - add and test if negative
149 * @i: integer value to add
150 * @v: pointer of type atomic_t
151 *
152 * Atomically adds @i to @v and returns true
153 * if the result is negative, or false when
154 * result is greater than or equal to zero.
155 */
156static inline int atomic_add_negative(int i, atomic_t *v)
157{
158 unsigned char c;
159
160 asm volatile(LOCK_PREFIX "addl %2,%0; sets %1"
161 : "+m" (v->counter), "=qm" (c)
162 : "ir" (i) : "memory");
163 return c;
164}
165
166/**
167 * atomic_add_return - add integer and return
168 * @i: integer value to add
169 * @v: pointer of type atomic_t
170 *
171 * Atomically adds @i to @v and returns @i + @v
172 */
173static inline int atomic_add_return(int i, atomic_t *v)
174{
175 int __i;
176#ifdef CONFIG_M386
177 unsigned long flags;
178 if (unlikely(boot_cpu_data.x86 <= 3))
179 goto no_xadd;
180#endif
181 /* Modern 486+ processor */
182 __i = i;
183 asm volatile(LOCK_PREFIX "xaddl %0, %1"
184 : "+r" (i), "+m" (v->counter)
185 : : "memory");
186 return i + __i;
187
188#ifdef CONFIG_M386
189no_xadd: /* Legacy 386 processor */
190 raw_local_irq_save(flags);
191 __i = atomic_read(v);
192 atomic_set(v, i + __i);
193 raw_local_irq_restore(flags);
194 return i + __i;
195#endif
196}
197
198/**
199 * atomic_sub_return - subtract integer and return
200 * @v: pointer of type atomic_t
201 * @i: integer value to subtract
202 *
203 * Atomically subtracts @i from @v and returns @v - @i
204 */
205static inline int atomic_sub_return(int i, atomic_t *v)
206{
207 return atomic_add_return(-i, v);
208}
209
210#define atomic_inc_return(v) (atomic_add_return(1, v))
211#define atomic_dec_return(v) (atomic_sub_return(1, v))
212
213static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
214{
215 return cmpxchg(&v->counter, old, new);
216}
217
218static inline int atomic_xchg(atomic_t *v, int new)
219{
220 return xchg(&v->counter, new);
221}
222
223/**
224 * atomic_add_unless - add unless the number is already a given value
225 * @v: pointer of type atomic_t
226 * @a: the amount to add to v...
227 * @u: ...unless v is equal to u.
228 *
229 * Atomically adds @a to @v, so long as @v was not already @u.
230 * Returns non-zero if @v was not @u, and zero otherwise.
231 */
232static inline int atomic_add_unless(atomic_t *v, int a, int u)
233{
234 int c, old;
235 c = atomic_read(v);
236 for (;;) {
237 if (unlikely(c == (u)))
238 break;
239 old = atomic_cmpxchg((v), c, c + (a));
240 if (likely(old == c))
241 break;
242 c = old;
243 }
244 return c != (u);
245}
246
247#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
248
249/**
250 * atomic_inc_short - increment of a short integer
251 * @v: pointer to type int
252 *
253 * Atomically adds 1 to @v
254 * Returns the new value of @u
255 */
256static inline short int atomic_inc_short(short int *v)
257{
258 asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v));
259 return *v;
260}
261
262#ifdef CONFIG_X86_64
263/**
264 * atomic_or_long - OR of two long integers
265 * @v1: pointer to type unsigned long
266 * @v2: pointer to type unsigned long
267 *
268 * Atomically ORs @v1 and @v2
269 * Returns the result of the OR
270 */
271static inline void atomic_or_long(unsigned long *v1, unsigned long v2)
272{
273 asm(LOCK_PREFIX "orq %1, %0" : "+m" (*v1) : "r" (v2));
274}
275#endif
276
277/* These are x86-specific, used by some header files */
278#define atomic_clear_mask(mask, addr) \
279 asm volatile(LOCK_PREFIX "andl %0,%1" \
280 : : "r" (~(mask)), "m" (*(addr)) : "memory")
281
282#define atomic_set_mask(mask, addr) \
283 asm volatile(LOCK_PREFIX "orl %0,%1" \
284 : : "r" ((unsigned)(mask)), "m" (*(addr)) \
285 : "memory")
286
287/* Atomic operations are already serializing on x86 */
288#define smp_mb__before_atomic_dec() barrier()
289#define smp_mb__after_atomic_dec() barrier()
290#define smp_mb__before_atomic_inc() barrier()
291#define smp_mb__after_atomic_inc() barrier()
292
1#ifdef CONFIG_X86_32 293#ifdef CONFIG_X86_32
2# include "atomic_32.h" 294# include "atomic64_32.h"
3#else 295#else
4# include "atomic_64.h" 296# include "atomic64_64.h"
5#endif 297#endif
298
299#include <asm-generic/atomic-long.h>
300#endif /* _ASM_X86_ATOMIC_H */
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
new file mode 100644
index 000000000000..03027bf28de5
--- /dev/null
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -0,0 +1,160 @@
1#ifndef _ASM_X86_ATOMIC64_32_H
2#define _ASM_X86_ATOMIC64_32_H
3
4#include <linux/compiler.h>
5#include <linux/types.h>
6#include <asm/processor.h>
7//#include <asm/cmpxchg.h>
8
9/* An 64bit atomic type */
10
11typedef struct {
12 u64 __aligned(8) counter;
13} atomic64_t;
14
15#define ATOMIC64_INIT(val) { (val) }
16
17extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val);
18
19/**
20 * atomic64_xchg - xchg atomic64 variable
21 * @ptr: pointer to type atomic64_t
22 * @new_val: value to assign
23 *
24 * Atomically xchgs the value of @ptr to @new_val and returns
25 * the old value.
26 */
27extern u64 atomic64_xchg(atomic64_t *ptr, u64 new_val);
28
29/**
30 * atomic64_set - set atomic64 variable
31 * @ptr: pointer to type atomic64_t
32 * @new_val: value to assign
33 *
34 * Atomically sets the value of @ptr to @new_val.
35 */
36extern void atomic64_set(atomic64_t *ptr, u64 new_val);
37
38/**
39 * atomic64_read - read atomic64 variable
40 * @ptr: pointer to type atomic64_t
41 *
42 * Atomically reads the value of @ptr and returns it.
43 */
44static inline u64 atomic64_read(atomic64_t *ptr)
45{
46 u64 res;
47
48 /*
49 * Note, we inline this atomic64_t primitive because
50 * it only clobbers EAX/EDX and leaves the others
51 * untouched. We also (somewhat subtly) rely on the
52 * fact that cmpxchg8b returns the current 64-bit value
53 * of the memory location we are touching:
54 */
55 asm volatile(
56 "mov %%ebx, %%eax\n\t"
57 "mov %%ecx, %%edx\n\t"
58 LOCK_PREFIX "cmpxchg8b %1\n"
59 : "=&A" (res)
60 : "m" (*ptr)
61 );
62
63 return res;
64}
65
66extern u64 atomic64_read(atomic64_t *ptr);
67
68/**
69 * atomic64_add_return - add and return
70 * @delta: integer value to add
71 * @ptr: pointer to type atomic64_t
72 *
73 * Atomically adds @delta to @ptr and returns @delta + *@ptr
74 */
75extern u64 atomic64_add_return(u64 delta, atomic64_t *ptr);
76
77/*
78 * Other variants with different arithmetic operators:
79 */
80extern u64 atomic64_sub_return(u64 delta, atomic64_t *ptr);
81extern u64 atomic64_inc_return(atomic64_t *ptr);
82extern u64 atomic64_dec_return(atomic64_t *ptr);
83
84/**
85 * atomic64_add - add integer to atomic64 variable
86 * @delta: integer value to add
87 * @ptr: pointer to type atomic64_t
88 *
89 * Atomically adds @delta to @ptr.
90 */
91extern void atomic64_add(u64 delta, atomic64_t *ptr);
92
93/**
94 * atomic64_sub - subtract the atomic64 variable
95 * @delta: integer value to subtract
96 * @ptr: pointer to type atomic64_t
97 *
98 * Atomically subtracts @delta from @ptr.
99 */
100extern void atomic64_sub(u64 delta, atomic64_t *ptr);
101
102/**
103 * atomic64_sub_and_test - subtract value from variable and test result
104 * @delta: integer value to subtract
105 * @ptr: pointer to type atomic64_t
106 *
107 * Atomically subtracts @delta from @ptr and returns
108 * true if the result is zero, or false for all
109 * other cases.
110 */
111extern int atomic64_sub_and_test(u64 delta, atomic64_t *ptr);
112
113/**
114 * atomic64_inc - increment atomic64 variable
115 * @ptr: pointer to type atomic64_t
116 *
117 * Atomically increments @ptr by 1.
118 */
119extern void atomic64_inc(atomic64_t *ptr);
120
121/**
122 * atomic64_dec - decrement atomic64 variable
123 * @ptr: pointer to type atomic64_t
124 *
125 * Atomically decrements @ptr by 1.
126 */
127extern void atomic64_dec(atomic64_t *ptr);
128
129/**
130 * atomic64_dec_and_test - decrement and test
131 * @ptr: pointer to type atomic64_t
132 *
133 * Atomically decrements @ptr by 1 and
134 * returns true if the result is 0, or false for all other
135 * cases.
136 */
137extern int atomic64_dec_and_test(atomic64_t *ptr);
138
139/**
140 * atomic64_inc_and_test - increment and test
141 * @ptr: pointer to type atomic64_t
142 *
143 * Atomically increments @ptr by 1
144 * and returns true if the result is zero, or false for all
145 * other cases.
146 */
147extern int atomic64_inc_and_test(atomic64_t *ptr);
148
149/**
150 * atomic64_add_negative - add and test if negative
151 * @delta: integer value to add
152 * @ptr: pointer to type atomic64_t
153 *
154 * Atomically adds @delta to @ptr and returns true
155 * if the result is negative, or false when
156 * result is greater than or equal to zero.
157 */
158extern int atomic64_add_negative(u64 delta, atomic64_t *ptr);
159
160#endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
new file mode 100644
index 000000000000..51c5b4056929
--- /dev/null
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -0,0 +1,224 @@
1#ifndef _ASM_X86_ATOMIC64_64_H
2#define _ASM_X86_ATOMIC64_64_H
3
4#include <linux/types.h>
5#include <asm/alternative.h>
6#include <asm/cmpxchg.h>
7
8/* The 64-bit atomic type */
9
10#define ATOMIC64_INIT(i) { (i) }
11
12/**
13 * atomic64_read - read atomic64 variable
14 * @v: pointer of type atomic64_t
15 *
16 * Atomically reads the value of @v.
17 * Doesn't imply a read memory barrier.
18 */
19static inline long atomic64_read(const atomic64_t *v)
20{
21 return v->counter;
22}
23
24/**
25 * atomic64_set - set atomic64 variable
26 * @v: pointer to type atomic64_t
27 * @i: required value
28 *
29 * Atomically sets the value of @v to @i.
30 */
31static inline void atomic64_set(atomic64_t *v, long i)
32{
33 v->counter = i;
34}
35
36/**
37 * atomic64_add - add integer to atomic64 variable
38 * @i: integer value to add
39 * @v: pointer to type atomic64_t
40 *
41 * Atomically adds @i to @v.
42 */
43static inline void atomic64_add(long i, atomic64_t *v)
44{
45 asm volatile(LOCK_PREFIX "addq %1,%0"
46 : "=m" (v->counter)
47 : "er" (i), "m" (v->counter));
48}
49
50/**
51 * atomic64_sub - subtract the atomic64 variable
52 * @i: integer value to subtract
53 * @v: pointer to type atomic64_t
54 *
55 * Atomically subtracts @i from @v.
56 */
57static inline void atomic64_sub(long i, atomic64_t *v)
58{
59 asm volatile(LOCK_PREFIX "subq %1,%0"
60 : "=m" (v->counter)
61 : "er" (i), "m" (v->counter));
62}
63
64/**
65 * atomic64_sub_and_test - subtract value from variable and test result
66 * @i: integer value to subtract
67 * @v: pointer to type atomic64_t
68 *
69 * Atomically subtracts @i from @v and returns
70 * true if the result is zero, or false for all
71 * other cases.
72 */
73static inline int atomic64_sub_and_test(long i, atomic64_t *v)
74{
75 unsigned char c;
76
77 asm volatile(LOCK_PREFIX "subq %2,%0; sete %1"
78 : "=m" (v->counter), "=qm" (c)
79 : "er" (i), "m" (v->counter) : "memory");
80 return c;
81}
82
83/**
84 * atomic64_inc - increment atomic64 variable
85 * @v: pointer to type atomic64_t
86 *
87 * Atomically increments @v by 1.
88 */
89static inline void atomic64_inc(atomic64_t *v)
90{
91 asm volatile(LOCK_PREFIX "incq %0"
92 : "=m" (v->counter)
93 : "m" (v->counter));
94}
95
96/**
97 * atomic64_dec - decrement atomic64 variable
98 * @v: pointer to type atomic64_t
99 *
100 * Atomically decrements @v by 1.
101 */
102static inline void atomic64_dec(atomic64_t *v)
103{
104 asm volatile(LOCK_PREFIX "decq %0"
105 : "=m" (v->counter)
106 : "m" (v->counter));
107}
108
109/**
110 * atomic64_dec_and_test - decrement and test
111 * @v: pointer to type atomic64_t
112 *
113 * Atomically decrements @v by 1 and
114 * returns true if the result is 0, or false for all other
115 * cases.
116 */
117static inline int atomic64_dec_and_test(atomic64_t *v)
118{
119 unsigned char c;
120
121 asm volatile(LOCK_PREFIX "decq %0; sete %1"
122 : "=m" (v->counter), "=qm" (c)
123 : "m" (v->counter) : "memory");
124 return c != 0;
125}
126
127/**
128 * atomic64_inc_and_test - increment and test
129 * @v: pointer to type atomic64_t
130 *
131 * Atomically increments @v by 1
132 * and returns true if the result is zero, or false for all
133 * other cases.
134 */
135static inline int atomic64_inc_and_test(atomic64_t *v)
136{
137 unsigned char c;
138
139 asm volatile(LOCK_PREFIX "incq %0; sete %1"
140 : "=m" (v->counter), "=qm" (c)
141 : "m" (v->counter) : "memory");
142 return c != 0;
143}
144
145/**
146 * atomic64_add_negative - add and test if negative
147 * @i: integer value to add
148 * @v: pointer to type atomic64_t
149 *
150 * Atomically adds @i to @v and returns true
151 * if the result is negative, or false when
152 * result is greater than or equal to zero.
153 */
154static inline int atomic64_add_negative(long i, atomic64_t *v)
155{
156 unsigned char c;
157
158 asm volatile(LOCK_PREFIX "addq %2,%0; sets %1"
159 : "=m" (v->counter), "=qm" (c)
160 : "er" (i), "m" (v->counter) : "memory");
161 return c;
162}
163
164/**
165 * atomic64_add_return - add and return
166 * @i: integer value to add
167 * @v: pointer to type atomic64_t
168 *
169 * Atomically adds @i to @v and returns @i + @v
170 */
171static inline long atomic64_add_return(long i, atomic64_t *v)
172{
173 long __i = i;
174 asm volatile(LOCK_PREFIX "xaddq %0, %1;"
175 : "+r" (i), "+m" (v->counter)
176 : : "memory");
177 return i + __i;
178}
179
180static inline long atomic64_sub_return(long i, atomic64_t *v)
181{
182 return atomic64_add_return(-i, v);
183}
184
185#define atomic64_inc_return(v) (atomic64_add_return(1, (v)))
186#define atomic64_dec_return(v) (atomic64_sub_return(1, (v)))
187
188static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
189{
190 return cmpxchg(&v->counter, old, new);
191}
192
193static inline long atomic64_xchg(atomic64_t *v, long new)
194{
195 return xchg(&v->counter, new);
196}
197
198/**
199 * atomic64_add_unless - add unless the number is a given value
200 * @v: pointer of type atomic64_t
201 * @a: the amount to add to v...
202 * @u: ...unless v is equal to u.
203 *
204 * Atomically adds @a to @v, so long as it was not @u.
205 * Returns non-zero if @v was not @u, and zero otherwise.
206 */
207static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
208{
209 long c, old;
210 c = atomic64_read(v);
211 for (;;) {
212 if (unlikely(c == (u)))
213 break;
214 old = atomic64_cmpxchg((v), c, c + (a));
215 if (likely(old == c))
216 break;
217 c = old;
218 }
219 return c != (u);
220}
221
222#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
223
224#endif /* _ASM_X86_ATOMIC64_64_H */
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
deleted file mode 100644
index dc5a667ff791..000000000000
--- a/arch/x86/include/asm/atomic_32.h
+++ /dev/null
@@ -1,415 +0,0 @@
1#ifndef _ASM_X86_ATOMIC_32_H
2#define _ASM_X86_ATOMIC_32_H
3
4#include <linux/compiler.h>
5#include <linux/types.h>
6#include <asm/processor.h>
7#include <asm/cmpxchg.h>
8
9/*
10 * Atomic operations that C can't guarantee us. Useful for
11 * resource counting etc..
12 */
13
14#define ATOMIC_INIT(i) { (i) }
15
16/**
17 * atomic_read - read atomic variable
18 * @v: pointer of type atomic_t
19 *
20 * Atomically reads the value of @v.
21 */
22static inline int atomic_read(const atomic_t *v)
23{
24 return v->counter;
25}
26
27/**
28 * atomic_set - set atomic variable
29 * @v: pointer of type atomic_t
30 * @i: required value
31 *
32 * Atomically sets the value of @v to @i.
33 */
34static inline void atomic_set(atomic_t *v, int i)
35{
36 v->counter = i;
37}
38
39/**
40 * atomic_add - add integer to atomic variable
41 * @i: integer value to add
42 * @v: pointer of type atomic_t
43 *
44 * Atomically adds @i to @v.
45 */
46static inline void atomic_add(int i, atomic_t *v)
47{
48 asm volatile(LOCK_PREFIX "addl %1,%0"
49 : "+m" (v->counter)
50 : "ir" (i));
51}
52
53/**
54 * atomic_sub - subtract integer from atomic variable
55 * @i: integer value to subtract
56 * @v: pointer of type atomic_t
57 *
58 * Atomically subtracts @i from @v.
59 */
60static inline void atomic_sub(int i, atomic_t *v)
61{
62 asm volatile(LOCK_PREFIX "subl %1,%0"
63 : "+m" (v->counter)
64 : "ir" (i));
65}
66
67/**
68 * atomic_sub_and_test - subtract value from variable and test result
69 * @i: integer value to subtract
70 * @v: pointer of type atomic_t
71 *
72 * Atomically subtracts @i from @v and returns
73 * true if the result is zero, or false for all
74 * other cases.
75 */
76static inline int atomic_sub_and_test(int i, atomic_t *v)
77{
78 unsigned char c;
79
80 asm volatile(LOCK_PREFIX "subl %2,%0; sete %1"
81 : "+m" (v->counter), "=qm" (c)
82 : "ir" (i) : "memory");
83 return c;
84}
85
86/**
87 * atomic_inc - increment atomic variable
88 * @v: pointer of type atomic_t
89 *
90 * Atomically increments @v by 1.
91 */
92static inline void atomic_inc(atomic_t *v)
93{
94 asm volatile(LOCK_PREFIX "incl %0"
95 : "+m" (v->counter));
96}
97
98/**
99 * atomic_dec - decrement atomic variable
100 * @v: pointer of type atomic_t
101 *
102 * Atomically decrements @v by 1.
103 */
104static inline void atomic_dec(atomic_t *v)
105{
106 asm volatile(LOCK_PREFIX "decl %0"
107 : "+m" (v->counter));
108}
109
110/**
111 * atomic_dec_and_test - decrement and test
112 * @v: pointer of type atomic_t
113 *
114 * Atomically decrements @v by 1 and
115 * returns true if the result is 0, or false for all other
116 * cases.
117 */
118static inline int atomic_dec_and_test(atomic_t *v)
119{
120 unsigned char c;
121
122 asm volatile(LOCK_PREFIX "decl %0; sete %1"
123 : "+m" (v->counter), "=qm" (c)
124 : : "memory");
125 return c != 0;
126}
127
128/**
129 * atomic_inc_and_test - increment and test
130 * @v: pointer of type atomic_t
131 *
132 * Atomically increments @v by 1
133 * and returns true if the result is zero, or false for all
134 * other cases.
135 */
136static inline int atomic_inc_and_test(atomic_t *v)
137{
138 unsigned char c;
139
140 asm volatile(LOCK_PREFIX "incl %0; sete %1"
141 : "+m" (v->counter), "=qm" (c)
142 : : "memory");
143 return c != 0;
144}
145
146/**
147 * atomic_add_negative - add and test if negative
148 * @v: pointer of type atomic_t
149 * @i: integer value to add
150 *
151 * Atomically adds @i to @v and returns true
152 * if the result is negative, or false when
153 * result is greater than or equal to zero.
154 */
155static inline int atomic_add_negative(int i, atomic_t *v)
156{
157 unsigned char c;
158
159 asm volatile(LOCK_PREFIX "addl %2,%0; sets %1"
160 : "+m" (v->counter), "=qm" (c)
161 : "ir" (i) : "memory");
162 return c;
163}
164
165/**
166 * atomic_add_return - add integer and return
167 * @v: pointer of type atomic_t
168 * @i: integer value to add
169 *
170 * Atomically adds @i to @v and returns @i + @v
171 */
172static inline int atomic_add_return(int i, atomic_t *v)
173{
174 int __i;
175#ifdef CONFIG_M386
176 unsigned long flags;
177 if (unlikely(boot_cpu_data.x86 <= 3))
178 goto no_xadd;
179#endif
180 /* Modern 486+ processor */
181 __i = i;
182 asm volatile(LOCK_PREFIX "xaddl %0, %1"
183 : "+r" (i), "+m" (v->counter)
184 : : "memory");
185 return i + __i;
186
187#ifdef CONFIG_M386
188no_xadd: /* Legacy 386 processor */
189 local_irq_save(flags);
190 __i = atomic_read(v);
191 atomic_set(v, i + __i);
192 local_irq_restore(flags);
193 return i + __i;
194#endif
195}
196
197/**
198 * atomic_sub_return - subtract integer and return
199 * @v: pointer of type atomic_t
200 * @i: integer value to subtract
201 *
202 * Atomically subtracts @i from @v and returns @v - @i
203 */
204static inline int atomic_sub_return(int i, atomic_t *v)
205{
206 return atomic_add_return(-i, v);
207}
208
209static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
210{
211 return cmpxchg(&v->counter, old, new);
212}
213
214static inline int atomic_xchg(atomic_t *v, int new)
215{
216 return xchg(&v->counter, new);
217}
218
219/**
220 * atomic_add_unless - add unless the number is already a given value
221 * @v: pointer of type atomic_t
222 * @a: the amount to add to v...
223 * @u: ...unless v is equal to u.
224 *
225 * Atomically adds @a to @v, so long as @v was not already @u.
226 * Returns non-zero if @v was not @u, and zero otherwise.
227 */
228static inline int atomic_add_unless(atomic_t *v, int a, int u)
229{
230 int c, old;
231 c = atomic_read(v);
232 for (;;) {
233 if (unlikely(c == (u)))
234 break;
235 old = atomic_cmpxchg((v), c, c + (a));
236 if (likely(old == c))
237 break;
238 c = old;
239 }
240 return c != (u);
241}
242
243#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
244
245#define atomic_inc_return(v) (atomic_add_return(1, v))
246#define atomic_dec_return(v) (atomic_sub_return(1, v))
247
248/* These are x86-specific, used by some header files */
249#define atomic_clear_mask(mask, addr) \
250 asm volatile(LOCK_PREFIX "andl %0,%1" \
251 : : "r" (~(mask)), "m" (*(addr)) : "memory")
252
253#define atomic_set_mask(mask, addr) \
254 asm volatile(LOCK_PREFIX "orl %0,%1" \
255 : : "r" (mask), "m" (*(addr)) : "memory")
256
257/* Atomic operations are already serializing on x86 */
258#define smp_mb__before_atomic_dec() barrier()
259#define smp_mb__after_atomic_dec() barrier()
260#define smp_mb__before_atomic_inc() barrier()
261#define smp_mb__after_atomic_inc() barrier()
262
263/* An 64bit atomic type */
264
265typedef struct {
266 u64 __aligned(8) counter;
267} atomic64_t;
268
269#define ATOMIC64_INIT(val) { (val) }
270
271extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val);
272
273/**
274 * atomic64_xchg - xchg atomic64 variable
275 * @ptr: pointer to type atomic64_t
276 * @new_val: value to assign
277 *
278 * Atomically xchgs the value of @ptr to @new_val and returns
279 * the old value.
280 */
281extern u64 atomic64_xchg(atomic64_t *ptr, u64 new_val);
282
283/**
284 * atomic64_set - set atomic64 variable
285 * @ptr: pointer to type atomic64_t
286 * @new_val: value to assign
287 *
288 * Atomically sets the value of @ptr to @new_val.
289 */
290extern void atomic64_set(atomic64_t *ptr, u64 new_val);
291
292/**
293 * atomic64_read - read atomic64 variable
294 * @ptr: pointer to type atomic64_t
295 *
296 * Atomically reads the value of @ptr and returns it.
297 */
298static inline u64 atomic64_read(atomic64_t *ptr)
299{
300 u64 res;
301
302 /*
303 * Note, we inline this atomic64_t primitive because
304 * it only clobbers EAX/EDX and leaves the others
305 * untouched. We also (somewhat subtly) rely on the
306 * fact that cmpxchg8b returns the current 64-bit value
307 * of the memory location we are touching:
308 */
309 asm volatile(
310 "mov %%ebx, %%eax\n\t"
311 "mov %%ecx, %%edx\n\t"
312 LOCK_PREFIX "cmpxchg8b %1\n"
313 : "=&A" (res)
314 : "m" (*ptr)
315 );
316
317 return res;
318}
319
320extern u64 atomic64_read(atomic64_t *ptr);
321
322/**
323 * atomic64_add_return - add and return
324 * @delta: integer value to add
325 * @ptr: pointer to type atomic64_t
326 *
327 * Atomically adds @delta to @ptr and returns @delta + *@ptr
328 */
329extern u64 atomic64_add_return(u64 delta, atomic64_t *ptr);
330
331/*
332 * Other variants with different arithmetic operators:
333 */
334extern u64 atomic64_sub_return(u64 delta, atomic64_t *ptr);
335extern u64 atomic64_inc_return(atomic64_t *ptr);
336extern u64 atomic64_dec_return(atomic64_t *ptr);
337
338/**
339 * atomic64_add - add integer to atomic64 variable
340 * @delta: integer value to add
341 * @ptr: pointer to type atomic64_t
342 *
343 * Atomically adds @delta to @ptr.
344 */
345extern void atomic64_add(u64 delta, atomic64_t *ptr);
346
347/**
348 * atomic64_sub - subtract the atomic64 variable
349 * @delta: integer value to subtract
350 * @ptr: pointer to type atomic64_t
351 *
352 * Atomically subtracts @delta from @ptr.
353 */
354extern void atomic64_sub(u64 delta, atomic64_t *ptr);
355
356/**
357 * atomic64_sub_and_test - subtract value from variable and test result
358 * @delta: integer value to subtract
359 * @ptr: pointer to type atomic64_t
360 *
361 * Atomically subtracts @delta from @ptr and returns
362 * true if the result is zero, or false for all
363 * other cases.
364 */
365extern int atomic64_sub_and_test(u64 delta, atomic64_t *ptr);
366
367/**
368 * atomic64_inc - increment atomic64 variable
369 * @ptr: pointer to type atomic64_t
370 *
371 * Atomically increments @ptr by 1.
372 */
373extern void atomic64_inc(atomic64_t *ptr);
374
375/**
376 * atomic64_dec - decrement atomic64 variable
377 * @ptr: pointer to type atomic64_t
378 *
379 * Atomically decrements @ptr by 1.
380 */
381extern void atomic64_dec(atomic64_t *ptr);
382
383/**
384 * atomic64_dec_and_test - decrement and test
385 * @ptr: pointer to type atomic64_t
386 *
387 * Atomically decrements @ptr by 1 and
388 * returns true if the result is 0, or false for all other
389 * cases.
390 */
391extern int atomic64_dec_and_test(atomic64_t *ptr);
392
393/**
394 * atomic64_inc_and_test - increment and test
395 * @ptr: pointer to type atomic64_t
396 *
397 * Atomically increments @ptr by 1
398 * and returns true if the result is zero, or false for all
399 * other cases.
400 */
401extern int atomic64_inc_and_test(atomic64_t *ptr);
402
403/**
404 * atomic64_add_negative - add and test if negative
405 * @delta: integer value to add
406 * @ptr: pointer to type atomic64_t
407 *
408 * Atomically adds @delta to @ptr and returns true
409 * if the result is negative, or false when
410 * result is greater than or equal to zero.
411 */
412extern int atomic64_add_negative(u64 delta, atomic64_t *ptr);
413
414#include <asm-generic/atomic-long.h>
415#endif /* _ASM_X86_ATOMIC_32_H */
diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h
deleted file mode 100644
index d605dc268e79..000000000000
--- a/arch/x86/include/asm/atomic_64.h
+++ /dev/null
@@ -1,485 +0,0 @@
1#ifndef _ASM_X86_ATOMIC_64_H
2#define _ASM_X86_ATOMIC_64_H
3
4#include <linux/types.h>
5#include <asm/alternative.h>
6#include <asm/cmpxchg.h>
7
8/*
9 * Atomic operations that C can't guarantee us. Useful for
10 * resource counting etc..
11 */
12
13#define ATOMIC_INIT(i) { (i) }
14
15/**
16 * atomic_read - read atomic variable
17 * @v: pointer of type atomic_t
18 *
19 * Atomically reads the value of @v.
20 */
21static inline int atomic_read(const atomic_t *v)
22{
23 return v->counter;
24}
25
26/**
27 * atomic_set - set atomic variable
28 * @v: pointer of type atomic_t
29 * @i: required value
30 *
31 * Atomically sets the value of @v to @i.
32 */
33static inline void atomic_set(atomic_t *v, int i)
34{
35 v->counter = i;
36}
37
38/**
39 * atomic_add - add integer to atomic variable
40 * @i: integer value to add
41 * @v: pointer of type atomic_t
42 *
43 * Atomically adds @i to @v.
44 */
45static inline void atomic_add(int i, atomic_t *v)
46{
47 asm volatile(LOCK_PREFIX "addl %1,%0"
48 : "=m" (v->counter)
49 : "ir" (i), "m" (v->counter));
50}
51
52/**
53 * atomic_sub - subtract the atomic variable
54 * @i: integer value to subtract
55 * @v: pointer of type atomic_t
56 *
57 * Atomically subtracts @i from @v.
58 */
59static inline void atomic_sub(int i, atomic_t *v)
60{
61 asm volatile(LOCK_PREFIX "subl %1,%0"
62 : "=m" (v->counter)
63 : "ir" (i), "m" (v->counter));
64}
65
66/**
67 * atomic_sub_and_test - subtract value from variable and test result
68 * @i: integer value to subtract
69 * @v: pointer of type atomic_t
70 *
71 * Atomically subtracts @i from @v and returns
72 * true if the result is zero, or false for all
73 * other cases.
74 */
75static inline int atomic_sub_and_test(int i, atomic_t *v)
76{
77 unsigned char c;
78
79 asm volatile(LOCK_PREFIX "subl %2,%0; sete %1"
80 : "=m" (v->counter), "=qm" (c)
81 : "ir" (i), "m" (v->counter) : "memory");
82 return c;
83}
84
85/**
86 * atomic_inc - increment atomic variable
87 * @v: pointer of type atomic_t
88 *
89 * Atomically increments @v by 1.
90 */
91static inline void atomic_inc(atomic_t *v)
92{
93 asm volatile(LOCK_PREFIX "incl %0"
94 : "=m" (v->counter)
95 : "m" (v->counter));
96}
97
98/**
99 * atomic_dec - decrement atomic variable
100 * @v: pointer of type atomic_t
101 *
102 * Atomically decrements @v by 1.
103 */
104static inline void atomic_dec(atomic_t *v)
105{
106 asm volatile(LOCK_PREFIX "decl %0"
107 : "=m" (v->counter)
108 : "m" (v->counter));
109}
110
111/**
112 * atomic_dec_and_test - decrement and test
113 * @v: pointer of type atomic_t
114 *
115 * Atomically decrements @v by 1 and
116 * returns true if the result is 0, or false for all other
117 * cases.
118 */
119static inline int atomic_dec_and_test(atomic_t *v)
120{
121 unsigned char c;
122
123 asm volatile(LOCK_PREFIX "decl %0; sete %1"
124 : "=m" (v->counter), "=qm" (c)
125 : "m" (v->counter) : "memory");
126 return c != 0;
127}
128
129/**
130 * atomic_inc_and_test - increment and test
131 * @v: pointer of type atomic_t
132 *
133 * Atomically increments @v by 1
134 * and returns true if the result is zero, or false for all
135 * other cases.
136 */
137static inline int atomic_inc_and_test(atomic_t *v)
138{
139 unsigned char c;
140
141 asm volatile(LOCK_PREFIX "incl %0; sete %1"
142 : "=m" (v->counter), "=qm" (c)
143 : "m" (v->counter) : "memory");
144 return c != 0;
145}
146
147/**
148 * atomic_add_negative - add and test if negative
149 * @i: integer value to add
150 * @v: pointer of type atomic_t
151 *
152 * Atomically adds @i to @v and returns true
153 * if the result is negative, or false when
154 * result is greater than or equal to zero.
155 */
156static inline int atomic_add_negative(int i, atomic_t *v)
157{
158 unsigned char c;
159
160 asm volatile(LOCK_PREFIX "addl %2,%0; sets %1"
161 : "=m" (v->counter), "=qm" (c)
162 : "ir" (i), "m" (v->counter) : "memory");
163 return c;
164}
165
166/**
167 * atomic_add_return - add and return
168 * @i: integer value to add
169 * @v: pointer of type atomic_t
170 *
171 * Atomically adds @i to @v and returns @i + @v
172 */
173static inline int atomic_add_return(int i, atomic_t *v)
174{
175 int __i = i;
176 asm volatile(LOCK_PREFIX "xaddl %0, %1"
177 : "+r" (i), "+m" (v->counter)
178 : : "memory");
179 return i + __i;
180}
181
182static inline int atomic_sub_return(int i, atomic_t *v)
183{
184 return atomic_add_return(-i, v);
185}
186
187#define atomic_inc_return(v) (atomic_add_return(1, v))
188#define atomic_dec_return(v) (atomic_sub_return(1, v))
189
190/* The 64-bit atomic type */
191
192#define ATOMIC64_INIT(i) { (i) }
193
194/**
195 * atomic64_read - read atomic64 variable
196 * @v: pointer of type atomic64_t
197 *
198 * Atomically reads the value of @v.
199 * Doesn't imply a read memory barrier.
200 */
201static inline long atomic64_read(const atomic64_t *v)
202{
203 return v->counter;
204}
205
206/**
207 * atomic64_set - set atomic64 variable
208 * @v: pointer to type atomic64_t
209 * @i: required value
210 *
211 * Atomically sets the value of @v to @i.
212 */
213static inline void atomic64_set(atomic64_t *v, long i)
214{
215 v->counter = i;
216}
217
218/**
219 * atomic64_add - add integer to atomic64 variable
220 * @i: integer value to add
221 * @v: pointer to type atomic64_t
222 *
223 * Atomically adds @i to @v.
224 */
225static inline void atomic64_add(long i, atomic64_t *v)
226{
227 asm volatile(LOCK_PREFIX "addq %1,%0"
228 : "=m" (v->counter)
229 : "er" (i), "m" (v->counter));
230}
231
232/**
233 * atomic64_sub - subtract the atomic64 variable
234 * @i: integer value to subtract
235 * @v: pointer to type atomic64_t
236 *
237 * Atomically subtracts @i from @v.
238 */
239static inline void atomic64_sub(long i, atomic64_t *v)
240{
241 asm volatile(LOCK_PREFIX "subq %1,%0"
242 : "=m" (v->counter)
243 : "er" (i), "m" (v->counter));
244}
245
246/**
247 * atomic64_sub_and_test - subtract value from variable and test result
248 * @i: integer value to subtract
249 * @v: pointer to type atomic64_t
250 *
251 * Atomically subtracts @i from @v and returns
252 * true if the result is zero, or false for all
253 * other cases.
254 */
255static inline int atomic64_sub_and_test(long i, atomic64_t *v)
256{
257 unsigned char c;
258
259 asm volatile(LOCK_PREFIX "subq %2,%0; sete %1"
260 : "=m" (v->counter), "=qm" (c)
261 : "er" (i), "m" (v->counter) : "memory");
262 return c;
263}
264
265/**
266 * atomic64_inc - increment atomic64 variable
267 * @v: pointer to type atomic64_t
268 *
269 * Atomically increments @v by 1.
270 */
271static inline void atomic64_inc(atomic64_t *v)
272{
273 asm volatile(LOCK_PREFIX "incq %0"
274 : "=m" (v->counter)
275 : "m" (v->counter));
276}
277
278/**
279 * atomic64_dec - decrement atomic64 variable
280 * @v: pointer to type atomic64_t
281 *
282 * Atomically decrements @v by 1.
283 */
284static inline void atomic64_dec(atomic64_t *v)
285{
286 asm volatile(LOCK_PREFIX "decq %0"
287 : "=m" (v->counter)
288 : "m" (v->counter));
289}
290
291/**
292 * atomic64_dec_and_test - decrement and test
293 * @v: pointer to type atomic64_t
294 *
295 * Atomically decrements @v by 1 and
296 * returns true if the result is 0, or false for all other
297 * cases.
298 */
299static inline int atomic64_dec_and_test(atomic64_t *v)
300{
301 unsigned char c;
302
303 asm volatile(LOCK_PREFIX "decq %0; sete %1"
304 : "=m" (v->counter), "=qm" (c)
305 : "m" (v->counter) : "memory");
306 return c != 0;
307}
308
309/**
310 * atomic64_inc_and_test - increment and test
311 * @v: pointer to type atomic64_t
312 *
313 * Atomically increments @v by 1
314 * and returns true if the result is zero, or false for all
315 * other cases.
316 */
317static inline int atomic64_inc_and_test(atomic64_t *v)
318{
319 unsigned char c;
320
321 asm volatile(LOCK_PREFIX "incq %0; sete %1"
322 : "=m" (v->counter), "=qm" (c)
323 : "m" (v->counter) : "memory");
324 return c != 0;
325}
326
327/**
328 * atomic64_add_negative - add and test if negative
329 * @i: integer value to add
330 * @v: pointer to type atomic64_t
331 *
332 * Atomically adds @i to @v and returns true
333 * if the result is negative, or false when
334 * result is greater than or equal to zero.
335 */
336static inline int atomic64_add_negative(long i, atomic64_t *v)
337{
338 unsigned char c;
339
340 asm volatile(LOCK_PREFIX "addq %2,%0; sets %1"
341 : "=m" (v->counter), "=qm" (c)
342 : "er" (i), "m" (v->counter) : "memory");
343 return c;
344}
345
346/**
347 * atomic64_add_return - add and return
348 * @i: integer value to add
349 * @v: pointer to type atomic64_t
350 *
351 * Atomically adds @i to @v and returns @i + @v
352 */
353static inline long atomic64_add_return(long i, atomic64_t *v)
354{
355 long __i = i;
356 asm volatile(LOCK_PREFIX "xaddq %0, %1;"
357 : "+r" (i), "+m" (v->counter)
358 : : "memory");
359 return i + __i;
360}
361
362static inline long atomic64_sub_return(long i, atomic64_t *v)
363{
364 return atomic64_add_return(-i, v);
365}
366
367#define atomic64_inc_return(v) (atomic64_add_return(1, (v)))
368#define atomic64_dec_return(v) (atomic64_sub_return(1, (v)))
369
370static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
371{
372 return cmpxchg(&v->counter, old, new);
373}
374
375static inline long atomic64_xchg(atomic64_t *v, long new)
376{
377 return xchg(&v->counter, new);
378}
379
380static inline long atomic_cmpxchg(atomic_t *v, int old, int new)
381{
382 return cmpxchg(&v->counter, old, new);
383}
384
385static inline long atomic_xchg(atomic_t *v, int new)
386{
387 return xchg(&v->counter, new);
388}
389
390/**
391 * atomic_add_unless - add unless the number is a given value
392 * @v: pointer of type atomic_t
393 * @a: the amount to add to v...
394 * @u: ...unless v is equal to u.
395 *
396 * Atomically adds @a to @v, so long as it was not @u.
397 * Returns non-zero if @v was not @u, and zero otherwise.
398 */
399static inline int atomic_add_unless(atomic_t *v, int a, int u)
400{
401 int c, old;
402 c = atomic_read(v);
403 for (;;) {
404 if (unlikely(c == (u)))
405 break;
406 old = atomic_cmpxchg((v), c, c + (a));
407 if (likely(old == c))
408 break;
409 c = old;
410 }
411 return c != (u);
412}
413
414#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
415
416/**
417 * atomic64_add_unless - add unless the number is a given value
418 * @v: pointer of type atomic64_t
419 * @a: the amount to add to v...
420 * @u: ...unless v is equal to u.
421 *
422 * Atomically adds @a to @v, so long as it was not @u.
423 * Returns non-zero if @v was not @u, and zero otherwise.
424 */
425static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
426{
427 long c, old;
428 c = atomic64_read(v);
429 for (;;) {
430 if (unlikely(c == (u)))
431 break;
432 old = atomic64_cmpxchg((v), c, c + (a));
433 if (likely(old == c))
434 break;
435 c = old;
436 }
437 return c != (u);
438}
439
440/**
441 * atomic_inc_short - increment of a short integer
442 * @v: pointer to type int
443 *
444 * Atomically adds 1 to @v
445 * Returns the new value of @u
446 */
447static inline short int atomic_inc_short(short int *v)
448{
449 asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v));
450 return *v;
451}
452
453/**
454 * atomic_or_long - OR of two long integers
455 * @v1: pointer to type unsigned long
456 * @v2: pointer to type unsigned long
457 *
458 * Atomically ORs @v1 and @v2
459 * Returns the result of the OR
460 */
461static inline void atomic_or_long(unsigned long *v1, unsigned long v2)
462{
463 asm(LOCK_PREFIX "orq %1, %0" : "+m" (*v1) : "r" (v2));
464}
465
466#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
467
468/* These are x86-specific, used by some header files */
469#define atomic_clear_mask(mask, addr) \
470 asm volatile(LOCK_PREFIX "andl %0,%1" \
471 : : "r" (~(mask)), "m" (*(addr)) : "memory")
472
473#define atomic_set_mask(mask, addr) \
474 asm volatile(LOCK_PREFIX "orl %0,%1" \
475 : : "r" ((unsigned)(mask)), "m" (*(addr)) \
476 : "memory")
477
478/* Atomic operations are already serializing on x86 */
479#define smp_mb__before_atomic_dec() barrier()
480#define smp_mb__after_atomic_dec() barrier()
481#define smp_mb__before_atomic_inc() barrier()
482#define smp_mb__after_atomic_inc() barrier()
483
484#include <asm-generic/atomic-long.h>
485#endif /* _ASM_X86_ATOMIC_64_H */
diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h
deleted file mode 100644
index d96c1ee3a95c..000000000000
--- a/arch/x86/include/asm/cpu_debug.h
+++ /dev/null
@@ -1,127 +0,0 @@
1#ifndef _ASM_X86_CPU_DEBUG_H
2#define _ASM_X86_CPU_DEBUG_H
3
4/*
5 * CPU x86 architecture debug
6 *
7 * Copyright(C) 2009 Jaswinder Singh Rajput
8 */
9
10/* Register flags */
11enum cpu_debug_bit {
12/* Model Specific Registers (MSRs) */
13 CPU_MC_BIT, /* Machine Check */
14 CPU_MONITOR_BIT, /* Monitor */
15 CPU_TIME_BIT, /* Time */
16 CPU_PMC_BIT, /* Performance Monitor */
17 CPU_PLATFORM_BIT, /* Platform */
18 CPU_APIC_BIT, /* APIC */
19 CPU_POWERON_BIT, /* Power-on */
20 CPU_CONTROL_BIT, /* Control */
21 CPU_FEATURES_BIT, /* Features control */
22 CPU_LBRANCH_BIT, /* Last Branch */
23 CPU_BIOS_BIT, /* BIOS */
24 CPU_FREQ_BIT, /* Frequency */
25 CPU_MTTR_BIT, /* MTRR */
26 CPU_PERF_BIT, /* Performance */
27 CPU_CACHE_BIT, /* Cache */
28 CPU_SYSENTER_BIT, /* Sysenter */
29 CPU_THERM_BIT, /* Thermal */
30 CPU_MISC_BIT, /* Miscellaneous */
31 CPU_DEBUG_BIT, /* Debug */
32 CPU_PAT_BIT, /* PAT */
33 CPU_VMX_BIT, /* VMX */
34 CPU_CALL_BIT, /* System Call */
35 CPU_BASE_BIT, /* BASE Address */
36 CPU_VER_BIT, /* Version ID */
37 CPU_CONF_BIT, /* Configuration */
38 CPU_SMM_BIT, /* System mgmt mode */
39 CPU_SVM_BIT, /*Secure Virtual Machine*/
40 CPU_OSVM_BIT, /* OS-Visible Workaround*/
41/* Standard Registers */
42 CPU_TSS_BIT, /* Task Stack Segment */
43 CPU_CR_BIT, /* Control Registers */
44 CPU_DT_BIT, /* Descriptor Table */
45/* End of Registers flags */
46 CPU_REG_ALL_BIT, /* Select all Registers */
47};
48
49#define CPU_REG_ALL (~0) /* Select all Registers */
50
51#define CPU_MC (1 << CPU_MC_BIT)
52#define CPU_MONITOR (1 << CPU_MONITOR_BIT)
53#define CPU_TIME (1 << CPU_TIME_BIT)
54#define CPU_PMC (1 << CPU_PMC_BIT)
55#define CPU_PLATFORM (1 << CPU_PLATFORM_BIT)
56#define CPU_APIC (1 << CPU_APIC_BIT)
57#define CPU_POWERON (1 << CPU_POWERON_BIT)
58#define CPU_CONTROL (1 << CPU_CONTROL_BIT)
59#define CPU_FEATURES (1 << CPU_FEATURES_BIT)
60#define CPU_LBRANCH (1 << CPU_LBRANCH_BIT)
61#define CPU_BIOS (1 << CPU_BIOS_BIT)
62#define CPU_FREQ (1 << CPU_FREQ_BIT)
63#define CPU_MTRR (1 << CPU_MTTR_BIT)
64#define CPU_PERF (1 << CPU_PERF_BIT)
65#define CPU_CACHE (1 << CPU_CACHE_BIT)
66#define CPU_SYSENTER (1 << CPU_SYSENTER_BIT)
67#define CPU_THERM (1 << CPU_THERM_BIT)
68#define CPU_MISC (1 << CPU_MISC_BIT)
69#define CPU_DEBUG (1 << CPU_DEBUG_BIT)
70#define CPU_PAT (1 << CPU_PAT_BIT)
71#define CPU_VMX (1 << CPU_VMX_BIT)
72#define CPU_CALL (1 << CPU_CALL_BIT)
73#define CPU_BASE (1 << CPU_BASE_BIT)
74#define CPU_VER (1 << CPU_VER_BIT)
75#define CPU_CONF (1 << CPU_CONF_BIT)
76#define CPU_SMM (1 << CPU_SMM_BIT)
77#define CPU_SVM (1 << CPU_SVM_BIT)
78#define CPU_OSVM (1 << CPU_OSVM_BIT)
79#define CPU_TSS (1 << CPU_TSS_BIT)
80#define CPU_CR (1 << CPU_CR_BIT)
81#define CPU_DT (1 << CPU_DT_BIT)
82
83/* Register file flags */
84enum cpu_file_bit {
85 CPU_INDEX_BIT, /* index */
86 CPU_VALUE_BIT, /* value */
87};
88
89#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT)
90
91#define MAX_CPU_FILES 512
92
93struct cpu_private {
94 unsigned cpu;
95 unsigned type;
96 unsigned reg;
97 unsigned file;
98};
99
100struct cpu_debug_base {
101 char *name; /* Register name */
102 unsigned flag; /* Register flag */
103 unsigned write; /* Register write flag */
104};
105
106/*
107 * Currently it looks similar to cpu_debug_base but once we add more files
108 * cpu_file_base will go in different direction
109 */
110struct cpu_file_base {
111 char *name; /* Register file name */
112 unsigned flag; /* Register file flag */
113 unsigned write; /* Register write flag */
114};
115
116struct cpu_cpuX_base {
117 struct dentry *dentry; /* Register dentry */
118 int init; /* Register index file */
119};
120
121struct cpu_debug_range {
122 unsigned min; /* Register range min */
123 unsigned max; /* Register range max */
124 unsigned flag; /* Supported flags */
125};
126
127#endif /* _ASM_X86_CPU_DEBUG_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 613700f27a4a..0cd82d068613 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -153,6 +153,7 @@
153#define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */ 153#define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */
154#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ 154#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */
155#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ 155#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */
156#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
156 157
157/* 158/*
158 * Auxiliary flags: Linux defined - For features scattered in various 159 * Auxiliary flags: Linux defined - For features scattered in various
@@ -167,6 +168,10 @@
167#define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */ 168#define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */
168#define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */ 169#define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */
169#define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */ 170#define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */
171#define X86_FEATURE_NPT (8*32+5) /* AMD Nested Page Table support */
172#define X86_FEATURE_LBRV (8*32+6) /* AMD LBR Virtualization support */
173#define X86_FEATURE_SVML (8*32+7) /* "svm_lock" AMD SVM locking MSR */
174#define X86_FEATURE_NRIPS (8*32+8) /* "nrip_save" AMD SVM next_rip save */
170 175
171#if defined(__KERNEL__) && !defined(__ASSEMBLY__) 176#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
172 177
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 8240f76b531e..b81002f23614 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -14,6 +14,9 @@
14 which debugging register was responsible for the trap. The other bits 14 which debugging register was responsible for the trap. The other bits
15 are either reserved or not of interest to us. */ 15 are either reserved or not of interest to us. */
16 16
17/* Define reserved bits in DR6 which are always set to 1 */
18#define DR6_RESERVED (0xFFFF0FF0)
19
17#define DR_TRAP0 (0x1) /* db0 */ 20#define DR_TRAP0 (0x1) /* db0 */
18#define DR_TRAP1 (0x2) /* db1 */ 21#define DR_TRAP1 (0x2) /* db1 */
19#define DR_TRAP2 (0x4) /* db2 */ 22#define DR_TRAP2 (0x4) /* db2 */
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index 9d6684849fd9..278441f39856 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -12,9 +12,9 @@
12#include <linux/types.h> 12#include <linux/types.h>
13 13
14/* 14/*
15 * FIXME: Acessing the desc_struct through its fields is more elegant, 15 * FIXME: Accessing the desc_struct through its fields is more elegant,
16 * and should be the one valid thing to do. However, a lot of open code 16 * and should be the one valid thing to do. However, a lot of open code
17 * still touches the a and b acessors, and doing this allow us to do it 17 * still touches the a and b accessors, and doing this allow us to do it
18 * incrementally. We keep the signature as a struct, rather than an union, 18 * incrementally. We keep the signature as a struct, rather than an union,
19 * so we can get rid of it transparently in the future -- glommer 19 * so we can get rid of it transparently in the future -- glommer
20 */ 20 */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 0f6c02f3b7d4..ac91eed21061 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -67,7 +67,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
67 if (!dev->dma_mask) 67 if (!dev->dma_mask)
68 return 0; 68 return 0;
69 69
70 return addr + size <= *dev->dma_mask; 70 return addr + size - 1 <= *dev->dma_mask;
71} 71}
72 72
73static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) 73static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 8a024babe5e6..f2ad2163109d 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -170,10 +170,7 @@ static inline void elf_common_init(struct thread_struct *t,
170} 170}
171 171
172#define ELF_PLAT_INIT(_r, load_addr) \ 172#define ELF_PLAT_INIT(_r, load_addr) \
173do { \ 173 elf_common_init(&current->thread, _r, 0)
174 elf_common_init(&current->thread, _r, 0); \
175 clear_thread_flag(TIF_IA32); \
176} while (0)
177 174
178#define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ 175#define COMPAT_ELF_PLAT_INIT(regs, load_addr) \
179 elf_common_init(&current->thread, regs, __USER_DS) 176 elf_common_init(&current->thread, regs, __USER_DS)
@@ -181,14 +178,8 @@ do { \
181void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp); 178void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp);
182#define compat_start_thread start_thread_ia32 179#define compat_start_thread start_thread_ia32
183 180
184#define COMPAT_SET_PERSONALITY(ex) \ 181void set_personality_ia32(void);
185do { \ 182#define COMPAT_SET_PERSONALITY(ex) set_personality_ia32()
186 if (test_thread_flag(TIF_IA32)) \
187 clear_thread_flag(TIF_ABI_PENDING); \
188 else \
189 set_thread_flag(TIF_ABI_PENDING); \
190 current->personality |= force_personality32; \
191} while (0)
192 183
193#define COMPAT_ELF_PLATFORM ("i686") 184#define COMPAT_ELF_PLATFORM ("i686")
194 185
@@ -239,7 +230,6 @@ extern int force_personality32;
239#endif /* !CONFIG_X86_32 */ 230#endif /* !CONFIG_X86_32 */
240 231
241#define CORE_DUMP_USE_REGSET 232#define CORE_DUMP_USE_REGSET
242#define USE_ELF_CORE_DUMP
243#define ELF_EXEC_PAGESIZE 4096 233#define ELF_EXEC_PAGESIZE 4096
244 234
245/* This is the location that an ET_DYN program is loaded if exec'ed. Typical 235/* This is the location that an ET_DYN program is loaded if exec'ed. Typical
diff --git a/arch/x86/include/asm/fb.h b/arch/x86/include/asm/fb.h
index 53018464aea6..2519d0679d99 100644
--- a/arch/x86/include/asm/fb.h
+++ b/arch/x86/include/asm/fb.h
@@ -12,10 +12,6 @@ static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
12 pgprot_val(vma->vm_page_prot) |= _PAGE_PCD; 12 pgprot_val(vma->vm_page_prot) |= _PAGE_PCD;
13} 13}
14 14
15#ifdef CONFIG_X86_32
16extern int fb_is_primary_device(struct fb_info *info); 15extern int fb_is_primary_device(struct fb_info *info);
17#else
18static inline int fb_is_primary_device(struct fb_info *info) { return 0; }
19#endif
20 16
21#endif /* _ASM_X86_FB_H */ 17#endif /* _ASM_X86_FB_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 14f9890eb495..635f03bb4995 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -118,14 +118,20 @@ enum fixed_addresses {
118 * 256 temporary boot-time mappings, used by early_ioremap(), 118 * 256 temporary boot-time mappings, used by early_ioremap(),
119 * before ioremap() is functional. 119 * before ioremap() is functional.
120 * 120 *
121 * We round it up to the next 256 pages boundary so that we 121 * If necessary we round it up to the next 256 pages boundary so
122 * can have a single pgd entry and a single pte table: 122 * that we can have a single pgd entry and a single pte table:
123 */ 123 */
124#define NR_FIX_BTMAPS 64 124#define NR_FIX_BTMAPS 64
125#define FIX_BTMAPS_SLOTS 4 125#define FIX_BTMAPS_SLOTS 4
126 FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - 126#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
127 (__end_of_permanent_fixed_addresses & 255), 127 FIX_BTMAP_END =
128 FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, 128 (__end_of_permanent_fixed_addresses ^
129 (__end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS - 1)) &
130 -PTRS_PER_PTE
131 ? __end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS -
132 (__end_of_permanent_fixed_addresses & (TOTAL_FIX_BTMAPS - 1))
133 : __end_of_permanent_fixed_addresses,
134 FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
129#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT 135#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
130 FIX_OHCI1394_BASE, 136 FIX_OHCI1394_BASE,
131#endif 137#endif
diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h
index ad3c2ed75481..7cd73552a4e8 100644
--- a/arch/x86/include/asm/geode.h
+++ b/arch/x86/include/asm/geode.h
@@ -12,160 +12,7 @@
12 12
13#include <asm/processor.h> 13#include <asm/processor.h>
14#include <linux/io.h> 14#include <linux/io.h>
15 15#include <linux/cs5535.h>
16/* Generic southbridge functions */
17
18#define GEODE_DEV_PMS 0
19#define GEODE_DEV_ACPI 1
20#define GEODE_DEV_GPIO 2
21#define GEODE_DEV_MFGPT 3
22
23extern int geode_get_dev_base(unsigned int dev);
24
25/* Useful macros */
26#define geode_pms_base() geode_get_dev_base(GEODE_DEV_PMS)
27#define geode_acpi_base() geode_get_dev_base(GEODE_DEV_ACPI)
28#define geode_gpio_base() geode_get_dev_base(GEODE_DEV_GPIO)
29#define geode_mfgpt_base() geode_get_dev_base(GEODE_DEV_MFGPT)
30
31/* MSRS */
32
33#define MSR_GLIU_P2D_RO0 0x10000029
34
35#define MSR_LX_GLD_MSR_CONFIG 0x48002001
36#define MSR_LX_MSR_PADSEL 0x48002011 /* NOT 0x48000011; the data
37 * sheet has the wrong value */
38#define MSR_GLCP_SYS_RSTPLL 0x4C000014
39#define MSR_GLCP_DOTPLL 0x4C000015
40
41#define MSR_LBAR_SMB 0x5140000B
42#define MSR_LBAR_GPIO 0x5140000C
43#define MSR_LBAR_MFGPT 0x5140000D
44#define MSR_LBAR_ACPI 0x5140000E
45#define MSR_LBAR_PMS 0x5140000F
46
47#define MSR_DIVIL_SOFT_RESET 0x51400017
48
49#define MSR_PIC_YSEL_LOW 0x51400020
50#define MSR_PIC_YSEL_HIGH 0x51400021
51#define MSR_PIC_ZSEL_LOW 0x51400022
52#define MSR_PIC_ZSEL_HIGH 0x51400023
53#define MSR_PIC_IRQM_LPC 0x51400025
54
55#define MSR_MFGPT_IRQ 0x51400028
56#define MSR_MFGPT_NR 0x51400029
57#define MSR_MFGPT_SETUP 0x5140002B
58
59#define MSR_LX_SPARE_MSR 0x80000011 /* DC-specific */
60
61#define MSR_GX_GLD_MSR_CONFIG 0xC0002001
62#define MSR_GX_MSR_PADSEL 0xC0002011
63
64/* Resource Sizes */
65
66#define LBAR_GPIO_SIZE 0xFF
67#define LBAR_MFGPT_SIZE 0x40
68#define LBAR_ACPI_SIZE 0x40
69#define LBAR_PMS_SIZE 0x80
70
71/* ACPI registers (PMS block) */
72
73/*
74 * PM1_EN is only valid when VSA is enabled for 16 bit reads.
75 * When VSA is not enabled, *always* read both PM1_STS and PM1_EN
76 * with a 32 bit read at offset 0x0
77 */
78
79#define PM1_STS 0x00
80#define PM1_EN 0x02
81#define PM1_CNT 0x08
82#define PM2_CNT 0x0C
83#define PM_TMR 0x10
84#define PM_GPE0_STS 0x18
85#define PM_GPE0_EN 0x1C
86
87/* PMC registers (PMS block) */
88
89#define PM_SSD 0x00
90#define PM_SCXA 0x04
91#define PM_SCYA 0x08
92#define PM_OUT_SLPCTL 0x0C
93#define PM_SCLK 0x10
94#define PM_SED 0x1
95#define PM_SCXD 0x18
96#define PM_SCYD 0x1C
97#define PM_IN_SLPCTL 0x20
98#define PM_WKD 0x30
99#define PM_WKXD 0x34
100#define PM_RD 0x38
101#define PM_WKXA 0x3C
102#define PM_FSD 0x40
103#define PM_TSD 0x44
104#define PM_PSD 0x48
105#define PM_NWKD 0x4C
106#define PM_AWKD 0x50
107#define PM_SSC 0x54
108
109/* VSA2 magic values */
110
111#define VSA_VRC_INDEX 0xAC1C
112#define VSA_VRC_DATA 0xAC1E
113#define VSA_VR_UNLOCK 0xFC53 /* unlock virtual register */
114#define VSA_VR_SIGNATURE 0x0003
115#define VSA_VR_MEM_SIZE 0x0200
116#define AMD_VSA_SIG 0x4132 /* signature is ascii 'VSA2' */
117#define GSW_VSA_SIG 0x534d /* General Software signature */
118/* GPIO */
119
120#define GPIO_OUTPUT_VAL 0x00
121#define GPIO_OUTPUT_ENABLE 0x04
122#define GPIO_OUTPUT_OPEN_DRAIN 0x08
123#define GPIO_OUTPUT_INVERT 0x0C
124#define GPIO_OUTPUT_AUX1 0x10
125#define GPIO_OUTPUT_AUX2 0x14
126#define GPIO_PULL_UP 0x18
127#define GPIO_PULL_DOWN 0x1C
128#define GPIO_INPUT_ENABLE 0x20
129#define GPIO_INPUT_INVERT 0x24
130#define GPIO_INPUT_FILTER 0x28
131#define GPIO_INPUT_EVENT_COUNT 0x2C
132#define GPIO_READ_BACK 0x30
133#define GPIO_INPUT_AUX1 0x34
134#define GPIO_EVENTS_ENABLE 0x38
135#define GPIO_LOCK_ENABLE 0x3C
136#define GPIO_POSITIVE_EDGE_EN 0x40
137#define GPIO_NEGATIVE_EDGE_EN 0x44
138#define GPIO_POSITIVE_EDGE_STS 0x48
139#define GPIO_NEGATIVE_EDGE_STS 0x4C
140
141#define GPIO_MAP_X 0xE0
142#define GPIO_MAP_Y 0xE4
143#define GPIO_MAP_Z 0xE8
144#define GPIO_MAP_W 0xEC
145
146static inline u32 geode_gpio(unsigned int nr)
147{
148 BUG_ON(nr > 28);
149 return 1 << nr;
150}
151
152extern void geode_gpio_set(u32, unsigned int);
153extern void geode_gpio_clear(u32, unsigned int);
154extern int geode_gpio_isset(u32, unsigned int);
155extern void geode_gpio_setup_event(unsigned int, int, int);
156extern void geode_gpio_set_irq(unsigned int, unsigned int);
157
158static inline void geode_gpio_event_irq(unsigned int gpio, int pair)
159{
160 geode_gpio_setup_event(gpio, pair, 0);
161}
162
163static inline void geode_gpio_event_pme(unsigned int gpio, int pair)
164{
165 geode_gpio_setup_event(gpio, pair, 1);
166}
167
168/* Specific geode tests */
169 16
170static inline int is_geode_gx(void) 17static inline int is_geode_gx(void)
171{ 18{
@@ -186,68 +33,4 @@ static inline int is_geode(void)
186 return (is_geode_gx() || is_geode_lx()); 33 return (is_geode_gx() || is_geode_lx());
187} 34}
188 35
189#ifdef CONFIG_MGEODE_LX
190extern int geode_has_vsa2(void);
191#else
192static inline int geode_has_vsa2(void)
193{
194 return 0;
195}
196#endif
197
198/* MFGPTs */
199
200#define MFGPT_MAX_TIMERS 8
201#define MFGPT_TIMER_ANY (-1)
202
203#define MFGPT_DOMAIN_WORKING 1
204#define MFGPT_DOMAIN_STANDBY 2
205#define MFGPT_DOMAIN_ANY (MFGPT_DOMAIN_WORKING | MFGPT_DOMAIN_STANDBY)
206
207#define MFGPT_CMP1 0
208#define MFGPT_CMP2 1
209
210#define MFGPT_EVENT_IRQ 0
211#define MFGPT_EVENT_NMI 1
212#define MFGPT_EVENT_RESET 3
213
214#define MFGPT_REG_CMP1 0
215#define MFGPT_REG_CMP2 2
216#define MFGPT_REG_COUNTER 4
217#define MFGPT_REG_SETUP 6
218
219#define MFGPT_SETUP_CNTEN (1 << 15)
220#define MFGPT_SETUP_CMP2 (1 << 14)
221#define MFGPT_SETUP_CMP1 (1 << 13)
222#define MFGPT_SETUP_SETUP (1 << 12)
223#define MFGPT_SETUP_STOPEN (1 << 11)
224#define MFGPT_SETUP_EXTEN (1 << 10)
225#define MFGPT_SETUP_REVEN (1 << 5)
226#define MFGPT_SETUP_CLKSEL (1 << 4)
227
228static inline void geode_mfgpt_write(int timer, u16 reg, u16 value)
229{
230 u32 base = geode_get_dev_base(GEODE_DEV_MFGPT);
231 outw(value, base + reg + (timer * 8));
232}
233
234static inline u16 geode_mfgpt_read(int timer, u16 reg)
235{
236 u32 base = geode_get_dev_base(GEODE_DEV_MFGPT);
237 return inw(base + reg + (timer * 8));
238}
239
240extern int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable);
241extern int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable);
242extern int geode_mfgpt_alloc_timer(int timer, int domain);
243
244#define geode_mfgpt_setup_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 1)
245#define geode_mfgpt_release_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 0)
246
247#ifdef CONFIG_GEODE_MFGPT_TIMER
248extern int __init mfgpt_timer_setup(void);
249#else
250static inline int mfgpt_timer_setup(void) { return 0; }
251#endif
252
253#endif /* _ASM_X86_GEODE_H */ 36#endif /* _ASM_X86_GEODE_H */
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 5d89fd2a3690..1d5c08a1bdfd 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -67,6 +67,7 @@ extern unsigned long hpet_address;
67extern unsigned long force_hpet_address; 67extern unsigned long force_hpet_address;
68extern u8 hpet_blockid; 68extern u8 hpet_blockid;
69extern int hpet_force_user; 69extern int hpet_force_user;
70extern u8 hpet_msi_disable;
70extern int is_hpet_enabled(void); 71extern int is_hpet_enabled(void);
71extern int hpet_enable(void); 72extern int hpet_enable(void);
72extern void hpet_disable(void); 73extern void hpet_disable(void);
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 08c48a81841f..eeac829a0f44 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -103,7 +103,8 @@ extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
103extern void send_cleanup_vector(struct irq_cfg *); 103extern void send_cleanup_vector(struct irq_cfg *);
104 104
105struct irq_desc; 105struct irq_desc;
106extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *); 106extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *,
107 unsigned int *dest_id);
107extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); 108extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
108extern void setup_ioapic_dest(void); 109extern void setup_ioapic_dest(void);
109 110
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index ebfb8a9e11f7..da2930924501 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -33,8 +33,16 @@ extern void init_thread_xstate(void);
33extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); 33extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
34 34
35extern user_regset_active_fn fpregs_active, xfpregs_active; 35extern user_regset_active_fn fpregs_active, xfpregs_active;
36extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; 36extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
37extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set; 37 xstateregs_get;
38extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
39 xstateregs_set;
40
41/*
42 * xstateregs_active == fpregs_active. Please refer to the comment
43 * at the definition of fpregs_active.
44 */
45#define xstateregs_active fpregs_active
38 46
39extern struct _fpx_sw_bytes fx_sw_reserved; 47extern struct _fpx_sw_bytes fx_sw_reserved;
40#ifdef CONFIG_IA32_EMULATION 48#ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 73739322b6d0..a1dcfa3ab17d 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -1,8 +1,42 @@
1#ifndef _ASM_X86_IO_H 1#ifndef _ASM_X86_IO_H
2#define _ASM_X86_IO_H 2#define _ASM_X86_IO_H
3 3
4/*
5 * This file contains the definitions for the x86 IO instructions
6 * inb/inw/inl/outb/outw/outl and the "string versions" of the same
7 * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing"
8 * versions of the single-IO instructions (inb_p/inw_p/..).
9 *
10 * This file is not meant to be obfuscating: it's just complicated
11 * to (a) handle it all in a way that makes gcc able to optimize it
12 * as well as possible and (b) trying to avoid writing the same thing
13 * over and over again with slight variations and possibly making a
14 * mistake somewhere.
15 */
16
17/*
18 * Thanks to James van Artsdalen for a better timing-fix than
19 * the two short jumps: using outb's to a nonexistent port seems
20 * to guarantee better timings even on fast machines.
21 *
22 * On the other hand, I'd like to be sure of a non-existent port:
23 * I feel a bit unsafe about using 0x80 (should be safe, though)
24 *
25 * Linus
26 */
27
28 /*
29 * Bit simplified and optimized by Jan Hubicka
30 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999.
31 *
32 * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added,
33 * isa_read[wl] and isa_write[wl] fixed
34 * - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
35 */
36
4#define ARCH_HAS_IOREMAP_WC 37#define ARCH_HAS_IOREMAP_WC
5 38
39#include <linux/string.h>
6#include <linux/compiler.h> 40#include <linux/compiler.h>
7#include <asm-generic/int-ll64.h> 41#include <asm-generic/int-ll64.h>
8#include <asm/page.h> 42#include <asm/page.h>
@@ -173,11 +207,126 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
173extern void iounmap(volatile void __iomem *addr); 207extern void iounmap(volatile void __iomem *addr);
174 208
175 209
176#ifdef CONFIG_X86_32 210#ifdef __KERNEL__
177# include "io_32.h" 211
212#include <asm-generic/iomap.h>
213
214#include <linux/vmalloc.h>
215
216/*
217 * Convert a virtual cached pointer to an uncached pointer
218 */
219#define xlate_dev_kmem_ptr(p) p
220
221static inline void
222memset_io(volatile void __iomem *addr, unsigned char val, size_t count)
223{
224 memset((void __force *)addr, val, count);
225}
226
227static inline void
228memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count)
229{
230 memcpy(dst, (const void __force *)src, count);
231}
232
233static inline void
234memcpy_toio(volatile void __iomem *dst, const void *src, size_t count)
235{
236 memcpy((void __force *)dst, src, count);
237}
238
239/*
240 * ISA space is 'always mapped' on a typical x86 system, no need to
241 * explicitly ioremap() it. The fact that the ISA IO space is mapped
242 * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
243 * are physical addresses. The following constant pointer can be
244 * used as the IO-area pointer (it can be iounmapped as well, so the
245 * analogy with PCI is quite large):
246 */
247#define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET))
248
249/*
250 * Cache management
251 *
252 * This needed for two cases
253 * 1. Out of order aware processors
254 * 2. Accidentally out of order processors (PPro errata #51)
255 */
256
257static inline void flush_write_buffers(void)
258{
259#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
260 asm volatile("lock; addl $0,0(%%esp)": : :"memory");
261#endif
262}
263
264#endif /* __KERNEL__ */
265
266extern void native_io_delay(void);
267
268extern int io_delay_type;
269extern void io_delay_init(void);
270
271#if defined(CONFIG_PARAVIRT)
272#include <asm/paravirt.h>
178#else 273#else
179# include "io_64.h" 274
275static inline void slow_down_io(void)
276{
277 native_io_delay();
278#ifdef REALLY_SLOW_IO
279 native_io_delay();
280 native_io_delay();
281 native_io_delay();
180#endif 282#endif
283}
284
285#endif
286
287#define BUILDIO(bwl, bw, type) \
288static inline void out##bwl(unsigned type value, int port) \
289{ \
290 asm volatile("out" #bwl " %" #bw "0, %w1" \
291 : : "a"(value), "Nd"(port)); \
292} \
293 \
294static inline unsigned type in##bwl(int port) \
295{ \
296 unsigned type value; \
297 asm volatile("in" #bwl " %w1, %" #bw "0" \
298 : "=a"(value) : "Nd"(port)); \
299 return value; \
300} \
301 \
302static inline void out##bwl##_p(unsigned type value, int port) \
303{ \
304 out##bwl(value, port); \
305 slow_down_io(); \
306} \
307 \
308static inline unsigned type in##bwl##_p(int port) \
309{ \
310 unsigned type value = in##bwl(port); \
311 slow_down_io(); \
312 return value; \
313} \
314 \
315static inline void outs##bwl(int port, const void *addr, unsigned long count) \
316{ \
317 asm volatile("rep; outs" #bwl \
318 : "+S"(addr), "+c"(count) : "d"(port)); \
319} \
320 \
321static inline void ins##bwl(int port, void *addr, unsigned long count) \
322{ \
323 asm volatile("rep; ins" #bwl \
324 : "+D"(addr), "+c"(count) : "d"(port)); \
325}
326
327BUILDIO(b, b, char)
328BUILDIO(w, w, short)
329BUILDIO(l, , int)
181 330
182extern void *xlate_dev_mem_ptr(unsigned long phys); 331extern void *xlate_dev_mem_ptr(unsigned long phys);
183extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr); 332extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr);
diff --git a/arch/x86/include/asm/io_32.h b/arch/x86/include/asm/io_32.h
deleted file mode 100644
index a299900f5920..000000000000
--- a/arch/x86/include/asm/io_32.h
+++ /dev/null
@@ -1,196 +0,0 @@
1#ifndef _ASM_X86_IO_32_H
2#define _ASM_X86_IO_32_H
3
4#include <linux/string.h>
5#include <linux/compiler.h>
6
7/*
8 * This file contains the definitions for the x86 IO instructions
9 * inb/inw/inl/outb/outw/outl and the "string versions" of the same
10 * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing"
11 * versions of the single-IO instructions (inb_p/inw_p/..).
12 *
13 * This file is not meant to be obfuscating: it's just complicated
14 * to (a) handle it all in a way that makes gcc able to optimize it
15 * as well as possible and (b) trying to avoid writing the same thing
16 * over and over again with slight variations and possibly making a
17 * mistake somewhere.
18 */
19
20/*
21 * Thanks to James van Artsdalen for a better timing-fix than
22 * the two short jumps: using outb's to a nonexistent port seems
23 * to guarantee better timings even on fast machines.
24 *
25 * On the other hand, I'd like to be sure of a non-existent port:
26 * I feel a bit unsafe about using 0x80 (should be safe, though)
27 *
28 * Linus
29 */
30
31 /*
32 * Bit simplified and optimized by Jan Hubicka
33 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999.
34 *
35 * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added,
36 * isa_read[wl] and isa_write[wl] fixed
37 * - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
38 */
39
40#define XQUAD_PORTIO_BASE 0xfe400000
41#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
42
43#ifdef __KERNEL__
44
45#include <asm-generic/iomap.h>
46
47#include <linux/vmalloc.h>
48
49/*
50 * Convert a virtual cached pointer to an uncached pointer
51 */
52#define xlate_dev_kmem_ptr(p) p
53
54static inline void
55memset_io(volatile void __iomem *addr, unsigned char val, int count)
56{
57 memset((void __force *)addr, val, count);
58}
59
60static inline void
61memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
62{
63 __memcpy(dst, (const void __force *)src, count);
64}
65
66static inline void
67memcpy_toio(volatile void __iomem *dst, const void *src, int count)
68{
69 __memcpy((void __force *)dst, src, count);
70}
71
72/*
73 * ISA space is 'always mapped' on a typical x86 system, no need to
74 * explicitly ioremap() it. The fact that the ISA IO space is mapped
75 * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
76 * are physical addresses. The following constant pointer can be
77 * used as the IO-area pointer (it can be iounmapped as well, so the
78 * analogy with PCI is quite large):
79 */
80#define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET))
81
82/*
83 * Cache management
84 *
85 * This needed for two cases
86 * 1. Out of order aware processors
87 * 2. Accidentally out of order processors (PPro errata #51)
88 */
89
90#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
91
92static inline void flush_write_buffers(void)
93{
94 asm volatile("lock; addl $0,0(%%esp)": : :"memory");
95}
96
97#else
98
99#define flush_write_buffers() do { } while (0)
100
101#endif
102
103#endif /* __KERNEL__ */
104
105extern void native_io_delay(void);
106
107extern int io_delay_type;
108extern void io_delay_init(void);
109
110#if defined(CONFIG_PARAVIRT)
111#include <asm/paravirt.h>
112#else
113
114static inline void slow_down_io(void)
115{
116 native_io_delay();
117#ifdef REALLY_SLOW_IO
118 native_io_delay();
119 native_io_delay();
120 native_io_delay();
121#endif
122}
123
124#endif
125
126#define __BUILDIO(bwl, bw, type) \
127static inline void out##bwl(unsigned type value, int port) \
128{ \
129 out##bwl##_local(value, port); \
130} \
131 \
132static inline unsigned type in##bwl(int port) \
133{ \
134 return in##bwl##_local(port); \
135}
136
137#define BUILDIO(bwl, bw, type) \
138static inline void out##bwl##_local(unsigned type value, int port) \
139{ \
140 asm volatile("out" #bwl " %" #bw "0, %w1" \
141 : : "a"(value), "Nd"(port)); \
142} \
143 \
144static inline unsigned type in##bwl##_local(int port) \
145{ \
146 unsigned type value; \
147 asm volatile("in" #bwl " %w1, %" #bw "0" \
148 : "=a"(value) : "Nd"(port)); \
149 return value; \
150} \
151 \
152static inline void out##bwl##_local_p(unsigned type value, int port) \
153{ \
154 out##bwl##_local(value, port); \
155 slow_down_io(); \
156} \
157 \
158static inline unsigned type in##bwl##_local_p(int port) \
159{ \
160 unsigned type value = in##bwl##_local(port); \
161 slow_down_io(); \
162 return value; \
163} \
164 \
165__BUILDIO(bwl, bw, type) \
166 \
167static inline void out##bwl##_p(unsigned type value, int port) \
168{ \
169 out##bwl(value, port); \
170 slow_down_io(); \
171} \
172 \
173static inline unsigned type in##bwl##_p(int port) \
174{ \
175 unsigned type value = in##bwl(port); \
176 slow_down_io(); \
177 return value; \
178} \
179 \
180static inline void outs##bwl(int port, const void *addr, unsigned long count) \
181{ \
182 asm volatile("rep; outs" #bwl \
183 : "+S"(addr), "+c"(count) : "d"(port)); \
184} \
185 \
186static inline void ins##bwl(int port, void *addr, unsigned long count) \
187{ \
188 asm volatile("rep; ins" #bwl \
189 : "+D"(addr), "+c"(count) : "d"(port)); \
190}
191
192BUILDIO(b, b, char)
193BUILDIO(w, w, short)
194BUILDIO(l, , int)
195
196#endif /* _ASM_X86_IO_32_H */
diff --git a/arch/x86/include/asm/io_64.h b/arch/x86/include/asm/io_64.h
deleted file mode 100644
index 244067893af4..000000000000
--- a/arch/x86/include/asm/io_64.h
+++ /dev/null
@@ -1,181 +0,0 @@
1#ifndef _ASM_X86_IO_64_H
2#define _ASM_X86_IO_64_H
3
4
5/*
6 * This file contains the definitions for the x86 IO instructions
7 * inb/inw/inl/outb/outw/outl and the "string versions" of the same
8 * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing"
9 * versions of the single-IO instructions (inb_p/inw_p/..).
10 *
11 * This file is not meant to be obfuscating: it's just complicated
12 * to (a) handle it all in a way that makes gcc able to optimize it
13 * as well as possible and (b) trying to avoid writing the same thing
14 * over and over again with slight variations and possibly making a
15 * mistake somewhere.
16 */
17
18/*
19 * Thanks to James van Artsdalen for a better timing-fix than
20 * the two short jumps: using outb's to a nonexistent port seems
21 * to guarantee better timings even on fast machines.
22 *
23 * On the other hand, I'd like to be sure of a non-existent port:
24 * I feel a bit unsafe about using 0x80 (should be safe, though)
25 *
26 * Linus
27 */
28
29 /*
30 * Bit simplified and optimized by Jan Hubicka
31 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999.
32 *
33 * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added,
34 * isa_read[wl] and isa_write[wl] fixed
35 * - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
36 */
37
38extern void native_io_delay(void);
39
40extern int io_delay_type;
41extern void io_delay_init(void);
42
43#if defined(CONFIG_PARAVIRT)
44#include <asm/paravirt.h>
45#else
46
47static inline void slow_down_io(void)
48{
49 native_io_delay();
50#ifdef REALLY_SLOW_IO
51 native_io_delay();
52 native_io_delay();
53 native_io_delay();
54#endif
55}
56#endif
57
58/*
59 * Talk about misusing macros..
60 */
61#define __OUT1(s, x) \
62static inline void out##s(unsigned x value, unsigned short port) {
63
64#define __OUT2(s, s1, s2) \
65asm volatile ("out" #s " %" s1 "0,%" s2 "1"
66
67#ifndef REALLY_SLOW_IO
68#define REALLY_SLOW_IO
69#define UNSET_REALLY_SLOW_IO
70#endif
71
72#define __OUT(s, s1, x) \
73 __OUT1(s, x) __OUT2(s, s1, "w") : : "a" (value), "Nd" (port)); \
74 } \
75 __OUT1(s##_p, x) __OUT2(s, s1, "w") : : "a" (value), "Nd" (port)); \
76 slow_down_io(); \
77}
78
79#define __IN1(s) \
80static inline RETURN_TYPE in##s(unsigned short port) \
81{ \
82 RETURN_TYPE _v;
83
84#define __IN2(s, s1, s2) \
85 asm volatile ("in" #s " %" s2 "1,%" s1 "0"
86
87#define __IN(s, s1, i...) \
88 __IN1(s) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i); \
89 return _v; \
90 } \
91 __IN1(s##_p) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i); \
92 slow_down_io(); \
93 return _v; }
94
95#ifdef UNSET_REALLY_SLOW_IO
96#undef REALLY_SLOW_IO
97#endif
98
99#define __INS(s) \
100static inline void ins##s(unsigned short port, void *addr, \
101 unsigned long count) \
102{ \
103 asm volatile ("rep ; ins" #s \
104 : "=D" (addr), "=c" (count) \
105 : "d" (port), "0" (addr), "1" (count)); \
106}
107
108#define __OUTS(s) \
109static inline void outs##s(unsigned short port, const void *addr, \
110 unsigned long count) \
111{ \
112 asm volatile ("rep ; outs" #s \
113 : "=S" (addr), "=c" (count) \
114 : "d" (port), "0" (addr), "1" (count)); \
115}
116
117#define RETURN_TYPE unsigned char
118__IN(b, "")
119#undef RETURN_TYPE
120#define RETURN_TYPE unsigned short
121__IN(w, "")
122#undef RETURN_TYPE
123#define RETURN_TYPE unsigned int
124__IN(l, "")
125#undef RETURN_TYPE
126
127__OUT(b, "b", char)
128__OUT(w, "w", short)
129__OUT(l, , int)
130
131__INS(b)
132__INS(w)
133__INS(l)
134
135__OUTS(b)
136__OUTS(w)
137__OUTS(l)
138
139#if defined(__KERNEL__) && defined(__x86_64__)
140
141#include <linux/vmalloc.h>
142
143#include <asm-generic/iomap.h>
144
145void __memcpy_fromio(void *, unsigned long, unsigned);
146void __memcpy_toio(unsigned long, const void *, unsigned);
147
148static inline void memcpy_fromio(void *to, const volatile void __iomem *from,
149 unsigned len)
150{
151 __memcpy_fromio(to, (unsigned long)from, len);
152}
153
154static inline void memcpy_toio(volatile void __iomem *to, const void *from,
155 unsigned len)
156{
157 __memcpy_toio((unsigned long)to, from, len);
158}
159
160void memset_io(volatile void __iomem *a, int b, size_t c);
161
162/*
163 * ISA space is 'always mapped' on a typical x86 system, no need to
164 * explicitly ioremap() it. The fact that the ISA IO space is mapped
165 * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
166 * are physical addresses. The following constant pointer can be
167 * used as the IO-area pointer (it can be iounmapped as well, so the
168 * analogy with PCI is quite large):
169 */
170#define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET))
171
172#define flush_write_buffers()
173
174/*
175 * Convert a virtual cached pointer to an uncached pointer
176 */
177#define xlate_dev_kmem_ptr(p) p
178
179#endif /* __KERNEL__ */
180
181#endif /* _ASM_X86_IO_64_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 6a635bd39867..4611f085cd43 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -113,7 +113,7 @@
113 */ 113 */
114#define LOCAL_PENDING_VECTOR 0xec 114#define LOCAL_PENDING_VECTOR 0xec
115 115
116#define UV_BAU_MESSAGE 0xec 116#define UV_BAU_MESSAGE 0xea
117 117
118/* 118/*
119 * Self IPI vector for machine checks 119 * Self IPI vector for machine checks
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 950df434763f..f46b79f6c16c 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -254,6 +254,10 @@ struct kvm_reinject_control {
254 __u8 reserved[31]; 254 __u8 reserved[31];
255}; 255};
256 256
257/* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */
258#define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001
259#define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002
260
257/* for KVM_GET/SET_VCPU_EVENTS */ 261/* for KVM_GET/SET_VCPU_EVENTS */
258struct kvm_vcpu_events { 262struct kvm_vcpu_events {
259 struct { 263 struct {
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 858baa061cfc..6c3fdd631ed3 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -108,10 +108,11 @@ struct mce_log {
108#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) 108#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9)
109#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) 109#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0)
110 110
111extern struct atomic_notifier_head x86_mce_decoder_chain;
112 111
113#ifdef __KERNEL__ 112#ifdef __KERNEL__
114 113
114extern struct atomic_notifier_head x86_mce_decoder_chain;
115
115#include <linux/percpu.h> 116#include <linux/percpu.h>
116#include <linux/init.h> 117#include <linux/init.h>
117#include <asm/atomic.h> 118#include <asm/atomic.h>
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index c24ca9a56458..ef51b501e22a 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -12,8 +12,6 @@ struct device;
12enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; 12enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND };
13 13
14struct microcode_ops { 14struct microcode_ops {
15 void (*init)(struct device *device);
16 void (*fini)(void);
17 enum ucode_state (*request_microcode_user) (int cpu, 15 enum ucode_state (*request_microcode_user) (int cpu,
18 const void __user *buf, size_t size); 16 const void __user *buf, size_t size);
19 17
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index ede6998bd92c..91df7c51806c 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -47,7 +47,7 @@ static inline void resume_map_numa_kva(pgd_t *pgd) {}
47/* 47/*
48 * generic node memory support, the following assumptions apply: 48 * generic node memory support, the following assumptions apply:
49 * 49 *
50 * 1) memory comes in 64Mb contigious chunks which are either present or not 50 * 1) memory comes in 64Mb contiguous chunks which are either present or not
51 * 2) we will not have more than 64Gb in total 51 * 2) we will not have more than 64Gb in total
52 * 52 *
53 * for now assume that 64Gb is max amount of RAM for whole system 53 * for now assume that 64Gb is max amount of RAM for whole system
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
index a29f48c2a322..288b96f815a6 100644
--- a/arch/x86/include/asm/mmzone_64.h
+++ b/arch/x86/include/asm/mmzone_64.h
@@ -39,11 +39,5 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
39#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) 39#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
40#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ 40#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \
41 NODE_DATA(nid)->node_spanned_pages) 41 NODE_DATA(nid)->node_spanned_pages)
42
43#ifdef CONFIG_NUMA_EMU
44#define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024)
45#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
46#endif
47
48#endif 42#endif
49#endif /* _ASM_X86_MMZONE_64_H */ 43#endif /* _ASM_X86_MMZONE_64_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 4ffe09b2ad75..1cd58cdbc03f 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -12,6 +12,7 @@
12#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ 12#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */
13#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ 13#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */
14#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ 14#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */
15#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */
15 16
16/* EFER bits: */ 17/* EFER bits: */
17#define _EFER_SCE 0 /* SYSCALL/SYSRET */ 18#define _EFER_SCE 0 /* SYSCALL/SYSRET */
@@ -123,6 +124,7 @@
123#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 124#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
124#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff 125#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff
125#define FAM10H_MMIO_CONF_BASE_SHIFT 20 126#define FAM10H_MMIO_CONF_BASE_SHIFT 20
127#define MSR_FAM10H_NODE_ID 0xc001100c
126 128
127/* K8 MSRs */ 129/* K8 MSRs */
128#define MSR_K8_TOP_MEM1 0xc001001a 130#define MSR_K8_TOP_MEM1 0xc001001a
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 5bef931f8b14..c5bc4c2d33f5 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -27,6 +27,18 @@ struct msr {
27 }; 27 };
28}; 28};
29 29
30struct msr_info {
31 u32 msr_no;
32 struct msr reg;
33 struct msr *msrs;
34 int err;
35};
36
37struct msr_regs_info {
38 u32 *regs;
39 int err;
40};
41
30static inline unsigned long long native_read_tscp(unsigned int *aux) 42static inline unsigned long long native_read_tscp(unsigned int *aux)
31{ 43{
32 unsigned long low, high; 44 unsigned long low, high;
@@ -240,9 +252,12 @@ do { \
240#define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \ 252#define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \
241 (u32)((val) >> 32)) 253 (u32)((val) >> 32))
242 254
243#define write_tsc(val1, val2) wrmsr(0x10, (val1), (val2)) 255#define write_tsc(val1, val2) wrmsr(MSR_IA32_TSC, (val1), (val2))
256
257#define write_rdtscp_aux(val) wrmsr(MSR_TSC_AUX, (val), 0)
244 258
245#define write_rdtscp_aux(val) wrmsr(0xc0000103, (val), 0) 259struct msr *msrs_alloc(void);
260void msrs_free(struct msr *msrs);
246 261
247#ifdef CONFIG_SMP 262#ifdef CONFIG_SMP
248int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); 263int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 139d4c1a33a7..93da9c3f3341 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -19,7 +19,6 @@ extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
19extern int check_nmi_watchdog(void); 19extern int check_nmi_watchdog(void);
20extern int nmi_watchdog_enabled; 20extern int nmi_watchdog_enabled;
21extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); 21extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
22extern int avail_to_resrv_perfctr_nmi(unsigned int);
23extern int reserve_perfctr_nmi(unsigned int); 22extern int reserve_perfctr_nmi(unsigned int);
24extern void release_perfctr_nmi(unsigned int); 23extern void release_perfctr_nmi(unsigned int);
25extern int reserve_evntsel_nmi(unsigned int); 24extern int reserve_evntsel_nmi(unsigned int);
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index c4ae822e415f..823e070e7c26 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -36,6 +36,11 @@ extern void __cpuinit numa_set_node(int cpu, int node);
36extern void __cpuinit numa_clear_node(int cpu); 36extern void __cpuinit numa_clear_node(int cpu);
37extern void __cpuinit numa_add_cpu(int cpu); 37extern void __cpuinit numa_add_cpu(int cpu);
38extern void __cpuinit numa_remove_cpu(int cpu); 38extern void __cpuinit numa_remove_cpu(int cpu);
39
40#ifdef CONFIG_NUMA_EMU
41#define FAKE_NODE_MIN_SIZE ((u64)64 << 20)
42#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
43#endif /* CONFIG_NUMA_EMU */
39#else 44#else
40static inline void init_cpu_to_node(void) { } 45static inline void init_cpu_to_node(void) { }
41static inline void numa_set_node(int cpu, int node) { } 46static inline void numa_set_node(int cpu, int node) { }
diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h
index 9f0a5f5d29ec..13370b95ea94 100644
--- a/arch/x86/include/asm/numaq.h
+++ b/arch/x86/include/asm/numaq.h
@@ -33,6 +33,10 @@ extern int get_memcfg_numaq(void);
33 33
34extern void *xquad_portio; 34extern void *xquad_portio;
35 35
36#define XQUAD_PORTIO_BASE 0xfe400000
37#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
38#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
39
36/* 40/*
37 * SYS_CFG_DATA_PRIV_ADDR, struct eachquadmem, and struct sys_cfg_data are the 41 * SYS_CFG_DATA_PRIV_ADDR, struct eachquadmem, and struct sys_cfg_data are the
38 */ 42 */
diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h
index 834a30295fab..3a57385d9fa7 100644
--- a/arch/x86/include/asm/olpc.h
+++ b/arch/x86/include/asm/olpc.h
@@ -120,7 +120,7 @@ extern int olpc_ec_mask_unset(uint8_t bits);
120 120
121/* GPIO assignments */ 121/* GPIO assignments */
122 122
123#define OLPC_GPIO_MIC_AC geode_gpio(1) 123#define OLPC_GPIO_MIC_AC 1
124#define OLPC_GPIO_DCON_IRQ geode_gpio(7) 124#define OLPC_GPIO_DCON_IRQ geode_gpio(7)
125#define OLPC_GPIO_THRM_ALRM geode_gpio(10) 125#define OLPC_GPIO_THRM_ALRM geode_gpio(10)
126#define OLPC_GPIO_SMB_CLK geode_gpio(14) 126#define OLPC_GPIO_SMB_CLK geode_gpio(14)
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 642fe34b36a2..a667f24c7254 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -40,7 +40,6 @@
40 40
41#ifndef __ASSEMBLY__ 41#ifndef __ASSEMBLY__
42 42
43extern int page_is_ram(unsigned long pagenr);
44extern int devmem_is_allowed(unsigned long pagenr); 43extern int devmem_is_allowed(unsigned long pagenr);
45 44
46extern unsigned long max_low_pfn_mapped; 45extern unsigned long max_low_pfn_mapped;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index efb38994859c..dd59a85a918f 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -731,34 +731,34 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
731 731
732#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) 732#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
733 733
734static inline int __raw_spin_is_locked(struct raw_spinlock *lock) 734static inline int arch_spin_is_locked(struct arch_spinlock *lock)
735{ 735{
736 return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); 736 return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
737} 737}
738 738
739static inline int __raw_spin_is_contended(struct raw_spinlock *lock) 739static inline int arch_spin_is_contended(struct arch_spinlock *lock)
740{ 740{
741 return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); 741 return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
742} 742}
743#define __raw_spin_is_contended __raw_spin_is_contended 743#define arch_spin_is_contended arch_spin_is_contended
744 744
745static __always_inline void __raw_spin_lock(struct raw_spinlock *lock) 745static __always_inline void arch_spin_lock(struct arch_spinlock *lock)
746{ 746{
747 PVOP_VCALL1(pv_lock_ops.spin_lock, lock); 747 PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
748} 748}
749 749
750static __always_inline void __raw_spin_lock_flags(struct raw_spinlock *lock, 750static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock,
751 unsigned long flags) 751 unsigned long flags)
752{ 752{
753 PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags); 753 PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
754} 754}
755 755
756static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock) 756static __always_inline int arch_spin_trylock(struct arch_spinlock *lock)
757{ 757{
758 return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); 758 return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
759} 759}
760 760
761static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) 761static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
762{ 762{
763 PVOP_VCALL1(pv_lock_ops.spin_unlock, lock); 763 PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
764} 764}
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 9357473c8da0..b1e70d51e40c 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -318,14 +318,14 @@ struct pv_mmu_ops {
318 phys_addr_t phys, pgprot_t flags); 318 phys_addr_t phys, pgprot_t flags);
319}; 319};
320 320
321struct raw_spinlock; 321struct arch_spinlock;
322struct pv_lock_ops { 322struct pv_lock_ops {
323 int (*spin_is_locked)(struct raw_spinlock *lock); 323 int (*spin_is_locked)(struct arch_spinlock *lock);
324 int (*spin_is_contended)(struct raw_spinlock *lock); 324 int (*spin_is_contended)(struct arch_spinlock *lock);
325 void (*spin_lock)(struct raw_spinlock *lock); 325 void (*spin_lock)(struct arch_spinlock *lock);
326 void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags); 326 void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags);
327 int (*spin_trylock)(struct raw_spinlock *lock); 327 int (*spin_trylock)(struct arch_spinlock *lock);
328 void (*spin_unlock)(struct raw_spinlock *lock); 328 void (*spin_unlock)(struct arch_spinlock *lock);
329}; 329};
330 330
331/* This contains all the paravirt structures: we get a convenient 331/* This contains all the paravirt structures: we get a convenient
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index b399988eee3a..05b58ccb2e82 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -29,6 +29,7 @@
29#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 29#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000
30#define PCI_HAS_IO_ECS 0x40000 30#define PCI_HAS_IO_ECS 0x40000
31#define PCI_NOASSIGN_ROMS 0x80000 31#define PCI_NOASSIGN_ROMS 0x80000
32#define PCI_ROOT_NO_CRS 0x100000
32 33
33extern unsigned int pci_probe; 34extern unsigned int pci_probe;
34extern unsigned long pirq_table_addr; 35extern unsigned long pirq_table_addr;
@@ -118,11 +119,27 @@ extern int __init pcibios_init(void);
118 119
119/* pci-mmconfig.c */ 120/* pci-mmconfig.c */
120 121
122/* "PCI MMCONFIG %04x [bus %02x-%02x]" */
123#define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2)
124
125struct pci_mmcfg_region {
126 struct list_head list;
127 struct resource res;
128 u64 address;
129 char __iomem *virt;
130 u16 segment;
131 u8 start_bus;
132 u8 end_bus;
133 char name[PCI_MMCFG_RESOURCE_NAME_LEN];
134};
135
121extern int __init pci_mmcfg_arch_init(void); 136extern int __init pci_mmcfg_arch_init(void);
122extern void __init pci_mmcfg_arch_free(void); 137extern void __init pci_mmcfg_arch_free(void);
138extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus);
139
140extern struct list_head pci_mmcfg_list;
123 141
124extern struct acpi_mcfg_allocation *pci_mmcfg_config; 142#define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20)
125extern int pci_mmcfg_config_num;
126 143
127/* 144/*
128 * AMD Fam10h CPUs are buggy, and cannot access MMIO config space 145 * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index b65a36defeb7..0c44196b78ac 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -74,31 +74,31 @@ extern void __bad_percpu_size(void);
74 74
75#define percpu_to_op(op, var, val) \ 75#define percpu_to_op(op, var, val) \
76do { \ 76do { \
77 typedef typeof(var) T__; \ 77 typedef typeof(var) pto_T__; \
78 if (0) { \ 78 if (0) { \
79 T__ tmp__; \ 79 pto_T__ pto_tmp__; \
80 tmp__ = (val); \ 80 pto_tmp__ = (val); \
81 } \ 81 } \
82 switch (sizeof(var)) { \ 82 switch (sizeof(var)) { \
83 case 1: \ 83 case 1: \
84 asm(op "b %1,"__percpu_arg(0) \ 84 asm(op "b %1,"__percpu_arg(0) \
85 : "+m" (var) \ 85 : "+m" (var) \
86 : "qi" ((T__)(val))); \ 86 : "qi" ((pto_T__)(val))); \
87 break; \ 87 break; \
88 case 2: \ 88 case 2: \
89 asm(op "w %1,"__percpu_arg(0) \ 89 asm(op "w %1,"__percpu_arg(0) \
90 : "+m" (var) \ 90 : "+m" (var) \
91 : "ri" ((T__)(val))); \ 91 : "ri" ((pto_T__)(val))); \
92 break; \ 92 break; \
93 case 4: \ 93 case 4: \
94 asm(op "l %1,"__percpu_arg(0) \ 94 asm(op "l %1,"__percpu_arg(0) \
95 : "+m" (var) \ 95 : "+m" (var) \
96 : "ri" ((T__)(val))); \ 96 : "ri" ((pto_T__)(val))); \
97 break; \ 97 break; \
98 case 8: \ 98 case 8: \
99 asm(op "q %1,"__percpu_arg(0) \ 99 asm(op "q %1,"__percpu_arg(0) \
100 : "+m" (var) \ 100 : "+m" (var) \
101 : "re" ((T__)(val))); \ 101 : "re" ((pto_T__)(val))); \
102 break; \ 102 break; \
103 default: __bad_percpu_size(); \ 103 default: __bad_percpu_size(); \
104 } \ 104 } \
@@ -106,31 +106,31 @@ do { \
106 106
107#define percpu_from_op(op, var, constraint) \ 107#define percpu_from_op(op, var, constraint) \
108({ \ 108({ \
109 typeof(var) ret__; \ 109 typeof(var) pfo_ret__; \
110 switch (sizeof(var)) { \ 110 switch (sizeof(var)) { \
111 case 1: \ 111 case 1: \
112 asm(op "b "__percpu_arg(1)",%0" \ 112 asm(op "b "__percpu_arg(1)",%0" \
113 : "=q" (ret__) \ 113 : "=q" (pfo_ret__) \
114 : constraint); \ 114 : constraint); \
115 break; \ 115 break; \
116 case 2: \ 116 case 2: \
117 asm(op "w "__percpu_arg(1)",%0" \ 117 asm(op "w "__percpu_arg(1)",%0" \
118 : "=r" (ret__) \ 118 : "=r" (pfo_ret__) \
119 : constraint); \ 119 : constraint); \
120 break; \ 120 break; \
121 case 4: \ 121 case 4: \
122 asm(op "l "__percpu_arg(1)",%0" \ 122 asm(op "l "__percpu_arg(1)",%0" \
123 : "=r" (ret__) \ 123 : "=r" (pfo_ret__) \
124 : constraint); \ 124 : constraint); \
125 break; \ 125 break; \
126 case 8: \ 126 case 8: \
127 asm(op "q "__percpu_arg(1)",%0" \ 127 asm(op "q "__percpu_arg(1)",%0" \
128 : "=r" (ret__) \ 128 : "=r" (pfo_ret__) \
129 : constraint); \ 129 : constraint); \
130 break; \ 130 break; \
131 default: __bad_percpu_size(); \ 131 default: __bad_percpu_size(); \
132 } \ 132 } \
133 ret__; \ 133 pfo_ret__; \
134}) 134})
135 135
136/* 136/*
@@ -153,6 +153,84 @@ do { \
153#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val) 153#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val)
154#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val) 154#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val)
155 155
156#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
157#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
158#define __this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
159
160#define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
161#define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
162#define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
163#define __this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val)
164#define __this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val)
165#define __this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val)
166#define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
167#define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
168#define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
169#define __this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
170#define __this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
171#define __this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
172#define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
173#define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
174#define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
175
176#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
177#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
178#define this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
179#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
180#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
181#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
182#define this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val)
183#define this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val)
184#define this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val)
185#define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
186#define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
187#define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
188#define this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
189#define this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
190#define this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
191#define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
192#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
193#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
194
195#define irqsafe_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val)
196#define irqsafe_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val)
197#define irqsafe_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val)
198#define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
199#define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
200#define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
201#define irqsafe_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
202#define irqsafe_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
203#define irqsafe_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
204#define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
205#define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
206#define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
207
208/*
209 * Per cpu atomic 64 bit operations are only available under 64 bit.
210 * 32 bit must fall back to generic operations.
211 */
212#ifdef CONFIG_X86_64
213#define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
214#define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
215#define __this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val)
216#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
217#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
218#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
219
220#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
221#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
222#define this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val)
223#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
224#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
225#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
226
227#define irqsafe_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val)
228#define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
229#define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
230#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
231
232#endif
233
156/* This is not atomic against other CPUs -- CPU preemption needs to be off */ 234/* This is not atomic against other CPUs -- CPU preemption needs to be off */
157#define x86_test_and_clear_bit_percpu(bit, var) \ 235#define x86_test_and_clear_bit_percpu(bit, var) \
158({ \ 236({ \
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8d9f8548a870..befd172c82ad 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -19,6 +19,7 @@
19#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 19#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
20 20
21#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) 21#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
22#define ARCH_PERFMON_EVENTSEL_ANY (1 << 21)
22#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) 23#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
23#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) 24#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
24#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) 25#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
@@ -26,7 +27,14 @@
26/* 27/*
27 * Includes eventsel and unit mask as well: 28 * Includes eventsel and unit mask as well:
28 */ 29 */
29#define ARCH_PERFMON_EVENT_MASK 0xffff 30
31
32#define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL
33#define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL
34#define INTEL_ARCH_EDGE_MASK 0x00040000ULL
35#define INTEL_ARCH_INV_MASK 0x00800000ULL
36#define INTEL_ARCH_CNT_MASK 0xFF000000ULL
37#define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK)
30 38
31/* 39/*
32 * filter mask to validate fixed counter events. 40 * filter mask to validate fixed counter events.
@@ -37,7 +45,12 @@
37 * The other filters are supported by fixed counters. 45 * The other filters are supported by fixed counters.
38 * The any-thread option is supported starting with v3. 46 * The any-thread option is supported starting with v3.
39 */ 47 */
40#define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000 48#define INTEL_ARCH_FIXED_MASK \
49 (INTEL_ARCH_CNT_MASK| \
50 INTEL_ARCH_INV_MASK| \
51 INTEL_ARCH_EDGE_MASK|\
52 INTEL_ARCH_UNIT_MASK|\
53 INTEL_ARCH_EVTSEL_MASK)
41 54
42#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c 55#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
43#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) 56#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 0e8c2a0fd922..271de94c3810 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -23,6 +23,11 @@ static inline void paravirt_release_pud(unsigned long pfn) {}
23#endif 23#endif
24 24
25/* 25/*
26 * Flags to use when allocating a user page table page.
27 */
28extern gfp_t __userpte_alloc_gfp;
29
30/*
26 * Allocate and free page tables. 31 * Allocate and free page tables.
27 */ 32 */
28extern pgd_t *pgd_alloc(struct mm_struct *); 33extern pgd_t *pgd_alloc(struct mm_struct *);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 6f8ec1c37e0a..b753ea59703a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -181,7 +181,7 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
181 unsigned int *ecx, unsigned int *edx) 181 unsigned int *ecx, unsigned int *edx)
182{ 182{
183 /* ecx is often an input as well as an output. */ 183 /* ecx is often an input as well as an output. */
184 asm("cpuid" 184 asm volatile("cpuid"
185 : "=a" (*eax), 185 : "=a" (*eax),
186 "=b" (*ebx), 186 "=b" (*ebx),
187 "=c" (*ecx), 187 "=c" (*ecx),
@@ -450,6 +450,8 @@ struct thread_struct {
450 struct perf_event *ptrace_bps[HBP_NUM]; 450 struct perf_event *ptrace_bps[HBP_NUM];
451 /* Debug status used for traps, single steps, etc... */ 451 /* Debug status used for traps, single steps, etc... */
452 unsigned long debugreg6; 452 unsigned long debugreg6;
453 /* Keep track of the exact dr7 value set by the user */
454 unsigned long ptrace_dr7;
453 /* Fault info: */ 455 /* Fault info: */
454 unsigned long cr2; 456 unsigned long cr2;
455 unsigned long trap_no; 457 unsigned long trap_no;
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 3d11fd0f44c5..20102808b191 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -274,10 +274,6 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
274 return 0; 274 return 0;
275} 275}
276 276
277/* Get Nth argument at function call */
278extern unsigned long regs_get_argument_nth(struct pt_regs *regs,
279 unsigned int n);
280
281/* 277/*
282 * These are defined as per linux/ptrace.h, which see. 278 * These are defined as per linux/ptrace.h, which see.
283 */ 279 */
@@ -292,6 +288,8 @@ extern void user_enable_block_step(struct task_struct *);
292#define arch_has_block_step() (boot_cpu_data.x86 >= 6) 288#define arch_has_block_step() (boot_cpu_data.x86 >= 6)
293#endif 289#endif
294 290
291#define ARCH_HAS_USER_SINGLE_STEP_INFO
292
295struct user_desc; 293struct user_desc;
296extern int do_get_thread_area(struct task_struct *p, int idx, 294extern int do_get_thread_area(struct task_struct *p, int idx,
297 struct user_desc __user *info); 295 struct user_desc __user *info);
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index ca7517d33776..606ede126972 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -41,6 +41,7 @@
41#include <linux/list.h> 41#include <linux/list.h>
42#include <linux/spinlock.h> 42#include <linux/spinlock.h>
43#include <linux/lockdep.h> 43#include <linux/lockdep.h>
44#include <asm/asm.h>
44 45
45struct rwsem_waiter; 46struct rwsem_waiter;
46 47
@@ -55,17 +56,28 @@ extern asmregparm struct rw_semaphore *
55 56
56/* 57/*
57 * the semaphore definition 58 * the semaphore definition
59 *
60 * The bias values and the counter type limits the number of
61 * potential readers/writers to 32767 for 32 bits and 2147483647
62 * for 64 bits.
58 */ 63 */
59 64
60#define RWSEM_UNLOCKED_VALUE 0x00000000 65#ifdef CONFIG_X86_64
61#define RWSEM_ACTIVE_BIAS 0x00000001 66# define RWSEM_ACTIVE_MASK 0xffffffffL
62#define RWSEM_ACTIVE_MASK 0x0000ffff 67#else
63#define RWSEM_WAITING_BIAS (-0x00010000) 68# define RWSEM_ACTIVE_MASK 0x0000ffffL
69#endif
70
71#define RWSEM_UNLOCKED_VALUE 0x00000000L
72#define RWSEM_ACTIVE_BIAS 0x00000001L
73#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
64#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 74#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
65#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 75#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
66 76
77typedef signed long rwsem_count_t;
78
67struct rw_semaphore { 79struct rw_semaphore {
68 signed long count; 80 rwsem_count_t count;
69 spinlock_t wait_lock; 81 spinlock_t wait_lock;
70 struct list_head wait_list; 82 struct list_head wait_list;
71#ifdef CONFIG_DEBUG_LOCK_ALLOC 83#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -105,7 +117,7 @@ do { \
105static inline void __down_read(struct rw_semaphore *sem) 117static inline void __down_read(struct rw_semaphore *sem)
106{ 118{
107 asm volatile("# beginning down_read\n\t" 119 asm volatile("# beginning down_read\n\t"
108 LOCK_PREFIX " incl (%%eax)\n\t" 120 LOCK_PREFIX _ASM_INC "(%1)\n\t"
109 /* adds 0x00000001, returns the old value */ 121 /* adds 0x00000001, returns the old value */
110 " jns 1f\n" 122 " jns 1f\n"
111 " call call_rwsem_down_read_failed\n" 123 " call call_rwsem_down_read_failed\n"
@@ -121,14 +133,14 @@ static inline void __down_read(struct rw_semaphore *sem)
121 */ 133 */
122static inline int __down_read_trylock(struct rw_semaphore *sem) 134static inline int __down_read_trylock(struct rw_semaphore *sem)
123{ 135{
124 __s32 result, tmp; 136 rwsem_count_t result, tmp;
125 asm volatile("# beginning __down_read_trylock\n\t" 137 asm volatile("# beginning __down_read_trylock\n\t"
126 " movl %0,%1\n\t" 138 " mov %0,%1\n\t"
127 "1:\n\t" 139 "1:\n\t"
128 " movl %1,%2\n\t" 140 " mov %1,%2\n\t"
129 " addl %3,%2\n\t" 141 " add %3,%2\n\t"
130 " jle 2f\n\t" 142 " jle 2f\n\t"
131 LOCK_PREFIX " cmpxchgl %2,%0\n\t" 143 LOCK_PREFIX " cmpxchg %2,%0\n\t"
132 " jnz 1b\n\t" 144 " jnz 1b\n\t"
133 "2:\n\t" 145 "2:\n\t"
134 "# ending __down_read_trylock\n\t" 146 "# ending __down_read_trylock\n\t"
@@ -143,13 +155,13 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
143 */ 155 */
144static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) 156static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
145{ 157{
146 int tmp; 158 rwsem_count_t tmp;
147 159
148 tmp = RWSEM_ACTIVE_WRITE_BIAS; 160 tmp = RWSEM_ACTIVE_WRITE_BIAS;
149 asm volatile("# beginning down_write\n\t" 161 asm volatile("# beginning down_write\n\t"
150 LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" 162 LOCK_PREFIX " xadd %1,(%2)\n\t"
151 /* subtract 0x0000ffff, returns the old value */ 163 /* subtract 0x0000ffff, returns the old value */
152 " testl %%edx,%%edx\n\t" 164 " test %1,%1\n\t"
153 /* was the count 0 before? */ 165 /* was the count 0 before? */
154 " jz 1f\n" 166 " jz 1f\n"
155 " call call_rwsem_down_write_failed\n" 167 " call call_rwsem_down_write_failed\n"
@@ -170,9 +182,9 @@ static inline void __down_write(struct rw_semaphore *sem)
170 */ 182 */
171static inline int __down_write_trylock(struct rw_semaphore *sem) 183static inline int __down_write_trylock(struct rw_semaphore *sem)
172{ 184{
173 signed long ret = cmpxchg(&sem->count, 185 rwsem_count_t ret = cmpxchg(&sem->count,
174 RWSEM_UNLOCKED_VALUE, 186 RWSEM_UNLOCKED_VALUE,
175 RWSEM_ACTIVE_WRITE_BIAS); 187 RWSEM_ACTIVE_WRITE_BIAS);
176 if (ret == RWSEM_UNLOCKED_VALUE) 188 if (ret == RWSEM_UNLOCKED_VALUE)
177 return 1; 189 return 1;
178 return 0; 190 return 0;
@@ -183,9 +195,9 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
183 */ 195 */
184static inline void __up_read(struct rw_semaphore *sem) 196static inline void __up_read(struct rw_semaphore *sem)
185{ 197{
186 __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; 198 rwsem_count_t tmp = -RWSEM_ACTIVE_READ_BIAS;
187 asm volatile("# beginning __up_read\n\t" 199 asm volatile("# beginning __up_read\n\t"
188 LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" 200 LOCK_PREFIX " xadd %1,(%2)\n\t"
189 /* subtracts 1, returns the old value */ 201 /* subtracts 1, returns the old value */
190 " jns 1f\n\t" 202 " jns 1f\n\t"
191 " call call_rwsem_wake\n" 203 " call call_rwsem_wake\n"
@@ -201,18 +213,18 @@ static inline void __up_read(struct rw_semaphore *sem)
201 */ 213 */
202static inline void __up_write(struct rw_semaphore *sem) 214static inline void __up_write(struct rw_semaphore *sem)
203{ 215{
216 rwsem_count_t tmp;
204 asm volatile("# beginning __up_write\n\t" 217 asm volatile("# beginning __up_write\n\t"
205 " movl %2,%%edx\n\t" 218 LOCK_PREFIX " xadd %1,(%2)\n\t"
206 LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t"
207 /* tries to transition 219 /* tries to transition
208 0xffff0001 -> 0x00000000 */ 220 0xffff0001 -> 0x00000000 */
209 " jz 1f\n" 221 " jz 1f\n"
210 " call call_rwsem_wake\n" 222 " call call_rwsem_wake\n"
211 "1:\n\t" 223 "1:\n\t"
212 "# ending __up_write\n" 224 "# ending __up_write\n"
213 : "+m" (sem->count) 225 : "+m" (sem->count), "=d" (tmp)
214 : "a" (sem), "i" (-RWSEM_ACTIVE_WRITE_BIAS) 226 : "a" (sem), "1" (-RWSEM_ACTIVE_WRITE_BIAS)
215 : "memory", "cc", "edx"); 227 : "memory", "cc");
216} 228}
217 229
218/* 230/*
@@ -221,33 +233,38 @@ static inline void __up_write(struct rw_semaphore *sem)
221static inline void __downgrade_write(struct rw_semaphore *sem) 233static inline void __downgrade_write(struct rw_semaphore *sem)
222{ 234{
223 asm volatile("# beginning __downgrade_write\n\t" 235 asm volatile("# beginning __downgrade_write\n\t"
224 LOCK_PREFIX " addl %2,(%%eax)\n\t" 236 LOCK_PREFIX _ASM_ADD "%2,(%1)\n\t"
225 /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ 237 /*
238 * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386)
239 * 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64)
240 */
226 " jns 1f\n\t" 241 " jns 1f\n\t"
227 " call call_rwsem_downgrade_wake\n" 242 " call call_rwsem_downgrade_wake\n"
228 "1:\n\t" 243 "1:\n\t"
229 "# ending __downgrade_write\n" 244 "# ending __downgrade_write\n"
230 : "+m" (sem->count) 245 : "+m" (sem->count)
231 : "a" (sem), "i" (-RWSEM_WAITING_BIAS) 246 : "a" (sem), "er" (-RWSEM_WAITING_BIAS)
232 : "memory", "cc"); 247 : "memory", "cc");
233} 248}
234 249
235/* 250/*
236 * implement atomic add functionality 251 * implement atomic add functionality
237 */ 252 */
238static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) 253static inline void rwsem_atomic_add(rwsem_count_t delta,
254 struct rw_semaphore *sem)
239{ 255{
240 asm volatile(LOCK_PREFIX "addl %1,%0" 256 asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
241 : "+m" (sem->count) 257 : "+m" (sem->count)
242 : "ir" (delta)); 258 : "er" (delta));
243} 259}
244 260
245/* 261/*
246 * implement exchange and add functionality 262 * implement exchange and add functionality
247 */ 263 */
248static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) 264static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
265 struct rw_semaphore *sem)
249{ 266{
250 int tmp = delta; 267 rwsem_count_t tmp = delta;
251 268
252 asm volatile(LOCK_PREFIX "xadd %0,%1" 269 asm volatile(LOCK_PREFIX "xadd %0,%1"
253 : "+r" (tmp), "+m" (sem->count) 270 : "+r" (tmp), "+m" (sem->count)
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 72e5a4491661..04459d25e66e 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -124,7 +124,7 @@ struct sigcontext {
124 * fpstate is really (struct _fpstate *) or (struct _xstate *) 124 * fpstate is really (struct _fpstate *) or (struct _xstate *)
125 * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved 125 * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
126 * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end 126 * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
127 * of extended memory layout. See comments at the defintion of 127 * of extended memory layout. See comments at the definition of
128 * (struct _fpx_sw_bytes) 128 * (struct _fpx_sw_bytes)
129 */ 129 */
130 void __user *fpstate; /* zero when no FPU/extended context */ 130 void __user *fpstate; /* zero when no FPU/extended context */
@@ -219,7 +219,7 @@ struct sigcontext {
219 * fpstate is really (struct _fpstate *) or (struct _xstate *) 219 * fpstate is really (struct _fpstate *) or (struct _xstate *)
220 * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved 220 * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
221 * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end 221 * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
222 * of extended memory layout. See comments at the defintion of 222 * of extended memory layout. See comments at the definition of
223 * (struct _fpx_sw_bytes) 223 * (struct _fpx_sw_bytes)
224 */ 224 */
225 void __user *fpstate; /* zero when no FPU/extended context */ 225 void __user *fpstate; /* zero when no FPU/extended context */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 1e796782cd7b..4cfc90824068 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -135,6 +135,8 @@ int native_cpu_disable(void);
135void native_cpu_die(unsigned int cpu); 135void native_cpu_die(unsigned int cpu);
136void native_play_dead(void); 136void native_play_dead(void);
137void play_dead_common(void); 137void play_dead_common(void);
138void wbinvd_on_cpu(int cpu);
139int wbinvd_on_all_cpus(void);
138 140
139void native_send_call_func_ipi(const struct cpumask *mask); 141void native_send_call_func_ipi(const struct cpumask *mask);
140void native_send_call_func_single_ipi(int cpu); 142void native_send_call_func_single_ipi(int cpu);
@@ -147,6 +149,13 @@ static inline int num_booting_cpus(void)
147{ 149{
148 return cpumask_weight(cpu_callout_mask); 150 return cpumask_weight(cpu_callout_mask);
149} 151}
152#else /* !CONFIG_SMP */
153#define wbinvd_on_cpu(cpu) wbinvd()
154static inline int wbinvd_on_all_cpus(void)
155{
156 wbinvd();
157 return 0;
158}
150#endif /* CONFIG_SMP */ 159#endif /* CONFIG_SMP */
151 160
152extern unsigned disabled_cpus __cpuinitdata; 161extern unsigned disabled_cpus __cpuinitdata;
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 4e77853321db..3089f70c0c52 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -58,7 +58,7 @@
58#if (NR_CPUS < 256) 58#if (NR_CPUS < 256)
59#define TICKET_SHIFT 8 59#define TICKET_SHIFT 8
60 60
61static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) 61static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
62{ 62{
63 short inc = 0x0100; 63 short inc = 0x0100;
64 64
@@ -77,7 +77,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
77 : "memory", "cc"); 77 : "memory", "cc");
78} 78}
79 79
80static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) 80static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
81{ 81{
82 int tmp, new; 82 int tmp, new;
83 83
@@ -96,7 +96,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
96 return tmp; 96 return tmp;
97} 97}
98 98
99static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) 99static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
100{ 100{
101 asm volatile(UNLOCK_LOCK_PREFIX "incb %0" 101 asm volatile(UNLOCK_LOCK_PREFIX "incb %0"
102 : "+m" (lock->slock) 102 : "+m" (lock->slock)
@@ -106,7 +106,7 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
106#else 106#else
107#define TICKET_SHIFT 16 107#define TICKET_SHIFT 16
108 108
109static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) 109static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
110{ 110{
111 int inc = 0x00010000; 111 int inc = 0x00010000;
112 int tmp; 112 int tmp;
@@ -127,7 +127,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
127 : "memory", "cc"); 127 : "memory", "cc");
128} 128}
129 129
130static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) 130static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
131{ 131{
132 int tmp; 132 int tmp;
133 int new; 133 int new;
@@ -149,7 +149,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
149 return tmp; 149 return tmp;
150} 150}
151 151
152static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) 152static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
153{ 153{
154 asm volatile(UNLOCK_LOCK_PREFIX "incw %0" 154 asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
155 : "+m" (lock->slock) 155 : "+m" (lock->slock)
@@ -158,14 +158,14 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
158} 158}
159#endif 159#endif
160 160
161static inline int __ticket_spin_is_locked(raw_spinlock_t *lock) 161static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
162{ 162{
163 int tmp = ACCESS_ONCE(lock->slock); 163 int tmp = ACCESS_ONCE(lock->slock);
164 164
165 return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1)); 165 return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
166} 166}
167 167
168static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) 168static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
169{ 169{
170 int tmp = ACCESS_ONCE(lock->slock); 170 int tmp = ACCESS_ONCE(lock->slock);
171 171
@@ -174,43 +174,43 @@ static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
174 174
175#ifndef CONFIG_PARAVIRT_SPINLOCKS 175#ifndef CONFIG_PARAVIRT_SPINLOCKS
176 176
177static inline int __raw_spin_is_locked(raw_spinlock_t *lock) 177static inline int arch_spin_is_locked(arch_spinlock_t *lock)
178{ 178{
179 return __ticket_spin_is_locked(lock); 179 return __ticket_spin_is_locked(lock);
180} 180}
181 181
182static inline int __raw_spin_is_contended(raw_spinlock_t *lock) 182static inline int arch_spin_is_contended(arch_spinlock_t *lock)
183{ 183{
184 return __ticket_spin_is_contended(lock); 184 return __ticket_spin_is_contended(lock);
185} 185}
186#define __raw_spin_is_contended __raw_spin_is_contended 186#define arch_spin_is_contended arch_spin_is_contended
187 187
188static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) 188static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
189{ 189{
190 __ticket_spin_lock(lock); 190 __ticket_spin_lock(lock);
191} 191}
192 192
193static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock) 193static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
194{ 194{
195 return __ticket_spin_trylock(lock); 195 return __ticket_spin_trylock(lock);
196} 196}
197 197
198static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) 198static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
199{ 199{
200 __ticket_spin_unlock(lock); 200 __ticket_spin_unlock(lock);
201} 201}
202 202
203static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock, 203static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
204 unsigned long flags) 204 unsigned long flags)
205{ 205{
206 __raw_spin_lock(lock); 206 arch_spin_lock(lock);
207} 207}
208 208
209#endif /* CONFIG_PARAVIRT_SPINLOCKS */ 209#endif /* CONFIG_PARAVIRT_SPINLOCKS */
210 210
211static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) 211static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
212{ 212{
213 while (__raw_spin_is_locked(lock)) 213 while (arch_spin_is_locked(lock))
214 cpu_relax(); 214 cpu_relax();
215} 215}
216 216
@@ -232,7 +232,7 @@ static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
232 * read_can_lock - would read_trylock() succeed? 232 * read_can_lock - would read_trylock() succeed?
233 * @lock: the rwlock in question. 233 * @lock: the rwlock in question.
234 */ 234 */
235static inline int __raw_read_can_lock(raw_rwlock_t *lock) 235static inline int arch_read_can_lock(arch_rwlock_t *lock)
236{ 236{
237 return (int)(lock)->lock > 0; 237 return (int)(lock)->lock > 0;
238} 238}
@@ -241,12 +241,12 @@ static inline int __raw_read_can_lock(raw_rwlock_t *lock)
241 * write_can_lock - would write_trylock() succeed? 241 * write_can_lock - would write_trylock() succeed?
242 * @lock: the rwlock in question. 242 * @lock: the rwlock in question.
243 */ 243 */
244static inline int __raw_write_can_lock(raw_rwlock_t *lock) 244static inline int arch_write_can_lock(arch_rwlock_t *lock)
245{ 245{
246 return (lock)->lock == RW_LOCK_BIAS; 246 return (lock)->lock == RW_LOCK_BIAS;
247} 247}
248 248
249static inline void __raw_read_lock(raw_rwlock_t *rw) 249static inline void arch_read_lock(arch_rwlock_t *rw)
250{ 250{
251 asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" 251 asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
252 "jns 1f\n" 252 "jns 1f\n"
@@ -255,7 +255,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw)
255 ::LOCK_PTR_REG (rw) : "memory"); 255 ::LOCK_PTR_REG (rw) : "memory");
256} 256}
257 257
258static inline void __raw_write_lock(raw_rwlock_t *rw) 258static inline void arch_write_lock(arch_rwlock_t *rw)
259{ 259{
260 asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t" 260 asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t"
261 "jz 1f\n" 261 "jz 1f\n"
@@ -264,7 +264,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw)
264 ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory"); 264 ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory");
265} 265}
266 266
267static inline int __raw_read_trylock(raw_rwlock_t *lock) 267static inline int arch_read_trylock(arch_rwlock_t *lock)
268{ 268{
269 atomic_t *count = (atomic_t *)lock; 269 atomic_t *count = (atomic_t *)lock;
270 270
@@ -274,7 +274,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *lock)
274 return 0; 274 return 0;
275} 275}
276 276
277static inline int __raw_write_trylock(raw_rwlock_t *lock) 277static inline int arch_write_trylock(arch_rwlock_t *lock)
278{ 278{
279 atomic_t *count = (atomic_t *)lock; 279 atomic_t *count = (atomic_t *)lock;
280 280
@@ -284,23 +284,23 @@ static inline int __raw_write_trylock(raw_rwlock_t *lock)
284 return 0; 284 return 0;
285} 285}
286 286
287static inline void __raw_read_unlock(raw_rwlock_t *rw) 287static inline void arch_read_unlock(arch_rwlock_t *rw)
288{ 288{
289 asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory"); 289 asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
290} 290}
291 291
292static inline void __raw_write_unlock(raw_rwlock_t *rw) 292static inline void arch_write_unlock(arch_rwlock_t *rw)
293{ 293{
294 asm volatile(LOCK_PREFIX "addl %1, %0" 294 asm volatile(LOCK_PREFIX "addl %1, %0"
295 : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory"); 295 : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
296} 296}
297 297
298#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) 298#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
299#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) 299#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
300 300
301#define _raw_spin_relax(lock) cpu_relax() 301#define arch_spin_relax(lock) cpu_relax()
302#define _raw_read_relax(lock) cpu_relax() 302#define arch_read_relax(lock) cpu_relax()
303#define _raw_write_relax(lock) cpu_relax() 303#define arch_write_relax(lock) cpu_relax()
304 304
305/* The {read|write|spin}_lock() on x86 are full memory barriers. */ 305/* The {read|write|spin}_lock() on x86 are full memory barriers. */
306static inline void smp_mb__after_lock(void) { } 306static inline void smp_mb__after_lock(void) { }
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 845f81c87091..dcb48b2edc11 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -5,16 +5,16 @@
5# error "please don't include this file directly" 5# error "please don't include this file directly"
6#endif 6#endif
7 7
8typedef struct raw_spinlock { 8typedef struct arch_spinlock {
9 unsigned int slock; 9 unsigned int slock;
10} raw_spinlock_t; 10} arch_spinlock_t;
11 11
12#define __RAW_SPIN_LOCK_UNLOCKED { 0 } 12#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
13 13
14typedef struct { 14typedef struct {
15 unsigned int lock; 15 unsigned int lock;
16} raw_rwlock_t; 16} arch_rwlock_t;
17 17
18#define __RAW_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } 18#define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS }
19 19
20#endif /* _ASM_X86_SPINLOCK_TYPES_H */ 20#endif /* _ASM_X86_SPINLOCK_TYPES_H */
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index cf86a5e73815..4dab78edbad9 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -3,7 +3,28 @@
3 3
4extern int kstack_depth_to_print; 4extern int kstack_depth_to_print;
5 5
6int x86_is_stack_id(int id, char *name); 6struct thread_info;
7struct stacktrace_ops;
8
9typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo,
10 unsigned long *stack,
11 unsigned long bp,
12 const struct stacktrace_ops *ops,
13 void *data,
14 unsigned long *end,
15 int *graph);
16
17extern unsigned long
18print_context_stack(struct thread_info *tinfo,
19 unsigned long *stack, unsigned long bp,
20 const struct stacktrace_ops *ops, void *data,
21 unsigned long *end, int *graph);
22
23extern unsigned long
24print_context_stack_bp(struct thread_info *tinfo,
25 unsigned long *stack, unsigned long bp,
26 const struct stacktrace_ops *ops, void *data,
27 unsigned long *end, int *graph);
7 28
8/* Generic stack tracer with callbacks */ 29/* Generic stack tracer with callbacks */
9 30
@@ -14,6 +35,7 @@ struct stacktrace_ops {
14 void (*address)(void *data, unsigned long address, int reliable); 35 void (*address)(void *data, unsigned long address, int reliable);
15 /* On negative return stop dumping */ 36 /* On negative return stop dumping */
16 int (*stack)(void *data, char *name); 37 int (*stack)(void *data, char *name);
38 walk_stack_t walk_stack;
17}; 39};
18 40
19void dump_trace(struct task_struct *tsk, struct pt_regs *regs, 41void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 87ffcb12a1b8..8085277e1b8b 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -5,13 +5,17 @@
5 5
6#ifdef CONFIG_SWIOTLB 6#ifdef CONFIG_SWIOTLB
7extern int swiotlb; 7extern int swiotlb;
8extern int pci_swiotlb_init(void); 8extern int __init pci_swiotlb_detect(void);
9extern void __init pci_swiotlb_init(void);
9#else 10#else
10#define swiotlb 0 11#define swiotlb 0
11static inline int pci_swiotlb_init(void) 12static inline int pci_swiotlb_detect(void)
12{ 13{
13 return 0; 14 return 0;
14} 15}
16static inline void pci_swiotlb_init(void)
17{
18}
15#endif 19#endif
16 20
17static inline void dma_mark_clean(void *addr, size_t size) {} 21static inline void dma_mark_clean(void *addr, size_t size) {}
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 9af9decb38c3..d5f69045c100 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -30,7 +30,6 @@ struct mmap_arg_struct;
30asmlinkage long sys32_mmap(struct mmap_arg_struct __user *); 30asmlinkage long sys32_mmap(struct mmap_arg_struct __user *);
31asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long); 31asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long);
32 32
33asmlinkage long sys32_pipe(int __user *);
34struct sigaction32; 33struct sigaction32;
35struct old_sigaction32; 34struct old_sigaction32;
36asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *, 35asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *,
@@ -57,9 +56,6 @@ asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32);
57asmlinkage long sys32_personality(unsigned long); 56asmlinkage long sys32_personality(unsigned long);
58asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); 57asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
59 58
60asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long,
61 unsigned long, unsigned long, unsigned long);
62
63struct oldold_utsname; 59struct oldold_utsname;
64struct old_utsname; 60struct old_utsname;
65asmlinkage long sys32_olduname(struct oldold_utsname __user *); 61asmlinkage long sys32_olduname(struct oldold_utsname __user *);
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 8d33bc5462d1..c4a348f7bd43 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -16,6 +16,8 @@
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/err.h> 17#include <linux/err.h>
18 18
19extern const unsigned long sys_call_table[];
20
19/* 21/*
20 * Only the low 32 bits of orig_ax are meaningful, so we return int. 22 * Only the low 32 bits of orig_ax are meaningful, so we return int.
21 * This importantly ignores the high bits on 64-bit, so comparisons 23 * This importantly ignores the high bits on 64-bit, so comparisons
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 372b76edd63f..8868b9420b0e 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -18,16 +18,24 @@
18/* Common in X86_32 and X86_64 */ 18/* Common in X86_32 and X86_64 */
19/* kernel/ioport.c */ 19/* kernel/ioport.c */
20asmlinkage long sys_ioperm(unsigned long, unsigned long, int); 20asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
21long sys_iopl(unsigned int, struct pt_regs *);
21 22
22/* kernel/process.c */ 23/* kernel/process.c */
23int sys_fork(struct pt_regs *); 24int sys_fork(struct pt_regs *);
24int sys_vfork(struct pt_regs *); 25int sys_vfork(struct pt_regs *);
26long sys_execve(char __user *, char __user * __user *,
27 char __user * __user *, struct pt_regs *);
28long sys_clone(unsigned long, unsigned long, void __user *,
29 void __user *, struct pt_regs *);
25 30
26/* kernel/ldt.c */ 31/* kernel/ldt.c */
27asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); 32asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
28 33
29/* kernel/signal.c */ 34/* kernel/signal.c */
30long sys_rt_sigreturn(struct pt_regs *); 35long sys_rt_sigreturn(struct pt_regs *);
36long sys_sigaltstack(const stack_t __user *, stack_t __user *,
37 struct pt_regs *);
38
31 39
32/* kernel/tls.c */ 40/* kernel/tls.c */
33asmlinkage int sys_set_thread_area(struct user_desc __user *); 41asmlinkage int sys_set_thread_area(struct user_desc __user *);
@@ -35,18 +43,11 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *);
35 43
36/* X86_32 only */ 44/* X86_32 only */
37#ifdef CONFIG_X86_32 45#ifdef CONFIG_X86_32
38/* kernel/ioport.c */
39long sys_iopl(struct pt_regs *);
40
41/* kernel/process_32.c */
42int sys_clone(struct pt_regs *);
43int sys_execve(struct pt_regs *);
44 46
45/* kernel/signal.c */ 47/* kernel/signal.c */
46asmlinkage int sys_sigsuspend(int, int, old_sigset_t); 48asmlinkage int sys_sigsuspend(int, int, old_sigset_t);
47asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, 49asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
48 struct old_sigaction __user *); 50 struct old_sigaction __user *);
49int sys_sigaltstack(struct pt_regs *);
50unsigned long sys_sigreturn(struct pt_regs *); 51unsigned long sys_sigreturn(struct pt_regs *);
51 52
52/* kernel/sys_i386_32.c */ 53/* kernel/sys_i386_32.c */
@@ -55,8 +56,6 @@ struct sel_arg_struct;
55struct oldold_utsname; 56struct oldold_utsname;
56struct old_utsname; 57struct old_utsname;
57 58
58asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
59 unsigned long, unsigned long, unsigned long);
60asmlinkage int old_mmap(struct mmap_arg_struct __user *); 59asmlinkage int old_mmap(struct mmap_arg_struct __user *);
61asmlinkage int old_select(struct sel_arg_struct __user *); 60asmlinkage int old_select(struct sel_arg_struct __user *);
62asmlinkage int sys_ipc(uint, int, int, int, void __user *, long); 61asmlinkage int sys_ipc(uint, int, int, int, void __user *, long);
@@ -64,28 +63,15 @@ asmlinkage int sys_uname(struct old_utsname __user *);
64asmlinkage int sys_olduname(struct oldold_utsname __user *); 63asmlinkage int sys_olduname(struct oldold_utsname __user *);
65 64
66/* kernel/vm86_32.c */ 65/* kernel/vm86_32.c */
67int sys_vm86old(struct pt_regs *); 66int sys_vm86old(struct vm86_struct __user *, struct pt_regs *);
68int sys_vm86(struct pt_regs *); 67int sys_vm86(unsigned long, unsigned long, struct pt_regs *);
69 68
70#else /* CONFIG_X86_32 */ 69#else /* CONFIG_X86_32 */
71 70
72/* X86_64 only */ 71/* X86_64 only */
73/* kernel/ioport.c */
74asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
75
76/* kernel/process_64.c */ 72/* kernel/process_64.c */
77asmlinkage long sys_clone(unsigned long, unsigned long,
78 void __user *, void __user *,
79 struct pt_regs *);
80asmlinkage long sys_execve(char __user *, char __user * __user *,
81 char __user * __user *,
82 struct pt_regs *);
83long sys_arch_prctl(int, unsigned long); 73long sys_arch_prctl(int, unsigned long);
84 74
85/* kernel/signal.c */
86asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *,
87 struct pt_regs *);
88
89/* kernel/sys_x86_64.c */ 75/* kernel/sys_x86_64.c */
90struct new_utsname; 76struct new_utsname;
91 77
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 022a84386de8..e04740f7a0bb 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -11,9 +11,9 @@
11#include <linux/irqflags.h> 11#include <linux/irqflags.h>
12 12
13/* entries in ARCH_DLINFO: */ 13/* entries in ARCH_DLINFO: */
14#ifdef CONFIG_IA32_EMULATION 14#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64)
15# define AT_VECTOR_SIZE_ARCH 2 15# define AT_VECTOR_SIZE_ARCH 2
16#else 16#else /* else it's non-compat x86-64 */
17# define AT_VECTOR_SIZE_ARCH 1 17# define AT_VECTOR_SIZE_ARCH 1
18#endif 18#endif
19 19
@@ -23,6 +23,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
23struct tss_struct; 23struct tss_struct;
24void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 24void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
25 struct tss_struct *tss); 25 struct tss_struct *tss);
26extern void show_regs_common(void);
26 27
27#ifdef CONFIG_X86_32 28#ifdef CONFIG_X86_32
28 29
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 375c917c37d2..e0d28901e969 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -87,7 +87,6 @@ struct thread_info {
87#define TIF_NOTSC 16 /* TSC is not accessible in userland */ 87#define TIF_NOTSC 16 /* TSC is not accessible in userland */
88#define TIF_IA32 17 /* 32bit process */ 88#define TIF_IA32 17 /* 32bit process */
89#define TIF_FORK 18 /* ret_from_fork */ 89#define TIF_FORK 18 /* ret_from_fork */
90#define TIF_ABI_PENDING 19
91#define TIF_MEMDIE 20 90#define TIF_MEMDIE 20
92#define TIF_DEBUG 21 /* uses debug registers */ 91#define TIF_DEBUG 21 /* uses debug registers */
93#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ 92#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
@@ -112,7 +111,6 @@ struct thread_info {
112#define _TIF_NOTSC (1 << TIF_NOTSC) 111#define _TIF_NOTSC (1 << TIF_NOTSC)
113#define _TIF_IA32 (1 << TIF_IA32) 112#define _TIF_IA32 (1 << TIF_IA32)
114#define _TIF_FORK (1 << TIF_FORK) 113#define _TIF_FORK (1 << TIF_FORK)
115#define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING)
116#define _TIF_DEBUG (1 << TIF_DEBUG) 114#define _TIF_DEBUG (1 << TIF_DEBUG)
117#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) 115#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
118#define _TIF_FREEZE (1 << TIF_FREEZE) 116#define _TIF_FREEZE (1 << TIF_FREEZE)
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 40e37b10c6c0..c5087d796587 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -35,11 +35,16 @@
35# endif 35# endif
36#endif 36#endif
37 37
38/* Node not present */ 38/*
39#define NUMA_NO_NODE (-1) 39 * to preserve the visibility of NUMA_NO_NODE definition,
40 * moved to there from here. May be used independent of
41 * CONFIG_NUMA.
42 */
43#include <linux/numa.h>
40 44
41#ifdef CONFIG_NUMA 45#ifdef CONFIG_NUMA
42#include <linux/cpumask.h> 46#include <linux/cpumask.h>
47
43#include <asm/mpspec.h> 48#include <asm/mpspec.h>
44 49
45#ifdef CONFIG_X86_32 50#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h
index 90f06c25221d..cb507bb05d79 100644
--- a/arch/x86/include/asm/trampoline.h
+++ b/arch/x86/include/asm/trampoline.h
@@ -16,7 +16,6 @@ extern unsigned long initial_code;
16extern unsigned long initial_gs; 16extern unsigned long initial_gs;
17 17
18#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) 18#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
19#define TRAMPOLINE_BASE 0x6000
20 19
21extern unsigned long setup_trampoline(void); 20extern unsigned long setup_trampoline(void);
22extern void __init reserve_trampoline_memory(void); 21extern void __init reserve_trampoline_memory(void);
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 0c9825e97f36..088d09fb1615 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -205,14 +205,13 @@ static inline unsigned long __must_check copy_from_user(void *to,
205 unsigned long n) 205 unsigned long n)
206{ 206{
207 int sz = __compiletime_object_size(to); 207 int sz = __compiletime_object_size(to);
208 int ret = -EFAULT;
209 208
210 if (likely(sz == -1 || sz >= n)) 209 if (likely(sz == -1 || sz >= n))
211 ret = _copy_from_user(to, from, n); 210 n = _copy_from_user(to, from, n);
212 else 211 else
213 copy_from_user_overflow(); 212 copy_from_user_overflow();
214 213
215 return ret; 214 return n;
216} 215}
217 216
218long __must_check strncpy_from_user(char *dst, const char __user *src, 217long __must_check strncpy_from_user(char *dst, const char __user *src,
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 46324c6a4f6e..316708d5af92 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -8,6 +8,8 @@
8#include <linux/errno.h> 8#include <linux/errno.h>
9#include <linux/prefetch.h> 9#include <linux/prefetch.h>
10#include <linux/lockdep.h> 10#include <linux/lockdep.h>
11#include <asm/alternative.h>
12#include <asm/cpufeature.h>
11#include <asm/page.h> 13#include <asm/page.h>
12 14
13/* 15/*
@@ -16,7 +18,24 @@
16 18
17/* Handles exceptions in both to and from, but doesn't do access_ok */ 19/* Handles exceptions in both to and from, but doesn't do access_ok */
18__must_check unsigned long 20__must_check unsigned long
19copy_user_generic(void *to, const void *from, unsigned len); 21copy_user_generic_string(void *to, const void *from, unsigned len);
22__must_check unsigned long
23copy_user_generic_unrolled(void *to, const void *from, unsigned len);
24
25static __always_inline __must_check unsigned long
26copy_user_generic(void *to, const void *from, unsigned len)
27{
28 unsigned ret;
29
30 alternative_call(copy_user_generic_unrolled,
31 copy_user_generic_string,
32 X86_FEATURE_REP_GOOD,
33 ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
34 "=d" (len)),
35 "1" (to), "2" (from), "3" (len)
36 : "memory", "rcx", "r8", "r9", "r10", "r11");
37 return ret;
38}
20 39
21__must_check unsigned long 40__must_check unsigned long
22_copy_to_user(void __user *to, const void *from, unsigned len); 41_copy_to_user(void __user *to, const void *from, unsigned len);
@@ -30,16 +49,15 @@ static inline unsigned long __must_check copy_from_user(void *to,
30 unsigned long n) 49 unsigned long n)
31{ 50{
32 int sz = __compiletime_object_size(to); 51 int sz = __compiletime_object_size(to);
33 int ret = -EFAULT;
34 52
35 might_fault(); 53 might_fault();
36 if (likely(sz == -1 || sz >= n)) 54 if (likely(sz == -1 || sz >= n))
37 ret = _copy_from_user(to, from, n); 55 n = _copy_from_user(to, from, n);
38#ifdef CONFIG_DEBUG_VM 56#ifdef CONFIG_DEBUG_VM
39 else 57 else
40 WARN(1, "Buffer overflow detected!\n"); 58 WARN(1, "Buffer overflow detected!\n");
41#endif 59#endif
42 return ret; 60 return n;
43} 61}
44 62
45static __always_inline __must_check 63static __always_inline __must_check
diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h
index 999873b22e7f..24532c7da3d6 100644
--- a/arch/x86/include/asm/user.h
+++ b/arch/x86/include/asm/user.h
@@ -1,5 +1,63 @@
1#ifndef _ASM_X86_USER_H
2#define _ASM_X86_USER_H
3
1#ifdef CONFIG_X86_32 4#ifdef CONFIG_X86_32
2# include "user_32.h" 5# include "user_32.h"
3#else 6#else
4# include "user_64.h" 7# include "user_64.h"
5#endif 8#endif
9
10#include <asm/types.h>
11
12struct user_ymmh_regs {
13 /* 16 * 16 bytes for each YMMH-reg */
14 __u32 ymmh_space[64];
15};
16
17struct user_xsave_hdr {
18 __u64 xstate_bv;
19 __u64 reserved1[2];
20 __u64 reserved2[5];
21};
22
23/*
24 * The structure layout of user_xstateregs, used for exporting the
25 * extended register state through ptrace and core-dump (NT_X86_XSTATE note)
26 * interfaces will be same as the memory layout of xsave used by the processor
27 * (except for the bytes 464..511, which can be used by the software) and hence
28 * the size of this structure varies depending on the features supported by the
29 * processor and OS. The size of the structure that users need to use can be
30 * obtained by doing:
31 * cpuid_count(0xd, 0, &eax, &ptrace_xstateregs_struct_size, &ecx, &edx);
32 * i.e., cpuid.(eax=0xd,ecx=0).ebx will be the size that user (debuggers, etc.)
33 * need to use.
34 *
35 * For now, only the first 8 bytes of the software usable bytes[464..471] will
36 * be used and will be set to OS enabled xstate mask (which is same as the
37 * 64bit mask returned by the xgetbv's xCR0). Users (analyzing core dump
38 * remotely, etc.) can use this mask as well as the mask saved in the
39 * xstate_hdr bytes and interpret what states the processor/OS supports
40 * and what states are in modified/initialized conditions for the
41 * particular process/thread.
42 *
43 * Also when the user modifies certain state FP/SSE/etc through the
44 * ptrace interface, they must ensure that the xsave_hdr.xstate_bv
45 * bytes[512..519] of the memory layout are updated correspondingly.
46 * i.e., for example when FP state is modified to a non-init state,
47 * xsave_hdr.xstate_bv's bit 0 must be set to '1', when SSE is modified to
48 * non-init state, xsave_hdr.xstate_bv's bit 1 must to be set to '1', etc.
49 */
50#define USER_XSTATE_FX_SW_WORDS 6
51#define USER_XSTATE_XCR0_WORD 0
52
53struct user_xstateregs {
54 struct {
55 __u64 fpx_space[58];
56 __u64 xstate_fx_sw[USER_XSTATE_FX_SW_WORDS];
57 } i387;
58 struct user_xsave_hdr xsave_hdr;
59 struct user_ymmh_regs ymmh;
60 /* further processor state extensions go here */
61};
62
63#endif /* _ASM_X86_USER_H */
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 7ed17ff502b9..71605c7d5c5c 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -18,8 +18,8 @@
18 * along with this program; if not, write to the Free Software 18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * 20 *
21 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. 21 * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
22 * Copyright (c) Russ Anderson 22 * Copyright (c) Russ Anderson <rja@sgi.com>
23 */ 23 */
24 24
25#include <linux/rtc.h> 25#include <linux/rtc.h>
@@ -36,7 +36,8 @@ enum uv_bios_cmd {
36 UV_BIOS_WATCHLIST_ALLOC, 36 UV_BIOS_WATCHLIST_ALLOC,
37 UV_BIOS_WATCHLIST_FREE, 37 UV_BIOS_WATCHLIST_FREE,
38 UV_BIOS_MEMPROTECT, 38 UV_BIOS_MEMPROTECT,
39 UV_BIOS_GET_PARTITION_ADDR 39 UV_BIOS_GET_PARTITION_ADDR,
40 UV_BIOS_SET_LEGACY_VGA_TARGET
40}; 41};
41 42
42/* 43/*
@@ -76,15 +77,6 @@ union partition_info_u {
76 }; 77 };
77}; 78};
78 79
79union uv_watchlist_u {
80 u64 val;
81 struct {
82 u64 blade : 16,
83 size : 32,
84 filler : 16;
85 };
86};
87
88enum uv_memprotect { 80enum uv_memprotect {
89 UV_MEMPROT_RESTRICT_ACCESS, 81 UV_MEMPROT_RESTRICT_ACCESS,
90 UV_MEMPROT_ALLOW_AMO, 82 UV_MEMPROT_ALLOW_AMO,
@@ -98,13 +90,14 @@ extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64);
98extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64); 90extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64);
99extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64); 91extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
100 92
101extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *); 93extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *, long *);
102extern s64 uv_bios_freq_base(u64, u64 *); 94extern s64 uv_bios_freq_base(u64, u64 *);
103extern int uv_bios_mq_watchlist_alloc(int, unsigned long, unsigned int, 95extern int uv_bios_mq_watchlist_alloc(unsigned long, unsigned int,
104 unsigned long *); 96 unsigned long *);
105extern int uv_bios_mq_watchlist_free(int, int); 97extern int uv_bios_mq_watchlist_free(int, int);
106extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect); 98extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect);
107extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *); 99extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *);
100extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus);
108 101
109extern void uv_bios_init(void); 102extern void uv_bios_init(void);
110 103
@@ -113,6 +106,7 @@ extern int uv_type;
113extern long sn_partition_id; 106extern long sn_partition_id;
114extern long sn_coherency_id; 107extern long sn_coherency_id;
115extern long sn_region_size; 108extern long sn_region_size;
109extern long system_serial_number;
116#define partition_coherence_id() (sn_coherency_id) 110#define partition_coherence_id() (sn_coherency_id)
117 111
118extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ 112extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index c0a01b5d985b..3bb9491b7659 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -11,6 +11,7 @@ struct mm_struct;
11extern enum uv_system_type get_uv_system_type(void); 11extern enum uv_system_type get_uv_system_type(void);
12extern int is_uv_system(void); 12extern int is_uv_system(void);
13extern void uv_cpu_init(void); 13extern void uv_cpu_init(void);
14extern void uv_nmi_init(void);
14extern void uv_system_init(void); 15extern void uv_system_init(void);
15extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, 16extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
16 struct mm_struct *mm, 17 struct mm_struct *mm,
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 80e2984f521c..b414d2b401f6 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -55,7 +55,7 @@
55#define DESC_STATUS_SOURCE_TIMEOUT 3 55#define DESC_STATUS_SOURCE_TIMEOUT 3
56 56
57/* 57/*
58 * source side threshholds at which message retries print a warning 58 * source side thresholds at which message retries print a warning
59 */ 59 */
60#define SOURCE_TIMEOUT_LIMIT 20 60#define SOURCE_TIMEOUT_LIMIT 20
61#define DESTINATION_TIMEOUT_LIMIT 20 61#define DESTINATION_TIMEOUT_LIMIT 20
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index d1414af98559..14cc74ba5d23 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -31,20 +31,20 @@
31 * contiguous (although various IO spaces may punch holes in 31 * contiguous (although various IO spaces may punch holes in
32 * it).. 32 * it)..
33 * 33 *
34 * N - Number of bits in the node portion of a socket physical 34 * N - Number of bits in the node portion of a socket physical
35 * address. 35 * address.
36 * 36 *
37 * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of 37 * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of
38 * routers always have low bit of 1, C/MBricks have low bit 38 * routers always have low bit of 1, C/MBricks have low bit
39 * equal to 0. Most addressing macros that target UV hub chips 39 * equal to 0. Most addressing macros that target UV hub chips
40 * right shift the NASID by 1 to exclude the always-zero bit. 40 * right shift the NASID by 1 to exclude the always-zero bit.
41 * NASIDs contain up to 15 bits. 41 * NASIDs contain up to 15 bits.
42 * 42 *
43 * GNODE - NASID right shifted by 1 bit. Most mmrs contain gnodes instead 43 * GNODE - NASID right shifted by 1 bit. Most mmrs contain gnodes instead
44 * of nasids. 44 * of nasids.
45 * 45 *
46 * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant 46 * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant
47 * of the nasid for socket usage. 47 * of the nasid for socket usage.
48 * 48 *
49 * 49 *
50 * NumaLink Global Physical Address Format: 50 * NumaLink Global Physical Address Format:
@@ -71,12 +71,12 @@
71 * 71 *
72 * 72 *
73 * APICID format 73 * APICID format
74 * NOTE!!!!!! This is the current format of the APICID. However, code 74 * NOTE!!!!!! This is the current format of the APICID. However, code
75 * should assume that this will change in the future. Use functions 75 * should assume that this will change in the future. Use functions
76 * in this file for all APICID bit manipulations and conversion. 76 * in this file for all APICID bit manipulations and conversion.
77 * 77 *
78 * 1111110000000000 78 * 1111110000000000
79 * 5432109876543210 79 * 5432109876543210
80 * pppppppppplc0cch 80 * pppppppppplc0cch
81 * sssssssssss 81 * sssssssssss
82 * 82 *
@@ -89,9 +89,9 @@
89 * Note: Processor only supports 12 bits in the APICID register. The ACPI 89 * Note: Processor only supports 12 bits in the APICID register. The ACPI
90 * tables hold all 16 bits. Software needs to be aware of this. 90 * tables hold all 16 bits. Software needs to be aware of this.
91 * 91 *
92 * Unless otherwise specified, all references to APICID refer to 92 * Unless otherwise specified, all references to APICID refer to
93 * the FULL value contained in ACPI tables, not the subset in the 93 * the FULL value contained in ACPI tables, not the subset in the
94 * processor APICID register. 94 * processor APICID register.
95 */ 95 */
96 96
97 97
@@ -151,16 +151,16 @@ struct uv_hub_info_s {
151}; 151};
152 152
153DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); 153DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
154#define uv_hub_info (&__get_cpu_var(__uv_hub_info)) 154#define uv_hub_info (&__get_cpu_var(__uv_hub_info))
155#define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) 155#define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu))
156 156
157/* 157/*
158 * Local & Global MMR space macros. 158 * Local & Global MMR space macros.
159 * Note: macros are intended to be used ONLY by inline functions 159 * Note: macros are intended to be used ONLY by inline functions
160 * in this file - not by other kernel code. 160 * in this file - not by other kernel code.
161 * n - NASID (full 15-bit global nasid) 161 * n - NASID (full 15-bit global nasid)
162 * g - GNODE (full 15-bit global nasid, right shifted 1) 162 * g - GNODE (full 15-bit global nasid, right shifted 1)
163 * p - PNODE (local part of nsids, right shifted 1) 163 * p - PNODE (local part of nsids, right shifted 1)
164 */ 164 */
165#define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) 165#define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask)
166#define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra) 166#define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra)
@@ -172,6 +172,8 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
172#define UV_LOCAL_MMR_SIZE (64UL * 1024 * 1024) 172#define UV_LOCAL_MMR_SIZE (64UL * 1024 * 1024)
173#define UV_GLOBAL_MMR32_SIZE (64UL * 1024 * 1024) 173#define UV_GLOBAL_MMR32_SIZE (64UL * 1024 * 1024)
174 174
175#define UV_GLOBAL_GRU_MMR_BASE 0x4000000
176
175#define UV_GLOBAL_MMR32_PNODE_SHIFT 15 177#define UV_GLOBAL_MMR32_PNODE_SHIFT 15
176#define UV_GLOBAL_MMR64_PNODE_SHIFT 26 178#define UV_GLOBAL_MMR64_PNODE_SHIFT 26
177 179
@@ -213,8 +215,8 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
213/* 215/*
214 * Macros for converting between kernel virtual addresses, socket local physical 216 * Macros for converting between kernel virtual addresses, socket local physical
215 * addresses, and UV global physical addresses. 217 * addresses, and UV global physical addresses.
216 * Note: use the standard __pa() & __va() macros for converting 218 * Note: use the standard __pa() & __va() macros for converting
217 * between socket virtual and socket physical addresses. 219 * between socket virtual and socket physical addresses.
218 */ 220 */
219 221
220/* socket phys RAM --> UV global physical address */ 222/* socket phys RAM --> UV global physical address */
@@ -232,6 +234,26 @@ static inline unsigned long uv_gpa(void *v)
232 return uv_soc_phys_ram_to_gpa(__pa(v)); 234 return uv_soc_phys_ram_to_gpa(__pa(v));
233} 235}
234 236
237/* Top two bits indicate the requested address is in MMR space. */
238static inline int
239uv_gpa_in_mmr_space(unsigned long gpa)
240{
241 return (gpa >> 62) == 0x3UL;
242}
243
244/* UV global physical address --> socket phys RAM */
245static inline unsigned long uv_gpa_to_soc_phys_ram(unsigned long gpa)
246{
247 unsigned long paddr = gpa & uv_hub_info->gpa_mask;
248 unsigned long remap_base = uv_hub_info->lowmem_remap_base;
249 unsigned long remap_top = uv_hub_info->lowmem_remap_top;
250
251 if (paddr >= remap_base && paddr < remap_base + remap_top)
252 paddr -= remap_base;
253 return paddr;
254}
255
256
235/* gnode -> pnode */ 257/* gnode -> pnode */
236static inline unsigned long uv_gpa_to_gnode(unsigned long gpa) 258static inline unsigned long uv_gpa_to_gnode(unsigned long gpa)
237{ 259{
@@ -265,21 +287,18 @@ static inline int uv_apicid_to_pnode(int apicid)
265 * Access global MMRs using the low memory MMR32 space. This region supports 287 * Access global MMRs using the low memory MMR32 space. This region supports
266 * faster MMR access but not all MMRs are accessible in this space. 288 * faster MMR access but not all MMRs are accessible in this space.
267 */ 289 */
268static inline unsigned long *uv_global_mmr32_address(int pnode, 290static inline unsigned long *uv_global_mmr32_address(int pnode, unsigned long offset)
269 unsigned long offset)
270{ 291{
271 return __va(UV_GLOBAL_MMR32_BASE | 292 return __va(UV_GLOBAL_MMR32_BASE |
272 UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset); 293 UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset);
273} 294}
274 295
275static inline void uv_write_global_mmr32(int pnode, unsigned long offset, 296static inline void uv_write_global_mmr32(int pnode, unsigned long offset, unsigned long val)
276 unsigned long val)
277{ 297{
278 writeq(val, uv_global_mmr32_address(pnode, offset)); 298 writeq(val, uv_global_mmr32_address(pnode, offset));
279} 299}
280 300
281static inline unsigned long uv_read_global_mmr32(int pnode, 301static inline unsigned long uv_read_global_mmr32(int pnode, unsigned long offset)
282 unsigned long offset)
283{ 302{
284 return readq(uv_global_mmr32_address(pnode, offset)); 303 return readq(uv_global_mmr32_address(pnode, offset));
285} 304}
@@ -288,26 +307,43 @@ static inline unsigned long uv_read_global_mmr32(int pnode,
288 * Access Global MMR space using the MMR space located at the top of physical 307 * Access Global MMR space using the MMR space located at the top of physical
289 * memory. 308 * memory.
290 */ 309 */
291static inline unsigned long *uv_global_mmr64_address(int pnode, 310static inline unsigned long *uv_global_mmr64_address(int pnode, unsigned long offset)
292 unsigned long offset)
293{ 311{
294 return __va(UV_GLOBAL_MMR64_BASE | 312 return __va(UV_GLOBAL_MMR64_BASE |
295 UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); 313 UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset);
296} 314}
297 315
298static inline void uv_write_global_mmr64(int pnode, unsigned long offset, 316static inline void uv_write_global_mmr64(int pnode, unsigned long offset, unsigned long val)
299 unsigned long val)
300{ 317{
301 writeq(val, uv_global_mmr64_address(pnode, offset)); 318 writeq(val, uv_global_mmr64_address(pnode, offset));
302} 319}
303 320
304static inline unsigned long uv_read_global_mmr64(int pnode, 321static inline unsigned long uv_read_global_mmr64(int pnode, unsigned long offset)
305 unsigned long offset)
306{ 322{
307 return readq(uv_global_mmr64_address(pnode, offset)); 323 return readq(uv_global_mmr64_address(pnode, offset));
308} 324}
309 325
310/* 326/*
327 * Global MMR space addresses when referenced by the GRU. (GRU does
328 * NOT use socket addressing).
329 */
330static inline unsigned long uv_global_gru_mmr_address(int pnode, unsigned long offset)
331{
332 return UV_GLOBAL_GRU_MMR_BASE | offset |
333 ((unsigned long)pnode << uv_hub_info->m_val);
334}
335
336static inline void uv_write_global_mmr8(int pnode, unsigned long offset, unsigned char val)
337{
338 writeb(val, uv_global_mmr64_address(pnode, offset));
339}
340
341static inline unsigned char uv_read_global_mmr8(int pnode, unsigned long offset)
342{
343 return readb(uv_global_mmr64_address(pnode, offset));
344}
345
346/*
311 * Access hub local MMRs. Faster than using global space but only local MMRs 347 * Access hub local MMRs. Faster than using global space but only local MMRs
312 * are accessible. 348 * are accessible.
313 */ 349 */
@@ -426,14 +462,28 @@ static inline void uv_set_scir_bits(unsigned char value)
426 } 462 }
427} 463}
428 464
465static inline unsigned long uv_scir_offset(int apicid)
466{
467 return SCIR_LOCAL_MMR_BASE | (apicid & 0x3f);
468}
469
429static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) 470static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
430{ 471{
431 if (uv_cpu_hub_info(cpu)->scir.state != value) { 472 if (uv_cpu_hub_info(cpu)->scir.state != value) {
473 uv_write_global_mmr8(uv_cpu_to_pnode(cpu),
474 uv_cpu_hub_info(cpu)->scir.offset, value);
432 uv_cpu_hub_info(cpu)->scir.state = value; 475 uv_cpu_hub_info(cpu)->scir.state = value;
433 uv_write_local_mmr8(uv_cpu_hub_info(cpu)->scir.offset, value);
434 } 476 }
435} 477}
436 478
479static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode)
480{
481 return (1UL << UVH_IPI_INT_SEND_SHFT) |
482 ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
483 (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
484 (vector << UVH_IPI_INT_VECTOR_SHFT);
485}
486
437static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) 487static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
438{ 488{
439 unsigned long val; 489 unsigned long val;
@@ -442,12 +492,21 @@ static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
442 if (vector == NMI_VECTOR) 492 if (vector == NMI_VECTOR)
443 dmode = dest_NMI; 493 dmode = dest_NMI;
444 494
445 val = (1UL << UVH_IPI_INT_SEND_SHFT) | 495 val = uv_hub_ipi_value(apicid, vector, dmode);
446 ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
447 (dmode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
448 (vector << UVH_IPI_INT_VECTOR_SHFT);
449 uv_write_global_mmr64(pnode, UVH_IPI_INT, val); 496 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
450} 497}
451 498
499/*
500 * Get the minimum revision number of the hub chips within the partition.
501 * 1 - initial rev 1.0 silicon
502 * 2 - rev 2.0 production silicon
503 */
504static inline int uv_get_min_hub_revision_id(void)
505{
506 extern int uv_min_hub_revision_id;
507
508 return uv_min_hub_revision_id;
509}
510
452#endif /* CONFIG_X86_64 */ 511#endif /* CONFIG_X86_64 */
453#endif /* _ASM_X86_UV_UV_HUB_H */ 512#endif /* _ASM_X86_UV_UV_HUB_H */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index ea0e8ea15e15..60cc35269083 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -126,6 +126,7 @@ struct x86_cpuinit_ops {
126 * @get_wallclock: get time from HW clock like RTC etc. 126 * @get_wallclock: get time from HW clock like RTC etc.
127 * @set_wallclock: set time back to HW clock 127 * @set_wallclock: set time back to HW clock
128 * @is_untracked_pat_range exclude from PAT logic 128 * @is_untracked_pat_range exclude from PAT logic
129 * @nmi_init enable NMI on cpus
129 */ 130 */
130struct x86_platform_ops { 131struct x86_platform_ops {
131 unsigned long (*calibrate_tsc)(void); 132 unsigned long (*calibrate_tsc)(void);
@@ -133,6 +134,7 @@ struct x86_platform_ops {
133 int (*set_wallclock)(unsigned long nowtime); 134 int (*set_wallclock)(unsigned long nowtime);
134 void (*iommu_shutdown)(void); 135 void (*iommu_shutdown)(void);
135 bool (*is_untracked_pat_range)(u64 start, u64 end); 136 bool (*is_untracked_pat_range)(u64 start, u64 end);
137 void (*nmi_init)(void);
136}; 138};
137 139
138extern struct x86_init_ops x86_init; 140extern struct x86_init_ops x86_init;
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index d5b7e90c0edf..396ff4cc8ed4 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -37,31 +37,4 @@
37extern struct shared_info *HYPERVISOR_shared_info; 37extern struct shared_info *HYPERVISOR_shared_info;
38extern struct start_info *xen_start_info; 38extern struct start_info *xen_start_info;
39 39
40enum xen_domain_type {
41 XEN_NATIVE, /* running on bare hardware */
42 XEN_PV_DOMAIN, /* running in a PV domain */
43 XEN_HVM_DOMAIN, /* running in a Xen hvm domain */
44};
45
46#ifdef CONFIG_XEN
47extern enum xen_domain_type xen_domain_type;
48#else
49#define xen_domain_type XEN_NATIVE
50#endif
51
52#define xen_domain() (xen_domain_type != XEN_NATIVE)
53#define xen_pv_domain() (xen_domain() && \
54 xen_domain_type == XEN_PV_DOMAIN)
55#define xen_hvm_domain() (xen_domain() && \
56 xen_domain_type == XEN_HVM_DOMAIN)
57
58#ifdef CONFIG_XEN_DOM0
59#include <xen/interface/xen.h>
60
61#define xen_initial_domain() (xen_pv_domain() && \
62 xen_start_info->flags & SIF_INITDOMAIN)
63#else /* !CONFIG_XEN_DOM0 */
64#define xen_initial_domain() (0)
65#endif /* CONFIG_XEN_DOM0 */
66
67#endif /* _ASM_X86_XEN_HYPERVISOR_H */ 40#endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 727acc152344..ddc04ccad03b 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -27,9 +27,11 @@
27extern unsigned int xstate_size; 27extern unsigned int xstate_size;
28extern u64 pcntxt_mask; 28extern u64 pcntxt_mask;
29extern struct xsave_struct *init_xstate_buf; 29extern struct xsave_struct *init_xstate_buf;
30extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
30 31
31extern void xsave_cntxt_init(void); 32extern void xsave_cntxt_init(void);
32extern void xsave_init(void); 33extern void xsave_init(void);
34extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
33extern int init_fpu(struct task_struct *child); 35extern int init_fpu(struct task_struct *child);
34extern int check_for_xstate(struct i387_fxsave_struct __user *buf, 36extern int check_for_xstate(struct i387_fxsave_struct __user *buf,
35 void __user *fpstate, 37 void __user *fpstate,
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 4f2e66e29ecc..d87f09bc5a52 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -89,7 +89,6 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
89obj-$(CONFIG_HPET_TIMER) += hpet.o 89obj-$(CONFIG_HPET_TIMER) += hpet.o
90 90
91obj-$(CONFIG_K8_NB) += k8.o 91obj-$(CONFIG_K8_NB) += k8.o
92obj-$(CONFIG_MGEODE_LX) += geode_32.o mfgpt_32.o
93obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o 92obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
94obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o 93obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
95 94
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index fd5ca97a2ad5..6f35260bb3ef 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -4,7 +4,7 @@ obj-$(CONFIG_ACPI) += boot.o
4obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_rm.o wakeup_$(BITS).o 4obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_rm.o wakeup_$(BITS).o
5 5
6ifneq ($(CONFIG_ACPI_PROCESSOR),) 6ifneq ($(CONFIG_ACPI_PROCESSOR),)
7obj-y += cstate.o processor.o 7obj-y += cstate.o
8endif 8endif
9 9
10$(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin 10$(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 87eee07da21f..f95703098f8d 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -49,6 +49,7 @@ EXPORT_SYMBOL(acpi_disabled);
49 49
50#ifdef CONFIG_X86_64 50#ifdef CONFIG_X86_64
51# include <asm/proto.h> 51# include <asm/proto.h>
52# include <asm/numa_64.h>
52#endif /* X86 */ 53#endif /* X86 */
53 54
54#define BAD_MADT_ENTRY(entry, end) ( \ 55#define BAD_MADT_ENTRY(entry, end) ( \
@@ -482,6 +483,25 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
482 */ 483 */
483#ifdef CONFIG_ACPI_HOTPLUG_CPU 484#ifdef CONFIG_ACPI_HOTPLUG_CPU
484 485
486static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
487{
488#ifdef CONFIG_ACPI_NUMA
489 int nid;
490
491 nid = acpi_get_node(handle);
492 if (nid == -1 || !node_online(nid))
493 return;
494#ifdef CONFIG_X86_64
495 apicid_to_node[physid] = nid;
496 numa_set_node(cpu, nid);
497#else /* CONFIG_X86_32 */
498 apicid_2_node[physid] = nid;
499 cpu_to_node_map[cpu] = nid;
500#endif
501
502#endif
503}
504
485static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) 505static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
486{ 506{
487 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 507 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
@@ -540,6 +560,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
540 } 560 }
541 561
542 cpu = cpumask_first(new_map); 562 cpu = cpumask_first(new_map);
563 acpi_map_cpu2node(handle, cpu, physid);
543 564
544 *pcpu = cpu; 565 *pcpu = cpu;
545 retval = 0; 566 retval = 0;
@@ -1123,7 +1144,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
1123 if (!acpi_sci_override_gsi) 1144 if (!acpi_sci_override_gsi)
1124 acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0); 1145 acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0);
1125 1146
1126 /* Fill in identity legacy mapings where no override */ 1147 /* Fill in identity legacy mappings where no override */
1127 mp_config_acpi_legacy_irqs(); 1148 mp_config_acpi_legacy_irqs();
1128 1149
1129 count = 1150 count =
@@ -1185,9 +1206,6 @@ static void __init acpi_process_madt(void)
1185 if (!error) { 1206 if (!error) {
1186 acpi_lapic = 1; 1207 acpi_lapic = 1;
1187 1208
1188#ifdef CONFIG_X86_BIGSMP
1189 generic_bigsmp_probe();
1190#endif
1191 /* 1209 /*
1192 * Parse MADT IO-APIC entries 1210 * Parse MADT IO-APIC entries
1193 */ 1211 */
@@ -1197,8 +1215,6 @@ static void __init acpi_process_madt(void)
1197 acpi_ioapic = 1; 1215 acpi_ioapic = 1;
1198 1216
1199 smp_found_config = 1; 1217 smp_found_config = 1;
1200 if (apic->setup_apic_routing)
1201 apic->setup_apic_routing();
1202 } 1218 }
1203 } 1219 }
1204 if (error == -EINVAL) { 1220 if (error == -EINVAL) {
@@ -1349,14 +1365,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
1349 }, 1365 },
1350 { 1366 {
1351 .callback = force_acpi_ht, 1367 .callback = force_acpi_ht,
1352 .ident = "ASUS P2B-DS",
1353 .matches = {
1354 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
1355 DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"),
1356 },
1357 },
1358 {
1359 .callback = force_acpi_ht,
1360 .ident = "ASUS CUR-DLS", 1368 .ident = "ASUS CUR-DLS",
1361 .matches = { 1369 .matches = {
1362 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), 1370 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
@@ -1529,16 +1537,10 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
1529 * if acpi_blacklisted() acpi_disabled = 1; 1537 * if acpi_blacklisted() acpi_disabled = 1;
1530 * acpi_irq_model=... 1538 * acpi_irq_model=...
1531 * ... 1539 * ...
1532 *
1533 * return value: (currently ignored)
1534 * 0: success
1535 * !0: failure
1536 */ 1540 */
1537 1541
1538int __init acpi_boot_table_init(void) 1542void __init acpi_boot_table_init(void)
1539{ 1543{
1540 int error;
1541
1542 dmi_check_system(acpi_dmi_table); 1544 dmi_check_system(acpi_dmi_table);
1543 1545
1544 /* 1546 /*
@@ -1546,15 +1548,14 @@ int __init acpi_boot_table_init(void)
1546 * One exception: acpi=ht continues far enough to enumerate LAPICs 1548 * One exception: acpi=ht continues far enough to enumerate LAPICs
1547 */ 1549 */
1548 if (acpi_disabled && !acpi_ht) 1550 if (acpi_disabled && !acpi_ht)
1549 return 1; 1551 return;
1550 1552
1551 /* 1553 /*
1552 * Initialize the ACPI boot-time table parser. 1554 * Initialize the ACPI boot-time table parser.
1553 */ 1555 */
1554 error = acpi_table_init(); 1556 if (acpi_table_init()) {
1555 if (error) {
1556 disable_acpi(); 1557 disable_acpi();
1557 return error; 1558 return;
1558 } 1559 }
1559 1560
1560 acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); 1561 acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
@@ -1562,18 +1563,15 @@ int __init acpi_boot_table_init(void)
1562 /* 1563 /*
1563 * blacklist may disable ACPI entirely 1564 * blacklist may disable ACPI entirely
1564 */ 1565 */
1565 error = acpi_blacklisted(); 1566 if (acpi_blacklisted()) {
1566 if (error) {
1567 if (acpi_force) { 1567 if (acpi_force) {
1568 printk(KERN_WARNING PREFIX "acpi=force override\n"); 1568 printk(KERN_WARNING PREFIX "acpi=force override\n");
1569 } else { 1569 } else {
1570 printk(KERN_WARNING PREFIX "Disabling ACPI support\n"); 1570 printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
1571 disable_acpi(); 1571 disable_acpi();
1572 return error; 1572 return;
1573 } 1573 }
1574 } 1574 }
1575
1576 return 0;
1577} 1575}
1578 1576
1579int __init early_acpi_boot_init(void) 1577int __init early_acpi_boot_init(void)
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 59cdfa4686b2..2e837f5080fe 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -48,7 +48,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
48 * P4, Core and beyond CPUs 48 * P4, Core and beyond CPUs
49 */ 49 */
50 if (c->x86_vendor == X86_VENDOR_INTEL && 50 if (c->x86_vendor == X86_VENDOR_INTEL &&
51 (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 14))) 51 (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f)))
52 flags->bm_control = 0; 52 flags->bm_control = 0;
53} 53}
54EXPORT_SYMBOL(acpi_processor_power_init_bm_check); 54EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
deleted file mode 100644
index d85d1b2432ba..000000000000
--- a/arch/x86/kernel/acpi/processor.c
+++ /dev/null
@@ -1,101 +0,0 @@
1/*
2 * Copyright (C) 2005 Intel Corporation
3 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
4 * - Added _PDC for platforms with Intel CPUs
5 */
6
7#include <linux/kernel.h>
8#include <linux/module.h>
9#include <linux/init.h>
10#include <linux/acpi.h>
11
12#include <acpi/processor.h>
13#include <asm/acpi.h>
14
15static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
16{
17 struct acpi_object_list *obj_list;
18 union acpi_object *obj;
19 u32 *buf;
20
21 /* allocate and initialize pdc. It will be used later. */
22 obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL);
23 if (!obj_list) {
24 printk(KERN_ERR "Memory allocation error\n");
25 return;
26 }
27
28 obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL);
29 if (!obj) {
30 printk(KERN_ERR "Memory allocation error\n");
31 kfree(obj_list);
32 return;
33 }
34
35 buf = kmalloc(12, GFP_KERNEL);
36 if (!buf) {
37 printk(KERN_ERR "Memory allocation error\n");
38 kfree(obj);
39 kfree(obj_list);
40 return;
41 }
42
43 buf[0] = ACPI_PDC_REVISION_ID;
44 buf[1] = 1;
45 buf[2] = ACPI_PDC_C_CAPABILITY_SMP;
46
47 /*
48 * The default of PDC_SMP_T_SWCOORD bit is set for intel x86 cpu so
49 * that OSPM is capable of native ACPI throttling software
50 * coordination using BIOS supplied _TSD info.
51 */
52 buf[2] |= ACPI_PDC_SMP_T_SWCOORD;
53 if (cpu_has(c, X86_FEATURE_EST))
54 buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP;
55
56 if (cpu_has(c, X86_FEATURE_ACPI))
57 buf[2] |= ACPI_PDC_T_FFH;
58
59 /*
60 * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
61 */
62 if (!cpu_has(c, X86_FEATURE_MWAIT))
63 buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
64
65 obj->type = ACPI_TYPE_BUFFER;
66 obj->buffer.length = 12;
67 obj->buffer.pointer = (u8 *) buf;
68 obj_list->count = 1;
69 obj_list->pointer = obj;
70 pr->pdc = obj_list;
71
72 return;
73}
74
75
76/* Initialize _PDC data based on the CPU vendor */
77void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
78{
79 struct cpuinfo_x86 *c = &cpu_data(pr->id);
80
81 pr->pdc = NULL;
82 if (c->x86_vendor == X86_VENDOR_INTEL ||
83 c->x86_vendor == X86_VENDOR_CENTAUR)
84 init_intel_pdc(pr, c);
85
86 return;
87}
88
89EXPORT_SYMBOL(arch_acpi_processor_init_pdc);
90
91void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr)
92{
93 if (pr->pdc) {
94 kfree(pr->pdc->pointer->buffer.pointer);
95 kfree(pr->pdc->pointer);
96 kfree(pr->pdc);
97 pr->pdc = NULL;
98 }
99}
100
101EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc);
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 82e508677b91..f9961034e557 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -162,6 +162,8 @@ static int __init acpi_sleep_setup(char *str)
162#endif 162#endif
163 if (strncmp(str, "old_ordering", 12) == 0) 163 if (strncmp(str, "old_ordering", 12) == 0)
164 acpi_old_suspend_ordering(); 164 acpi_old_suspend_ordering();
165 if (strncmp(str, "sci_force_enable", 16) == 0)
166 acpi_set_sci_en_on_resume();
165 str = strchr(str, ','); 167 str = strchr(str, ',');
166 if (str != NULL) 168 if (str != NULL)
167 str += strspn(str, ", \t"); 169 str += strspn(str, ", \t");
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index de7353c0ce9c..e6ea0342c8f8 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -205,7 +205,7 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
205 struct alt_instr *end) 205 struct alt_instr *end)
206{ 206{
207 struct alt_instr *a; 207 struct alt_instr *a;
208 char insnbuf[MAX_PATCH_LEN]; 208 u8 insnbuf[MAX_PATCH_LEN];
209 209
210 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); 210 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
211 for (a = start; a < end; a++) { 211 for (a = start; a < end; a++) {
@@ -223,6 +223,8 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
223 } 223 }
224#endif 224#endif
225 memcpy(insnbuf, a->replacement, a->replacementlen); 225 memcpy(insnbuf, a->replacement, a->replacementlen);
226 if (*insnbuf == 0xe8 && a->replacementlen == 5)
227 *(s32 *)(insnbuf + 1) += a->replacement - a->instr;
226 add_nops(insnbuf + a->replacementlen, 228 add_nops(insnbuf + a->replacementlen,
227 a->instrlen - a->replacementlen); 229 a->instrlen - a->replacementlen);
228 text_poke_early(instr, insnbuf, a->instrlen); 230 text_poke_early(instr, insnbuf, a->instrlen);
@@ -390,6 +392,24 @@ void alternatives_smp_switch(int smp)
390 mutex_unlock(&smp_alt); 392 mutex_unlock(&smp_alt);
391} 393}
392 394
395/* Return 1 if the address range is reserved for smp-alternatives */
396int alternatives_text_reserved(void *start, void *end)
397{
398 struct smp_alt_module *mod;
399 u8 **ptr;
400 u8 *text_start = start;
401 u8 *text_end = end;
402
403 list_for_each_entry(mod, &smp_alt_modules, next) {
404 if (mod->text > text_end || mod->text_end < text_start)
405 continue;
406 for (ptr = mod->locks; ptr < mod->locks_end; ptr++)
407 if (text_start <= *ptr && text_end >= *ptr)
408 return 1;
409 }
410
411 return 0;
412}
393#endif 413#endif
394 414
395#ifdef CONFIG_PARAVIRT 415#ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 32fb09102a13..adb0ba025702 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -19,7 +19,7 @@
19 19
20#include <linux/pci.h> 20#include <linux/pci.h>
21#include <linux/gfp.h> 21#include <linux/gfp.h>
22#include <linux/bitops.h> 22#include <linux/bitmap.h>
23#include <linux/debugfs.h> 23#include <linux/debugfs.h>
24#include <linux/scatterlist.h> 24#include <linux/scatterlist.h>
25#include <linux/dma-mapping.h> 25#include <linux/dma-mapping.h>
@@ -166,6 +166,43 @@ static void iommu_uninit_device(struct device *dev)
166{ 166{
167 kfree(dev->archdata.iommu); 167 kfree(dev->archdata.iommu);
168} 168}
169
170void __init amd_iommu_uninit_devices(void)
171{
172 struct pci_dev *pdev = NULL;
173
174 for_each_pci_dev(pdev) {
175
176 if (!check_device(&pdev->dev))
177 continue;
178
179 iommu_uninit_device(&pdev->dev);
180 }
181}
182
183int __init amd_iommu_init_devices(void)
184{
185 struct pci_dev *pdev = NULL;
186 int ret = 0;
187
188 for_each_pci_dev(pdev) {
189
190 if (!check_device(&pdev->dev))
191 continue;
192
193 ret = iommu_init_device(&pdev->dev);
194 if (ret)
195 goto out_free;
196 }
197
198 return 0;
199
200out_free:
201
202 amd_iommu_uninit_devices();
203
204 return ret;
205}
169#ifdef CONFIG_AMD_IOMMU_STATS 206#ifdef CONFIG_AMD_IOMMU_STATS
170 207
171/* 208/*
@@ -943,7 +980,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
943{ 980{
944 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; 981 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
945 struct amd_iommu *iommu; 982 struct amd_iommu *iommu;
946 int i; 983 unsigned long i;
947 984
948#ifdef CONFIG_IOMMU_STRESS 985#ifdef CONFIG_IOMMU_STRESS
949 populate = false; 986 populate = false;
@@ -1125,7 +1162,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
1125 1162
1126 address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; 1163 address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
1127 1164
1128 iommu_area_free(range->bitmap, address, pages); 1165 bitmap_clear(range->bitmap, address, pages);
1129 1166
1130} 1167}
1131 1168
@@ -1452,11 +1489,14 @@ static void __detach_device(struct device *dev)
1452{ 1489{
1453 struct iommu_dev_data *dev_data = get_dev_data(dev); 1490 struct iommu_dev_data *dev_data = get_dev_data(dev);
1454 struct iommu_dev_data *alias_data; 1491 struct iommu_dev_data *alias_data;
1492 struct protection_domain *domain;
1455 unsigned long flags; 1493 unsigned long flags;
1456 1494
1457 BUG_ON(!dev_data->domain); 1495 BUG_ON(!dev_data->domain);
1458 1496
1459 spin_lock_irqsave(&dev_data->domain->lock, flags); 1497 domain = dev_data->domain;
1498
1499 spin_lock_irqsave(&domain->lock, flags);
1460 1500
1461 if (dev_data->alias != dev) { 1501 if (dev_data->alias != dev) {
1462 alias_data = get_dev_data(dev_data->alias); 1502 alias_data = get_dev_data(dev_data->alias);
@@ -1467,13 +1507,15 @@ static void __detach_device(struct device *dev)
1467 if (atomic_dec_and_test(&dev_data->bind)) 1507 if (atomic_dec_and_test(&dev_data->bind))
1468 do_detach(dev); 1508 do_detach(dev);
1469 1509
1470 spin_unlock_irqrestore(&dev_data->domain->lock, flags); 1510 spin_unlock_irqrestore(&domain->lock, flags);
1471 1511
1472 /* 1512 /*
1473 * If we run in passthrough mode the device must be assigned to the 1513 * If we run in passthrough mode the device must be assigned to the
1474 * passthrough domain if it is detached from any other domain 1514 * passthrough domain if it is detached from any other domain.
1515 * Make sure we can deassign from the pt_domain itself.
1475 */ 1516 */
1476 if (iommu_pass_through && dev_data->domain == NULL) 1517 if (iommu_pass_through &&
1518 (dev_data->domain == NULL && domain != pt_domain))
1477 __attach_device(dev, pt_domain); 1519 __attach_device(dev, pt_domain);
1478} 1520}
1479 1521
@@ -1587,6 +1629,11 @@ static struct notifier_block device_nb = {
1587 .notifier_call = device_change_notifier, 1629 .notifier_call = device_change_notifier,
1588}; 1630};
1589 1631
1632void amd_iommu_init_notifier(void)
1633{
1634 bus_register_notifier(&pci_bus_type, &device_nb);
1635}
1636
1590/***************************************************************************** 1637/*****************************************************************************
1591 * 1638 *
1592 * The next functions belong to the dma_ops mapping/unmapping code. 1639 * The next functions belong to the dma_ops mapping/unmapping code.
@@ -1783,7 +1830,7 @@ retry:
1783 goto out; 1830 goto out;
1784 1831
1785 /* 1832 /*
1786 * aperture was sucessfully enlarged by 128 MB, try 1833 * aperture was successfully enlarged by 128 MB, try
1787 * allocation again 1834 * allocation again
1788 */ 1835 */
1789 goto retry; 1836 goto retry;
@@ -2145,8 +2192,6 @@ static void prealloc_protection_domains(void)
2145 if (!check_device(&dev->dev)) 2192 if (!check_device(&dev->dev))
2146 continue; 2193 continue;
2147 2194
2148 iommu_init_device(&dev->dev);
2149
2150 /* Is there already any domain for it? */ 2195 /* Is there already any domain for it? */
2151 if (domain_for_device(&dev->dev)) 2196 if (domain_for_device(&dev->dev))
2152 continue; 2197 continue;
@@ -2178,6 +2223,12 @@ static struct dma_map_ops amd_iommu_dma_ops = {
2178/* 2223/*
2179 * The function which clues the AMD IOMMU driver into dma_ops. 2224 * The function which clues the AMD IOMMU driver into dma_ops.
2180 */ 2225 */
2226
2227void __init amd_iommu_init_api(void)
2228{
2229 register_iommu(&amd_iommu_ops);
2230}
2231
2181int __init amd_iommu_init_dma_ops(void) 2232int __init amd_iommu_init_dma_ops(void)
2182{ 2233{
2183 struct amd_iommu *iommu; 2234 struct amd_iommu *iommu;
@@ -2213,10 +2264,6 @@ int __init amd_iommu_init_dma_ops(void)
2213 /* Make the driver finally visible to the drivers */ 2264 /* Make the driver finally visible to the drivers */
2214 dma_ops = &amd_iommu_dma_ops; 2265 dma_ops = &amd_iommu_dma_ops;
2215 2266
2216 register_iommu(&amd_iommu_ops);
2217
2218 bus_register_notifier(&pci_bus_type, &device_nb);
2219
2220 amd_iommu_stats_init(); 2267 amd_iommu_stats_init();
2221 2268
2222 return 0; 2269 return 0;
@@ -2490,7 +2537,7 @@ int __init amd_iommu_init_passthrough(void)
2490 struct pci_dev *dev = NULL; 2537 struct pci_dev *dev = NULL;
2491 u16 devid; 2538 u16 devid;
2492 2539
2493 /* allocate passthroug domain */ 2540 /* allocate passthrough domain */
2494 pt_domain = protection_domain_alloc(); 2541 pt_domain = protection_domain_alloc();
2495 if (!pt_domain) 2542 if (!pt_domain)
2496 return -ENOMEM; 2543 return -ENOMEM;
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 7ffc39965233..9dc91b431470 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -138,6 +138,11 @@ int amd_iommus_present;
138bool amd_iommu_np_cache __read_mostly; 138bool amd_iommu_np_cache __read_mostly;
139 139
140/* 140/*
141 * Set to true if ACPI table parsing and hardware intialization went properly
142 */
143static bool amd_iommu_initialized;
144
145/*
141 * List of protection domains - used during resume 146 * List of protection domains - used during resume
142 */ 147 */
143LIST_HEAD(amd_iommu_pd_list); 148LIST_HEAD(amd_iommu_pd_list);
@@ -929,6 +934,8 @@ static int __init init_iommu_all(struct acpi_table_header *table)
929 } 934 }
930 WARN_ON(p != end); 935 WARN_ON(p != end);
931 936
937 amd_iommu_initialized = true;
938
932 return 0; 939 return 0;
933} 940}
934 941
@@ -1263,6 +1270,9 @@ static int __init amd_iommu_init(void)
1263 if (acpi_table_parse("IVRS", init_iommu_all) != 0) 1270 if (acpi_table_parse("IVRS", init_iommu_all) != 0)
1264 goto free; 1271 goto free;
1265 1272
1273 if (!amd_iommu_initialized)
1274 goto free;
1275
1266 if (acpi_table_parse("IVRS", init_memory_definitions) != 0) 1276 if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
1267 goto free; 1277 goto free;
1268 1278
@@ -1274,13 +1284,22 @@ static int __init amd_iommu_init(void)
1274 if (ret) 1284 if (ret)
1275 goto free; 1285 goto free;
1276 1286
1287 ret = amd_iommu_init_devices();
1288 if (ret)
1289 goto free;
1290
1277 if (iommu_pass_through) 1291 if (iommu_pass_through)
1278 ret = amd_iommu_init_passthrough(); 1292 ret = amd_iommu_init_passthrough();
1279 else 1293 else
1280 ret = amd_iommu_init_dma_ops(); 1294 ret = amd_iommu_init_dma_ops();
1295
1281 if (ret) 1296 if (ret)
1282 goto free; 1297 goto free;
1283 1298
1299 amd_iommu_init_api();
1300
1301 amd_iommu_init_notifier();
1302
1284 enable_iommus(); 1303 enable_iommus();
1285 1304
1286 if (iommu_pass_through) 1305 if (iommu_pass_through)
@@ -1296,6 +1315,9 @@ out:
1296 return ret; 1315 return ret;
1297 1316
1298free: 1317free:
1318
1319 amd_iommu_uninit_devices();
1320
1299 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1321 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
1300 get_order(MAX_DOMAIN_ID/8)); 1322 get_order(MAX_DOMAIN_ID/8));
1301 1323
@@ -1336,6 +1358,9 @@ void __init amd_iommu_detect(void)
1336 iommu_detected = 1; 1358 iommu_detected = 1;
1337 amd_iommu_detected = 1; 1359 amd_iommu_detected = 1;
1338 x86_init.iommu.iommu_init = amd_iommu_init; 1360 x86_init.iommu.iommu_init = amd_iommu_init;
1361
1362 /* Make sure ACS will be enabled */
1363 pci_request_acs();
1339 } 1364 }
1340} 1365}
1341 1366
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index e0dfb6856aa2..3704997e8b25 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -280,7 +280,8 @@ void __init early_gart_iommu_check(void)
280 * or BIOS forget to put that in reserved. 280 * or BIOS forget to put that in reserved.
281 * try to update e820 to make that region as reserved. 281 * try to update e820 to make that region as reserved.
282 */ 282 */
283 int i, fix, slot; 283 u32 agp_aper_base = 0, agp_aper_order = 0;
284 int i, fix, slot, valid_agp = 0;
284 u32 ctl; 285 u32 ctl;
285 u32 aper_size = 0, aper_order = 0, last_aper_order = 0; 286 u32 aper_size = 0, aper_order = 0, last_aper_order = 0;
286 u64 aper_base = 0, last_aper_base = 0; 287 u64 aper_base = 0, last_aper_base = 0;
@@ -290,6 +291,8 @@ void __init early_gart_iommu_check(void)
290 return; 291 return;
291 292
292 /* This is mostly duplicate of iommu_hole_init */ 293 /* This is mostly duplicate of iommu_hole_init */
294 agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp);
295
293 fix = 0; 296 fix = 0;
294 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { 297 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
295 int bus; 298 int bus;
@@ -342,10 +345,10 @@ void __init early_gart_iommu_check(void)
342 } 345 }
343 } 346 }
344 347
345 if (!fix) 348 if (valid_agp)
346 return; 349 return;
347 350
348 /* different nodes have different setting, disable them all at first*/ 351 /* disable them all at first */
349 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { 352 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
350 int bus; 353 int bus;
351 int dev_base, dev_limit; 354 int dev_base, dev_limit;
@@ -458,8 +461,6 @@ out:
458 461
459 if (aper_alloc) { 462 if (aper_alloc) {
460 /* Got the aperture from the AGP bridge */ 463 /* Got the aperture from the AGP bridge */
461 } else if (!valid_agp) {
462 /* Do nothing */
463 } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || 464 } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
464 force_iommu || 465 force_iommu ||
465 valid_agp || 466 valid_agp ||
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index efb2b9cd132c..6e29b2a77aa8 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -61,12 +61,6 @@ unsigned int boot_cpu_physical_apicid = -1U;
61 61
62/* 62/*
63 * The highest APIC ID seen during enumeration. 63 * The highest APIC ID seen during enumeration.
64 *
65 * On AMD, this determines the messaging protocol we can use: if all APIC IDs
66 * are in the 0 ... 7 range, then we can use logical addressing which
67 * has some performance advantages (better broadcasting).
68 *
69 * If there's an APIC ID above 8, we use physical addressing.
70 */ 64 */
71unsigned int max_physical_apicid; 65unsigned int max_physical_apicid;
72 66
@@ -587,7 +581,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
587 res = (((u64)(*deltatsc)) * pm_100ms); 581 res = (((u64)(*deltatsc)) * pm_100ms);
588 do_div(res, deltapm); 582 do_div(res, deltapm);
589 apic_printk(APIC_VERBOSE, "TSC delta adjusted to " 583 apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
590 "PM-Timer: %lu (%ld) \n", 584 "PM-Timer: %lu (%ld)\n",
591 (unsigned long)res, *deltatsc); 585 (unsigned long)res, *deltatsc);
592 *deltatsc = (long)res; 586 *deltatsc = (long)res;
593 } 587 }
@@ -1341,7 +1335,7 @@ void enable_x2apic(void)
1341 1335
1342 rdmsr(MSR_IA32_APICBASE, msr, msr2); 1336 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1343 if (!(msr & X2APIC_ENABLE)) { 1337 if (!(msr & X2APIC_ENABLE)) {
1344 pr_info("Enabling x2apic\n"); 1338 printk_once(KERN_INFO "Enabling x2apic\n");
1345 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); 1339 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
1346 } 1340 }
1347} 1341}
@@ -1647,9 +1641,7 @@ int __init APIC_init_uniprocessor(void)
1647#endif 1641#endif
1648 1642
1649 enable_IR_x2apic(); 1643 enable_IR_x2apic();
1650#ifdef CONFIG_X86_64
1651 default_setup_apic_routing(); 1644 default_setup_apic_routing();
1652#endif
1653 1645
1654 verify_local_APIC(); 1646 verify_local_APIC();
1655 connect_bsp_APIC(); 1647 connect_bsp_APIC();
@@ -1897,18 +1889,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
1897 if (apicid > max_physical_apicid) 1889 if (apicid > max_physical_apicid)
1898 max_physical_apicid = apicid; 1890 max_physical_apicid = apicid;
1899 1891
1900#ifdef CONFIG_X86_32
1901 switch (boot_cpu_data.x86_vendor) {
1902 case X86_VENDOR_INTEL:
1903 if (num_processors > 8)
1904 def_to_bigsmp = 1;
1905 break;
1906 case X86_VENDOR_AMD:
1907 if (max_physical_apicid >= 8)
1908 def_to_bigsmp = 1;
1909 }
1910#endif
1911
1912#if defined(CONFIG_SMP) || defined(CONFIG_X86_64) 1892#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
1913 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; 1893 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1914 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; 1894 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index d0c99abc26c3..e3c3d820c325 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -240,6 +240,11 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
240 printk(KERN_DEBUG "system APIC only can use physical flat"); 240 printk(KERN_DEBUG "system APIC only can use physical flat");
241 return 1; 241 return 1;
242 } 242 }
243
244 if (!strncmp(oem_id, "IBM", 3) && !strncmp(oem_table_id, "EXA", 3)) {
245 printk(KERN_DEBUG "IBM Summit detected, will use apic physical");
246 return 1;
247 }
243#endif 248#endif
244 249
245 return 0; 250 return 0;
@@ -306,10 +311,7 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
306 if (cpumask_test_cpu(cpu, cpu_online_mask)) 311 if (cpumask_test_cpu(cpu, cpu_online_mask))
307 break; 312 break;
308 } 313 }
309 if (cpu < nr_cpu_ids) 314 return per_cpu(x86_cpu_to_apicid, cpu);
310 return per_cpu(x86_cpu_to_apicid, cpu);
311
312 return BAD_APICID;
313} 315}
314 316
315struct apic apic_physflat = { 317struct apic apic_physflat = {
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index d9acc3bee0f4..e31b9ffe25f5 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -127,7 +127,7 @@ static u32 noop_apic_read(u32 reg)
127 127
128static void noop_apic_write(u32 reg, u32 v) 128static void noop_apic_write(u32 reg, u32 v)
129{ 129{
130 WARN_ON_ONCE((cpu_has_apic || !disable_apic)); 130 WARN_ON_ONCE(cpu_has_apic && !disable_apic);
131} 131}
132 132
133struct apic apic_noop = { 133struct apic apic_noop = {
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 38dcecfa5818..cb804c5091b9 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -131,10 +131,7 @@ static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
131 if (cpumask_test_cpu(cpu, cpu_online_mask)) 131 if (cpumask_test_cpu(cpu, cpu_online_mask))
132 break; 132 break;
133 } 133 }
134 if (cpu < nr_cpu_ids) 134 return bigsmp_cpu_to_logical_apicid(cpu);
135 return bigsmp_cpu_to_logical_apicid(cpu);
136
137 return BAD_APICID;
138} 135}
139 136
140static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) 137static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index e85f8fb7f8e7..dd2b5f264643 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -27,6 +27,9 @@
27 * 27 *
28 * http://www.unisys.com 28 * http://www.unisys.com
29 */ 29 */
30
31#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
32
30#include <linux/notifier.h> 33#include <linux/notifier.h>
31#include <linux/spinlock.h> 34#include <linux/spinlock.h>
32#include <linux/cpumask.h> 35#include <linux/cpumask.h>
@@ -223,9 +226,9 @@ static int parse_unisys_oem(char *oemptr)
223 mip_addr = val; 226 mip_addr = val;
224 mip = (struct mip_reg *)val; 227 mip = (struct mip_reg *)val;
225 mip_reg = __va(mip); 228 mip_reg = __va(mip);
226 pr_debug("es7000_mipcfg: host_reg = 0x%lx \n", 229 pr_debug("host_reg = 0x%lx\n",
227 (unsigned long)host_reg); 230 (unsigned long)host_reg);
228 pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n", 231 pr_debug("mip_reg = 0x%lx\n",
229 (unsigned long)mip_reg); 232 (unsigned long)mip_reg);
230 success++; 233 success++;
231 break; 234 break;
@@ -401,7 +404,7 @@ static void es7000_enable_apic_mode(void)
401 if (!es7000_plat) 404 if (!es7000_plat)
402 return; 405 return;
403 406
404 printk(KERN_INFO "ES7000: Enabling APIC mode.\n"); 407 pr_info("Enabling APIC mode.\n");
405 memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); 408 memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
406 es7000_mip_reg.off_0x00 = MIP_SW_APIC; 409 es7000_mip_reg.off_0x00 = MIP_SW_APIC;
407 es7000_mip_reg.off_0x38 = MIP_VALID; 410 es7000_mip_reg.off_0x38 = MIP_VALID;
@@ -514,8 +517,7 @@ static void es7000_setup_apic_routing(void)
514{ 517{
515 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); 518 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
516 519
517 printk(KERN_INFO 520 pr_info("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
518 "Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
519 (apic_version[apic] == 0x14) ? 521 (apic_version[apic] == 0x14) ?
520 "Physical Cluster" : "Logical Cluster", 522 "Physical Cluster" : "Logical Cluster",
521 nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); 523 nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index d5d498fbee4b..6bdd2c7ead75 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1647,7 +1647,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1647 printk(KERN_DEBUG ".... IRQ redirection table:\n"); 1647 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1648 1648
1649 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" 1649 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
1650 " Stat Dmod Deli Vect: \n"); 1650 " Stat Dmod Deli Vect:\n");
1651 1651
1652 for (i = 0; i <= reg_01.bits.entries; i++) { 1652 for (i = 0; i <= reg_01.bits.entries; i++) {
1653 struct IO_APIC_route_entry entry; 1653 struct IO_APIC_route_entry entry;
@@ -2276,26 +2276,28 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
2276 2276
2277/* 2277/*
2278 * Either sets desc->affinity to a valid value, and returns 2278 * Either sets desc->affinity to a valid value, and returns
2279 * ->cpu_mask_to_apicid of that, or returns BAD_APICID and 2279 * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
2280 * leaves desc->affinity untouched. 2280 * leaves desc->affinity untouched.
2281 */ 2281 */
2282unsigned int 2282unsigned int
2283set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) 2283set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask,
2284 unsigned int *dest_id)
2284{ 2285{
2285 struct irq_cfg *cfg; 2286 struct irq_cfg *cfg;
2286 unsigned int irq; 2287 unsigned int irq;
2287 2288
2288 if (!cpumask_intersects(mask, cpu_online_mask)) 2289 if (!cpumask_intersects(mask, cpu_online_mask))
2289 return BAD_APICID; 2290 return -1;
2290 2291
2291 irq = desc->irq; 2292 irq = desc->irq;
2292 cfg = desc->chip_data; 2293 cfg = desc->chip_data;
2293 if (assign_irq_vector(irq, cfg, mask)) 2294 if (assign_irq_vector(irq, cfg, mask))
2294 return BAD_APICID; 2295 return -1;
2295 2296
2296 cpumask_copy(desc->affinity, mask); 2297 cpumask_copy(desc->affinity, mask);
2297 2298
2298 return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); 2299 *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
2300 return 0;
2299} 2301}
2300 2302
2301static int 2303static int
@@ -2311,12 +2313,11 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2311 cfg = desc->chip_data; 2313 cfg = desc->chip_data;
2312 2314
2313 spin_lock_irqsave(&ioapic_lock, flags); 2315 spin_lock_irqsave(&ioapic_lock, flags);
2314 dest = set_desc_affinity(desc, mask); 2316 ret = set_desc_affinity(desc, mask, &dest);
2315 if (dest != BAD_APICID) { 2317 if (!ret) {
2316 /* Only the high 8 bits are valid. */ 2318 /* Only the high 8 bits are valid. */
2317 dest = SET_APIC_LOGICAL_ID(dest); 2319 dest = SET_APIC_LOGICAL_ID(dest);
2318 __target_IO_APIC_irq(irq, dest, cfg); 2320 __target_IO_APIC_irq(irq, dest, cfg);
2319 ret = 0;
2320 } 2321 }
2321 spin_unlock_irqrestore(&ioapic_lock, flags); 2322 spin_unlock_irqrestore(&ioapic_lock, flags);
2322 2323
@@ -2431,7 +2432,14 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2431 continue; 2432 continue;
2432 2433
2433 cfg = irq_cfg(irq); 2434 cfg = irq_cfg(irq);
2434 spin_lock(&desc->lock); 2435 raw_spin_lock(&desc->lock);
2436
2437 /*
2438 * Check if the irq migration is in progress. If so, we
2439 * haven't received the cleanup request yet for this irq.
2440 */
2441 if (cfg->move_in_progress)
2442 goto unlock;
2435 2443
2436 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) 2444 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2437 goto unlock; 2445 goto unlock;
@@ -2450,7 +2458,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2450 } 2458 }
2451 __get_cpu_var(vector_irq)[vector] = -1; 2459 __get_cpu_var(vector_irq)[vector] = -1;
2452unlock: 2460unlock:
2453 spin_unlock(&desc->lock); 2461 raw_spin_unlock(&desc->lock);
2454 } 2462 }
2455 2463
2456 irq_exit(); 2464 irq_exit();
@@ -3351,8 +3359,7 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3351 struct msi_msg msg; 3359 struct msi_msg msg;
3352 unsigned int dest; 3360 unsigned int dest;
3353 3361
3354 dest = set_desc_affinity(desc, mask); 3362 if (set_desc_affinity(desc, mask, &dest))
3355 if (dest == BAD_APICID)
3356 return -1; 3363 return -1;
3357 3364
3358 cfg = desc->chip_data; 3365 cfg = desc->chip_data;
@@ -3384,8 +3391,7 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3384 if (get_irte(irq, &irte)) 3391 if (get_irte(irq, &irte))
3385 return -1; 3392 return -1;
3386 3393
3387 dest = set_desc_affinity(desc, mask); 3394 if (set_desc_affinity(desc, mask, &dest))
3388 if (dest == BAD_APICID)
3389 return -1; 3395 return -1;
3390 3396
3391 irte.vector = cfg->vector; 3397 irte.vector = cfg->vector;
@@ -3567,8 +3573,7 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3567 struct msi_msg msg; 3573 struct msi_msg msg;
3568 unsigned int dest; 3574 unsigned int dest;
3569 3575
3570 dest = set_desc_affinity(desc, mask); 3576 if (set_desc_affinity(desc, mask, &dest))
3571 if (dest == BAD_APICID)
3572 return -1; 3577 return -1;
3573 3578
3574 cfg = desc->chip_data; 3579 cfg = desc->chip_data;
@@ -3623,8 +3628,7 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3623 struct msi_msg msg; 3628 struct msi_msg msg;
3624 unsigned int dest; 3629 unsigned int dest;
3625 3630
3626 dest = set_desc_affinity(desc, mask); 3631 if (set_desc_affinity(desc, mask, &dest))
3627 if (dest == BAD_APICID)
3628 return -1; 3632 return -1;
3629 3633
3630 cfg = desc->chip_data; 3634 cfg = desc->chip_data;
@@ -3730,8 +3734,7 @@ static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
3730 struct irq_cfg *cfg; 3734 struct irq_cfg *cfg;
3731 unsigned int dest; 3735 unsigned int dest;
3732 3736
3733 dest = set_desc_affinity(desc, mask); 3737 if (set_desc_affinity(desc, mask, &dest))
3734 if (dest == BAD_APICID)
3735 return -1; 3738 return -1;
3736 3739
3737 cfg = desc->chip_data; 3740 cfg = desc->chip_data;
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index 6389432a9dbf..0159a69396cb 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -361,7 +361,7 @@ void stop_apic_nmi_watchdog(void *unused)
361 */ 361 */
362 362
363static DEFINE_PER_CPU(unsigned, last_irq_sum); 363static DEFINE_PER_CPU(unsigned, last_irq_sum);
364static DEFINE_PER_CPU(local_t, alert_counter); 364static DEFINE_PER_CPU(long, alert_counter);
365static DEFINE_PER_CPU(int, nmi_touch); 365static DEFINE_PER_CPU(int, nmi_touch);
366 366
367void touch_nmi_watchdog(void) 367void touch_nmi_watchdog(void)
@@ -438,8 +438,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
438 * Ayiee, looks like this CPU is stuck ... 438 * Ayiee, looks like this CPU is stuck ...
439 * wait a few IRQs (5 seconds) before doing the oops ... 439 * wait a few IRQs (5 seconds) before doing the oops ...
440 */ 440 */
441 local_inc(&__get_cpu_var(alert_counter)); 441 __this_cpu_inc(per_cpu_var(alert_counter));
442 if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz) 442 if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz)
443 /* 443 /*
444 * die_nmi will return ONLY if NOTIFY_STOP happens.. 444 * die_nmi will return ONLY if NOTIFY_STOP happens..
445 */ 445 */
@@ -447,7 +447,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
447 regs, panic_on_timeout); 447 regs, panic_on_timeout);
448 } else { 448 } else {
449 __get_cpu_var(last_irq_sum) = sum; 449 __get_cpu_var(last_irq_sum) = sum;
450 local_set(&__get_cpu_var(alert_counter), 0); 450 __this_cpu_write(per_cpu_var(alert_counter), 0);
451 } 451 }
452 452
453 /* see if the nmi watchdog went off */ 453 /* see if the nmi watchdog went off */
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 98c4665f251c..47dd856708e5 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -225,7 +225,7 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc)
225 225
226 mpc_record = 0; 226 mpc_record = 0;
227 printk(KERN_INFO 227 printk(KERN_INFO
228 "Found an OEM MPC table at %8p - parsing it ... \n", oemtable); 228 "Found an OEM MPC table at %8p - parsing it...\n", oemtable);
229 229
230 if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) { 230 if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) {
231 printk(KERN_WARNING 231 printk(KERN_WARNING
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 1a6559f6768c..99d2fe016084 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -52,7 +52,32 @@ static int __init print_ipi_mode(void)
52} 52}
53late_initcall(print_ipi_mode); 53late_initcall(print_ipi_mode);
54 54
55void default_setup_apic_routing(void) 55void __init default_setup_apic_routing(void)
56{
57 int version = apic_version[boot_cpu_physical_apicid];
58
59 if (num_possible_cpus() > 8) {
60 switch (boot_cpu_data.x86_vendor) {
61 case X86_VENDOR_INTEL:
62 if (!APIC_XAPIC(version)) {
63 def_to_bigsmp = 0;
64 break;
65 }
66 /* If P4 and above fall through */
67 case X86_VENDOR_AMD:
68 def_to_bigsmp = 1;
69 }
70 }
71
72#ifdef CONFIG_X86_BIGSMP
73 generic_bigsmp_probe();
74#endif
75
76 if (apic->setup_apic_routing)
77 apic->setup_apic_routing();
78}
79
80static void setup_apic_flat_routing(void)
56{ 81{
57#ifdef CONFIG_X86_IO_APIC 82#ifdef CONFIG_X86_IO_APIC
58 printk(KERN_INFO 83 printk(KERN_INFO
@@ -103,7 +128,7 @@ struct apic apic_default = {
103 .init_apic_ldr = default_init_apic_ldr, 128 .init_apic_ldr = default_init_apic_ldr,
104 129
105 .ioapic_phys_id_map = default_ioapic_phys_id_map, 130 .ioapic_phys_id_map = default_ioapic_phys_id_map,
106 .setup_apic_routing = default_setup_apic_routing, 131 .setup_apic_routing = setup_apic_flat_routing,
107 .multi_timer_check = NULL, 132 .multi_timer_check = NULL,
108 .apicid_to_node = default_apicid_to_node, 133 .apicid_to_node = default_apicid_to_node,
109 .cpu_to_logical_apicid = default_cpu_to_logical_apicid, 134 .cpu_to_logical_apicid = default_cpu_to_logical_apicid,
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index c4cbd3080c1c..83e9be4778e2 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -67,17 +67,8 @@ void __init default_setup_apic_routing(void)
67 } 67 }
68#endif 68#endif
69 69
70 if (apic == &apic_flat) { 70 if (apic == &apic_flat && num_possible_cpus() > 8)
71 switch (boot_cpu_data.x86_vendor) { 71 apic = &apic_physflat;
72 case X86_VENDOR_INTEL:
73 if (num_processors > 8)
74 apic = &apic_physflat;
75 break;
76 case X86_VENDOR_AMD:
77 if (max_physical_apicid >= 8)
78 apic = &apic_physflat;
79 }
80 }
81 72
82 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); 73 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
83 74
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index a5371ec36776..cf69c59f4910 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -148,10 +148,7 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
148 break; 148 break;
149 } 149 }
150 150
151 if (cpu < nr_cpu_ids) 151 return per_cpu(x86_cpu_to_logical_apicid, cpu);
152 return per_cpu(x86_cpu_to_logical_apicid, cpu);
153
154 return BAD_APICID;
155} 152}
156 153
157static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x) 154static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x)
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index a8989aadc99a..8972f38c5ced 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -146,10 +146,7 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
146 break; 146 break;
147 } 147 }
148 148
149 if (cpu < nr_cpu_ids) 149 return per_cpu(x86_cpu_to_apicid, cpu);
150 return per_cpu(x86_cpu_to_apicid, cpu);
151
152 return BAD_APICID;
153} 150}
154 151
155static unsigned int x2apic_phys_get_apic_id(unsigned long x) 152static unsigned int x2apic_phys_get_apic_id(unsigned long x)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index b684bb303cbf..3740c8a4eae7 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * SGI UV APIC functions (note: not an Intel compatible APIC) 6 * SGI UV APIC functions (note: not an Intel compatible APIC)
7 * 7 *
8 * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2009 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10#include <linux/cpumask.h> 10#include <linux/cpumask.h>
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
@@ -20,6 +20,8 @@
20#include <linux/cpu.h> 20#include <linux/cpu.h>
21#include <linux/init.h> 21#include <linux/init.h>
22#include <linux/io.h> 22#include <linux/io.h>
23#include <linux/pci.h>
24#include <linux/kdebug.h>
23 25
24#include <asm/uv/uv_mmrs.h> 26#include <asm/uv/uv_mmrs.h>
25#include <asm/uv/uv_hub.h> 27#include <asm/uv/uv_hub.h>
@@ -34,8 +36,13 @@
34 36
35DEFINE_PER_CPU(int, x2apic_extra_bits); 37DEFINE_PER_CPU(int, x2apic_extra_bits);
36 38
39#define PR_DEVEL(fmt, args...) pr_devel("%s: " fmt, __func__, args)
40
37static enum uv_system_type uv_system_type; 41static enum uv_system_type uv_system_type;
38static u64 gru_start_paddr, gru_end_paddr; 42static u64 gru_start_paddr, gru_end_paddr;
43int uv_min_hub_revision_id;
44EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
45static DEFINE_SPINLOCK(uv_nmi_lock);
39 46
40static inline bool is_GRU_range(u64 start, u64 end) 47static inline bool is_GRU_range(u64 start, u64 end)
41{ 48{
@@ -55,20 +62,28 @@ static int early_get_nodeid(void)
55 mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr)); 62 mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr));
56 node_id.v = *mmr; 63 node_id.v = *mmr;
57 early_iounmap(mmr, sizeof(*mmr)); 64 early_iounmap(mmr, sizeof(*mmr));
65
66 /* Currently, all blades have same revision number */
67 uv_min_hub_revision_id = node_id.s.revision;
68
58 return node_id.s.node_id; 69 return node_id.s.node_id;
59} 70}
60 71
61static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 72static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
62{ 73{
74 int nodeid;
75
63 if (!strcmp(oem_id, "SGI")) { 76 if (!strcmp(oem_id, "SGI")) {
77 nodeid = early_get_nodeid();
64 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; 78 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
79 x86_platform.nmi_init = uv_nmi_init;
65 if (!strcmp(oem_table_id, "UVL")) 80 if (!strcmp(oem_table_id, "UVL"))
66 uv_system_type = UV_LEGACY_APIC; 81 uv_system_type = UV_LEGACY_APIC;
67 else if (!strcmp(oem_table_id, "UVX")) 82 else if (!strcmp(oem_table_id, "UVX"))
68 uv_system_type = UV_X2APIC; 83 uv_system_type = UV_X2APIC;
69 else if (!strcmp(oem_table_id, "UVH")) { 84 else if (!strcmp(oem_table_id, "UVH")) {
70 __get_cpu_var(x2apic_extra_bits) = 85 __get_cpu_var(x2apic_extra_bits) =
71 early_get_nodeid() << (UV_APIC_PNODE_SHIFT - 1); 86 nodeid << (UV_APIC_PNODE_SHIFT - 1);
72 uv_system_type = UV_NON_UNIQUE_APIC; 87 uv_system_type = UV_NON_UNIQUE_APIC;
73 return 1; 88 return 1;
74 } 89 }
@@ -225,10 +240,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
225 if (cpumask_test_cpu(cpu, cpu_online_mask)) 240 if (cpumask_test_cpu(cpu, cpu_online_mask))
226 break; 241 break;
227 } 242 }
228 if (cpu < nr_cpu_ids) 243 return per_cpu(x86_cpu_to_apicid, cpu);
229 return per_cpu(x86_cpu_to_apicid, cpu);
230
231 return BAD_APICID;
232} 244}
233 245
234static unsigned int x2apic_get_apic_id(unsigned long x) 246static unsigned int x2apic_get_apic_id(unsigned long x)
@@ -377,13 +389,13 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
377 389
378enum map_type {map_wb, map_uc}; 390enum map_type {map_wb, map_uc};
379 391
380static __init void map_high(char *id, unsigned long base, int shift, 392static __init void map_high(char *id, unsigned long base, int pshift,
381 int max_pnode, enum map_type map_type) 393 int bshift, int max_pnode, enum map_type map_type)
382{ 394{
383 unsigned long bytes, paddr; 395 unsigned long bytes, paddr;
384 396
385 paddr = base << shift; 397 paddr = base << pshift;
386 bytes = (1UL << shift) * (max_pnode + 1); 398 bytes = (1UL << bshift) * (max_pnode + 1);
387 printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, 399 printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
388 paddr + bytes); 400 paddr + bytes);
389 if (map_type == map_uc) 401 if (map_type == map_uc)
@@ -399,7 +411,7 @@ static __init void map_gru_high(int max_pnode)
399 411
400 gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); 412 gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
401 if (gru.s.enable) { 413 if (gru.s.enable) {
402 map_high("GRU", gru.s.base, shift, max_pnode, map_wb); 414 map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb);
403 gru_start_paddr = ((u64)gru.s.base << shift); 415 gru_start_paddr = ((u64)gru.s.base << shift);
404 gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); 416 gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
405 417
@@ -413,7 +425,7 @@ static __init void map_mmr_high(int max_pnode)
413 425
414 mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); 426 mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
415 if (mmr.s.enable) 427 if (mmr.s.enable)
416 map_high("MMR", mmr.s.base, shift, max_pnode, map_uc); 428 map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc);
417} 429}
418 430
419static __init void map_mmioh_high(int max_pnode) 431static __init void map_mmioh_high(int max_pnode)
@@ -423,7 +435,8 @@ static __init void map_mmioh_high(int max_pnode)
423 435
424 mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); 436 mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
425 if (mmioh.s.enable) 437 if (mmioh.s.enable)
426 map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); 438 map_high("MMIOH", mmioh.s.base, shift, mmioh.s.m_io,
439 max_pnode, map_uc);
427} 440}
428 441
429static __init void map_low_mmrs(void) 442static __init void map_low_mmrs(void)
@@ -475,7 +488,7 @@ static void uv_heartbeat(unsigned long ignored)
475 488
476static void __cpuinit uv_heartbeat_enable(int cpu) 489static void __cpuinit uv_heartbeat_enable(int cpu)
477{ 490{
478 if (!uv_cpu_hub_info(cpu)->scir.enabled) { 491 while (!uv_cpu_hub_info(cpu)->scir.enabled) {
479 struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer; 492 struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
480 493
481 uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); 494 uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
@@ -483,11 +496,10 @@ static void __cpuinit uv_heartbeat_enable(int cpu)
483 timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; 496 timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
484 add_timer_on(timer, cpu); 497 add_timer_on(timer, cpu);
485 uv_cpu_hub_info(cpu)->scir.enabled = 1; 498 uv_cpu_hub_info(cpu)->scir.enabled = 1;
486 }
487 499
488 /* check boot cpu */ 500 /* also ensure that boot cpu is enabled */
489 if (!uv_cpu_hub_info(0)->scir.enabled) 501 cpu = 0;
490 uv_heartbeat_enable(0); 502 }
491} 503}
492 504
493#ifdef CONFIG_HOTPLUG_CPU 505#ifdef CONFIG_HOTPLUG_CPU
@@ -546,6 +558,30 @@ late_initcall(uv_init_heartbeat);
546 558
547#endif /* !CONFIG_HOTPLUG_CPU */ 559#endif /* !CONFIG_HOTPLUG_CPU */
548 560
561/* Direct Legacy VGA I/O traffic to designated IOH */
562int uv_set_vga_state(struct pci_dev *pdev, bool decode,
563 unsigned int command_bits, bool change_bridge)
564{
565 int domain, bus, rc;
566
567 PR_DEVEL("devfn %x decode %d cmd %x chg_brdg %d\n",
568 pdev->devfn, decode, command_bits, change_bridge);
569
570 if (!change_bridge)
571 return 0;
572
573 if ((command_bits & PCI_COMMAND_IO) == 0)
574 return 0;
575
576 domain = pci_domain_nr(pdev->bus);
577 bus = pdev->bus->number;
578
579 rc = uv_bios_set_legacy_vga_target(decode, domain, bus);
580 PR_DEVEL("vga decode %d %x:%x, rc: %d\n", decode, domain, bus, rc);
581
582 return rc;
583}
584
549/* 585/*
550 * Called on each cpu to initialize the per_cpu UV data area. 586 * Called on each cpu to initialize the per_cpu UV data area.
551 * FIXME: hotplug not supported yet 587 * FIXME: hotplug not supported yet
@@ -562,6 +598,46 @@ void __cpuinit uv_cpu_init(void)
562 set_x2apic_extra_bits(uv_hub_info->pnode); 598 set_x2apic_extra_bits(uv_hub_info->pnode);
563} 599}
564 600
601/*
602 * When NMI is received, print a stack trace.
603 */
604int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
605{
606 if (reason != DIE_NMI_IPI)
607 return NOTIFY_OK;
608 /*
609 * Use a lock so only one cpu prints at a time
610 * to prevent intermixed output.
611 */
612 spin_lock(&uv_nmi_lock);
613 pr_info("NMI stack dump cpu %u:\n", smp_processor_id());
614 dump_stack();
615 spin_unlock(&uv_nmi_lock);
616
617 return NOTIFY_STOP;
618}
619
620static struct notifier_block uv_dump_stack_nmi_nb = {
621 .notifier_call = uv_handle_nmi
622};
623
624void uv_register_nmi_notifier(void)
625{
626 if (register_die_notifier(&uv_dump_stack_nmi_nb))
627 printk(KERN_WARNING "UV NMI handler failed to register\n");
628}
629
630void uv_nmi_init(void)
631{
632 unsigned int value;
633
634 /*
635 * Unmask NMI on all cpus
636 */
637 value = apic_read(APIC_LVT1) | APIC_DM_NMI;
638 value &= ~APIC_LVT_MASKED;
639 apic_write(APIC_LVT1, value);
640}
565 641
566void __init uv_system_init(void) 642void __init uv_system_init(void)
567{ 643{
@@ -627,13 +703,15 @@ void __init uv_system_init(void)
627 } 703 }
628 704
629 uv_bios_init(); 705 uv_bios_init();
630 uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, 706 uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id,
631 &sn_coherency_id, &sn_region_size); 707 &sn_region_size, &system_serial_number);
632 uv_rtc_init(); 708 uv_rtc_init();
633 709
634 for_each_present_cpu(cpu) { 710 for_each_present_cpu(cpu) {
711 int apicid = per_cpu(x86_cpu_to_apicid, cpu);
712
635 nid = cpu_to_node(cpu); 713 nid = cpu_to_node(cpu);
636 pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); 714 pnode = uv_apicid_to_pnode(apicid);
637 blade = boot_pnode_to_blade(pnode); 715 blade = boot_pnode_to_blade(pnode);
638 lcpu = uv_blade_info[blade].nr_possible_cpus; 716 lcpu = uv_blade_info[blade].nr_possible_cpus;
639 uv_blade_info[blade].nr_possible_cpus++; 717 uv_blade_info[blade].nr_possible_cpus++;
@@ -654,15 +732,13 @@ void __init uv_system_init(void)
654 uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; 732 uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
655 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; 733 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
656 uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; 734 uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
657 uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; 735 uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid);
658 uv_node_to_blade[nid] = blade; 736 uv_node_to_blade[nid] = blade;
659 uv_cpu_to_blade[cpu] = blade; 737 uv_cpu_to_blade[cpu] = blade;
660 max_pnode = max(pnode, max_pnode); 738 max_pnode = max(pnode, max_pnode);
661 739
662 printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, " 740 printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, lcpu %d, blade %d\n",
663 "lcpu %d, blade %d\n", 741 cpu, apicid, pnode, nid, lcpu, blade);
664 cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid,
665 lcpu, blade);
666 } 742 }
667 743
668 /* Add blade/pnode info for nodes without cpus */ 744 /* Add blade/pnode info for nodes without cpus */
@@ -683,5 +759,9 @@ void __init uv_system_init(void)
683 759
684 uv_cpu_init(); 760 uv_cpu_init();
685 uv_scir_register_cpu_notifier(); 761 uv_scir_register_cpu_notifier();
762 uv_register_nmi_notifier();
686 proc_mkdir("sgi_uv", NULL); 763 proc_mkdir("sgi_uv", NULL);
764
765 /* register Legacy VGA I/O redirection handler */
766 pci_register_set_vga_state(uv_set_vga_state);
687} 767}
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index b5b6b23bce53..031aa887b0eb 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1992,8 +1992,8 @@ static int __init apm_is_horked_d850md(const struct dmi_system_id *d)
1992 apm_info.disabled = 1; 1992 apm_info.disabled = 1;
1993 printk(KERN_INFO "%s machine detected. " 1993 printk(KERN_INFO "%s machine detected. "
1994 "Disabling APM.\n", d->ident); 1994 "Disabling APM.\n", d->ident);
1995 printk(KERN_INFO "This bug is fixed in bios P15 which is available for \n"); 1995 printk(KERN_INFO "This bug is fixed in bios P15 which is available for\n");
1996 printk(KERN_INFO "download from support.intel.com \n"); 1996 printk(KERN_INFO "download from support.intel.com\n");
1997 } 1997 }
1998 return 0; 1998 return 0;
1999} 1999}
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index 63a88e1f987d..8bc57baaa9ad 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -15,8 +15,8 @@
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 * 17 *
18 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. 18 * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
19 * Copyright (c) Russ Anderson 19 * Copyright (c) Russ Anderson <rja@sgi.com>
20 */ 20 */
21 21
22#include <linux/efi.h> 22#include <linux/efi.h>
@@ -30,6 +30,7 @@ static struct uv_systab uv_systab;
30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) 30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
31{ 31{
32 struct uv_systab *tab = &uv_systab; 32 struct uv_systab *tab = &uv_systab;
33 s64 ret;
33 34
34 if (!tab->function) 35 if (!tab->function)
35 /* 36 /*
@@ -37,9 +38,11 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
37 */ 38 */
38 return BIOS_STATUS_UNIMPLEMENTED; 39 return BIOS_STATUS_UNIMPLEMENTED;
39 40
40 return efi_call6((void *)__va(tab->function), 41 ret = efi_call6((void *)__va(tab->function), (u64)which,
41 (u64)which, a1, a2, a3, a4, a5); 42 a1, a2, a3, a4, a5);
43 return ret;
42} 44}
45EXPORT_SYMBOL_GPL(uv_bios_call);
43 46
44s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, 47s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
45 u64 a4, u64 a5) 48 u64 a4, u64 a5)
@@ -73,11 +76,14 @@ long sn_coherency_id;
73EXPORT_SYMBOL_GPL(sn_coherency_id); 76EXPORT_SYMBOL_GPL(sn_coherency_id);
74long sn_region_size; 77long sn_region_size;
75EXPORT_SYMBOL_GPL(sn_region_size); 78EXPORT_SYMBOL_GPL(sn_region_size);
79long system_serial_number;
80EXPORT_SYMBOL_GPL(system_serial_number);
76int uv_type; 81int uv_type;
82EXPORT_SYMBOL_GPL(uv_type);
77 83
78 84
79s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, 85s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
80 long *region) 86 long *region, long *ssn)
81{ 87{
82 s64 ret; 88 s64 ret;
83 u64 v0, v1; 89 u64 v0, v1;
@@ -97,25 +103,24 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
97 *coher = part.coherence_id; 103 *coher = part.coherence_id;
98 if (region) 104 if (region)
99 *region = part.region_size; 105 *region = part.region_size;
106 if (ssn)
107 *ssn = v1;
100 return ret; 108 return ret;
101} 109}
110EXPORT_SYMBOL_GPL(uv_bios_get_sn_info);
102 111
103int 112int
104uv_bios_mq_watchlist_alloc(int blade, unsigned long addr, unsigned int mq_size, 113uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size,
105 unsigned long *intr_mmr_offset) 114 unsigned long *intr_mmr_offset)
106{ 115{
107 union uv_watchlist_u size_blade;
108 u64 watchlist; 116 u64 watchlist;
109 s64 ret; 117 s64 ret;
110 118
111 size_blade.size = mq_size;
112 size_blade.blade = blade;
113
114 /* 119 /*
115 * bios returns watchlist number or negative error number. 120 * bios returns watchlist number or negative error number.
116 */ 121 */
117 ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr, 122 ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr,
118 size_blade.val, (u64)intr_mmr_offset, 123 mq_size, (u64)intr_mmr_offset,
119 (u64)&watchlist, 0); 124 (u64)&watchlist, 0);
120 if (ret < BIOS_STATUS_SUCCESS) 125 if (ret < BIOS_STATUS_SUCCESS)
121 return ret; 126 return ret;
@@ -158,6 +163,25 @@ s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
158} 163}
159EXPORT_SYMBOL_GPL(uv_bios_freq_base); 164EXPORT_SYMBOL_GPL(uv_bios_freq_base);
160 165
166/*
167 * uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target
168 * @decode: true to enable target, false to disable target
169 * @domain: PCI domain number
170 * @bus: PCI bus number
171 *
172 * Returns:
173 * 0: Success
174 * -EINVAL: Invalid domain or bus number
175 * -ENOSYS: Capability not available
176 * -EBUSY: Legacy VGA I/O cannot be retargeted at this time
177 */
178int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus)
179{
180 return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET,
181 (u64)decode, (u64)domain, (u64)bus, 0, 0);
182}
183EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
184
161 185
162#ifdef CONFIG_EFI 186#ifdef CONFIG_EFI
163void uv_bios_init(void) 187void uv_bios_init(void)
@@ -189,4 +213,3 @@ void uv_bios_init(void)
189 213
190void uv_bios_init(void) { } 214void uv_bios_init(void) { }
191#endif 215#endif
192
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 1d2cb383410e..c202b62f3671 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -19,8 +19,6 @@ obj-y += vmware.o hypervisor.o sched.o
19obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o 19obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
20obj-$(CONFIG_X86_64) += bugs_64.o 20obj-$(CONFIG_X86_64) += bugs_64.o
21 21
22obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o
23
24obj-$(CONFIG_CPU_SUP_INTEL) += intel.o 22obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
25obj-$(CONFIG_CPU_SUP_AMD) += amd.o 23obj-$(CONFIG_CPU_SUP_AMD) += amd.o
26obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o 24obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index c965e5212714..97ad79cdf688 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -32,6 +32,10 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
32 static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { 32 static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
33 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, 33 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
34 { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, 34 { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 },
35 { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a },
36 { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a },
37 { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a },
38 { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a },
35 { 0, 0, 0, 0 } 39 { 0, 0, 0, 0 }
36 }; 40 };
37 41
@@ -74,6 +78,7 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
74 unsigned int eax, ebx, ecx, edx, sub_index; 78 unsigned int eax, ebx, ecx, edx, sub_index;
75 unsigned int ht_mask_width, core_plus_mask_width; 79 unsigned int ht_mask_width, core_plus_mask_width;
76 unsigned int core_select_mask, core_level_siblings; 80 unsigned int core_select_mask, core_level_siblings;
81 static bool printed;
77 82
78 if (c->cpuid_level < 0xb) 83 if (c->cpuid_level < 0xb)
79 return; 84 return;
@@ -127,12 +132,14 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
127 132
128 c->x86_max_cores = (core_level_siblings / smp_num_siblings); 133 c->x86_max_cores = (core_level_siblings / smp_num_siblings);
129 134
130 135 if (!printed) {
131 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", 136 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
132 c->phys_proc_id); 137 c->phys_proc_id);
133 if (c->x86_max_cores > 1) 138 if (c->x86_max_cores > 1)
134 printk(KERN_INFO "CPU: Processor Core ID: %d\n", 139 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
135 c->cpu_core_id); 140 c->cpu_core_id);
141 printed = 1;
142 }
136 return; 143 return;
137#endif 144#endif
138} 145}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7128b3799cec..e485825130d2 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -254,59 +254,36 @@ static int __cpuinit nearby_node(int apicid)
254 254
255/* 255/*
256 * Fixup core topology information for AMD multi-node processors. 256 * Fixup core topology information for AMD multi-node processors.
257 * Assumption 1: Number of cores in each internal node is the same. 257 * Assumption: Number of cores in each internal node is the same.
258 * Assumption 2: Mixed systems with both single-node and dual-node
259 * processors are not supported.
260 */ 258 */
261#ifdef CONFIG_X86_HT 259#ifdef CONFIG_X86_HT
262static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) 260static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c)
263{ 261{
264#ifdef CONFIG_PCI 262 unsigned long long value;
265 u32 t, cpn; 263 u32 nodes, cores_per_node;
266 u8 n, n_id;
267 int cpu = smp_processor_id(); 264 int cpu = smp_processor_id();
268 265
266 if (!cpu_has(c, X86_FEATURE_NODEID_MSR))
267 return;
268
269 /* fixup topology information only once for a core */ 269 /* fixup topology information only once for a core */
270 if (cpu_has(c, X86_FEATURE_AMD_DCM)) 270 if (cpu_has(c, X86_FEATURE_AMD_DCM))
271 return; 271 return;
272 272
273 /* check for multi-node processor on boot cpu */ 273 rdmsrl(MSR_FAM10H_NODE_ID, value);
274 t = read_pci_config(0, 24, 3, 0xe8); 274
275 if (!(t & (1 << 29))) 275 nodes = ((value >> 3) & 7) + 1;
276 if (nodes == 1)
276 return; 277 return;
277 278
278 set_cpu_cap(c, X86_FEATURE_AMD_DCM); 279 set_cpu_cap(c, X86_FEATURE_AMD_DCM);
280 cores_per_node = c->x86_max_cores / nodes;
279 281
280 /* cores per node: each internal node has half the number of cores */ 282 /* store NodeID, use llc_shared_map to store sibling info */
281 cpn = c->x86_max_cores >> 1; 283 per_cpu(cpu_llc_id, cpu) = value & 7;
282
283 /* even-numbered NB_id of this dual-node processor */
284 n = c->phys_proc_id << 1;
285
286 /*
287 * determine internal node id and assign cores fifty-fifty to
288 * each node of the dual-node processor
289 */
290 t = read_pci_config(0, 24 + n, 3, 0xe8);
291 n = (t>>30) & 0x3;
292 if (n == 0) {
293 if (c->cpu_core_id < cpn)
294 n_id = 0;
295 else
296 n_id = 1;
297 } else {
298 if (c->cpu_core_id < cpn)
299 n_id = 1;
300 else
301 n_id = 0;
302 }
303
304 /* compute entire NodeID, use llc_shared_map to store sibling info */
305 per_cpu(cpu_llc_id, cpu) = (c->phys_proc_id << 1) + n_id;
306 284
307 /* fixup core id to be in range from 0 to cpn */ 285 /* fixup core id to be in range from 0 to (cores_per_node - 1) */
308 c->cpu_core_id = c->cpu_core_id % cpn; 286 c->cpu_core_id = c->cpu_core_id % cores_per_node;
309#endif
310} 287}
311#endif 288#endif
312 289
@@ -375,8 +352,6 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
375 node = nearby_node(apicid); 352 node = nearby_node(apicid);
376 } 353 }
377 numa_set_node(cpu, node); 354 numa_set_node(cpu, node);
378
379 printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
380#endif 355#endif
381} 356}
382 357
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c1afa990a6c8..4868e4a951ee 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -427,6 +427,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
427#ifdef CONFIG_X86_HT 427#ifdef CONFIG_X86_HT
428 u32 eax, ebx, ecx, edx; 428 u32 eax, ebx, ecx, edx;
429 int index_msb, core_bits; 429 int index_msb, core_bits;
430 static bool printed;
430 431
431 if (!cpu_has(c, X86_FEATURE_HT)) 432 if (!cpu_has(c, X86_FEATURE_HT))
432 return; 433 return;
@@ -442,7 +443,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
442 smp_num_siblings = (ebx & 0xff0000) >> 16; 443 smp_num_siblings = (ebx & 0xff0000) >> 16;
443 444
444 if (smp_num_siblings == 1) { 445 if (smp_num_siblings == 1) {
445 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 446 printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n");
446 goto out; 447 goto out;
447 } 448 }
448 449
@@ -469,11 +470,12 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
469 ((1 << core_bits) - 1); 470 ((1 << core_bits) - 1);
470 471
471out: 472out:
472 if ((c->x86_max_cores * smp_num_siblings) > 1) { 473 if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) {
473 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", 474 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
474 c->phys_proc_id); 475 c->phys_proc_id);
475 printk(KERN_INFO "CPU: Processor Core ID: %d\n", 476 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
476 c->cpu_core_id); 477 c->cpu_core_id);
478 printed = 1;
477 } 479 }
478#endif 480#endif
479} 481}
@@ -1093,7 +1095,7 @@ static void clear_all_debug_regs(void)
1093 1095
1094void __cpuinit cpu_init(void) 1096void __cpuinit cpu_init(void)
1095{ 1097{
1096 struct orig_ist *orig_ist; 1098 struct orig_ist *oist;
1097 struct task_struct *me; 1099 struct task_struct *me;
1098 struct tss_struct *t; 1100 struct tss_struct *t;
1099 unsigned long v; 1101 unsigned long v;
@@ -1102,7 +1104,7 @@ void __cpuinit cpu_init(void)
1102 1104
1103 cpu = stack_smp_processor_id(); 1105 cpu = stack_smp_processor_id();
1104 t = &per_cpu(init_tss, cpu); 1106 t = &per_cpu(init_tss, cpu);
1105 orig_ist = &per_cpu(orig_ist, cpu); 1107 oist = &per_cpu(orig_ist, cpu);
1106 1108
1107#ifdef CONFIG_NUMA 1109#ifdef CONFIG_NUMA
1108 if (cpu != 0 && percpu_read(node_number) == 0 && 1110 if (cpu != 0 && percpu_read(node_number) == 0 &&
@@ -1115,7 +1117,7 @@ void __cpuinit cpu_init(void)
1115 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) 1117 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask))
1116 panic("CPU#%d already initialized!\n", cpu); 1118 panic("CPU#%d already initialized!\n", cpu);
1117 1119
1118 printk(KERN_INFO "Initializing CPU#%d\n", cpu); 1120 pr_debug("Initializing CPU#%d\n", cpu);
1119 1121
1120 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1122 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
1121 1123
@@ -1143,12 +1145,12 @@ void __cpuinit cpu_init(void)
1143 /* 1145 /*
1144 * set up and load the per-CPU TSS 1146 * set up and load the per-CPU TSS
1145 */ 1147 */
1146 if (!orig_ist->ist[0]) { 1148 if (!oist->ist[0]) {
1147 char *estacks = per_cpu(exception_stacks, cpu); 1149 char *estacks = per_cpu(exception_stacks, cpu);
1148 1150
1149 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1151 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1150 estacks += exception_stack_sizes[v]; 1152 estacks += exception_stack_sizes[v];
1151 orig_ist->ist[v] = t->x86_tss.ist[v] = 1153 oist->ist[v] = t->x86_tss.ist[v] =
1152 (unsigned long)estacks; 1154 (unsigned long)estacks;
1153 } 1155 }
1154 } 1156 }
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
deleted file mode 100644
index dca325c03999..000000000000
--- a/arch/x86/kernel/cpu/cpu_debug.c
+++ /dev/null
@@ -1,688 +0,0 @@
1/*
2 * CPU x86 architecture debug code
3 *
4 * Copyright(C) 2009 Jaswinder Singh Rajput
5 *
6 * For licencing details see kernel-base/COPYING
7 */
8
9#include <linux/interrupt.h>
10#include <linux/compiler.h>
11#include <linux/seq_file.h>
12#include <linux/debugfs.h>
13#include <linux/kprobes.h>
14#include <linux/uaccess.h>
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/percpu.h>
18#include <linux/signal.h>
19#include <linux/errno.h>
20#include <linux/sched.h>
21#include <linux/types.h>
22#include <linux/init.h>
23#include <linux/slab.h>
24#include <linux/smp.h>
25
26#include <asm/cpu_debug.h>
27#include <asm/paravirt.h>
28#include <asm/system.h>
29#include <asm/traps.h>
30#include <asm/apic.h>
31#include <asm/desc.h>
32
33static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpu_arr);
34static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], priv_arr);
35static DEFINE_PER_CPU(int, cpu_priv_count);
36
37static DEFINE_MUTEX(cpu_debug_lock);
38
39static struct dentry *cpu_debugfs_dir;
40
41static struct cpu_debug_base cpu_base[] = {
42 { "mc", CPU_MC, 0 },
43 { "monitor", CPU_MONITOR, 0 },
44 { "time", CPU_TIME, 0 },
45 { "pmc", CPU_PMC, 1 },
46 { "platform", CPU_PLATFORM, 0 },
47 { "apic", CPU_APIC, 0 },
48 { "poweron", CPU_POWERON, 0 },
49 { "control", CPU_CONTROL, 0 },
50 { "features", CPU_FEATURES, 0 },
51 { "lastbranch", CPU_LBRANCH, 0 },
52 { "bios", CPU_BIOS, 0 },
53 { "freq", CPU_FREQ, 0 },
54 { "mtrr", CPU_MTRR, 0 },
55 { "perf", CPU_PERF, 0 },
56 { "cache", CPU_CACHE, 0 },
57 { "sysenter", CPU_SYSENTER, 0 },
58 { "therm", CPU_THERM, 0 },
59 { "misc", CPU_MISC, 0 },
60 { "debug", CPU_DEBUG, 0 },
61 { "pat", CPU_PAT, 0 },
62 { "vmx", CPU_VMX, 0 },
63 { "call", CPU_CALL, 0 },
64 { "base", CPU_BASE, 0 },
65 { "ver", CPU_VER, 0 },
66 { "conf", CPU_CONF, 0 },
67 { "smm", CPU_SMM, 0 },
68 { "svm", CPU_SVM, 0 },
69 { "osvm", CPU_OSVM, 0 },
70 { "tss", CPU_TSS, 0 },
71 { "cr", CPU_CR, 0 },
72 { "dt", CPU_DT, 0 },
73 { "registers", CPU_REG_ALL, 0 },
74};
75
76static struct cpu_file_base cpu_file[] = {
77 { "index", CPU_REG_ALL, 0 },
78 { "value", CPU_REG_ALL, 1 },
79};
80
81/* CPU Registers Range */
82static struct cpu_debug_range cpu_reg_range[] = {
83 { 0x00000000, 0x00000001, CPU_MC, },
84 { 0x00000006, 0x00000007, CPU_MONITOR, },
85 { 0x00000010, 0x00000010, CPU_TIME, },
86 { 0x00000011, 0x00000013, CPU_PMC, },
87 { 0x00000017, 0x00000017, CPU_PLATFORM, },
88 { 0x0000001B, 0x0000001B, CPU_APIC, },
89 { 0x0000002A, 0x0000002B, CPU_POWERON, },
90 { 0x0000002C, 0x0000002C, CPU_FREQ, },
91 { 0x0000003A, 0x0000003A, CPU_CONTROL, },
92 { 0x00000040, 0x00000047, CPU_LBRANCH, },
93 { 0x00000060, 0x00000067, CPU_LBRANCH, },
94 { 0x00000079, 0x00000079, CPU_BIOS, },
95 { 0x00000088, 0x0000008A, CPU_CACHE, },
96 { 0x0000008B, 0x0000008B, CPU_BIOS, },
97 { 0x0000009B, 0x0000009B, CPU_MONITOR, },
98 { 0x000000C1, 0x000000C4, CPU_PMC, },
99 { 0x000000CD, 0x000000CD, CPU_FREQ, },
100 { 0x000000E7, 0x000000E8, CPU_PERF, },
101 { 0x000000FE, 0x000000FE, CPU_MTRR, },
102
103 { 0x00000116, 0x0000011E, CPU_CACHE, },
104 { 0x00000174, 0x00000176, CPU_SYSENTER, },
105 { 0x00000179, 0x0000017B, CPU_MC, },
106 { 0x00000186, 0x00000189, CPU_PMC, },
107 { 0x00000198, 0x00000199, CPU_PERF, },
108 { 0x0000019A, 0x0000019A, CPU_TIME, },
109 { 0x0000019B, 0x0000019D, CPU_THERM, },
110 { 0x000001A0, 0x000001A0, CPU_MISC, },
111 { 0x000001C9, 0x000001C9, CPU_LBRANCH, },
112 { 0x000001D7, 0x000001D8, CPU_LBRANCH, },
113 { 0x000001D9, 0x000001D9, CPU_DEBUG, },
114 { 0x000001DA, 0x000001E0, CPU_LBRANCH, },
115
116 { 0x00000200, 0x0000020F, CPU_MTRR, },
117 { 0x00000250, 0x00000250, CPU_MTRR, },
118 { 0x00000258, 0x00000259, CPU_MTRR, },
119 { 0x00000268, 0x0000026F, CPU_MTRR, },
120 { 0x00000277, 0x00000277, CPU_PAT, },
121 { 0x000002FF, 0x000002FF, CPU_MTRR, },
122
123 { 0x00000300, 0x00000311, CPU_PMC, },
124 { 0x00000345, 0x00000345, CPU_PMC, },
125 { 0x00000360, 0x00000371, CPU_PMC, },
126 { 0x0000038D, 0x00000390, CPU_PMC, },
127 { 0x000003A0, 0x000003BE, CPU_PMC, },
128 { 0x000003C0, 0x000003CD, CPU_PMC, },
129 { 0x000003E0, 0x000003E1, CPU_PMC, },
130 { 0x000003F0, 0x000003F2, CPU_PMC, },
131
132 { 0x00000400, 0x00000417, CPU_MC, },
133 { 0x00000480, 0x0000048B, CPU_VMX, },
134
135 { 0x00000600, 0x00000600, CPU_DEBUG, },
136 { 0x00000680, 0x0000068F, CPU_LBRANCH, },
137 { 0x000006C0, 0x000006CF, CPU_LBRANCH, },
138
139 { 0x000107CC, 0x000107D3, CPU_PMC, },
140
141 { 0xC0000080, 0xC0000080, CPU_FEATURES, },
142 { 0xC0000081, 0xC0000084, CPU_CALL, },
143 { 0xC0000100, 0xC0000102, CPU_BASE, },
144 { 0xC0000103, 0xC0000103, CPU_TIME, },
145
146 { 0xC0010000, 0xC0010007, CPU_PMC, },
147 { 0xC0010010, 0xC0010010, CPU_CONF, },
148 { 0xC0010015, 0xC0010015, CPU_CONF, },
149 { 0xC0010016, 0xC001001A, CPU_MTRR, },
150 { 0xC001001D, 0xC001001D, CPU_MTRR, },
151 { 0xC001001F, 0xC001001F, CPU_CONF, },
152 { 0xC0010030, 0xC0010035, CPU_BIOS, },
153 { 0xC0010044, 0xC0010048, CPU_MC, },
154 { 0xC0010050, 0xC0010056, CPU_SMM, },
155 { 0xC0010058, 0xC0010058, CPU_CONF, },
156 { 0xC0010060, 0xC0010060, CPU_CACHE, },
157 { 0xC0010061, 0xC0010068, CPU_SMM, },
158 { 0xC0010069, 0xC001006B, CPU_SMM, },
159 { 0xC0010070, 0xC0010071, CPU_SMM, },
160 { 0xC0010111, 0xC0010113, CPU_SMM, },
161 { 0xC0010114, 0xC0010118, CPU_SVM, },
162 { 0xC0010140, 0xC0010141, CPU_OSVM, },
163 { 0xC0011022, 0xC0011023, CPU_CONF, },
164};
165
166static int is_typeflag_valid(unsigned cpu, unsigned flag)
167{
168 int i;
169
170 /* Standard Registers should be always valid */
171 if (flag >= CPU_TSS)
172 return 1;
173
174 for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
175 if (cpu_reg_range[i].flag == flag)
176 return 1;
177 }
178
179 /* Invalid */
180 return 0;
181}
182
183static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max,
184 int index, unsigned flag)
185{
186 if (cpu_reg_range[index].flag == flag) {
187 *min = cpu_reg_range[index].min;
188 *max = cpu_reg_range[index].max;
189 } else
190 *max = 0;
191
192 return *max;
193}
194
195/* This function can also be called with seq = NULL for printk */
196static void print_cpu_data(struct seq_file *seq, unsigned type,
197 u32 low, u32 high)
198{
199 struct cpu_private *priv;
200 u64 val = high;
201
202 if (seq) {
203 priv = seq->private;
204 if (priv->file) {
205 val = (val << 32) | low;
206 seq_printf(seq, "0x%llx\n", val);
207 } else
208 seq_printf(seq, " %08x: %08x_%08x\n",
209 type, high, low);
210 } else
211 printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low);
212}
213
214/* This function can also be called with seq = NULL for printk */
215static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
216{
217 unsigned msr, msr_min, msr_max;
218 struct cpu_private *priv;
219 u32 low, high;
220 int i;
221
222 if (seq) {
223 priv = seq->private;
224 if (priv->file) {
225 if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg,
226 &low, &high))
227 print_cpu_data(seq, priv->reg, low, high);
228 return;
229 }
230 }
231
232 for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
233 if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag))
234 continue;
235
236 for (msr = msr_min; msr <= msr_max; msr++) {
237 if (rdmsr_safe_on_cpu(cpu, msr, &low, &high))
238 continue;
239 print_cpu_data(seq, msr, low, high);
240 }
241 }
242}
243
244static void print_tss(void *arg)
245{
246 struct pt_regs *regs = task_pt_regs(current);
247 struct seq_file *seq = arg;
248 unsigned int seg;
249
250 seq_printf(seq, " RAX\t: %016lx\n", regs->ax);
251 seq_printf(seq, " RBX\t: %016lx\n", regs->bx);
252 seq_printf(seq, " RCX\t: %016lx\n", regs->cx);
253 seq_printf(seq, " RDX\t: %016lx\n", regs->dx);
254
255 seq_printf(seq, " RSI\t: %016lx\n", regs->si);
256 seq_printf(seq, " RDI\t: %016lx\n", regs->di);
257 seq_printf(seq, " RBP\t: %016lx\n", regs->bp);
258 seq_printf(seq, " ESP\t: %016lx\n", regs->sp);
259
260#ifdef CONFIG_X86_64
261 seq_printf(seq, " R08\t: %016lx\n", regs->r8);
262 seq_printf(seq, " R09\t: %016lx\n", regs->r9);
263 seq_printf(seq, " R10\t: %016lx\n", regs->r10);
264 seq_printf(seq, " R11\t: %016lx\n", regs->r11);
265 seq_printf(seq, " R12\t: %016lx\n", regs->r12);
266 seq_printf(seq, " R13\t: %016lx\n", regs->r13);
267 seq_printf(seq, " R14\t: %016lx\n", regs->r14);
268 seq_printf(seq, " R15\t: %016lx\n", regs->r15);
269#endif
270
271 asm("movl %%cs,%0" : "=r" (seg));
272 seq_printf(seq, " CS\t: %04x\n", seg);
273 asm("movl %%ds,%0" : "=r" (seg));
274 seq_printf(seq, " DS\t: %04x\n", seg);
275 seq_printf(seq, " SS\t: %04lx\n", regs->ss & 0xffff);
276 asm("movl %%es,%0" : "=r" (seg));
277 seq_printf(seq, " ES\t: %04x\n", seg);
278 asm("movl %%fs,%0" : "=r" (seg));
279 seq_printf(seq, " FS\t: %04x\n", seg);
280 asm("movl %%gs,%0" : "=r" (seg));
281 seq_printf(seq, " GS\t: %04x\n", seg);
282
283 seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags);
284
285 seq_printf(seq, " EIP\t: %016lx\n", regs->ip);
286}
287
288static void print_cr(void *arg)
289{
290 struct seq_file *seq = arg;
291
292 seq_printf(seq, " cr0\t: %016lx\n", read_cr0());
293 seq_printf(seq, " cr2\t: %016lx\n", read_cr2());
294 seq_printf(seq, " cr3\t: %016lx\n", read_cr3());
295 seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe());
296#ifdef CONFIG_X86_64
297 seq_printf(seq, " cr8\t: %016lx\n", read_cr8());
298#endif
299}
300
301static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt)
302{
303 seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size));
304}
305
306static void print_dt(void *seq)
307{
308 struct desc_ptr dt;
309 unsigned long ldt;
310
311 /* IDT */
312 store_idt((struct desc_ptr *)&dt);
313 print_desc_ptr("IDT", seq, dt);
314
315 /* GDT */
316 store_gdt((struct desc_ptr *)&dt);
317 print_desc_ptr("GDT", seq, dt);
318
319 /* LDT */
320 store_ldt(ldt);
321 seq_printf(seq, " LDT\t: %016lx\n", ldt);
322
323 /* TR */
324 store_tr(ldt);
325 seq_printf(seq, " TR\t: %016lx\n", ldt);
326}
327
328static void print_dr(void *arg)
329{
330 struct seq_file *seq = arg;
331 unsigned long dr;
332 int i;
333
334 for (i = 0; i < 8; i++) {
335 /* Ignore db4, db5 */
336 if ((i == 4) || (i == 5))
337 continue;
338 get_debugreg(dr, i);
339 seq_printf(seq, " dr%d\t: %016lx\n", i, dr);
340 }
341
342 seq_printf(seq, "\n MSR\t:\n");
343}
344
345static void print_apic(void *arg)
346{
347 struct seq_file *seq = arg;
348
349#ifdef CONFIG_X86_LOCAL_APIC
350 seq_printf(seq, " LAPIC\t:\n");
351 seq_printf(seq, " ID\t\t: %08x\n", apic_read(APIC_ID) >> 24);
352 seq_printf(seq, " LVR\t\t: %08x\n", apic_read(APIC_LVR));
353 seq_printf(seq, " TASKPRI\t: %08x\n", apic_read(APIC_TASKPRI));
354 seq_printf(seq, " ARBPRI\t\t: %08x\n", apic_read(APIC_ARBPRI));
355 seq_printf(seq, " PROCPRI\t: %08x\n", apic_read(APIC_PROCPRI));
356 seq_printf(seq, " LDR\t\t: %08x\n", apic_read(APIC_LDR));
357 seq_printf(seq, " DFR\t\t: %08x\n", apic_read(APIC_DFR));
358 seq_printf(seq, " SPIV\t\t: %08x\n", apic_read(APIC_SPIV));
359 seq_printf(seq, " ISR\t\t: %08x\n", apic_read(APIC_ISR));
360 seq_printf(seq, " ESR\t\t: %08x\n", apic_read(APIC_ESR));
361 seq_printf(seq, " ICR\t\t: %08x\n", apic_read(APIC_ICR));
362 seq_printf(seq, " ICR2\t\t: %08x\n", apic_read(APIC_ICR2));
363 seq_printf(seq, " LVTT\t\t: %08x\n", apic_read(APIC_LVTT));
364 seq_printf(seq, " LVTTHMR\t: %08x\n", apic_read(APIC_LVTTHMR));
365 seq_printf(seq, " LVTPC\t\t: %08x\n", apic_read(APIC_LVTPC));
366 seq_printf(seq, " LVT0\t\t: %08x\n", apic_read(APIC_LVT0));
367 seq_printf(seq, " LVT1\t\t: %08x\n", apic_read(APIC_LVT1));
368 seq_printf(seq, " LVTERR\t\t: %08x\n", apic_read(APIC_LVTERR));
369 seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT));
370 seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT));
371 seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR));
372 if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
373 unsigned int i, v, maxeilvt;
374
375 v = apic_read(APIC_EFEAT);
376 maxeilvt = (v >> 16) & 0xff;
377 seq_printf(seq, " EFEAT\t\t: %08x\n", v);
378 seq_printf(seq, " ECTRL\t\t: %08x\n", apic_read(APIC_ECTRL));
379
380 for (i = 0; i < maxeilvt; i++) {
381 v = apic_read(APIC_EILVTn(i));
382 seq_printf(seq, " EILVT%d\t\t: %08x\n", i, v);
383 }
384 }
385#endif /* CONFIG_X86_LOCAL_APIC */
386 seq_printf(seq, "\n MSR\t:\n");
387}
388
389static int cpu_seq_show(struct seq_file *seq, void *v)
390{
391 struct cpu_private *priv = seq->private;
392
393 if (priv == NULL)
394 return -EINVAL;
395
396 switch (cpu_base[priv->type].flag) {
397 case CPU_TSS:
398 smp_call_function_single(priv->cpu, print_tss, seq, 1);
399 break;
400 case CPU_CR:
401 smp_call_function_single(priv->cpu, print_cr, seq, 1);
402 break;
403 case CPU_DT:
404 smp_call_function_single(priv->cpu, print_dt, seq, 1);
405 break;
406 case CPU_DEBUG:
407 if (priv->file == CPU_INDEX_BIT)
408 smp_call_function_single(priv->cpu, print_dr, seq, 1);
409 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
410 break;
411 case CPU_APIC:
412 if (priv->file == CPU_INDEX_BIT)
413 smp_call_function_single(priv->cpu, print_apic, seq, 1);
414 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
415 break;
416
417 default:
418 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
419 break;
420 }
421 seq_printf(seq, "\n");
422
423 return 0;
424}
425
426static void *cpu_seq_start(struct seq_file *seq, loff_t *pos)
427{
428 if (*pos == 0) /* One time is enough ;-) */
429 return seq;
430
431 return NULL;
432}
433
434static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
435{
436 (*pos)++;
437
438 return cpu_seq_start(seq, pos);
439}
440
441static void cpu_seq_stop(struct seq_file *seq, void *v)
442{
443}
444
445static const struct seq_operations cpu_seq_ops = {
446 .start = cpu_seq_start,
447 .next = cpu_seq_next,
448 .stop = cpu_seq_stop,
449 .show = cpu_seq_show,
450};
451
452static int cpu_seq_open(struct inode *inode, struct file *file)
453{
454 struct cpu_private *priv = inode->i_private;
455 struct seq_file *seq;
456 int err;
457
458 err = seq_open(file, &cpu_seq_ops);
459 if (!err) {
460 seq = file->private_data;
461 seq->private = priv;
462 }
463
464 return err;
465}
466
467static int write_msr(struct cpu_private *priv, u64 val)
468{
469 u32 low, high;
470
471 high = (val >> 32) & 0xffffffff;
472 low = val & 0xffffffff;
473
474 if (!wrmsr_safe_on_cpu(priv->cpu, priv->reg, low, high))
475 return 0;
476
477 return -EPERM;
478}
479
480static int write_cpu_register(struct cpu_private *priv, const char *buf)
481{
482 int ret = -EPERM;
483 u64 val;
484
485 ret = strict_strtoull(buf, 0, &val);
486 if (ret < 0)
487 return ret;
488
489 /* Supporting only MSRs */
490 if (priv->type < CPU_TSS_BIT)
491 return write_msr(priv, val);
492
493 return ret;
494}
495
496static ssize_t cpu_write(struct file *file, const char __user *ubuf,
497 size_t count, loff_t *off)
498{
499 struct seq_file *seq = file->private_data;
500 struct cpu_private *priv = seq->private;
501 char buf[19];
502
503 if ((priv == NULL) || (count >= sizeof(buf)))
504 return -EINVAL;
505
506 if (copy_from_user(&buf, ubuf, count))
507 return -EFAULT;
508
509 buf[count] = 0;
510
511 if ((cpu_base[priv->type].write) && (cpu_file[priv->file].write))
512 if (!write_cpu_register(priv, buf))
513 return count;
514
515 return -EACCES;
516}
517
518static const struct file_operations cpu_fops = {
519 .owner = THIS_MODULE,
520 .open = cpu_seq_open,
521 .read = seq_read,
522 .write = cpu_write,
523 .llseek = seq_lseek,
524 .release = seq_release,
525};
526
527static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg,
528 unsigned file, struct dentry *dentry)
529{
530 struct cpu_private *priv = NULL;
531
532 /* Already intialized */
533 if (file == CPU_INDEX_BIT)
534 if (per_cpu(cpu_arr[type].init, cpu))
535 return 0;
536
537 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
538 if (priv == NULL)
539 return -ENOMEM;
540
541 priv->cpu = cpu;
542 priv->type = type;
543 priv->reg = reg;
544 priv->file = file;
545 mutex_lock(&cpu_debug_lock);
546 per_cpu(priv_arr[type], cpu) = priv;
547 per_cpu(cpu_priv_count, cpu)++;
548 mutex_unlock(&cpu_debug_lock);
549
550 if (file)
551 debugfs_create_file(cpu_file[file].name, S_IRUGO,
552 dentry, (void *)priv, &cpu_fops);
553 else {
554 debugfs_create_file(cpu_base[type].name, S_IRUGO,
555 per_cpu(cpu_arr[type].dentry, cpu),
556 (void *)priv, &cpu_fops);
557 mutex_lock(&cpu_debug_lock);
558 per_cpu(cpu_arr[type].init, cpu) = 1;
559 mutex_unlock(&cpu_debug_lock);
560 }
561
562 return 0;
563}
564
565static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg,
566 struct dentry *dentry)
567{
568 unsigned file;
569 int err = 0;
570
571 for (file = 0; file < ARRAY_SIZE(cpu_file); file++) {
572 err = cpu_create_file(cpu, type, reg, file, dentry);
573 if (err)
574 return err;
575 }
576
577 return err;
578}
579
580static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry)
581{
582 struct dentry *cpu_dentry = NULL;
583 unsigned reg, reg_min, reg_max;
584 int i, err = 0;
585 char reg_dir[12];
586 u32 low, high;
587
588 for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
589 if (!get_cpu_range(cpu, &reg_min, &reg_max, i,
590 cpu_base[type].flag))
591 continue;
592
593 for (reg = reg_min; reg <= reg_max; reg++) {
594 if (rdmsr_safe_on_cpu(cpu, reg, &low, &high))
595 continue;
596
597 sprintf(reg_dir, "0x%x", reg);
598 cpu_dentry = debugfs_create_dir(reg_dir, dentry);
599 err = cpu_init_regfiles(cpu, type, reg, cpu_dentry);
600 if (err)
601 return err;
602 }
603 }
604
605 return err;
606}
607
608static int cpu_init_allreg(unsigned cpu, struct dentry *dentry)
609{
610 struct dentry *cpu_dentry = NULL;
611 unsigned type;
612 int err = 0;
613
614 for (type = 0; type < ARRAY_SIZE(cpu_base) - 1; type++) {
615 if (!is_typeflag_valid(cpu, cpu_base[type].flag))
616 continue;
617 cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry);
618 per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry;
619
620 if (type < CPU_TSS_BIT)
621 err = cpu_init_msr(cpu, type, cpu_dentry);
622 else
623 err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT,
624 cpu_dentry);
625 if (err)
626 return err;
627 }
628
629 return err;
630}
631
632static int cpu_init_cpu(void)
633{
634 struct dentry *cpu_dentry = NULL;
635 struct cpuinfo_x86 *cpui;
636 char cpu_dir[12];
637 unsigned cpu;
638 int err = 0;
639
640 for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
641 cpui = &cpu_data(cpu);
642 if (!cpu_has(cpui, X86_FEATURE_MSR))
643 continue;
644
645 sprintf(cpu_dir, "cpu%d", cpu);
646 cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir);
647 err = cpu_init_allreg(cpu, cpu_dentry);
648
649 pr_info("cpu%d(%d) debug files %d\n",
650 cpu, nr_cpu_ids, per_cpu(cpu_priv_count, cpu));
651 if (per_cpu(cpu_priv_count, cpu) > MAX_CPU_FILES) {
652 pr_err("Register files count %d exceeds limit %d\n",
653 per_cpu(cpu_priv_count, cpu), MAX_CPU_FILES);
654 per_cpu(cpu_priv_count, cpu) = MAX_CPU_FILES;
655 err = -ENFILE;
656 }
657 if (err)
658 return err;
659 }
660
661 return err;
662}
663
664static int __init cpu_debug_init(void)
665{
666 cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir);
667
668 return cpu_init_cpu();
669}
670
671static void __exit cpu_debug_exit(void)
672{
673 int i, cpu;
674
675 if (cpu_debugfs_dir)
676 debugfs_remove_recursive(cpu_debugfs_dir);
677
678 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
679 for (i = 0; i < per_cpu(cpu_priv_count, cpu); i++)
680 kfree(per_cpu(priv_arr[i], cpu));
681}
682
683module_init(cpu_debug_init);
684module_exit(cpu_debug_exit);
685
686MODULE_AUTHOR("Jaswinder Singh Rajput");
687MODULE_DESCRIPTION("CPU Debug module");
688MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8b581d3905cb..1b1920fa7c80 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -68,9 +68,9 @@ struct acpi_cpufreq_data {
68 unsigned int cpu_feature; 68 unsigned int cpu_feature;
69}; 69};
70 70
71static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); 71static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
72 72
73static DEFINE_PER_CPU(struct aperfmperf, old_perf); 73static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf);
74 74
75/* acpi_perf_data is a pointer to percpu data. */ 75/* acpi_perf_data is a pointer to percpu data. */
76static struct acpi_processor_performance *acpi_perf_data; 76static struct acpi_processor_performance *acpi_perf_data;
@@ -190,9 +190,11 @@ static void do_drv_write(void *_cmd)
190 190
191static void drv_read(struct drv_cmd *cmd) 191static void drv_read(struct drv_cmd *cmd)
192{ 192{
193 int err;
193 cmd->val = 0; 194 cmd->val = 0;
194 195
195 smp_call_function_single(cpumask_any(cmd->mask), do_drv_read, cmd, 1); 196 err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
197 WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */
196} 198}
197 199
198static void drv_write(struct drv_cmd *cmd) 200static void drv_write(struct drv_cmd *cmd)
@@ -214,14 +216,14 @@ static u32 get_cur_val(const struct cpumask *mask)
214 if (unlikely(cpumask_empty(mask))) 216 if (unlikely(cpumask_empty(mask)))
215 return 0; 217 return 0;
216 218
217 switch (per_cpu(drv_data, cpumask_first(mask))->cpu_feature) { 219 switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
218 case SYSTEM_INTEL_MSR_CAPABLE: 220 case SYSTEM_INTEL_MSR_CAPABLE:
219 cmd.type = SYSTEM_INTEL_MSR_CAPABLE; 221 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
220 cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; 222 cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
221 break; 223 break;
222 case SYSTEM_IO_CAPABLE: 224 case SYSTEM_IO_CAPABLE:
223 cmd.type = SYSTEM_IO_CAPABLE; 225 cmd.type = SYSTEM_IO_CAPABLE;
224 perf = per_cpu(drv_data, cpumask_first(mask))->acpi_data; 226 perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
225 cmd.addr.io.port = perf->control_register.address; 227 cmd.addr.io.port = perf->control_register.address;
226 cmd.addr.io.bit_width = perf->control_register.bit_width; 228 cmd.addr.io.bit_width = perf->control_register.bit_width;
227 break; 229 break;
@@ -268,8 +270,8 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
268 if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) 270 if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
269 return 0; 271 return 0;
270 272
271 ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf); 273 ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf);
272 per_cpu(old_perf, cpu) = perf; 274 per_cpu(acfreq_old_perf, cpu) = perf;
273 275
274 retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; 276 retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
275 277
@@ -278,7 +280,7 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
278 280
279static unsigned int get_cur_freq_on_cpu(unsigned int cpu) 281static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
280{ 282{
281 struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); 283 struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
282 unsigned int freq; 284 unsigned int freq;
283 unsigned int cached_freq; 285 unsigned int cached_freq;
284 286
@@ -322,7 +324,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
322static int acpi_cpufreq_target(struct cpufreq_policy *policy, 324static int acpi_cpufreq_target(struct cpufreq_policy *policy,
323 unsigned int target_freq, unsigned int relation) 325 unsigned int target_freq, unsigned int relation)
324{ 326{
325 struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); 327 struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
326 struct acpi_processor_performance *perf; 328 struct acpi_processor_performance *perf;
327 struct cpufreq_freqs freqs; 329 struct cpufreq_freqs freqs;
328 struct drv_cmd cmd; 330 struct drv_cmd cmd;
@@ -416,7 +418,7 @@ out:
416 418
417static int acpi_cpufreq_verify(struct cpufreq_policy *policy) 419static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
418{ 420{
419 struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); 421 struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
420 422
421 dprintk("acpi_cpufreq_verify\n"); 423 dprintk("acpi_cpufreq_verify\n");
422 424
@@ -574,7 +576,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
574 return -ENOMEM; 576 return -ENOMEM;
575 577
576 data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); 578 data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
577 per_cpu(drv_data, cpu) = data; 579 per_cpu(acfreq_data, cpu) = data;
578 580
579 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) 581 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
580 acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; 582 acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
@@ -725,20 +727,20 @@ err_unreg:
725 acpi_processor_unregister_performance(perf, cpu); 727 acpi_processor_unregister_performance(perf, cpu);
726err_free: 728err_free:
727 kfree(data); 729 kfree(data);
728 per_cpu(drv_data, cpu) = NULL; 730 per_cpu(acfreq_data, cpu) = NULL;
729 731
730 return result; 732 return result;
731} 733}
732 734
733static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) 735static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
734{ 736{
735 struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); 737 struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
736 738
737 dprintk("acpi_cpufreq_cpu_exit\n"); 739 dprintk("acpi_cpufreq_cpu_exit\n");
738 740
739 if (data) { 741 if (data) {
740 cpufreq_frequency_table_put_attr(policy->cpu); 742 cpufreq_frequency_table_put_attr(policy->cpu);
741 per_cpu(drv_data, policy->cpu) = NULL; 743 per_cpu(acfreq_data, policy->cpu) = NULL;
742 acpi_processor_unregister_performance(data->acpi_data, 744 acpi_processor_unregister_performance(data->acpi_data,
743 policy->cpu); 745 policy->cpu);
744 kfree(data); 746 kfree(data);
@@ -749,7 +751,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
749 751
750static int acpi_cpufreq_resume(struct cpufreq_policy *policy) 752static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
751{ 753{
752 struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); 754 struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
753 755
754 dprintk("acpi_cpufreq_resume\n"); 756 dprintk("acpi_cpufreq_resume\n");
755 757
@@ -764,14 +766,15 @@ static struct freq_attr *acpi_cpufreq_attr[] = {
764}; 766};
765 767
766static struct cpufreq_driver acpi_cpufreq_driver = { 768static struct cpufreq_driver acpi_cpufreq_driver = {
767 .verify = acpi_cpufreq_verify, 769 .verify = acpi_cpufreq_verify,
768 .target = acpi_cpufreq_target, 770 .target = acpi_cpufreq_target,
769 .init = acpi_cpufreq_cpu_init, 771 .bios_limit = acpi_processor_get_bios_limit,
770 .exit = acpi_cpufreq_cpu_exit, 772 .init = acpi_cpufreq_cpu_init,
771 .resume = acpi_cpufreq_resume, 773 .exit = acpi_cpufreq_cpu_exit,
772 .name = "acpi-cpufreq", 774 .resume = acpi_cpufreq_resume,
773 .owner = THIS_MODULE, 775 .name = "acpi-cpufreq",
774 .attr = acpi_cpufreq_attr, 776 .owner = THIS_MODULE,
777 .attr = acpi_cpufreq_attr,
775}; 778};
776 779
777static int __init acpi_cpufreq_init(void) 780static int __init acpi_cpufreq_init(void)
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index cabd2fa3fc93..7e7eea4f8261 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -885,7 +885,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
885 885
886 /* Find ACPI data for processor */ 886 /* Find ACPI data for processor */
887 acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, 887 acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
888 ACPI_UINT32_MAX, &longhaul_walk_callback, 888 ACPI_UINT32_MAX, &longhaul_walk_callback, NULL,
889 NULL, (void *)&pr); 889 NULL, (void *)&pr);
890 890
891 /* Check ACPI support for C3 state */ 891 /* Check ACPI support for C3 state */
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index f10dea409f40..cb01dac267d3 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -164,7 +164,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
164 } 164 }
165 165
166 /* cpuinfo and default policy values */ 166 /* cpuinfo and default policy values */
167 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; 167 policy->cpuinfo.transition_latency = 200000;
168 policy->cur = busfreq * max_multiplier; 168 policy->cur = busfreq * max_multiplier;
169 169
170 result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); 170 result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index d47c775eb0ab..9a97116f89e5 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -714,14 +714,17 @@ static struct freq_attr *powernow_table_attr[] = {
714}; 714};
715 715
716static struct cpufreq_driver powernow_driver = { 716static struct cpufreq_driver powernow_driver = {
717 .verify = powernow_verify, 717 .verify = powernow_verify,
718 .target = powernow_target, 718 .target = powernow_target,
719 .get = powernow_get, 719 .get = powernow_get,
720 .init = powernow_cpu_init, 720#ifdef CONFIG_X86_POWERNOW_K7_ACPI
721 .exit = powernow_cpu_exit, 721 .bios_limit = acpi_processor_get_bios_limit,
722 .name = "powernow-k7", 722#endif
723 .owner = THIS_MODULE, 723 .init = powernow_cpu_init,
724 .attr = powernow_table_attr, 724 .exit = powernow_cpu_exit,
725 .name = "powernow-k7",
726 .owner = THIS_MODULE,
727 .attr = powernow_table_attr,
725}; 728};
726 729
727static int __init powernow_init(void) 730static int __init powernow_init(void)
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 3f12dabeab52..6e44519960c8 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1118,7 +1118,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data,
1118static int powernowk8_target(struct cpufreq_policy *pol, 1118static int powernowk8_target(struct cpufreq_policy *pol,
1119 unsigned targfreq, unsigned relation) 1119 unsigned targfreq, unsigned relation)
1120{ 1120{
1121 cpumask_t oldmask; 1121 cpumask_var_t oldmask;
1122 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); 1122 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1123 u32 checkfid; 1123 u32 checkfid;
1124 u32 checkvid; 1124 u32 checkvid;
@@ -1131,9 +1131,13 @@ static int powernowk8_target(struct cpufreq_policy *pol,
1131 checkfid = data->currfid; 1131 checkfid = data->currfid;
1132 checkvid = data->currvid; 1132 checkvid = data->currvid;
1133 1133
1134 /* only run on specific CPU from here on */ 1134 /* only run on specific CPU from here on. */
1135 oldmask = current->cpus_allowed; 1135 /* This is poor form: use a workqueue or smp_call_function_single */
1136 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); 1136 if (!alloc_cpumask_var(&oldmask, GFP_KERNEL))
1137 return -ENOMEM;
1138
1139 cpumask_copy(oldmask, tsk_cpus_allowed(current));
1140 set_cpus_allowed_ptr(current, cpumask_of(pol->cpu));
1137 1141
1138 if (smp_processor_id() != pol->cpu) { 1142 if (smp_processor_id() != pol->cpu) {
1139 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); 1143 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1193,7 +1197,8 @@ static int powernowk8_target(struct cpufreq_policy *pol,
1193 ret = 0; 1197 ret = 0;
1194 1198
1195err_out: 1199err_out:
1196 set_cpus_allowed_ptr(current, &oldmask); 1200 set_cpus_allowed_ptr(current, oldmask);
1201 free_cpumask_var(oldmask);
1197 return ret; 1202 return ret;
1198} 1203}
1199 1204
@@ -1351,6 +1356,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
1351 1356
1352 kfree(data->powernow_table); 1357 kfree(data->powernow_table);
1353 kfree(data); 1358 kfree(data);
1359 per_cpu(powernow_data, pol->cpu) = NULL;
1354 1360
1355 return 0; 1361 return 0;
1356} 1362}
@@ -1370,7 +1376,7 @@ static unsigned int powernowk8_get(unsigned int cpu)
1370 int err; 1376 int err;
1371 1377
1372 if (!data) 1378 if (!data)
1373 return -EINVAL; 1379 return 0;
1374 1380
1375 smp_call_function_single(cpu, query_values_on_cpu, &err, true); 1381 smp_call_function_single(cpu, query_values_on_cpu, &err, true);
1376 if (err) 1382 if (err)
@@ -1393,14 +1399,15 @@ static struct freq_attr *powernow_k8_attr[] = {
1393}; 1399};
1394 1400
1395static struct cpufreq_driver cpufreq_amd64_driver = { 1401static struct cpufreq_driver cpufreq_amd64_driver = {
1396 .verify = powernowk8_verify, 1402 .verify = powernowk8_verify,
1397 .target = powernowk8_target, 1403 .target = powernowk8_target,
1398 .init = powernowk8_cpu_init, 1404 .bios_limit = acpi_processor_get_bios_limit,
1399 .exit = __devexit_p(powernowk8_cpu_exit), 1405 .init = powernowk8_cpu_init,
1400 .get = powernowk8_get, 1406 .exit = __devexit_p(powernowk8_cpu_exit),
1401 .name = "powernow-k8", 1407 .get = powernowk8_get,
1402 .owner = THIS_MODULE, 1408 .name = "powernow-k8",
1403 .attr = powernow_k8_attr, 1409 .owner = THIS_MODULE,
1410 .attr = powernow_k8_attr,
1404}; 1411};
1405 1412
1406/* driver entry point for init */ 1413/* driver entry point for init */
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 3ae5a7a3a500..2ce8e0b5cc54 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -39,7 +39,7 @@ static struct pci_dev *speedstep_chipset_dev;
39 39
40/* speedstep_processor 40/* speedstep_processor
41 */ 41 */
42static unsigned int speedstep_processor; 42static enum speedstep_processor speedstep_processor;
43 43
44static u32 pmbase; 44static u32 pmbase;
45 45
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index f4c290b8482f..ad0083abfa23 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -34,7 +34,7 @@ static int relaxed_check;
34 * GET PROCESSOR CORE SPEED IN KHZ * 34 * GET PROCESSOR CORE SPEED IN KHZ *
35 *********************************************************************/ 35 *********************************************************************/
36 36
37static unsigned int pentium3_get_frequency(unsigned int processor) 37static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
38{ 38{
39 /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ 39 /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
40 struct { 40 struct {
@@ -227,7 +227,7 @@ static unsigned int pentium4_get_frequency(void)
227 227
228 228
229/* Warning: may get called from smp_call_function_single. */ 229/* Warning: may get called from smp_call_function_single. */
230unsigned int speedstep_get_frequency(unsigned int processor) 230unsigned int speedstep_get_frequency(enum speedstep_processor processor)
231{ 231{
232 switch (processor) { 232 switch (processor) {
233 case SPEEDSTEP_CPU_PCORE: 233 case SPEEDSTEP_CPU_PCORE:
@@ -380,7 +380,7 @@ EXPORT_SYMBOL_GPL(speedstep_detect_processor);
380 * DETECT SPEEDSTEP SPEEDS * 380 * DETECT SPEEDSTEP SPEEDS *
381 *********************************************************************/ 381 *********************************************************************/
382 382
383unsigned int speedstep_get_freqs(unsigned int processor, 383unsigned int speedstep_get_freqs(enum speedstep_processor processor,
384 unsigned int *low_speed, 384 unsigned int *low_speed,
385 unsigned int *high_speed, 385 unsigned int *high_speed,
386 unsigned int *transition_latency, 386 unsigned int *transition_latency,
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
index 2b6c04e5a304..70d9cea1219d 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
@@ -11,18 +11,18 @@
11 11
12 12
13/* processors */ 13/* processors */
14 14enum speedstep_processor {
15#define SPEEDSTEP_CPU_PIII_C_EARLY 0x00000001 /* Coppermine core */ 15 SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001, /* Coppermine core */
16#define SPEEDSTEP_CPU_PIII_C 0x00000002 /* Coppermine core */ 16 SPEEDSTEP_CPU_PIII_C = 0x00000002, /* Coppermine core */
17#define SPEEDSTEP_CPU_PIII_T 0x00000003 /* Tualatin core */ 17 SPEEDSTEP_CPU_PIII_T = 0x00000003, /* Tualatin core */
18#define SPEEDSTEP_CPU_P4M 0x00000004 /* P4-M */ 18 SPEEDSTEP_CPU_P4M = 0x00000004, /* P4-M */
19
20/* the following processors are not speedstep-capable and are not auto-detected 19/* the following processors are not speedstep-capable and are not auto-detected
21 * in speedstep_detect_processor(). However, their speed can be detected using 20 * in speedstep_detect_processor(). However, their speed can be detected using
22 * the speedstep_get_frequency() call. */ 21 * the speedstep_get_frequency() call. */
23#define SPEEDSTEP_CPU_PM 0xFFFFFF03 /* Pentium M */ 22 SPEEDSTEP_CPU_PM = 0xFFFFFF03, /* Pentium M */
24#define SPEEDSTEP_CPU_P4D 0xFFFFFF04 /* desktop P4 */ 23 SPEEDSTEP_CPU_P4D = 0xFFFFFF04, /* desktop P4 */
25#define SPEEDSTEP_CPU_PCORE 0xFFFFFF05 /* Core */ 24 SPEEDSTEP_CPU_PCORE = 0xFFFFFF05, /* Core */
25};
26 26
27/* speedstep states -- only two of them */ 27/* speedstep states -- only two of them */
28 28
@@ -31,10 +31,10 @@
31 31
32 32
33/* detect a speedstep-capable processor */ 33/* detect a speedstep-capable processor */
34extern unsigned int speedstep_detect_processor (void); 34extern enum speedstep_processor speedstep_detect_processor(void);
35 35
36/* detect the current speed (in khz) of the processor */ 36/* detect the current speed (in khz) of the processor */
37extern unsigned int speedstep_get_frequency(unsigned int processor); 37extern unsigned int speedstep_get_frequency(enum speedstep_processor processor);
38 38
39 39
40/* detect the low and high speeds of the processor. The callback 40/* detect the low and high speeds of the processor. The callback
@@ -42,7 +42,7 @@ extern unsigned int speedstep_get_frequency(unsigned int processor);
42 * SPEEDSTEP_LOW; the second argument is zero so that no 42 * SPEEDSTEP_LOW; the second argument is zero so that no
43 * cpufreq_notify_transition calls are initiated. 43 * cpufreq_notify_transition calls are initiated.
44 */ 44 */
45extern unsigned int speedstep_get_freqs(unsigned int processor, 45extern unsigned int speedstep_get_freqs(enum speedstep_processor processor,
46 unsigned int *low_speed, 46 unsigned int *low_speed,
47 unsigned int *high_speed, 47 unsigned int *high_speed,
48 unsigned int *transition_latency, 48 unsigned int *transition_latency,
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index befea088e4f5..04d73c114e49 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -35,7 +35,7 @@ static int smi_cmd;
35static unsigned int smi_sig; 35static unsigned int smi_sig;
36 36
37/* info about the processor */ 37/* info about the processor */
38static unsigned int speedstep_processor; 38static enum speedstep_processor speedstep_processor;
39 39
40/* 40/*
41 * There are only two frequency states for each processor. Values 41 * There are only two frequency states for each processor. Values
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index c900b73f9224..879666f4d871 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -70,7 +70,6 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
70 if (c->x86_power & (1 << 8)) { 70 if (c->x86_power & (1 << 8)) {
71 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 71 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
72 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 72 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
73 set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
74 sched_clock_stable = 1; 73 sched_clock_stable = 1;
75 } 74 }
76 75
@@ -270,8 +269,6 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
270 node = cpu_to_node(cpu); 269 node = cpu_to_node(cpu);
271 } 270 }
272 numa_set_node(cpu, node); 271 numa_set_node(cpu, node);
273
274 printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
275#endif 272#endif
276} 273}
277 274
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 6c40f6b5b340..eddb1bdd1b8f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -18,6 +18,7 @@
18#include <asm/processor.h> 18#include <asm/processor.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <asm/k8.h> 20#include <asm/k8.h>
21#include <asm/smp.h>
21 22
22#define LVL_1_INST 1 23#define LVL_1_INST 1
23#define LVL_1_DATA 2 24#define LVL_1_DATA 2
@@ -31,6 +32,8 @@ struct _cache_table {
31 short size; 32 short size;
32}; 33};
33 34
35#define MB(x) ((x) * 1024)
36
34/* All the cache descriptor types we care about (no TLB or 37/* All the cache descriptor types we care about (no TLB or
35 trace cache entries) */ 38 trace cache entries) */
36 39
@@ -44,9 +47,9 @@ static const struct _cache_table __cpuinitconst cache_table[] =
44 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ 47 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
45 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ 48 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
46 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 49 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
47 { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 50 { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
48 { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 51 { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */
49 { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 52 { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */
50 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ 53 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
51 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ 54 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
52 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 55 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
@@ -59,16 +62,16 @@ static const struct _cache_table __cpuinitconst cache_table[] =
59 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ 62 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
60 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ 63 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
61 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ 64 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
62 { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */ 65 { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */
63 { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */ 66 { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */
64 { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */ 67 { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */
65 { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */ 68 { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */
66 { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ 69 { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
67 { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */ 70 { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */
68 { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ 71 { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
69 { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ 72 { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */
70 { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ 73 { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */
71 { 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */ 74 { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */
72 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 75 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
73 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 76 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
74 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 77 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
@@ -77,34 +80,34 @@ static const struct _cache_table __cpuinitconst cache_table[] =
77 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ 80 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
78 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ 81 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
79 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ 82 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
80 { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */ 83 { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */
81 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 84 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
82 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 85 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
83 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 86 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
84 { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 87 { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
85 { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */ 88 { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */
86 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ 89 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
87 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ 90 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
88 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ 91 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
89 { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */ 92 { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */
90 { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ 93 { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */
91 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ 94 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
92 { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ 95 { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */
93 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ 96 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
94 { 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */ 97 { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */
95 { 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */ 98 { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */
96 { 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */ 99 { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */
97 { 0xd7, LVL_3, 2048 }, /* 8-way set assoc, 64 byte line size */ 100 { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */
98 { 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ 101 { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
99 { 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */ 102 { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */
100 { 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ 103 { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
101 { 0xde, LVL_3, 8192 }, /* 12-way set assoc, 64 byte line size */ 104 { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */
102 { 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */ 105 { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */
103 { 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ 106 { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
104 { 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ 107 { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
105 { 0xea, LVL_3, 12288 }, /* 24-way set assoc, 64 byte line size */ 108 { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */
106 { 0xeb, LVL_3, 18432 }, /* 24-way set assoc, 64 byte line size */ 109 { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */
107 { 0xec, LVL_3, 24576 }, /* 24-way set assoc, 64 byte line size */ 110 { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */
108 { 0x00, 0, 0} 111 { 0x00, 0, 0}
109}; 112};
110 113
@@ -150,7 +153,8 @@ struct _cpuid4_info {
150 union _cpuid4_leaf_ebx ebx; 153 union _cpuid4_leaf_ebx ebx;
151 union _cpuid4_leaf_ecx ecx; 154 union _cpuid4_leaf_ecx ecx;
152 unsigned long size; 155 unsigned long size;
153 unsigned long can_disable; 156 bool can_disable;
157 unsigned int l3_indices;
154 DECLARE_BITMAP(shared_cpu_map, NR_CPUS); 158 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
155}; 159};
156 160
@@ -160,7 +164,8 @@ struct _cpuid4_info_regs {
160 union _cpuid4_leaf_ebx ebx; 164 union _cpuid4_leaf_ebx ebx;
161 union _cpuid4_leaf_ecx ecx; 165 union _cpuid4_leaf_ecx ecx;
162 unsigned long size; 166 unsigned long size;
163 unsigned long can_disable; 167 bool can_disable;
168 unsigned int l3_indices;
164}; 169};
165 170
166unsigned short num_cache_leaves; 171unsigned short num_cache_leaves;
@@ -290,6 +295,36 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
290 (ebx->split.ways_of_associativity + 1) - 1; 295 (ebx->split.ways_of_associativity + 1) - 1;
291} 296}
292 297
298struct _cache_attr {
299 struct attribute attr;
300 ssize_t (*show)(struct _cpuid4_info *, char *);
301 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
302};
303
304#ifdef CONFIG_CPU_SUP_AMD
305static unsigned int __cpuinit amd_calc_l3_indices(void)
306{
307 /*
308 * We're called over smp_call_function_single() and therefore
309 * are on the correct cpu.
310 */
311 int cpu = smp_processor_id();
312 int node = cpu_to_node(cpu);
313 struct pci_dev *dev = node_to_k8_nb_misc(node);
314 unsigned int sc0, sc1, sc2, sc3;
315 u32 val = 0;
316
317 pci_read_config_dword(dev, 0x1C4, &val);
318
319 /* calculate subcache sizes */
320 sc0 = !(val & BIT(0));
321 sc1 = !(val & BIT(4));
322 sc2 = !(val & BIT(8)) + !(val & BIT(9));
323 sc3 = !(val & BIT(12)) + !(val & BIT(13));
324
325 return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
326}
327
293static void __cpuinit 328static void __cpuinit
294amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) 329amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
295{ 330{
@@ -299,13 +334,104 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
299 if (boot_cpu_data.x86 == 0x11) 334 if (boot_cpu_data.x86 == 0x11)
300 return; 335 return;
301 336
302 /* see erratum #382 */ 337 /* see errata #382 and #388 */
303 if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8)) 338 if ((boot_cpu_data.x86 == 0x10) &&
339 ((boot_cpu_data.x86_model < 0x8) ||
340 (boot_cpu_data.x86_mask < 0x1)))
304 return; 341 return;
305 342
306 this_leaf->can_disable = 1; 343 this_leaf->can_disable = true;
344 this_leaf->l3_indices = amd_calc_l3_indices();
345}
346
347static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
348 unsigned int index)
349{
350 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
351 int node = amd_get_nb_id(cpu);
352 struct pci_dev *dev = node_to_k8_nb_misc(node);
353 unsigned int reg = 0;
354
355 if (!this_leaf->can_disable)
356 return -EINVAL;
357
358 if (!dev)
359 return -EINVAL;
360
361 pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
362 return sprintf(buf, "0x%08x\n", reg);
363}
364
365#define SHOW_CACHE_DISABLE(index) \
366static ssize_t \
367show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
368{ \
369 return show_cache_disable(this_leaf, buf, index); \
370}
371SHOW_CACHE_DISABLE(0)
372SHOW_CACHE_DISABLE(1)
373
374static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
375 const char *buf, size_t count, unsigned int index)
376{
377 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
378 int node = amd_get_nb_id(cpu);
379 struct pci_dev *dev = node_to_k8_nb_misc(node);
380 unsigned long val = 0;
381
382#define SUBCACHE_MASK (3UL << 20)
383#define SUBCACHE_INDEX 0xfff
384
385 if (!this_leaf->can_disable)
386 return -EINVAL;
387
388 if (!capable(CAP_SYS_ADMIN))
389 return -EPERM;
390
391 if (!dev)
392 return -EINVAL;
393
394 if (strict_strtoul(buf, 10, &val) < 0)
395 return -EINVAL;
396
397 /* do not allow writes outside of allowed bits */
398 if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
399 ((val & SUBCACHE_INDEX) > this_leaf->l3_indices))
400 return -EINVAL;
401
402 val |= BIT(30);
403 pci_write_config_dword(dev, 0x1BC + index * 4, val);
404 /*
405 * We need to WBINVD on a core on the node containing the L3 cache which
406 * indices we disable therefore a simple wbinvd() is not sufficient.
407 */
408 wbinvd_on_cpu(cpu);
409 pci_write_config_dword(dev, 0x1BC + index * 4, val | BIT(31));
410 return count;
307} 411}
308 412
413#define STORE_CACHE_DISABLE(index) \
414static ssize_t \
415store_cache_disable_##index(struct _cpuid4_info *this_leaf, \
416 const char *buf, size_t count) \
417{ \
418 return store_cache_disable(this_leaf, buf, count, index); \
419}
420STORE_CACHE_DISABLE(0)
421STORE_CACHE_DISABLE(1)
422
423static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
424 show_cache_disable_0, store_cache_disable_0);
425static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
426 show_cache_disable_1, store_cache_disable_1);
427
428#else /* CONFIG_CPU_SUP_AMD */
429static void __cpuinit
430amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
431{
432};
433#endif /* CONFIG_CPU_SUP_AMD */
434
309static int 435static int
310__cpuinit cpuid4_cache_lookup_regs(int index, 436__cpuinit cpuid4_cache_lookup_regs(int index,
311 struct _cpuid4_info_regs *this_leaf) 437 struct _cpuid4_info_regs *this_leaf)
@@ -499,26 +625,27 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
499#ifdef CONFIG_SYSFS 625#ifdef CONFIG_SYSFS
500 626
501/* pointer to _cpuid4_info array (for each cache leaf) */ 627/* pointer to _cpuid4_info array (for each cache leaf) */
502static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); 628static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
503#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) 629#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
504 630
505#ifdef CONFIG_SMP 631#ifdef CONFIG_SMP
506static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) 632static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
507{ 633{
508 struct _cpuid4_info *this_leaf, *sibling_leaf; 634 struct _cpuid4_info *this_leaf, *sibling_leaf;
509 unsigned long num_threads_sharing; 635 unsigned long num_threads_sharing;
510 int index_msb, i; 636 int index_msb, i, sibling;
511 struct cpuinfo_x86 *c = &cpu_data(cpu); 637 struct cpuinfo_x86 *c = &cpu_data(cpu);
512 638
513 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { 639 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
514 struct cpuinfo_x86 *d; 640 for_each_cpu(i, c->llc_shared_map) {
515 for_each_online_cpu(i) { 641 if (!per_cpu(ici_cpuid4_info, i))
516 if (!per_cpu(cpuid4_info, i))
517 continue; 642 continue;
518 d = &cpu_data(i);
519 this_leaf = CPUID4_INFO_IDX(i, index); 643 this_leaf = CPUID4_INFO_IDX(i, index);
520 cpumask_copy(to_cpumask(this_leaf->shared_cpu_map), 644 for_each_cpu(sibling, c->llc_shared_map) {
521 d->llc_shared_map); 645 if (!cpu_online(sibling))
646 continue;
647 set_bit(sibling, this_leaf->shared_cpu_map);
648 }
522 } 649 }
523 return; 650 return;
524 } 651 }
@@ -535,7 +662,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
535 c->apicid >> index_msb) { 662 c->apicid >> index_msb) {
536 cpumask_set_cpu(i, 663 cpumask_set_cpu(i,
537 to_cpumask(this_leaf->shared_cpu_map)); 664 to_cpumask(this_leaf->shared_cpu_map));
538 if (i != cpu && per_cpu(cpuid4_info, i)) { 665 if (i != cpu && per_cpu(ici_cpuid4_info, i)) {
539 sibling_leaf = 666 sibling_leaf =
540 CPUID4_INFO_IDX(i, index); 667 CPUID4_INFO_IDX(i, index);
541 cpumask_set_cpu(cpu, to_cpumask( 668 cpumask_set_cpu(cpu, to_cpumask(
@@ -574,8 +701,8 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
574 for (i = 0; i < num_cache_leaves; i++) 701 for (i = 0; i < num_cache_leaves; i++)
575 cache_remove_shared_cpu_map(cpu, i); 702 cache_remove_shared_cpu_map(cpu, i);
576 703
577 kfree(per_cpu(cpuid4_info, cpu)); 704 kfree(per_cpu(ici_cpuid4_info, cpu));
578 per_cpu(cpuid4_info, cpu) = NULL; 705 per_cpu(ici_cpuid4_info, cpu) = NULL;
579} 706}
580 707
581static int 708static int
@@ -614,15 +741,15 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
614 if (num_cache_leaves == 0) 741 if (num_cache_leaves == 0)
615 return -ENOENT; 742 return -ENOENT;
616 743
617 per_cpu(cpuid4_info, cpu) = kzalloc( 744 per_cpu(ici_cpuid4_info, cpu) = kzalloc(
618 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); 745 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
619 if (per_cpu(cpuid4_info, cpu) == NULL) 746 if (per_cpu(ici_cpuid4_info, cpu) == NULL)
620 return -ENOMEM; 747 return -ENOMEM;
621 748
622 smp_call_function_single(cpu, get_cpu_leaves, &retval, true); 749 smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
623 if (retval) { 750 if (retval) {
624 kfree(per_cpu(cpuid4_info, cpu)); 751 kfree(per_cpu(ici_cpuid4_info, cpu));
625 per_cpu(cpuid4_info, cpu) = NULL; 752 per_cpu(ici_cpuid4_info, cpu) = NULL;
626 } 753 }
627 754
628 return retval; 755 return retval;
@@ -634,7 +761,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
634extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ 761extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
635 762
636/* pointer to kobject for cpuX/cache */ 763/* pointer to kobject for cpuX/cache */
637static DEFINE_PER_CPU(struct kobject *, cache_kobject); 764static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
638 765
639struct _index_kobject { 766struct _index_kobject {
640 struct kobject kobj; 767 struct kobject kobj;
@@ -643,8 +770,8 @@ struct _index_kobject {
643}; 770};
644 771
645/* pointer to array of kobjects for cpuX/cache/indexY */ 772/* pointer to array of kobjects for cpuX/cache/indexY */
646static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); 773static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
647#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) 774#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
648 775
649#define show_one_plus(file_name, object, val) \ 776#define show_one_plus(file_name, object, val) \
650static ssize_t show_##file_name \ 777static ssize_t show_##file_name \
@@ -710,82 +837,6 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
710#define to_object(k) container_of(k, struct _index_kobject, kobj) 837#define to_object(k) container_of(k, struct _index_kobject, kobj)
711#define to_attr(a) container_of(a, struct _cache_attr, attr) 838#define to_attr(a) container_of(a, struct _cache_attr, attr)
712 839
713static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
714 unsigned int index)
715{
716 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
717 int node = cpu_to_node(cpu);
718 struct pci_dev *dev = node_to_k8_nb_misc(node);
719 unsigned int reg = 0;
720
721 if (!this_leaf->can_disable)
722 return -EINVAL;
723
724 if (!dev)
725 return -EINVAL;
726
727 pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
728 return sprintf(buf, "%x\n", reg);
729}
730
731#define SHOW_CACHE_DISABLE(index) \
732static ssize_t \
733show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
734{ \
735 return show_cache_disable(this_leaf, buf, index); \
736}
737SHOW_CACHE_DISABLE(0)
738SHOW_CACHE_DISABLE(1)
739
740static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
741 const char *buf, size_t count, unsigned int index)
742{
743 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
744 int node = cpu_to_node(cpu);
745 struct pci_dev *dev = node_to_k8_nb_misc(node);
746 unsigned long val = 0;
747 unsigned int scrubber = 0;
748
749 if (!this_leaf->can_disable)
750 return -EINVAL;
751
752 if (!capable(CAP_SYS_ADMIN))
753 return -EPERM;
754
755 if (!dev)
756 return -EINVAL;
757
758 if (strict_strtoul(buf, 10, &val) < 0)
759 return -EINVAL;
760
761 val |= 0xc0000000;
762
763 pci_read_config_dword(dev, 0x58, &scrubber);
764 scrubber &= ~0x1f000000;
765 pci_write_config_dword(dev, 0x58, scrubber);
766
767 pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
768 wbinvd();
769 pci_write_config_dword(dev, 0x1BC + index * 4, val);
770 return count;
771}
772
773#define STORE_CACHE_DISABLE(index) \
774static ssize_t \
775store_cache_disable_##index(struct _cpuid4_info *this_leaf, \
776 const char *buf, size_t count) \
777{ \
778 return store_cache_disable(this_leaf, buf, count, index); \
779}
780STORE_CACHE_DISABLE(0)
781STORE_CACHE_DISABLE(1)
782
783struct _cache_attr {
784 struct attribute attr;
785 ssize_t (*show)(struct _cpuid4_info *, char *);
786 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
787};
788
789#define define_one_ro(_name) \ 840#define define_one_ro(_name) \
790static struct _cache_attr _name = \ 841static struct _cache_attr _name = \
791 __ATTR(_name, 0444, show_##_name, NULL) 842 __ATTR(_name, 0444, show_##_name, NULL)
@@ -800,23 +851,28 @@ define_one_ro(size);
800define_one_ro(shared_cpu_map); 851define_one_ro(shared_cpu_map);
801define_one_ro(shared_cpu_list); 852define_one_ro(shared_cpu_list);
802 853
803static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, 854#define DEFAULT_SYSFS_CACHE_ATTRS \
804 show_cache_disable_0, store_cache_disable_0); 855 &type.attr, \
805static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, 856 &level.attr, \
806 show_cache_disable_1, store_cache_disable_1); 857 &coherency_line_size.attr, \
858 &physical_line_partition.attr, \
859 &ways_of_associativity.attr, \
860 &number_of_sets.attr, \
861 &size.attr, \
862 &shared_cpu_map.attr, \
863 &shared_cpu_list.attr
807 864
808static struct attribute *default_attrs[] = { 865static struct attribute *default_attrs[] = {
809 &type.attr, 866 DEFAULT_SYSFS_CACHE_ATTRS,
810 &level.attr, 867 NULL
811 &coherency_line_size.attr, 868};
812 &physical_line_partition.attr, 869
813 &ways_of_associativity.attr, 870static struct attribute *default_l3_attrs[] = {
814 &number_of_sets.attr, 871 DEFAULT_SYSFS_CACHE_ATTRS,
815 &size.attr, 872#ifdef CONFIG_CPU_SUP_AMD
816 &shared_cpu_map.attr,
817 &shared_cpu_list.attr,
818 &cache_disable_0.attr, 873 &cache_disable_0.attr,
819 &cache_disable_1.attr, 874 &cache_disable_1.attr,
875#endif
820 NULL 876 NULL
821}; 877};
822 878
@@ -863,10 +919,10 @@ static struct kobj_type ktype_percpu_entry = {
863 919
864static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu) 920static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
865{ 921{
866 kfree(per_cpu(cache_kobject, cpu)); 922 kfree(per_cpu(ici_cache_kobject, cpu));
867 kfree(per_cpu(index_kobject, cpu)); 923 kfree(per_cpu(ici_index_kobject, cpu));
868 per_cpu(cache_kobject, cpu) = NULL; 924 per_cpu(ici_cache_kobject, cpu) = NULL;
869 per_cpu(index_kobject, cpu) = NULL; 925 per_cpu(ici_index_kobject, cpu) = NULL;
870 free_cache_attributes(cpu); 926 free_cache_attributes(cpu);
871} 927}
872 928
@@ -882,14 +938,14 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
882 return err; 938 return err;
883 939
884 /* Allocate all required memory */ 940 /* Allocate all required memory */
885 per_cpu(cache_kobject, cpu) = 941 per_cpu(ici_cache_kobject, cpu) =
886 kzalloc(sizeof(struct kobject), GFP_KERNEL); 942 kzalloc(sizeof(struct kobject), GFP_KERNEL);
887 if (unlikely(per_cpu(cache_kobject, cpu) == NULL)) 943 if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
888 goto err_out; 944 goto err_out;
889 945
890 per_cpu(index_kobject, cpu) = kzalloc( 946 per_cpu(ici_index_kobject, cpu) = kzalloc(
891 sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL); 947 sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
892 if (unlikely(per_cpu(index_kobject, cpu) == NULL)) 948 if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
893 goto err_out; 949 goto err_out;
894 950
895 return 0; 951 return 0;
@@ -907,13 +963,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
907 unsigned int cpu = sys_dev->id; 963 unsigned int cpu = sys_dev->id;
908 unsigned long i, j; 964 unsigned long i, j;
909 struct _index_kobject *this_object; 965 struct _index_kobject *this_object;
966 struct _cpuid4_info *this_leaf;
910 int retval; 967 int retval;
911 968
912 retval = cpuid4_cache_sysfs_init(cpu); 969 retval = cpuid4_cache_sysfs_init(cpu);
913 if (unlikely(retval < 0)) 970 if (unlikely(retval < 0))
914 return retval; 971 return retval;
915 972
916 retval = kobject_init_and_add(per_cpu(cache_kobject, cpu), 973 retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
917 &ktype_percpu_entry, 974 &ktype_percpu_entry,
918 &sys_dev->kobj, "%s", "cache"); 975 &sys_dev->kobj, "%s", "cache");
919 if (retval < 0) { 976 if (retval < 0) {
@@ -925,14 +982,22 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
925 this_object = INDEX_KOBJECT_PTR(cpu, i); 982 this_object = INDEX_KOBJECT_PTR(cpu, i);
926 this_object->cpu = cpu; 983 this_object->cpu = cpu;
927 this_object->index = i; 984 this_object->index = i;
985
986 this_leaf = CPUID4_INFO_IDX(cpu, i);
987
988 if (this_leaf->can_disable)
989 ktype_cache.default_attrs = default_l3_attrs;
990 else
991 ktype_cache.default_attrs = default_attrs;
992
928 retval = kobject_init_and_add(&(this_object->kobj), 993 retval = kobject_init_and_add(&(this_object->kobj),
929 &ktype_cache, 994 &ktype_cache,
930 per_cpu(cache_kobject, cpu), 995 per_cpu(ici_cache_kobject, cpu),
931 "index%1lu", i); 996 "index%1lu", i);
932 if (unlikely(retval)) { 997 if (unlikely(retval)) {
933 for (j = 0; j < i; j++) 998 for (j = 0; j < i; j++)
934 kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj)); 999 kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
935 kobject_put(per_cpu(cache_kobject, cpu)); 1000 kobject_put(per_cpu(ici_cache_kobject, cpu));
936 cpuid4_cache_sysfs_exit(cpu); 1001 cpuid4_cache_sysfs_exit(cpu);
937 return retval; 1002 return retval;
938 } 1003 }
@@ -940,7 +1005,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
940 } 1005 }
941 cpumask_set_cpu(cpu, to_cpumask(cache_dev_map)); 1006 cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
942 1007
943 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); 1008 kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
944 return 0; 1009 return 0;
945} 1010}
946 1011
@@ -949,7 +1014,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
949 unsigned int cpu = sys_dev->id; 1014 unsigned int cpu = sys_dev->id;
950 unsigned long i; 1015 unsigned long i;
951 1016
952 if (per_cpu(cpuid4_info, cpu) == NULL) 1017 if (per_cpu(ici_cpuid4_info, cpu) == NULL)
953 return; 1018 return;
954 if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map))) 1019 if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
955 return; 1020 return;
@@ -957,7 +1022,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
957 1022
958 for (i = 0; i < num_cache_leaves; i++) 1023 for (i = 0; i < num_cache_leaves; i++)
959 kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj)); 1024 kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
960 kobject_put(per_cpu(cache_kobject, cpu)); 1025 kobject_put(per_cpu(ici_cache_kobject, cpu));
961 cpuid4_cache_sysfs_exit(cpu); 1026 cpuid4_cache_sysfs_exit(cpu);
962} 1027}
963 1028
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 472763d92098..73734baa50f2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -74,7 +74,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
74 m->finished = 0; 74 m->finished = 0;
75} 75}
76 76
77static cpumask_t mce_inject_cpumask; 77static cpumask_var_t mce_inject_cpumask;
78 78
79static int mce_raise_notify(struct notifier_block *self, 79static int mce_raise_notify(struct notifier_block *self,
80 unsigned long val, void *data) 80 unsigned long val, void *data)
@@ -82,9 +82,9 @@ static int mce_raise_notify(struct notifier_block *self,
82 struct die_args *args = (struct die_args *)data; 82 struct die_args *args = (struct die_args *)data;
83 int cpu = smp_processor_id(); 83 int cpu = smp_processor_id();
84 struct mce *m = &__get_cpu_var(injectm); 84 struct mce *m = &__get_cpu_var(injectm);
85 if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask)) 85 if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
86 return NOTIFY_DONE; 86 return NOTIFY_DONE;
87 cpu_clear(cpu, mce_inject_cpumask); 87 cpumask_clear_cpu(cpu, mce_inject_cpumask);
88 if (m->inject_flags & MCJ_EXCEPTION) 88 if (m->inject_flags & MCJ_EXCEPTION)
89 raise_exception(m, args->regs); 89 raise_exception(m, args->regs);
90 else if (m->status) 90 else if (m->status)
@@ -148,22 +148,22 @@ static void raise_mce(struct mce *m)
148 unsigned long start; 148 unsigned long start;
149 int cpu; 149 int cpu;
150 get_online_cpus(); 150 get_online_cpus();
151 mce_inject_cpumask = cpu_online_map; 151 cpumask_copy(mce_inject_cpumask, cpu_online_mask);
152 cpu_clear(get_cpu(), mce_inject_cpumask); 152 cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
153 for_each_online_cpu(cpu) { 153 for_each_online_cpu(cpu) {
154 struct mce *mcpu = &per_cpu(injectm, cpu); 154 struct mce *mcpu = &per_cpu(injectm, cpu);
155 if (!mcpu->finished || 155 if (!mcpu->finished ||
156 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) 156 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
157 cpu_clear(cpu, mce_inject_cpumask); 157 cpumask_clear_cpu(cpu, mce_inject_cpumask);
158 } 158 }
159 if (!cpus_empty(mce_inject_cpumask)) 159 if (!cpumask_empty(mce_inject_cpumask))
160 apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR); 160 apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR);
161 start = jiffies; 161 start = jiffies;
162 while (!cpus_empty(mce_inject_cpumask)) { 162 while (!cpumask_empty(mce_inject_cpumask)) {
163 if (!time_before(jiffies, start + 2*HZ)) { 163 if (!time_before(jiffies, start + 2*HZ)) {
164 printk(KERN_ERR 164 printk(KERN_ERR
165 "Timeout waiting for mce inject NMI %lx\n", 165 "Timeout waiting for mce inject NMI %lx\n",
166 *cpus_addr(mce_inject_cpumask)); 166 *cpumask_bits(mce_inject_cpumask));
167 break; 167 break;
168 } 168 }
169 cpu_relax(); 169 cpu_relax();
@@ -210,6 +210,8 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
210 210
211static int inject_init(void) 211static int inject_init(void)
212{ 212{
213 if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
214 return -ENOMEM;
213 printk(KERN_INFO "Machine check injector initialized\n"); 215 printk(KERN_INFO "Machine check injector initialized\n");
214 mce_chrdev_ops.write = mce_write; 216 mce_chrdev_ops.write = mce_write;
215 register_die_notifier(&mce_raise_nb); 217 register_die_notifier(&mce_raise_nb);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d7ebf25d10ed..a8aacd4b513c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1388,13 +1388,14 @@ static void __mcheck_cpu_init_timer(void)
1388 struct timer_list *t = &__get_cpu_var(mce_timer); 1388 struct timer_list *t = &__get_cpu_var(mce_timer);
1389 int *n = &__get_cpu_var(mce_next_interval); 1389 int *n = &__get_cpu_var(mce_next_interval);
1390 1390
1391 setup_timer(t, mce_start_timer, smp_processor_id());
1392
1391 if (mce_ignore_ce) 1393 if (mce_ignore_ce)
1392 return; 1394 return;
1393 1395
1394 *n = check_interval * HZ; 1396 *n = check_interval * HZ;
1395 if (!*n) 1397 if (!*n)
1396 return; 1398 return;
1397 setup_timer(t, mce_start_timer, smp_processor_id());
1398 t->expires = round_jiffies(jiffies + *n); 1399 t->expires = round_jiffies(jiffies + *n);
1399 add_timer_on(t, smp_processor_id()); 1400 add_timer_on(t, smp_processor_id());
1400} 1401}
@@ -1928,7 +1929,7 @@ error2:
1928 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); 1929 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr);
1929error: 1930error:
1930 while (--i >= 0) 1931 while (--i >= 0)
1931 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); 1932 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
1932 1933
1933 sysdev_unregister(&per_cpu(mce_dev, cpu)); 1934 sysdev_unregister(&per_cpu(mce_dev, cpu));
1934 1935
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 4fef985fc221..81c499eceb21 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -256,6 +256,16 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
256 ack_APIC_irq(); 256 ack_APIC_irq();
257} 257}
258 258
259/* Thermal monitoring depends on APIC, ACPI and clock modulation */
260static int intel_thermal_supported(struct cpuinfo_x86 *c)
261{
262 if (!cpu_has_apic)
263 return 0;
264 if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
265 return 0;
266 return 1;
267}
268
259void __init mcheck_intel_therm_init(void) 269void __init mcheck_intel_therm_init(void)
260{ 270{
261 /* 271 /*
@@ -263,8 +273,7 @@ void __init mcheck_intel_therm_init(void)
263 * LVT value on BSP and use that value to restore APs' thermal LVT 273 * LVT value on BSP and use that value to restore APs' thermal LVT
264 * entry BIOS programmed later 274 * entry BIOS programmed later
265 */ 275 */
266 if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) && 276 if (intel_thermal_supported(&boot_cpu_data))
267 cpu_has(&boot_cpu_data, X86_FEATURE_ACC))
268 lvtthmr_init = apic_read(APIC_LVTTHMR); 277 lvtthmr_init = apic_read(APIC_LVTTHMR);
269} 278}
270 279
@@ -274,8 +283,7 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
274 int tm2 = 0; 283 int tm2 = 0;
275 u32 l, h; 284 u32 l, h;
276 285
277 /* Thermal monitoring depends on ACPI and clock modulation*/ 286 if (!intel_thermal_supported(c))
278 if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
279 return; 287 return;
280 288
281 /* 289 /*
@@ -339,8 +347,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
339 l = apic_read(APIC_LVTTHMR); 347 l = apic_read(APIC_LVTTHMR);
340 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 348 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
341 349
342 printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", 350 printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
343 cpu, tm2 ? "TM2" : "TM1"); 351 tm2 ? "TM2" : "TM1");
344 352
345 /* enable thermal throttle processing */ 353 /* enable thermal throttle processing */
346 atomic_set(&therm_throt_en, 1); 354 atomic_set(&therm_throt_en, 1);
diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile
index f4361b56f8e9..ad9e5ed81181 100644
--- a/arch/x86/kernel/cpu/mtrr/Makefile
+++ b/arch/x86/kernel/cpu/mtrr/Makefile
@@ -1,3 +1,3 @@
1obj-y := main.o if.o generic.o state.o cleanup.o 1obj-y := main.o if.o generic.o cleanup.o
2obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o 2obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
3 3
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c
index 33af14110dfd..92ba9cd31c9a 100644
--- a/arch/x86/kernel/cpu/mtrr/amd.c
+++ b/arch/x86/kernel/cpu/mtrr/amd.c
@@ -108,7 +108,7 @@ amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
108 return 0; 108 return 0;
109} 109}
110 110
111static struct mtrr_ops amd_mtrr_ops = { 111static const struct mtrr_ops amd_mtrr_ops = {
112 .vendor = X86_VENDOR_AMD, 112 .vendor = X86_VENDOR_AMD,
113 .set = amd_set_mtrr, 113 .set = amd_set_mtrr,
114 .get = amd_get_mtrr, 114 .get = amd_get_mtrr,
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
index de89f14eff3a..316fe3e60a97 100644
--- a/arch/x86/kernel/cpu/mtrr/centaur.c
+++ b/arch/x86/kernel/cpu/mtrr/centaur.c
@@ -110,7 +110,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t
110 return 0; 110 return 0;
111} 111}
112 112
113static struct mtrr_ops centaur_mtrr_ops = { 113static const struct mtrr_ops centaur_mtrr_ops = {
114 .vendor = X86_VENDOR_CENTAUR, 114 .vendor = X86_VENDOR_CENTAUR,
115 .set = centaur_set_mcr, 115 .set = centaur_set_mcr,
116 .get = centaur_get_mcr, 116 .get = centaur_get_mcr,
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 228d982ce09c..68a3343e5798 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -265,7 +265,7 @@ static void cyrix_set_all(void)
265 post_set(); 265 post_set();
266} 266}
267 267
268static struct mtrr_ops cyrix_mtrr_ops = { 268static const struct mtrr_ops cyrix_mtrr_ops = {
269 .vendor = X86_VENDOR_CYRIX, 269 .vendor = X86_VENDOR_CYRIX,
270 .set_all = cyrix_set_all, 270 .set_all = cyrix_set_all,
271 .set = cyrix_set_arr, 271 .set = cyrix_set_arr,
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 55da0c5f68dd..9aa5dc76ff4a 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -464,7 +464,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
464 tmp |= ~((1<<(hi - 1)) - 1); 464 tmp |= ~((1<<(hi - 1)) - 1);
465 465
466 if (tmp != mask_lo) { 466 if (tmp != mask_lo) {
467 WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); 467 printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
468 mask_lo = tmp; 468 mask_lo = tmp;
469 } 469 }
470 } 470 }
@@ -570,7 +570,7 @@ static unsigned long set_mtrr_state(void)
570 570
571 571
572static unsigned long cr4; 572static unsigned long cr4;
573static DEFINE_SPINLOCK(set_atomicity_lock); 573static DEFINE_RAW_SPINLOCK(set_atomicity_lock);
574 574
575/* 575/*
576 * Since we are disabling the cache don't allow any interrupts, 576 * Since we are disabling the cache don't allow any interrupts,
@@ -590,7 +590,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
590 * changes to the way the kernel boots 590 * changes to the way the kernel boots
591 */ 591 */
592 592
593 spin_lock(&set_atomicity_lock); 593 raw_spin_lock(&set_atomicity_lock);
594 594
595 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ 595 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
596 cr0 = read_cr0() | X86_CR0_CD; 596 cr0 = read_cr0() | X86_CR0_CD;
@@ -627,7 +627,7 @@ static void post_set(void) __releases(set_atomicity_lock)
627 /* Restore value of CR4 */ 627 /* Restore value of CR4 */
628 if (cpu_has_pge) 628 if (cpu_has_pge)
629 write_cr4(cr4); 629 write_cr4(cr4);
630 spin_unlock(&set_atomicity_lock); 630 raw_spin_unlock(&set_atomicity_lock);
631} 631}
632 632
633static void generic_set_all(void) 633static void generic_set_all(void)
@@ -752,7 +752,7 @@ int positive_have_wrcomb(void)
752/* 752/*
753 * Generic structure... 753 * Generic structure...
754 */ 754 */
755struct mtrr_ops generic_mtrr_ops = { 755const struct mtrr_ops generic_mtrr_ops = {
756 .use_intel_if = 1, 756 .use_intel_if = 1,
757 .set_all = generic_set_all, 757 .set_all = generic_set_all,
758 .get = generic_get_mtrr, 758 .get = generic_get_mtrr,
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 3c1b12d461d1..e006e56f699c 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -4,6 +4,7 @@
4#include <linux/proc_fs.h> 4#include <linux/proc_fs.h>
5#include <linux/module.h> 5#include <linux/module.h>
6#include <linux/ctype.h> 6#include <linux/ctype.h>
7#include <linux/string.h>
7#include <linux/init.h> 8#include <linux/init.h>
8 9
9#define LINE_SIZE 80 10#define LINE_SIZE 80
@@ -133,8 +134,7 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
133 return -EINVAL; 134 return -EINVAL;
134 135
135 base = simple_strtoull(line + 5, &ptr, 0); 136 base = simple_strtoull(line + 5, &ptr, 0);
136 while (isspace(*ptr)) 137 ptr = skip_spaces(ptr);
137 ptr++;
138 138
139 if (strncmp(ptr, "size=", 5)) 139 if (strncmp(ptr, "size=", 5))
140 return -EINVAL; 140 return -EINVAL;
@@ -142,14 +142,11 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
142 size = simple_strtoull(ptr + 5, &ptr, 0); 142 size = simple_strtoull(ptr + 5, &ptr, 0);
143 if ((base & 0xfff) || (size & 0xfff)) 143 if ((base & 0xfff) || (size & 0xfff))
144 return -EINVAL; 144 return -EINVAL;
145 while (isspace(*ptr)) 145 ptr = skip_spaces(ptr);
146 ptr++;
147 146
148 if (strncmp(ptr, "type=", 5)) 147 if (strncmp(ptr, "type=", 5))
149 return -EINVAL; 148 return -EINVAL;
150 ptr += 5; 149 ptr = skip_spaces(ptr + 5);
151 while (isspace(*ptr))
152 ptr++;
153 150
154 for (i = 0; i < MTRR_NUM_TYPES; ++i) { 151 for (i = 0; i < MTRR_NUM_TYPES; ++i) {
155 if (strcmp(ptr, mtrr_strings[i])) 152 if (strcmp(ptr, mtrr_strings[i]))
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 84e83de54575..fe4622e8c837 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -60,14 +60,14 @@ static DEFINE_MUTEX(mtrr_mutex);
60u64 size_or_mask, size_and_mask; 60u64 size_or_mask, size_and_mask;
61static bool mtrr_aps_delayed_init; 61static bool mtrr_aps_delayed_init;
62 62
63static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; 63static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
64 64
65struct mtrr_ops *mtrr_if; 65const struct mtrr_ops *mtrr_if;
66 66
67static void set_mtrr(unsigned int reg, unsigned long base, 67static void set_mtrr(unsigned int reg, unsigned long base,
68 unsigned long size, mtrr_type type); 68 unsigned long size, mtrr_type type);
69 69
70void set_mtrr_ops(struct mtrr_ops *ops) 70void set_mtrr_ops(const struct mtrr_ops *ops)
71{ 71{
72 if (ops->vendor && ops->vendor < X86_VENDOR_NUM) 72 if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
73 mtrr_ops[ops->vendor] = ops; 73 mtrr_ops[ops->vendor] = ops;
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index a501dee9a87a..df5e41f31a27 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -32,7 +32,7 @@ extern int generic_get_free_region(unsigned long base, unsigned long size,
32extern int generic_validate_add_page(unsigned long base, unsigned long size, 32extern int generic_validate_add_page(unsigned long base, unsigned long size,
33 unsigned int type); 33 unsigned int type);
34 34
35extern struct mtrr_ops generic_mtrr_ops; 35extern const struct mtrr_ops generic_mtrr_ops;
36 36
37extern int positive_have_wrcomb(void); 37extern int positive_have_wrcomb(void);
38 38
@@ -53,10 +53,10 @@ void fill_mtrr_var_range(unsigned int index,
53 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); 53 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
54void get_mtrr_state(void); 54void get_mtrr_state(void);
55 55
56extern void set_mtrr_ops(struct mtrr_ops *ops); 56extern void set_mtrr_ops(const struct mtrr_ops *ops);
57 57
58extern u64 size_or_mask, size_and_mask; 58extern u64 size_or_mask, size_and_mask;
59extern struct mtrr_ops *mtrr_if; 59extern const struct mtrr_ops *mtrr_if;
60 60
61#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) 61#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
62#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) 62#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c
deleted file mode 100644
index dfc80b4e6b0d..000000000000
--- a/arch/x86/kernel/cpu/mtrr/state.c
+++ /dev/null
@@ -1,94 +0,0 @@
1#include <linux/init.h>
2#include <linux/io.h>
3#include <linux/mm.h>
4
5#include <asm/processor-cyrix.h>
6#include <asm/processor-flags.h>
7#include <asm/mtrr.h>
8#include <asm/msr.h>
9
10#include "mtrr.h"
11
12/* Put the processor into a state where MTRRs can be safely set */
13void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
14{
15 unsigned int cr0;
16
17 /* Disable interrupts locally */
18 local_irq_save(ctxt->flags);
19
20 if (use_intel() || is_cpu(CYRIX)) {
21
22 /* Save value of CR4 and clear Page Global Enable (bit 7) */
23 if (cpu_has_pge) {
24 ctxt->cr4val = read_cr4();
25 write_cr4(ctxt->cr4val & ~X86_CR4_PGE);
26 }
27
28 /*
29 * Disable and flush caches. Note that wbinvd flushes the TLBs
30 * as a side-effect
31 */
32 cr0 = read_cr0() | X86_CR0_CD;
33 wbinvd();
34 write_cr0(cr0);
35 wbinvd();
36
37 if (use_intel()) {
38 /* Save MTRR state */
39 rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
40 } else {
41 /*
42 * Cyrix ARRs -
43 * everything else were excluded at the top
44 */
45 ctxt->ccr3 = getCx86(CX86_CCR3);
46 }
47 }
48}
49
50void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
51{
52 if (use_intel()) {
53 /* Disable MTRRs, and set the default type to uncached */
54 mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL,
55 ctxt->deftype_hi);
56 } else {
57 if (is_cpu(CYRIX)) {
58 /* Cyrix ARRs - everything else were excluded at the top */
59 setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
60 }
61 }
62}
63
64/* Restore the processor after a set_mtrr_prepare */
65void set_mtrr_done(struct set_mtrr_context *ctxt)
66{
67 if (use_intel() || is_cpu(CYRIX)) {
68
69 /* Flush caches and TLBs */
70 wbinvd();
71
72 /* Restore MTRRdefType */
73 if (use_intel()) {
74 /* Intel (P6) standard MTRRs */
75 mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo,
76 ctxt->deftype_hi);
77 } else {
78 /*
79 * Cyrix ARRs -
80 * everything else was excluded at the top
81 */
82 setCx86(CX86_CCR3, ctxt->ccr3);
83 }
84
85 /* Enable caches */
86 write_cr0(read_cr0() & 0xbfffffff);
87
88 /* Restore value of CR4 */
89 if (cpu_has_pge)
90 write_cr4(ctxt->cr4val);
91 }
92 /* Re-enable interrupts locally (if enabled previously) */
93 local_irq_restore(ctxt->flags);
94}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index c1bbed1021d9..641ccb9dddbc 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -7,6 +7,7 @@
7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter 7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> 9 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10 * Copyright (C) 2009 Google, Inc., Stephane Eranian
10 * 11 *
11 * For licencing details see kernel-base/COPYING 12 * For licencing details see kernel-base/COPYING
12 */ 13 */
@@ -22,6 +23,7 @@
22#include <linux/uaccess.h> 23#include <linux/uaccess.h>
23#include <linux/highmem.h> 24#include <linux/highmem.h>
24#include <linux/cpu.h> 25#include <linux/cpu.h>
26#include <linux/bitops.h>
25 27
26#include <asm/apic.h> 28#include <asm/apic.h>
27#include <asm/stacktrace.h> 29#include <asm/stacktrace.h>
@@ -68,26 +70,59 @@ struct debug_store {
68 u64 pebs_event_reset[MAX_PEBS_EVENTS]; 70 u64 pebs_event_reset[MAX_PEBS_EVENTS];
69}; 71};
70 72
73struct event_constraint {
74 union {
75 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
76 u64 idxmsk64[1];
77 };
78 int code;
79 int cmask;
80 int weight;
81};
82
83struct amd_nb {
84 int nb_id; /* NorthBridge id */
85 int refcnt; /* reference count */
86 struct perf_event *owners[X86_PMC_IDX_MAX];
87 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
88};
89
71struct cpu_hw_events { 90struct cpu_hw_events {
72 struct perf_event *events[X86_PMC_IDX_MAX]; 91 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
73 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
74 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 92 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
75 unsigned long interrupts; 93 unsigned long interrupts;
76 int enabled; 94 int enabled;
77 struct debug_store *ds; 95 struct debug_store *ds;
78};
79 96
80struct event_constraint { 97 int n_events;
81 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 98 int n_added;
82 int code; 99 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
100 u64 tags[X86_PMC_IDX_MAX];
101 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
102 struct amd_nb *amd_nb;
83}; 103};
84 104
85#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } 105#define __EVENT_CONSTRAINT(c, n, m, w) {\
86#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } 106 { .idxmsk64[0] = (n) }, \
107 .code = (c), \
108 .cmask = (m), \
109 .weight = (w), \
110}
111
112#define EVENT_CONSTRAINT(c, n, m) \
113 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
87 114
88#define for_each_event_constraint(e, c) \ 115#define INTEL_EVENT_CONSTRAINT(c, n) \
89 for ((e) = (c); (e)->idxmsk[0]; (e)++) 116 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
90 117
118#define FIXED_EVENT_CONSTRAINT(c, n) \
119 EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
120
121#define EVENT_CONSTRAINT_END \
122 EVENT_CONSTRAINT(0, 0, 0)
123
124#define for_each_event_constraint(e, c) \
125 for ((e) = (c); (e)->cmask; (e)++)
91 126
92/* 127/*
93 * struct x86_pmu - generic x86 pmu 128 * struct x86_pmu - generic x86 pmu
@@ -114,8 +149,14 @@ struct x86_pmu {
114 u64 intel_ctrl; 149 u64 intel_ctrl;
115 void (*enable_bts)(u64 config); 150 void (*enable_bts)(u64 config);
116 void (*disable_bts)(void); 151 void (*disable_bts)(void);
117 int (*get_event_idx)(struct cpu_hw_events *cpuc, 152
118 struct hw_perf_event *hwc); 153 struct event_constraint *
154 (*get_event_constraints)(struct cpu_hw_events *cpuc,
155 struct perf_event *event);
156
157 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
158 struct perf_event *event);
159 struct event_constraint *event_constraints;
119}; 160};
120 161
121static struct x86_pmu x86_pmu __read_mostly; 162static struct x86_pmu x86_pmu __read_mostly;
@@ -124,111 +165,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
124 .enabled = 1, 165 .enabled = 1,
125}; 166};
126 167
127static const struct event_constraint *event_constraints; 168static int x86_perf_event_set_period(struct perf_event *event,
128 169 struct hw_perf_event *hwc, int idx);
129/*
130 * Not sure about some of these
131 */
132static const u64 p6_perfmon_event_map[] =
133{
134 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
135 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
136 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
137 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
138 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
139 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
140 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
141};
142
143static u64 p6_pmu_event_map(int hw_event)
144{
145 return p6_perfmon_event_map[hw_event];
146}
147
148/*
149 * Event setting that is specified not to count anything.
150 * We use this to effectively disable a counter.
151 *
152 * L2_RQSTS with 0 MESI unit mask.
153 */
154#define P6_NOP_EVENT 0x0000002EULL
155
156static u64 p6_pmu_raw_event(u64 hw_event)
157{
158#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
159#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
160#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
161#define P6_EVNTSEL_INV_MASK 0x00800000ULL
162#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
163
164#define P6_EVNTSEL_MASK \
165 (P6_EVNTSEL_EVENT_MASK | \
166 P6_EVNTSEL_UNIT_MASK | \
167 P6_EVNTSEL_EDGE_MASK | \
168 P6_EVNTSEL_INV_MASK | \
169 P6_EVNTSEL_REG_MASK)
170
171 return hw_event & P6_EVNTSEL_MASK;
172}
173
174static const struct event_constraint intel_p6_event_constraints[] =
175{
176 EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
177 EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
178 EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
179 EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
180 EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
181 EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
182 EVENT_CONSTRAINT_END
183};
184
185/*
186 * Intel PerfMon v3. Used on Core2 and later.
187 */
188static const u64 intel_perfmon_event_map[] =
189{
190 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
191 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
192 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
193 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
194 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
195 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
196 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
197};
198
199static const struct event_constraint intel_core_event_constraints[] =
200{
201 EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
202 EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
203 EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
204 EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
205 EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
206 EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
207 EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
208 EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
209 EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
210 EVENT_CONSTRAINT_END
211};
212
213static const struct event_constraint intel_nehalem_event_constraints[] =
214{
215 EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
216 EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
217 EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
218 EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
219 EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
220 EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */
221 EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
222 EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
223 EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */
224 EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */
225 EVENT_CONSTRAINT_END
226};
227
228static u64 intel_pmu_event_map(int hw_event)
229{
230 return intel_perfmon_event_map[hw_event];
231}
232 170
233/* 171/*
234 * Generalized hw caching related hw_event table, filled 172 * Generalized hw caching related hw_event table, filled
@@ -245,424 +183,6 @@ static u64 __read_mostly hw_cache_event_ids
245 [PERF_COUNT_HW_CACHE_OP_MAX] 183 [PERF_COUNT_HW_CACHE_OP_MAX]
246 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 184 [PERF_COUNT_HW_CACHE_RESULT_MAX];
247 185
248static __initconst u64 nehalem_hw_cache_event_ids
249 [PERF_COUNT_HW_CACHE_MAX]
250 [PERF_COUNT_HW_CACHE_OP_MAX]
251 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
252{
253 [ C(L1D) ] = {
254 [ C(OP_READ) ] = {
255 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
256 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
257 },
258 [ C(OP_WRITE) ] = {
259 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
260 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
261 },
262 [ C(OP_PREFETCH) ] = {
263 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
264 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
265 },
266 },
267 [ C(L1I ) ] = {
268 [ C(OP_READ) ] = {
269 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
270 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
271 },
272 [ C(OP_WRITE) ] = {
273 [ C(RESULT_ACCESS) ] = -1,
274 [ C(RESULT_MISS) ] = -1,
275 },
276 [ C(OP_PREFETCH) ] = {
277 [ C(RESULT_ACCESS) ] = 0x0,
278 [ C(RESULT_MISS) ] = 0x0,
279 },
280 },
281 [ C(LL ) ] = {
282 [ C(OP_READ) ] = {
283 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
284 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
285 },
286 [ C(OP_WRITE) ] = {
287 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
288 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
289 },
290 [ C(OP_PREFETCH) ] = {
291 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
292 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
293 },
294 },
295 [ C(DTLB) ] = {
296 [ C(OP_READ) ] = {
297 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
298 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
299 },
300 [ C(OP_WRITE) ] = {
301 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
302 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
303 },
304 [ C(OP_PREFETCH) ] = {
305 [ C(RESULT_ACCESS) ] = 0x0,
306 [ C(RESULT_MISS) ] = 0x0,
307 },
308 },
309 [ C(ITLB) ] = {
310 [ C(OP_READ) ] = {
311 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
312 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
313 },
314 [ C(OP_WRITE) ] = {
315 [ C(RESULT_ACCESS) ] = -1,
316 [ C(RESULT_MISS) ] = -1,
317 },
318 [ C(OP_PREFETCH) ] = {
319 [ C(RESULT_ACCESS) ] = -1,
320 [ C(RESULT_MISS) ] = -1,
321 },
322 },
323 [ C(BPU ) ] = {
324 [ C(OP_READ) ] = {
325 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
326 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
327 },
328 [ C(OP_WRITE) ] = {
329 [ C(RESULT_ACCESS) ] = -1,
330 [ C(RESULT_MISS) ] = -1,
331 },
332 [ C(OP_PREFETCH) ] = {
333 [ C(RESULT_ACCESS) ] = -1,
334 [ C(RESULT_MISS) ] = -1,
335 },
336 },
337};
338
339static __initconst u64 core2_hw_cache_event_ids
340 [PERF_COUNT_HW_CACHE_MAX]
341 [PERF_COUNT_HW_CACHE_OP_MAX]
342 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
343{
344 [ C(L1D) ] = {
345 [ C(OP_READ) ] = {
346 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
347 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
348 },
349 [ C(OP_WRITE) ] = {
350 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
351 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
352 },
353 [ C(OP_PREFETCH) ] = {
354 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
355 [ C(RESULT_MISS) ] = 0,
356 },
357 },
358 [ C(L1I ) ] = {
359 [ C(OP_READ) ] = {
360 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
361 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
362 },
363 [ C(OP_WRITE) ] = {
364 [ C(RESULT_ACCESS) ] = -1,
365 [ C(RESULT_MISS) ] = -1,
366 },
367 [ C(OP_PREFETCH) ] = {
368 [ C(RESULT_ACCESS) ] = 0,
369 [ C(RESULT_MISS) ] = 0,
370 },
371 },
372 [ C(LL ) ] = {
373 [ C(OP_READ) ] = {
374 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
375 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
376 },
377 [ C(OP_WRITE) ] = {
378 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
379 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
380 },
381 [ C(OP_PREFETCH) ] = {
382 [ C(RESULT_ACCESS) ] = 0,
383 [ C(RESULT_MISS) ] = 0,
384 },
385 },
386 [ C(DTLB) ] = {
387 [ C(OP_READ) ] = {
388 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
389 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
390 },
391 [ C(OP_WRITE) ] = {
392 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
393 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
394 },
395 [ C(OP_PREFETCH) ] = {
396 [ C(RESULT_ACCESS) ] = 0,
397 [ C(RESULT_MISS) ] = 0,
398 },
399 },
400 [ C(ITLB) ] = {
401 [ C(OP_READ) ] = {
402 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
403 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
404 },
405 [ C(OP_WRITE) ] = {
406 [ C(RESULT_ACCESS) ] = -1,
407 [ C(RESULT_MISS) ] = -1,
408 },
409 [ C(OP_PREFETCH) ] = {
410 [ C(RESULT_ACCESS) ] = -1,
411 [ C(RESULT_MISS) ] = -1,
412 },
413 },
414 [ C(BPU ) ] = {
415 [ C(OP_READ) ] = {
416 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
417 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
418 },
419 [ C(OP_WRITE) ] = {
420 [ C(RESULT_ACCESS) ] = -1,
421 [ C(RESULT_MISS) ] = -1,
422 },
423 [ C(OP_PREFETCH) ] = {
424 [ C(RESULT_ACCESS) ] = -1,
425 [ C(RESULT_MISS) ] = -1,
426 },
427 },
428};
429
430static __initconst u64 atom_hw_cache_event_ids
431 [PERF_COUNT_HW_CACHE_MAX]
432 [PERF_COUNT_HW_CACHE_OP_MAX]
433 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
434{
435 [ C(L1D) ] = {
436 [ C(OP_READ) ] = {
437 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
438 [ C(RESULT_MISS) ] = 0,
439 },
440 [ C(OP_WRITE) ] = {
441 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
442 [ C(RESULT_MISS) ] = 0,
443 },
444 [ C(OP_PREFETCH) ] = {
445 [ C(RESULT_ACCESS) ] = 0x0,
446 [ C(RESULT_MISS) ] = 0,
447 },
448 },
449 [ C(L1I ) ] = {
450 [ C(OP_READ) ] = {
451 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
452 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
453 },
454 [ C(OP_WRITE) ] = {
455 [ C(RESULT_ACCESS) ] = -1,
456 [ C(RESULT_MISS) ] = -1,
457 },
458 [ C(OP_PREFETCH) ] = {
459 [ C(RESULT_ACCESS) ] = 0,
460 [ C(RESULT_MISS) ] = 0,
461 },
462 },
463 [ C(LL ) ] = {
464 [ C(OP_READ) ] = {
465 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
466 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
467 },
468 [ C(OP_WRITE) ] = {
469 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
470 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
471 },
472 [ C(OP_PREFETCH) ] = {
473 [ C(RESULT_ACCESS) ] = 0,
474 [ C(RESULT_MISS) ] = 0,
475 },
476 },
477 [ C(DTLB) ] = {
478 [ C(OP_READ) ] = {
479 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
480 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
481 },
482 [ C(OP_WRITE) ] = {
483 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
484 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
485 },
486 [ C(OP_PREFETCH) ] = {
487 [ C(RESULT_ACCESS) ] = 0,
488 [ C(RESULT_MISS) ] = 0,
489 },
490 },
491 [ C(ITLB) ] = {
492 [ C(OP_READ) ] = {
493 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
494 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
495 },
496 [ C(OP_WRITE) ] = {
497 [ C(RESULT_ACCESS) ] = -1,
498 [ C(RESULT_MISS) ] = -1,
499 },
500 [ C(OP_PREFETCH) ] = {
501 [ C(RESULT_ACCESS) ] = -1,
502 [ C(RESULT_MISS) ] = -1,
503 },
504 },
505 [ C(BPU ) ] = {
506 [ C(OP_READ) ] = {
507 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
508 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
509 },
510 [ C(OP_WRITE) ] = {
511 [ C(RESULT_ACCESS) ] = -1,
512 [ C(RESULT_MISS) ] = -1,
513 },
514 [ C(OP_PREFETCH) ] = {
515 [ C(RESULT_ACCESS) ] = -1,
516 [ C(RESULT_MISS) ] = -1,
517 },
518 },
519};
520
521static u64 intel_pmu_raw_event(u64 hw_event)
522{
523#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
524#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
525#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
526#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
527#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
528
529#define CORE_EVNTSEL_MASK \
530 (CORE_EVNTSEL_EVENT_MASK | \
531 CORE_EVNTSEL_UNIT_MASK | \
532 CORE_EVNTSEL_EDGE_MASK | \
533 CORE_EVNTSEL_INV_MASK | \
534 CORE_EVNTSEL_REG_MASK)
535
536 return hw_event & CORE_EVNTSEL_MASK;
537}
538
539static __initconst u64 amd_hw_cache_event_ids
540 [PERF_COUNT_HW_CACHE_MAX]
541 [PERF_COUNT_HW_CACHE_OP_MAX]
542 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
543{
544 [ C(L1D) ] = {
545 [ C(OP_READ) ] = {
546 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
547 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
548 },
549 [ C(OP_WRITE) ] = {
550 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
551 [ C(RESULT_MISS) ] = 0,
552 },
553 [ C(OP_PREFETCH) ] = {
554 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
555 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
556 },
557 },
558 [ C(L1I ) ] = {
559 [ C(OP_READ) ] = {
560 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
561 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
562 },
563 [ C(OP_WRITE) ] = {
564 [ C(RESULT_ACCESS) ] = -1,
565 [ C(RESULT_MISS) ] = -1,
566 },
567 [ C(OP_PREFETCH) ] = {
568 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
569 [ C(RESULT_MISS) ] = 0,
570 },
571 },
572 [ C(LL ) ] = {
573 [ C(OP_READ) ] = {
574 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
575 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
576 },
577 [ C(OP_WRITE) ] = {
578 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
579 [ C(RESULT_MISS) ] = 0,
580 },
581 [ C(OP_PREFETCH) ] = {
582 [ C(RESULT_ACCESS) ] = 0,
583 [ C(RESULT_MISS) ] = 0,
584 },
585 },
586 [ C(DTLB) ] = {
587 [ C(OP_READ) ] = {
588 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
589 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
590 },
591 [ C(OP_WRITE) ] = {
592 [ C(RESULT_ACCESS) ] = 0,
593 [ C(RESULT_MISS) ] = 0,
594 },
595 [ C(OP_PREFETCH) ] = {
596 [ C(RESULT_ACCESS) ] = 0,
597 [ C(RESULT_MISS) ] = 0,
598 },
599 },
600 [ C(ITLB) ] = {
601 [ C(OP_READ) ] = {
602 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
603 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
604 },
605 [ C(OP_WRITE) ] = {
606 [ C(RESULT_ACCESS) ] = -1,
607 [ C(RESULT_MISS) ] = -1,
608 },
609 [ C(OP_PREFETCH) ] = {
610 [ C(RESULT_ACCESS) ] = -1,
611 [ C(RESULT_MISS) ] = -1,
612 },
613 },
614 [ C(BPU ) ] = {
615 [ C(OP_READ) ] = {
616 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
617 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
618 },
619 [ C(OP_WRITE) ] = {
620 [ C(RESULT_ACCESS) ] = -1,
621 [ C(RESULT_MISS) ] = -1,
622 },
623 [ C(OP_PREFETCH) ] = {
624 [ C(RESULT_ACCESS) ] = -1,
625 [ C(RESULT_MISS) ] = -1,
626 },
627 },
628};
629
630/*
631 * AMD Performance Monitor K7 and later.
632 */
633static const u64 amd_perfmon_event_map[] =
634{
635 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
636 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
637 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
638 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
639 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
640 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
641};
642
643static u64 amd_pmu_event_map(int hw_event)
644{
645 return amd_perfmon_event_map[hw_event];
646}
647
648static u64 amd_pmu_raw_event(u64 hw_event)
649{
650#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
651#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
652#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
653#define K7_EVNTSEL_INV_MASK 0x000800000ULL
654#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
655
656#define K7_EVNTSEL_MASK \
657 (K7_EVNTSEL_EVENT_MASK | \
658 K7_EVNTSEL_UNIT_MASK | \
659 K7_EVNTSEL_EDGE_MASK | \
660 K7_EVNTSEL_INV_MASK | \
661 K7_EVNTSEL_REG_MASK)
662
663 return hw_event & K7_EVNTSEL_MASK;
664}
665
666/* 186/*
667 * Propagate event elapsed time into the generic event. 187 * Propagate event elapsed time into the generic event.
668 * Can only be executed on the CPU where the event is active. 188 * Can only be executed on the CPU where the event is active.
@@ -914,42 +434,6 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
914 return 0; 434 return 0;
915} 435}
916 436
917static void intel_pmu_enable_bts(u64 config)
918{
919 unsigned long debugctlmsr;
920
921 debugctlmsr = get_debugctlmsr();
922
923 debugctlmsr |= X86_DEBUGCTL_TR;
924 debugctlmsr |= X86_DEBUGCTL_BTS;
925 debugctlmsr |= X86_DEBUGCTL_BTINT;
926
927 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
928 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
929
930 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
931 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
932
933 update_debugctlmsr(debugctlmsr);
934}
935
936static void intel_pmu_disable_bts(void)
937{
938 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
939 unsigned long debugctlmsr;
940
941 if (!cpuc->ds)
942 return;
943
944 debugctlmsr = get_debugctlmsr();
945
946 debugctlmsr &=
947 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
948 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
949
950 update_debugctlmsr(debugctlmsr);
951}
952
953/* 437/*
954 * Setup the hardware configuration for a given attr_type 438 * Setup the hardware configuration for a given attr_type
955 */ 439 */
@@ -988,6 +472,8 @@ static int __hw_perf_event_init(struct perf_event *event)
988 hwc->config = ARCH_PERFMON_EVENTSEL_INT; 472 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
989 473
990 hwc->idx = -1; 474 hwc->idx = -1;
475 hwc->last_cpu = -1;
476 hwc->last_tag = ~0ULL;
991 477
992 /* 478 /*
993 * Count user and OS events unless requested not to. 479 * Count user and OS events unless requested not to.
@@ -1056,216 +542,323 @@ static int __hw_perf_event_init(struct perf_event *event)
1056 return 0; 542 return 0;
1057} 543}
1058 544
1059static void p6_pmu_disable_all(void) 545static void x86_pmu_disable_all(void)
1060{ 546{
1061 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 547 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1062 u64 val; 548 int idx;
1063
1064 if (!cpuc->enabled)
1065 return;
1066 549
1067 cpuc->enabled = 0; 550 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1068 barrier(); 551 u64 val;
1069 552
1070 /* p6 only has one enable register */ 553 if (!test_bit(idx, cpuc->active_mask))
1071 rdmsrl(MSR_P6_EVNTSEL0, val); 554 continue;
1072 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 555 rdmsrl(x86_pmu.eventsel + idx, val);
1073 wrmsrl(MSR_P6_EVNTSEL0, val); 556 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
557 continue;
558 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
559 wrmsrl(x86_pmu.eventsel + idx, val);
560 }
1074} 561}
1075 562
1076static void intel_pmu_disable_all(void) 563void hw_perf_disable(void)
1077{ 564{
1078 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 565 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1079 566
567 if (!x86_pmu_initialized())
568 return;
569
1080 if (!cpuc->enabled) 570 if (!cpuc->enabled)
1081 return; 571 return;
1082 572
573 cpuc->n_added = 0;
1083 cpuc->enabled = 0; 574 cpuc->enabled = 0;
1084 barrier(); 575 barrier();
1085 576
1086 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); 577 x86_pmu.disable_all();
1087
1088 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
1089 intel_pmu_disable_bts();
1090} 578}
1091 579
1092static void amd_pmu_disable_all(void) 580static void x86_pmu_enable_all(void)
1093{ 581{
1094 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 582 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1095 int idx; 583 int idx;
1096 584
1097 if (!cpuc->enabled)
1098 return;
1099
1100 cpuc->enabled = 0;
1101 /*
1102 * ensure we write the disable before we start disabling the
1103 * events proper, so that amd_pmu_enable_event() does the
1104 * right thing.
1105 */
1106 barrier();
1107
1108 for (idx = 0; idx < x86_pmu.num_events; idx++) { 585 for (idx = 0; idx < x86_pmu.num_events; idx++) {
586 struct perf_event *event = cpuc->events[idx];
1109 u64 val; 587 u64 val;
1110 588
1111 if (!test_bit(idx, cpuc->active_mask)) 589 if (!test_bit(idx, cpuc->active_mask))
1112 continue; 590 continue;
1113 rdmsrl(MSR_K7_EVNTSEL0 + idx, val); 591
1114 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) 592 val = event->hw.config;
1115 continue; 593 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1116 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 594 wrmsrl(x86_pmu.eventsel + idx, val);
1117 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1118 } 595 }
1119} 596}
1120 597
1121void hw_perf_disable(void) 598static const struct pmu pmu;
599
600static inline int is_x86_event(struct perf_event *event)
1122{ 601{
1123 if (!x86_pmu_initialized()) 602 return event->pmu == &pmu;
1124 return;
1125 return x86_pmu.disable_all();
1126} 603}
1127 604
1128static void p6_pmu_enable_all(void) 605static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
1129{ 606{
1130 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 607 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
1131 unsigned long val; 608 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
609 int i, j, w, wmax, num = 0;
610 struct hw_perf_event *hwc;
1132 611
1133 if (cpuc->enabled) 612 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
1134 return;
1135 613
1136 cpuc->enabled = 1; 614 for (i = 0; i < n; i++) {
1137 barrier(); 615 constraints[i] =
616 x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
617 }
1138 618
1139 /* p6 only has one enable register */ 619 /*
1140 rdmsrl(MSR_P6_EVNTSEL0, val); 620 * fastpath, try to reuse previous register
1141 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 621 */
1142 wrmsrl(MSR_P6_EVNTSEL0, val); 622 for (i = 0; i < n; i++) {
1143} 623 hwc = &cpuc->event_list[i]->hw;
624 c = constraints[i];
1144 625
1145static void intel_pmu_enable_all(void) 626 /* never assigned */
1146{ 627 if (hwc->idx == -1)
1147 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 628 break;
1148 629
1149 if (cpuc->enabled) 630 /* constraint still honored */
1150 return; 631 if (!test_bit(hwc->idx, c->idxmsk))
632 break;
1151 633
1152 cpuc->enabled = 1; 634 /* not already used */
1153 barrier(); 635 if (test_bit(hwc->idx, used_mask))
636 break;
1154 637
1155 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); 638 set_bit(hwc->idx, used_mask);
639 if (assign)
640 assign[i] = hwc->idx;
641 }
642 if (i == n)
643 goto done;
1156 644
1157 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 645 /*
1158 struct perf_event *event = 646 * begin slow path
1159 cpuc->events[X86_PMC_IDX_FIXED_BTS]; 647 */
648
649 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
650
651 /*
652 * weight = number of possible counters
653 *
654 * 1 = most constrained, only works on one counter
655 * wmax = least constrained, works on any counter
656 *
657 * assign events to counters starting with most
658 * constrained events.
659 */
660 wmax = x86_pmu.num_events;
661
662 /*
663 * when fixed event counters are present,
664 * wmax is incremented by 1 to account
665 * for one more choice
666 */
667 if (x86_pmu.num_events_fixed)
668 wmax++;
1160 669
1161 if (WARN_ON_ONCE(!event)) 670 for (w = 1, num = n; num && w <= wmax; w++) {
1162 return; 671 /* for each event */
672 for (i = 0; num && i < n; i++) {
673 c = constraints[i];
674 hwc = &cpuc->event_list[i]->hw;
1163 675
1164 intel_pmu_enable_bts(event->hw.config); 676 if (c->weight != w)
677 continue;
678
679 for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
680 if (!test_bit(j, used_mask))
681 break;
682 }
683
684 if (j == X86_PMC_IDX_MAX)
685 break;
686
687 set_bit(j, used_mask);
688
689 if (assign)
690 assign[i] = j;
691 num--;
692 }
693 }
694done:
695 /*
696 * scheduling failed or is just a simulation,
697 * free resources if necessary
698 */
699 if (!assign || num) {
700 for (i = 0; i < n; i++) {
701 if (x86_pmu.put_event_constraints)
702 x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
703 }
1165 } 704 }
705 return num ? -ENOSPC : 0;
1166} 706}
1167 707
1168static void amd_pmu_enable_all(void) 708/*
709 * dogrp: true if must collect siblings events (group)
710 * returns total number of events and error code
711 */
712static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
1169{ 713{
1170 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 714 struct perf_event *event;
1171 int idx; 715 int n, max_count;
1172 716
1173 if (cpuc->enabled) 717 max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
1174 return;
1175 718
1176 cpuc->enabled = 1; 719 /* current number of events already accepted */
1177 barrier(); 720 n = cpuc->n_events;
1178 721
1179 for (idx = 0; idx < x86_pmu.num_events; idx++) { 722 if (is_x86_event(leader)) {
1180 struct perf_event *event = cpuc->events[idx]; 723 if (n >= max_count)
1181 u64 val; 724 return -ENOSPC;
725 cpuc->event_list[n] = leader;
726 n++;
727 }
728 if (!dogrp)
729 return n;
1182 730
1183 if (!test_bit(idx, cpuc->active_mask)) 731 list_for_each_entry(event, &leader->sibling_list, group_entry) {
732 if (!is_x86_event(event) ||
733 event->state <= PERF_EVENT_STATE_OFF)
1184 continue; 734 continue;
1185 735
1186 val = event->hw.config; 736 if (n >= max_count)
1187 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 737 return -ENOSPC;
1188 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1189 }
1190}
1191 738
1192void hw_perf_enable(void) 739 cpuc->event_list[n] = event;
1193{ 740 n++;
1194 if (!x86_pmu_initialized()) 741 }
1195 return; 742 return n;
1196 x86_pmu.enable_all();
1197} 743}
1198 744
1199static inline u64 intel_pmu_get_status(void) 745static inline void x86_assign_hw_event(struct perf_event *event,
746 struct cpu_hw_events *cpuc, int i)
1200{ 747{
1201 u64 status; 748 struct hw_perf_event *hwc = &event->hw;
1202 749
1203 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); 750 hwc->idx = cpuc->assign[i];
751 hwc->last_cpu = smp_processor_id();
752 hwc->last_tag = ++cpuc->tags[i];
1204 753
1205 return status; 754 if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
755 hwc->config_base = 0;
756 hwc->event_base = 0;
757 } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
758 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
759 /*
760 * We set it so that event_base + idx in wrmsr/rdmsr maps to
761 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
762 */
763 hwc->event_base =
764 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
765 } else {
766 hwc->config_base = x86_pmu.eventsel;
767 hwc->event_base = x86_pmu.perfctr;
768 }
1206} 769}
1207 770
1208static inline void intel_pmu_ack_status(u64 ack) 771static inline int match_prev_assignment(struct hw_perf_event *hwc,
772 struct cpu_hw_events *cpuc,
773 int i)
1209{ 774{
1210 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); 775 return hwc->idx == cpuc->assign[i] &&
776 hwc->last_cpu == smp_processor_id() &&
777 hwc->last_tag == cpuc->tags[i];
1211} 778}
1212 779
1213static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) 780static void x86_pmu_stop(struct perf_event *event);
1214{
1215 (void)checking_wrmsrl(hwc->config_base + idx,
1216 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
1217}
1218 781
1219static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) 782void hw_perf_enable(void)
1220{ 783{
1221 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); 784 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1222} 785 struct perf_event *event;
786 struct hw_perf_event *hwc;
787 int i;
1223 788
1224static inline void 789 if (!x86_pmu_initialized())
1225intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) 790 return;
1226{
1227 int idx = __idx - X86_PMC_IDX_FIXED;
1228 u64 ctrl_val, mask;
1229 791
1230 mask = 0xfULL << (idx * 4); 792 if (cpuc->enabled)
793 return;
1231 794
1232 rdmsrl(hwc->config_base, ctrl_val); 795 if (cpuc->n_added) {
1233 ctrl_val &= ~mask; 796 /*
1234 (void)checking_wrmsrl(hwc->config_base, ctrl_val); 797 * apply assignment obtained either from
1235} 798 * hw_perf_group_sched_in() or x86_pmu_enable()
799 *
800 * step1: save events moving to new counters
801 * step2: reprogram moved events into new counters
802 */
803 for (i = 0; i < cpuc->n_events; i++) {
1236 804
1237static inline void 805 event = cpuc->event_list[i];
1238p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) 806 hwc = &event->hw;
1239{
1240 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1241 u64 val = P6_NOP_EVENT;
1242 807
1243 if (cpuc->enabled) 808 /*
1244 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 809 * we can avoid reprogramming counter if:
810 * - assigned same counter as last time
811 * - running on same CPU as last time
812 * - no other event has used the counter since
813 */
814 if (hwc->idx == -1 ||
815 match_prev_assignment(hwc, cpuc, i))
816 continue;
1245 817
1246 (void)checking_wrmsrl(hwc->config_base + idx, val); 818 x86_pmu_stop(event);
1247}
1248 819
1249static inline void 820 hwc->idx = -1;
1250intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) 821 }
1251{
1252 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1253 intel_pmu_disable_bts();
1254 return;
1255 }
1256 822
1257 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 823 for (i = 0; i < cpuc->n_events; i++) {
1258 intel_pmu_disable_fixed(hwc, idx); 824
1259 return; 825 event = cpuc->event_list[i];
826 hwc = &event->hw;
827
828 if (hwc->idx == -1) {
829 x86_assign_hw_event(event, cpuc, i);
830 x86_perf_event_set_period(event, hwc, hwc->idx);
831 }
832 /*
833 * need to mark as active because x86_pmu_disable()
834 * clear active_mask and events[] yet it preserves
835 * idx
836 */
837 set_bit(hwc->idx, cpuc->active_mask);
838 cpuc->events[hwc->idx] = event;
839
840 x86_pmu.enable(hwc, hwc->idx);
841 perf_event_update_userpage(event);
842 }
843 cpuc->n_added = 0;
844 perf_events_lapic_init();
1260 } 845 }
1261 846
1262 x86_pmu_disable_event(hwc, idx); 847 cpuc->enabled = 1;
848 barrier();
849
850 x86_pmu.enable_all();
1263} 851}
1264 852
1265static inline void 853static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1266amd_pmu_disable_event(struct hw_perf_event *hwc, int idx) 854{
855 (void)checking_wrmsrl(hwc->config_base + idx,
856 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
857}
858
859static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1267{ 860{
1268 x86_pmu_disable_event(hwc, idx); 861 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
1269} 862}
1270 863
1271static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 864static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -1286,7 +879,7 @@ x86_perf_event_set_period(struct perf_event *event,
1286 return 0; 879 return 0;
1287 880
1288 /* 881 /*
1289 * If we are way outside a reasoable range then just skip forward: 882 * If we are way outside a reasonable range then just skip forward:
1290 */ 883 */
1291 if (unlikely(left <= -period)) { 884 if (unlikely(left <= -period)) {
1292 left = period; 885 left = period;
@@ -1326,213 +919,60 @@ x86_perf_event_set_period(struct perf_event *event,
1326 return ret; 919 return ret;
1327} 920}
1328 921
1329static inline void 922static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1330intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
1331{
1332 int idx = __idx - X86_PMC_IDX_FIXED;
1333 u64 ctrl_val, bits, mask;
1334 int err;
1335
1336 /*
1337 * Enable IRQ generation (0x8),
1338 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
1339 * if requested:
1340 */
1341 bits = 0x8ULL;
1342 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
1343 bits |= 0x2;
1344 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1345 bits |= 0x1;
1346 bits <<= (idx * 4);
1347 mask = 0xfULL << (idx * 4);
1348
1349 rdmsrl(hwc->config_base, ctrl_val);
1350 ctrl_val &= ~mask;
1351 ctrl_val |= bits;
1352 err = checking_wrmsrl(hwc->config_base, ctrl_val);
1353}
1354
1355static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1356{ 923{
1357 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 924 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1358 u64 val;
1359
1360 val = hwc->config;
1361 if (cpuc->enabled) 925 if (cpuc->enabled)
1362 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 926 __x86_pmu_enable_event(hwc, idx);
1363
1364 (void)checking_wrmsrl(hwc->config_base + idx, val);
1365} 927}
1366 928
1367 929/*
1368static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) 930 * activate a single event
1369{ 931 *
1370 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { 932 * The event is added to the group of enabled events
1371 if (!__get_cpu_var(cpu_hw_events).enabled) 933 * but only if it can be scehduled with existing events.
1372 return; 934 *
1373 935 * Called with PMU disabled. If successful and return value 1,
1374 intel_pmu_enable_bts(hwc->config); 936 * then guaranteed to call perf_enable() and hw_perf_enable()
1375 return; 937 */
1376 } 938static int x86_pmu_enable(struct perf_event *event)
1377
1378 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1379 intel_pmu_enable_fixed(hwc, idx);
1380 return;
1381 }
1382
1383 x86_pmu_enable_event(hwc, idx);
1384}
1385
1386static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1387{ 939{
1388 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 940 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
941 struct hw_perf_event *hwc;
942 int assign[X86_PMC_IDX_MAX];
943 int n, n0, ret;
1389 944
1390 if (cpuc->enabled) 945 hwc = &event->hw;
1391 x86_pmu_enable_event(hwc, idx);
1392}
1393
1394static int fixed_mode_idx(struct hw_perf_event *hwc)
1395{
1396 unsigned int hw_event;
1397
1398 hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
1399
1400 if (unlikely((hw_event ==
1401 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
1402 (hwc->sample_period == 1)))
1403 return X86_PMC_IDX_FIXED_BTS;
1404 946
1405 if (!x86_pmu.num_events_fixed) 947 n0 = cpuc->n_events;
1406 return -1; 948 n = collect_events(cpuc, event, false);
949 if (n < 0)
950 return n;
1407 951
952 ret = x86_schedule_events(cpuc, n, assign);
953 if (ret)
954 return ret;
1408 /* 955 /*
1409 * fixed counters do not take all possible filters 956 * copy new assignment, now we know it is possible
957 * will be used by hw_perf_enable()
1410 */ 958 */
1411 if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) 959 memcpy(cpuc->assign, assign, n*sizeof(int));
1412 return -1;
1413 960
1414 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) 961 cpuc->n_events = n;
1415 return X86_PMC_IDX_FIXED_INSTRUCTIONS; 962 cpuc->n_added = n - n0;
1416 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
1417 return X86_PMC_IDX_FIXED_CPU_CYCLES;
1418 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
1419 return X86_PMC_IDX_FIXED_BUS_CYCLES;
1420
1421 return -1;
1422}
1423 963
1424/* 964 return 0;
1425 * generic counter allocator: get next free counter
1426 */
1427static int
1428gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1429{
1430 int idx;
1431
1432 idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
1433 return idx == x86_pmu.num_events ? -1 : idx;
1434}
1435
1436/*
1437 * intel-specific counter allocator: check event constraints
1438 */
1439static int
1440intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1441{
1442 const struct event_constraint *event_constraint;
1443 int i, code;
1444
1445 if (!event_constraints)
1446 goto skip;
1447
1448 code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
1449
1450 for_each_event_constraint(event_constraint, event_constraints) {
1451 if (code == event_constraint->code) {
1452 for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
1453 if (!test_and_set_bit(i, cpuc->used_mask))
1454 return i;
1455 }
1456 return -1;
1457 }
1458 }
1459skip:
1460 return gen_get_event_idx(cpuc, hwc);
1461}
1462
1463static int
1464x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1465{
1466 int idx;
1467
1468 idx = fixed_mode_idx(hwc);
1469 if (idx == X86_PMC_IDX_FIXED_BTS) {
1470 /* BTS is already occupied. */
1471 if (test_and_set_bit(idx, cpuc->used_mask))
1472 return -EAGAIN;
1473
1474 hwc->config_base = 0;
1475 hwc->event_base = 0;
1476 hwc->idx = idx;
1477 } else if (idx >= 0) {
1478 /*
1479 * Try to get the fixed event, if that is already taken
1480 * then try to get a generic event:
1481 */
1482 if (test_and_set_bit(idx, cpuc->used_mask))
1483 goto try_generic;
1484
1485 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
1486 /*
1487 * We set it so that event_base + idx in wrmsr/rdmsr maps to
1488 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
1489 */
1490 hwc->event_base =
1491 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
1492 hwc->idx = idx;
1493 } else {
1494 idx = hwc->idx;
1495 /* Try to get the previous generic event again */
1496 if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
1497try_generic:
1498 idx = x86_pmu.get_event_idx(cpuc, hwc);
1499 if (idx == -1)
1500 return -EAGAIN;
1501
1502 set_bit(idx, cpuc->used_mask);
1503 hwc->idx = idx;
1504 }
1505 hwc->config_base = x86_pmu.eventsel;
1506 hwc->event_base = x86_pmu.perfctr;
1507 }
1508
1509 return idx;
1510} 965}
1511 966
1512/* 967static int x86_pmu_start(struct perf_event *event)
1513 * Find a PMC slot for the freshly enabled / scheduled in event:
1514 */
1515static int x86_pmu_enable(struct perf_event *event)
1516{ 968{
1517 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1518 struct hw_perf_event *hwc = &event->hw; 969 struct hw_perf_event *hwc = &event->hw;
1519 int idx;
1520 970
1521 idx = x86_schedule_event(cpuc, hwc); 971 if (hwc->idx == -1)
1522 if (idx < 0) 972 return -EAGAIN;
1523 return idx;
1524 973
1525 perf_events_lapic_init(); 974 x86_perf_event_set_period(event, hwc, hwc->idx);
1526 975 x86_pmu.enable(hwc, hwc->idx);
1527 x86_pmu.disable(hwc, idx);
1528
1529 cpuc->events[idx] = event;
1530 set_bit(idx, cpuc->active_mask);
1531
1532 x86_perf_event_set_period(event, hwc, idx);
1533 x86_pmu.enable(hwc, idx);
1534
1535 perf_event_update_userpage(event);
1536 976
1537 return 0; 977 return 0;
1538} 978}
@@ -1576,7 +1016,7 @@ void perf_event_print_debug(void)
1576 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); 1016 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1577 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); 1017 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
1578 } 1018 }
1579 pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); 1019 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1580 1020
1581 for (idx = 0; idx < x86_pmu.num_events; idx++) { 1021 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1582 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); 1022 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
@@ -1600,66 +1040,7 @@ void perf_event_print_debug(void)
1600 local_irq_restore(flags); 1040 local_irq_restore(flags);
1601} 1041}
1602 1042
1603static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) 1043static void x86_pmu_stop(struct perf_event *event)
1604{
1605 struct debug_store *ds = cpuc->ds;
1606 struct bts_record {
1607 u64 from;
1608 u64 to;
1609 u64 flags;
1610 };
1611 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
1612 struct bts_record *at, *top;
1613 struct perf_output_handle handle;
1614 struct perf_event_header header;
1615 struct perf_sample_data data;
1616 struct pt_regs regs;
1617
1618 if (!event)
1619 return;
1620
1621 if (!ds)
1622 return;
1623
1624 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
1625 top = (struct bts_record *)(unsigned long)ds->bts_index;
1626
1627 if (top <= at)
1628 return;
1629
1630 ds->bts_index = ds->bts_buffer_base;
1631
1632
1633 data.period = event->hw.last_period;
1634 data.addr = 0;
1635 regs.ip = 0;
1636
1637 /*
1638 * Prepare a generic sample, i.e. fill in the invariant fields.
1639 * We will overwrite the from and to address before we output
1640 * the sample.
1641 */
1642 perf_prepare_sample(&header, &data, event, &regs);
1643
1644 if (perf_output_begin(&handle, event,
1645 header.size * (top - at), 1, 1))
1646 return;
1647
1648 for (; at < top; at++) {
1649 data.ip = at->from;
1650 data.addr = at->to;
1651
1652 perf_output_sample(&handle, &header, &data, event);
1653 }
1654
1655 perf_output_end(&handle);
1656
1657 /* There's new data available. */
1658 event->hw.interrupts++;
1659 event->pending_kill = POLL_IN;
1660}
1661
1662static void x86_pmu_disable(struct perf_event *event)
1663{ 1044{
1664 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1045 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1665 struct hw_perf_event *hwc = &event->hw; 1046 struct hw_perf_event *hwc = &event->hw;
@@ -1673,181 +1054,38 @@ static void x86_pmu_disable(struct perf_event *event)
1673 x86_pmu.disable(hwc, idx); 1054 x86_pmu.disable(hwc, idx);
1674 1055
1675 /* 1056 /*
1676 * Make sure the cleared pointer becomes visible before we
1677 * (potentially) free the event:
1678 */
1679 barrier();
1680
1681 /*
1682 * Drain the remaining delta count out of a event 1057 * Drain the remaining delta count out of a event
1683 * that we are disabling: 1058 * that we are disabling:
1684 */ 1059 */
1685 x86_perf_event_update(event, hwc, idx); 1060 x86_perf_event_update(event, hwc, idx);
1686 1061
1687 /* Drain the remaining BTS records. */
1688 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
1689 intel_pmu_drain_bts_buffer(cpuc);
1690
1691 cpuc->events[idx] = NULL; 1062 cpuc->events[idx] = NULL;
1692 clear_bit(idx, cpuc->used_mask);
1693
1694 perf_event_update_userpage(event);
1695}
1696
1697/*
1698 * Save and restart an expired event. Called by NMI contexts,
1699 * so it has to be careful about preempting normal event ops:
1700 */
1701static int intel_pmu_save_and_restart(struct perf_event *event)
1702{
1703 struct hw_perf_event *hwc = &event->hw;
1704 int idx = hwc->idx;
1705 int ret;
1706
1707 x86_perf_event_update(event, hwc, idx);
1708 ret = x86_perf_event_set_period(event, hwc, idx);
1709
1710 if (event->state == PERF_EVENT_STATE_ACTIVE)
1711 intel_pmu_enable_event(hwc, idx);
1712
1713 return ret;
1714}
1715
1716static void intel_pmu_reset(void)
1717{
1718 struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
1719 unsigned long flags;
1720 int idx;
1721
1722 if (!x86_pmu.num_events)
1723 return;
1724
1725 local_irq_save(flags);
1726
1727 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
1728
1729 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1730 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
1731 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
1732 }
1733 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1734 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1735 }
1736 if (ds)
1737 ds->bts_index = ds->bts_buffer_base;
1738
1739 local_irq_restore(flags);
1740} 1063}
1741 1064
1742static int p6_pmu_handle_irq(struct pt_regs *regs) 1065static void x86_pmu_disable(struct perf_event *event)
1743{
1744 struct perf_sample_data data;
1745 struct cpu_hw_events *cpuc;
1746 struct perf_event *event;
1747 struct hw_perf_event *hwc;
1748 int idx, handled = 0;
1749 u64 val;
1750
1751 data.addr = 0;
1752
1753 cpuc = &__get_cpu_var(cpu_hw_events);
1754
1755 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1756 if (!test_bit(idx, cpuc->active_mask))
1757 continue;
1758
1759 event = cpuc->events[idx];
1760 hwc = &event->hw;
1761
1762 val = x86_perf_event_update(event, hwc, idx);
1763 if (val & (1ULL << (x86_pmu.event_bits - 1)))
1764 continue;
1765
1766 /*
1767 * event overflow
1768 */
1769 handled = 1;
1770 data.period = event->hw.last_period;
1771
1772 if (!x86_perf_event_set_period(event, hwc, idx))
1773 continue;
1774
1775 if (perf_event_overflow(event, 1, &data, regs))
1776 p6_pmu_disable_event(hwc, idx);
1777 }
1778
1779 if (handled)
1780 inc_irq_stat(apic_perf_irqs);
1781
1782 return handled;
1783}
1784
1785/*
1786 * This handler is triggered by the local APIC, so the APIC IRQ handling
1787 * rules apply:
1788 */
1789static int intel_pmu_handle_irq(struct pt_regs *regs)
1790{ 1066{
1791 struct perf_sample_data data; 1067 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1792 struct cpu_hw_events *cpuc; 1068 int i;
1793 int bit, loops;
1794 u64 ack, status;
1795
1796 data.addr = 0;
1797
1798 cpuc = &__get_cpu_var(cpu_hw_events);
1799
1800 perf_disable();
1801 intel_pmu_drain_bts_buffer(cpuc);
1802 status = intel_pmu_get_status();
1803 if (!status) {
1804 perf_enable();
1805 return 0;
1806 }
1807
1808 loops = 0;
1809again:
1810 if (++loops > 100) {
1811 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
1812 perf_event_print_debug();
1813 intel_pmu_reset();
1814 perf_enable();
1815 return 1;
1816 }
1817 1069
1818 inc_irq_stat(apic_perf_irqs); 1070 x86_pmu_stop(event);
1819 ack = status;
1820 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1821 struct perf_event *event = cpuc->events[bit];
1822 1071
1823 clear_bit(bit, (unsigned long *) &status); 1072 for (i = 0; i < cpuc->n_events; i++) {
1824 if (!test_bit(bit, cpuc->active_mask)) 1073 if (event == cpuc->event_list[i]) {
1825 continue;
1826 1074
1827 if (!intel_pmu_save_and_restart(event)) 1075 if (x86_pmu.put_event_constraints)
1828 continue; 1076 x86_pmu.put_event_constraints(cpuc, event);
1829 1077
1830 data.period = event->hw.last_period; 1078 while (++i < cpuc->n_events)
1079 cpuc->event_list[i-1] = cpuc->event_list[i];
1831 1080
1832 if (perf_event_overflow(event, 1, &data, regs)) 1081 --cpuc->n_events;
1833 intel_pmu_disable_event(&event->hw, bit); 1082 break;
1083 }
1834 } 1084 }
1835 1085 perf_event_update_userpage(event);
1836 intel_pmu_ack_status(ack);
1837
1838 /*
1839 * Repeat if there is more work to be done:
1840 */
1841 status = intel_pmu_get_status();
1842 if (status)
1843 goto again;
1844
1845 perf_enable();
1846
1847 return 1;
1848} 1086}
1849 1087
1850static int amd_pmu_handle_irq(struct pt_regs *regs) 1088static int x86_pmu_handle_irq(struct pt_regs *regs)
1851{ 1089{
1852 struct perf_sample_data data; 1090 struct perf_sample_data data;
1853 struct cpu_hw_events *cpuc; 1091 struct cpu_hw_events *cpuc;
@@ -1857,6 +1095,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
1857 u64 val; 1095 u64 val;
1858 1096
1859 data.addr = 0; 1097 data.addr = 0;
1098 data.raw = NULL;
1860 1099
1861 cpuc = &__get_cpu_var(cpu_hw_events); 1100 cpuc = &__get_cpu_var(cpu_hw_events);
1862 1101
@@ -1881,7 +1120,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
1881 continue; 1120 continue;
1882 1121
1883 if (perf_event_overflow(event, 1, &data, regs)) 1122 if (perf_event_overflow(event, 1, &data, regs))
1884 amd_pmu_disable_event(hwc, idx); 1123 x86_pmu.disable(hwc, idx);
1885 } 1124 }
1886 1125
1887 if (handled) 1126 if (handled)
@@ -1964,199 +1203,146 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1964 .priority = 1 1203 .priority = 1
1965}; 1204};
1966 1205
1967static __initconst struct x86_pmu p6_pmu = { 1206static struct event_constraint unconstrained;
1968 .name = "p6", 1207static struct event_constraint emptyconstraint;
1969 .handle_irq = p6_pmu_handle_irq,
1970 .disable_all = p6_pmu_disable_all,
1971 .enable_all = p6_pmu_enable_all,
1972 .enable = p6_pmu_enable_event,
1973 .disable = p6_pmu_disable_event,
1974 .eventsel = MSR_P6_EVNTSEL0,
1975 .perfctr = MSR_P6_PERFCTR0,
1976 .event_map = p6_pmu_event_map,
1977 .raw_event = p6_pmu_raw_event,
1978 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
1979 .apic = 1,
1980 .max_period = (1ULL << 31) - 1,
1981 .version = 0,
1982 .num_events = 2,
1983 /*
1984 * Events have 40 bits implemented. However they are designed such
1985 * that bits [32-39] are sign extensions of bit 31. As such the
1986 * effective width of a event for P6-like PMU is 32 bits only.
1987 *
1988 * See IA-32 Intel Architecture Software developer manual Vol 3B
1989 */
1990 .event_bits = 32,
1991 .event_mask = (1ULL << 32) - 1,
1992 .get_event_idx = intel_get_event_idx,
1993};
1994 1208
1995static __initconst struct x86_pmu intel_pmu = { 1209static struct event_constraint *
1996 .name = "Intel", 1210x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1997 .handle_irq = intel_pmu_handle_irq, 1211{
1998 .disable_all = intel_pmu_disable_all, 1212 struct event_constraint *c;
1999 .enable_all = intel_pmu_enable_all,
2000 .enable = intel_pmu_enable_event,
2001 .disable = intel_pmu_disable_event,
2002 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
2003 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
2004 .event_map = intel_pmu_event_map,
2005 .raw_event = intel_pmu_raw_event,
2006 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
2007 .apic = 1,
2008 /*
2009 * Intel PMCs cannot be accessed sanely above 32 bit width,
2010 * so we install an artificial 1<<31 period regardless of
2011 * the generic event period:
2012 */
2013 .max_period = (1ULL << 31) - 1,
2014 .enable_bts = intel_pmu_enable_bts,
2015 .disable_bts = intel_pmu_disable_bts,
2016 .get_event_idx = intel_get_event_idx,
2017};
2018 1213
2019static __initconst struct x86_pmu amd_pmu = { 1214 if (x86_pmu.event_constraints) {
2020 .name = "AMD", 1215 for_each_event_constraint(c, x86_pmu.event_constraints) {
2021 .handle_irq = amd_pmu_handle_irq, 1216 if ((event->hw.config & c->cmask) == c->code)
2022 .disable_all = amd_pmu_disable_all, 1217 return c;
2023 .enable_all = amd_pmu_enable_all, 1218 }
2024 .enable = amd_pmu_enable_event, 1219 }
2025 .disable = amd_pmu_disable_event,
2026 .eventsel = MSR_K7_EVNTSEL0,
2027 .perfctr = MSR_K7_PERFCTR0,
2028 .event_map = amd_pmu_event_map,
2029 .raw_event = amd_pmu_raw_event,
2030 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
2031 .num_events = 4,
2032 .event_bits = 48,
2033 .event_mask = (1ULL << 48) - 1,
2034 .apic = 1,
2035 /* use highest bit to detect overflow */
2036 .max_period = (1ULL << 47) - 1,
2037 .get_event_idx = gen_get_event_idx,
2038};
2039 1220
2040static __init int p6_pmu_init(void) 1221 return &unconstrained;
1222}
1223
1224static int x86_event_sched_in(struct perf_event *event,
1225 struct perf_cpu_context *cpuctx)
2041{ 1226{
2042 switch (boot_cpu_data.x86_model) { 1227 int ret = 0;
2043 case 1:
2044 case 3: /* Pentium Pro */
2045 case 5:
2046 case 6: /* Pentium II */
2047 case 7:
2048 case 8:
2049 case 11: /* Pentium III */
2050 event_constraints = intel_p6_event_constraints;
2051 break;
2052 case 9:
2053 case 13:
2054 /* Pentium M */
2055 event_constraints = intel_p6_event_constraints;
2056 break;
2057 default:
2058 pr_cont("unsupported p6 CPU model %d ",
2059 boot_cpu_data.x86_model);
2060 return -ENODEV;
2061 }
2062 1228
2063 x86_pmu = p6_pmu; 1229 event->state = PERF_EVENT_STATE_ACTIVE;
1230 event->oncpu = smp_processor_id();
1231 event->tstamp_running += event->ctx->time - event->tstamp_stopped;
2064 1232
2065 if (!cpu_has_apic) { 1233 if (!is_x86_event(event))
2066 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); 1234 ret = event->pmu->enable(event);
2067 pr_info("no hardware sampling interrupt available.\n");
2068 x86_pmu.apic = 0;
2069 }
2070 1235
2071 return 0; 1236 if (!ret && !is_software_event(event))
1237 cpuctx->active_oncpu++;
1238
1239 if (!ret && event->attr.exclusive)
1240 cpuctx->exclusive = 1;
1241
1242 return ret;
2072} 1243}
2073 1244
2074static __init int intel_pmu_init(void) 1245static void x86_event_sched_out(struct perf_event *event,
1246 struct perf_cpu_context *cpuctx)
2075{ 1247{
2076 union cpuid10_edx edx; 1248 event->state = PERF_EVENT_STATE_INACTIVE;
2077 union cpuid10_eax eax; 1249 event->oncpu = -1;
2078 unsigned int unused;
2079 unsigned int ebx;
2080 int version;
2081
2082 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
2083 /* check for P6 processor family */
2084 if (boot_cpu_data.x86 == 6) {
2085 return p6_pmu_init();
2086 } else {
2087 return -ENODEV;
2088 }
2089 }
2090 1250
2091 /* 1251 if (!is_x86_event(event))
2092 * Check whether the Architectural PerfMon supports 1252 event->pmu->disable(event);
2093 * Branch Misses Retired hw_event or not.
2094 */
2095 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
2096 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
2097 return -ENODEV;
2098 1253
2099 version = eax.split.version_id; 1254 event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
2100 if (version < 2)
2101 return -ENODEV;
2102 1255
2103 x86_pmu = intel_pmu; 1256 if (!is_software_event(event))
2104 x86_pmu.version = version; 1257 cpuctx->active_oncpu--;
2105 x86_pmu.num_events = eax.split.num_events;
2106 x86_pmu.event_bits = eax.split.bit_width;
2107 x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1;
2108 1258
2109 /* 1259 if (event->attr.exclusive || !cpuctx->active_oncpu)
2110 * Quirk: v2 perfmon does not report fixed-purpose events, so 1260 cpuctx->exclusive = 0;
2111 * assume at least 3 events: 1261}
2112 */
2113 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
2114 1262
1263/*
1264 * Called to enable a whole group of events.
1265 * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
1266 * Assumes the caller has disabled interrupts and has
1267 * frozen the PMU with hw_perf_save_disable.
1268 *
1269 * called with PMU disabled. If successful and return value 1,
1270 * then guaranteed to call perf_enable() and hw_perf_enable()
1271 */
1272int hw_perf_group_sched_in(struct perf_event *leader,
1273 struct perf_cpu_context *cpuctx,
1274 struct perf_event_context *ctx)
1275{
1276 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1277 struct perf_event *sub;
1278 int assign[X86_PMC_IDX_MAX];
1279 int n0, n1, ret;
1280
1281 /* n0 = total number of events */
1282 n0 = collect_events(cpuc, leader, true);
1283 if (n0 < 0)
1284 return n0;
1285
1286 ret = x86_schedule_events(cpuc, n0, assign);
1287 if (ret)
1288 return ret;
1289
1290 ret = x86_event_sched_in(leader, cpuctx);
1291 if (ret)
1292 return ret;
1293
1294 n1 = 1;
1295 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1296 if (sub->state > PERF_EVENT_STATE_OFF) {
1297 ret = x86_event_sched_in(sub, cpuctx);
1298 if (ret)
1299 goto undo;
1300 ++n1;
1301 }
1302 }
2115 /* 1303 /*
2116 * Install the hw-cache-events table: 1304 * copy new assignment, now we know it is possible
1305 * will be used by hw_perf_enable()
2117 */ 1306 */
2118 switch (boot_cpu_data.x86_model) { 1307 memcpy(cpuc->assign, assign, n0*sizeof(int));
2119 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
2120 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
2121 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
2122 case 29: /* six-core 45 nm xeon "Dunnington" */
2123 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
2124 sizeof(hw_cache_event_ids));
2125
2126 pr_cont("Core2 events, ");
2127 event_constraints = intel_core_event_constraints;
2128 break;
2129 default:
2130 case 26:
2131 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
2132 sizeof(hw_cache_event_ids));
2133 1308
2134 event_constraints = intel_nehalem_event_constraints; 1309 cpuc->n_events = n0;
2135 pr_cont("Nehalem/Corei7 events, "); 1310 cpuc->n_added = n1;
2136 break; 1311 ctx->nr_active += n1;
2137 case 28:
2138 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2139 sizeof(hw_cache_event_ids));
2140 1312
2141 pr_cont("Atom events, "); 1313 /*
2142 break; 1314 * 1 means successful and events are active
1315 * This is not quite true because we defer
1316 * actual activation until hw_perf_enable() but
1317 * this way we* ensure caller won't try to enable
1318 * individual events
1319 */
1320 return 1;
1321undo:
1322 x86_event_sched_out(leader, cpuctx);
1323 n0 = 1;
1324 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1325 if (sub->state == PERF_EVENT_STATE_ACTIVE) {
1326 x86_event_sched_out(sub, cpuctx);
1327 if (++n0 == n1)
1328 break;
1329 }
2143 } 1330 }
2144 return 0; 1331 return ret;
2145} 1332}
2146 1333
2147static __init int amd_pmu_init(void) 1334#include "perf_event_amd.c"
2148{ 1335#include "perf_event_p6.c"
2149 /* Performance-monitoring supported from K7 and later: */ 1336#include "perf_event_intel.c"
2150 if (boot_cpu_data.x86 < 6)
2151 return -ENODEV;
2152 1337
2153 x86_pmu = amd_pmu; 1338static void __init pmu_check_apic(void)
2154 1339{
2155 /* Events are common for all AMDs */ 1340 if (cpu_has_apic)
2156 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, 1341 return;
2157 sizeof(hw_cache_event_ids));
2158 1342
2159 return 0; 1343 x86_pmu.apic = 0;
1344 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1345 pr_info("no hardware sampling interrupt available.\n");
2160} 1346}
2161 1347
2162void __init init_hw_perf_events(void) 1348void __init init_hw_perf_events(void)
@@ -2180,6 +1366,8 @@ void __init init_hw_perf_events(void)
2180 return; 1366 return;
2181 } 1367 }
2182 1368
1369 pmu_check_apic();
1370
2183 pr_cont("%s PMU driver.\n", x86_pmu.name); 1371 pr_cont("%s PMU driver.\n", x86_pmu.name);
2184 1372
2185 if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { 1373 if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
@@ -2203,6 +1391,10 @@ void __init init_hw_perf_events(void)
2203 perf_events_lapic_init(); 1391 perf_events_lapic_init();
2204 register_die_notifier(&perf_event_nmi_notifier); 1392 register_die_notifier(&perf_event_nmi_notifier);
2205 1393
1394 unconstrained = (struct event_constraint)
1395 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
1396 0, x86_pmu.num_events);
1397
2206 pr_info("... version: %d\n", x86_pmu.version); 1398 pr_info("... version: %d\n", x86_pmu.version);
2207 pr_info("... bit width: %d\n", x86_pmu.event_bits); 1399 pr_info("... bit width: %d\n", x86_pmu.event_bits);
2208 pr_info("... generic registers: %d\n", x86_pmu.num_events); 1400 pr_info("... generic registers: %d\n", x86_pmu.num_events);
@@ -2220,50 +1412,79 @@ static inline void x86_pmu_read(struct perf_event *event)
2220static const struct pmu pmu = { 1412static const struct pmu pmu = {
2221 .enable = x86_pmu_enable, 1413 .enable = x86_pmu_enable,
2222 .disable = x86_pmu_disable, 1414 .disable = x86_pmu_disable,
1415 .start = x86_pmu_start,
1416 .stop = x86_pmu_stop,
2223 .read = x86_pmu_read, 1417 .read = x86_pmu_read,
2224 .unthrottle = x86_pmu_unthrottle, 1418 .unthrottle = x86_pmu_unthrottle,
2225}; 1419};
2226 1420
2227static int 1421/*
2228validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) 1422 * validate a single event group
2229{ 1423 *
2230 struct hw_perf_event fake_event = event->hw; 1424 * validation include:
2231 1425 * - check events are compatible which each other
2232 if (event->pmu && event->pmu != &pmu) 1426 * - events do not compete for the same counter
2233 return 0; 1427 * - number of events <= number of counters
2234 1428 *
2235 return x86_schedule_event(cpuc, &fake_event) >= 0; 1429 * validation ensures the group can be loaded onto the
2236} 1430 * PMU if it was the only group available.
2237 1431 */
2238static int validate_group(struct perf_event *event) 1432static int validate_group(struct perf_event *event)
2239{ 1433{
2240 struct perf_event *sibling, *leader = event->group_leader; 1434 struct perf_event *leader = event->group_leader;
2241 struct cpu_hw_events fake_pmu; 1435 struct cpu_hw_events *fake_cpuc;
1436 int ret, n;
2242 1437
2243 memset(&fake_pmu, 0, sizeof(fake_pmu)); 1438 ret = -ENOMEM;
1439 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1440 if (!fake_cpuc)
1441 goto out;
2244 1442
2245 if (!validate_event(&fake_pmu, leader)) 1443 /*
2246 return -ENOSPC; 1444 * the event is not yet connected with its
1445 * siblings therefore we must first collect
1446 * existing siblings, then add the new event
1447 * before we can simulate the scheduling
1448 */
1449 ret = -ENOSPC;
1450 n = collect_events(fake_cpuc, leader, true);
1451 if (n < 0)
1452 goto out_free;
2247 1453
2248 list_for_each_entry(sibling, &leader->sibling_list, group_entry) { 1454 fake_cpuc->n_events = n;
2249 if (!validate_event(&fake_pmu, sibling)) 1455 n = collect_events(fake_cpuc, event, false);
2250 return -ENOSPC; 1456 if (n < 0)
2251 } 1457 goto out_free;
2252 1458
2253 if (!validate_event(&fake_pmu, event)) 1459 fake_cpuc->n_events = n;
2254 return -ENOSPC;
2255 1460
2256 return 0; 1461 ret = x86_schedule_events(fake_cpuc, n, NULL);
1462
1463out_free:
1464 kfree(fake_cpuc);
1465out:
1466 return ret;
2257} 1467}
2258 1468
2259const struct pmu *hw_perf_event_init(struct perf_event *event) 1469const struct pmu *hw_perf_event_init(struct perf_event *event)
2260{ 1470{
1471 const struct pmu *tmp;
2261 int err; 1472 int err;
2262 1473
2263 err = __hw_perf_event_init(event); 1474 err = __hw_perf_event_init(event);
2264 if (!err) { 1475 if (!err) {
1476 /*
1477 * we temporarily connect event to its pmu
1478 * such that validate_group() can classify
1479 * it as an x86 event using is_x86_event()
1480 */
1481 tmp = event->pmu;
1482 event->pmu = &pmu;
1483
2265 if (event->group_leader != event) 1484 if (event->group_leader != event)
2266 err = validate_group(event); 1485 err = validate_group(event);
1486
1487 event->pmu = tmp;
2267 } 1488 }
2268 if (err) { 1489 if (err) {
2269 if (event->destroy) 1490 if (event->destroy)
@@ -2287,7 +1508,6 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
2287 1508
2288static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); 1509static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
2289static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); 1510static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
2290static DEFINE_PER_CPU(int, in_nmi_frame);
2291 1511
2292 1512
2293static void 1513static void
@@ -2303,9 +1523,6 @@ static void backtrace_warning(void *data, char *msg)
2303 1523
2304static int backtrace_stack(void *data, char *name) 1524static int backtrace_stack(void *data, char *name)
2305{ 1525{
2306 per_cpu(in_nmi_frame, smp_processor_id()) =
2307 x86_is_stack_id(NMI_STACK, name);
2308
2309 return 0; 1526 return 0;
2310} 1527}
2311 1528
@@ -2313,9 +1530,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
2313{ 1530{
2314 struct perf_callchain_entry *entry = data; 1531 struct perf_callchain_entry *entry = data;
2315 1532
2316 if (per_cpu(in_nmi_frame, smp_processor_id()))
2317 return;
2318
2319 if (reliable) 1533 if (reliable)
2320 callchain_store(entry, addr); 1534 callchain_store(entry, addr);
2321} 1535}
@@ -2325,6 +1539,7 @@ static const struct stacktrace_ops backtrace_ops = {
2325 .warning_symbol = backtrace_warning_symbol, 1539 .warning_symbol = backtrace_warning_symbol,
2326 .stack = backtrace_stack, 1540 .stack = backtrace_stack,
2327 .address = backtrace_address, 1541 .address = backtrace_address,
1542 .walk_stack = print_context_stack_bp,
2328}; 1543};
2329 1544
2330#include "../dumpstack.h" 1545#include "../dumpstack.h"
@@ -2335,7 +1550,7 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
2335 callchain_store(entry, PERF_CONTEXT_KERNEL); 1550 callchain_store(entry, PERF_CONTEXT_KERNEL);
2336 callchain_store(entry, regs->ip); 1551 callchain_store(entry, regs->ip);
2337 1552
2338 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); 1553 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
2339} 1554}
2340 1555
2341/* 1556/*
@@ -2421,9 +1636,6 @@ perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
2421 1636
2422 is_user = user_mode(regs); 1637 is_user = user_mode(regs);
2423 1638
2424 if (!current || current->pid == 0)
2425 return;
2426
2427 if (is_user && current->state != TASK_RUNNING) 1639 if (is_user && current->state != TASK_RUNNING)
2428 return; 1640 return;
2429 1641
@@ -2453,4 +1665,25 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2453void hw_perf_event_setup_online(int cpu) 1665void hw_perf_event_setup_online(int cpu)
2454{ 1666{
2455 init_debug_store_on_cpu(cpu); 1667 init_debug_store_on_cpu(cpu);
1668
1669 switch (boot_cpu_data.x86_vendor) {
1670 case X86_VENDOR_AMD:
1671 amd_pmu_cpu_online(cpu);
1672 break;
1673 default:
1674 return;
1675 }
1676}
1677
1678void hw_perf_event_setup_offline(int cpu)
1679{
1680 init_debug_store_on_cpu(cpu);
1681
1682 switch (boot_cpu_data.x86_vendor) {
1683 case X86_VENDOR_AMD:
1684 amd_pmu_cpu_offline(cpu);
1685 break;
1686 default:
1687 return;
1688 }
2456} 1689}
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
new file mode 100644
index 000000000000..8f3dbfda3c4f
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -0,0 +1,416 @@
1#ifdef CONFIG_CPU_SUP_AMD
2
3static DEFINE_RAW_SPINLOCK(amd_nb_lock);
4
5static __initconst u64 amd_hw_cache_event_ids
6 [PERF_COUNT_HW_CACHE_MAX]
7 [PERF_COUNT_HW_CACHE_OP_MAX]
8 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
9{
10 [ C(L1D) ] = {
11 [ C(OP_READ) ] = {
12 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
13 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
14 },
15 [ C(OP_WRITE) ] = {
16 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
17 [ C(RESULT_MISS) ] = 0,
18 },
19 [ C(OP_PREFETCH) ] = {
20 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
21 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
22 },
23 },
24 [ C(L1I ) ] = {
25 [ C(OP_READ) ] = {
26 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
27 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
28 },
29 [ C(OP_WRITE) ] = {
30 [ C(RESULT_ACCESS) ] = -1,
31 [ C(RESULT_MISS) ] = -1,
32 },
33 [ C(OP_PREFETCH) ] = {
34 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
35 [ C(RESULT_MISS) ] = 0,
36 },
37 },
38 [ C(LL ) ] = {
39 [ C(OP_READ) ] = {
40 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
41 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
42 },
43 [ C(OP_WRITE) ] = {
44 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
45 [ C(RESULT_MISS) ] = 0,
46 },
47 [ C(OP_PREFETCH) ] = {
48 [ C(RESULT_ACCESS) ] = 0,
49 [ C(RESULT_MISS) ] = 0,
50 },
51 },
52 [ C(DTLB) ] = {
53 [ C(OP_READ) ] = {
54 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
55 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
56 },
57 [ C(OP_WRITE) ] = {
58 [ C(RESULT_ACCESS) ] = 0,
59 [ C(RESULT_MISS) ] = 0,
60 },
61 [ C(OP_PREFETCH) ] = {
62 [ C(RESULT_ACCESS) ] = 0,
63 [ C(RESULT_MISS) ] = 0,
64 },
65 },
66 [ C(ITLB) ] = {
67 [ C(OP_READ) ] = {
68 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
69 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
70 },
71 [ C(OP_WRITE) ] = {
72 [ C(RESULT_ACCESS) ] = -1,
73 [ C(RESULT_MISS) ] = -1,
74 },
75 [ C(OP_PREFETCH) ] = {
76 [ C(RESULT_ACCESS) ] = -1,
77 [ C(RESULT_MISS) ] = -1,
78 },
79 },
80 [ C(BPU ) ] = {
81 [ C(OP_READ) ] = {
82 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
83 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
84 },
85 [ C(OP_WRITE) ] = {
86 [ C(RESULT_ACCESS) ] = -1,
87 [ C(RESULT_MISS) ] = -1,
88 },
89 [ C(OP_PREFETCH) ] = {
90 [ C(RESULT_ACCESS) ] = -1,
91 [ C(RESULT_MISS) ] = -1,
92 },
93 },
94};
95
96/*
97 * AMD Performance Monitor K7 and later.
98 */
99static const u64 amd_perfmon_event_map[] =
100{
101 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
102 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
103 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
104 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
105 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
106 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
107};
108
109static u64 amd_pmu_event_map(int hw_event)
110{
111 return amd_perfmon_event_map[hw_event];
112}
113
114static u64 amd_pmu_raw_event(u64 hw_event)
115{
116#define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL
117#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
118#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
119#define K7_EVNTSEL_INV_MASK 0x000800000ULL
120#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
121
122#define K7_EVNTSEL_MASK \
123 (K7_EVNTSEL_EVENT_MASK | \
124 K7_EVNTSEL_UNIT_MASK | \
125 K7_EVNTSEL_EDGE_MASK | \
126 K7_EVNTSEL_INV_MASK | \
127 K7_EVNTSEL_REG_MASK)
128
129 return hw_event & K7_EVNTSEL_MASK;
130}
131
132/*
133 * AMD64 events are detected based on their event codes.
134 */
135static inline int amd_is_nb_event(struct hw_perf_event *hwc)
136{
137 return (hwc->config & 0xe0) == 0xe0;
138}
139
140static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
141 struct perf_event *event)
142{
143 struct hw_perf_event *hwc = &event->hw;
144 struct amd_nb *nb = cpuc->amd_nb;
145 int i;
146
147 /*
148 * only care about NB events
149 */
150 if (!(nb && amd_is_nb_event(hwc)))
151 return;
152
153 /*
154 * need to scan whole list because event may not have
155 * been assigned during scheduling
156 *
157 * no race condition possible because event can only
158 * be removed on one CPU at a time AND PMU is disabled
159 * when we come here
160 */
161 for (i = 0; i < x86_pmu.num_events; i++) {
162 if (nb->owners[i] == event) {
163 cmpxchg(nb->owners+i, event, NULL);
164 break;
165 }
166 }
167}
168
169 /*
170 * AMD64 NorthBridge events need special treatment because
171 * counter access needs to be synchronized across all cores
172 * of a package. Refer to BKDG section 3.12
173 *
174 * NB events are events measuring L3 cache, Hypertransport
175 * traffic. They are identified by an event code >= 0xe00.
176 * They measure events on the NorthBride which is shared
177 * by all cores on a package. NB events are counted on a
178 * shared set of counters. When a NB event is programmed
179 * in a counter, the data actually comes from a shared
180 * counter. Thus, access to those counters needs to be
181 * synchronized.
182 *
183 * We implement the synchronization such that no two cores
184 * can be measuring NB events using the same counters. Thus,
185 * we maintain a per-NB allocation table. The available slot
186 * is propagated using the event_constraint structure.
187 *
188 * We provide only one choice for each NB event based on
189 * the fact that only NB events have restrictions. Consequently,
190 * if a counter is available, there is a guarantee the NB event
191 * will be assigned to it. If no slot is available, an empty
192 * constraint is returned and scheduling will eventually fail
193 * for this event.
194 *
195 * Note that all cores attached the same NB compete for the same
196 * counters to host NB events, this is why we use atomic ops. Some
197 * multi-chip CPUs may have more than one NB.
198 *
199 * Given that resources are allocated (cmpxchg), they must be
200 * eventually freed for others to use. This is accomplished by
201 * calling amd_put_event_constraints().
202 *
203 * Non NB events are not impacted by this restriction.
204 */
205static struct event_constraint *
206amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
207{
208 struct hw_perf_event *hwc = &event->hw;
209 struct amd_nb *nb = cpuc->amd_nb;
210 struct perf_event *old = NULL;
211 int max = x86_pmu.num_events;
212 int i, j, k = -1;
213
214 /*
215 * if not NB event or no NB, then no constraints
216 */
217 if (!(nb && amd_is_nb_event(hwc)))
218 return &unconstrained;
219
220 /*
221 * detect if already present, if so reuse
222 *
223 * cannot merge with actual allocation
224 * because of possible holes
225 *
226 * event can already be present yet not assigned (in hwc->idx)
227 * because of successive calls to x86_schedule_events() from
228 * hw_perf_group_sched_in() without hw_perf_enable()
229 */
230 for (i = 0; i < max; i++) {
231 /*
232 * keep track of first free slot
233 */
234 if (k == -1 && !nb->owners[i])
235 k = i;
236
237 /* already present, reuse */
238 if (nb->owners[i] == event)
239 goto done;
240 }
241 /*
242 * not present, so grab a new slot
243 * starting either at:
244 */
245 if (hwc->idx != -1) {
246 /* previous assignment */
247 i = hwc->idx;
248 } else if (k != -1) {
249 /* start from free slot found */
250 i = k;
251 } else {
252 /*
253 * event not found, no slot found in
254 * first pass, try again from the
255 * beginning
256 */
257 i = 0;
258 }
259 j = i;
260 do {
261 old = cmpxchg(nb->owners+i, NULL, event);
262 if (!old)
263 break;
264 if (++i == max)
265 i = 0;
266 } while (i != j);
267done:
268 if (!old)
269 return &nb->event_constraints[i];
270
271 return &emptyconstraint;
272}
273
274static __initconst struct x86_pmu amd_pmu = {
275 .name = "AMD",
276 .handle_irq = x86_pmu_handle_irq,
277 .disable_all = x86_pmu_disable_all,
278 .enable_all = x86_pmu_enable_all,
279 .enable = x86_pmu_enable_event,
280 .disable = x86_pmu_disable_event,
281 .eventsel = MSR_K7_EVNTSEL0,
282 .perfctr = MSR_K7_PERFCTR0,
283 .event_map = amd_pmu_event_map,
284 .raw_event = amd_pmu_raw_event,
285 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
286 .num_events = 4,
287 .event_bits = 48,
288 .event_mask = (1ULL << 48) - 1,
289 .apic = 1,
290 /* use highest bit to detect overflow */
291 .max_period = (1ULL << 47) - 1,
292 .get_event_constraints = amd_get_event_constraints,
293 .put_event_constraints = amd_put_event_constraints
294};
295
296static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
297{
298 struct amd_nb *nb;
299 int i;
300
301 nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
302 if (!nb)
303 return NULL;
304
305 memset(nb, 0, sizeof(*nb));
306 nb->nb_id = nb_id;
307
308 /*
309 * initialize all possible NB constraints
310 */
311 for (i = 0; i < x86_pmu.num_events; i++) {
312 set_bit(i, nb->event_constraints[i].idxmsk);
313 nb->event_constraints[i].weight = 1;
314 }
315 return nb;
316}
317
318static void amd_pmu_cpu_online(int cpu)
319{
320 struct cpu_hw_events *cpu1, *cpu2;
321 struct amd_nb *nb = NULL;
322 int i, nb_id;
323
324 if (boot_cpu_data.x86_max_cores < 2)
325 return;
326
327 /*
328 * function may be called too early in the
329 * boot process, in which case nb_id is bogus
330 */
331 nb_id = amd_get_nb_id(cpu);
332 if (nb_id == BAD_APICID)
333 return;
334
335 cpu1 = &per_cpu(cpu_hw_events, cpu);
336 cpu1->amd_nb = NULL;
337
338 raw_spin_lock(&amd_nb_lock);
339
340 for_each_online_cpu(i) {
341 cpu2 = &per_cpu(cpu_hw_events, i);
342 nb = cpu2->amd_nb;
343 if (!nb)
344 continue;
345 if (nb->nb_id == nb_id)
346 goto found;
347 }
348
349 nb = amd_alloc_nb(cpu, nb_id);
350 if (!nb) {
351 pr_err("perf_events: failed NB allocation for CPU%d\n", cpu);
352 raw_spin_unlock(&amd_nb_lock);
353 return;
354 }
355found:
356 nb->refcnt++;
357 cpu1->amd_nb = nb;
358
359 raw_spin_unlock(&amd_nb_lock);
360}
361
362static void amd_pmu_cpu_offline(int cpu)
363{
364 struct cpu_hw_events *cpuhw;
365
366 if (boot_cpu_data.x86_max_cores < 2)
367 return;
368
369 cpuhw = &per_cpu(cpu_hw_events, cpu);
370
371 raw_spin_lock(&amd_nb_lock);
372
373 if (--cpuhw->amd_nb->refcnt == 0)
374 kfree(cpuhw->amd_nb);
375
376 cpuhw->amd_nb = NULL;
377
378 raw_spin_unlock(&amd_nb_lock);
379}
380
381static __init int amd_pmu_init(void)
382{
383 /* Performance-monitoring supported from K7 and later: */
384 if (boot_cpu_data.x86 < 6)
385 return -ENODEV;
386
387 x86_pmu = amd_pmu;
388
389 /* Events are common for all AMDs */
390 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
391 sizeof(hw_cache_event_ids));
392
393 /*
394 * explicitly initialize the boot cpu, other cpus will get
395 * the cpu hotplug callbacks from smp_init()
396 */
397 amd_pmu_cpu_online(smp_processor_id());
398 return 0;
399}
400
401#else /* CONFIG_CPU_SUP_AMD */
402
403static int amd_pmu_init(void)
404{
405 return 0;
406}
407
408static void amd_pmu_cpu_online(int cpu)
409{
410}
411
412static void amd_pmu_cpu_offline(int cpu)
413{
414}
415
416#endif
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
new file mode 100644
index 000000000000..cf6590cf4a5f
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -0,0 +1,971 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3/*
4 * Intel PerfMon v3. Used on Core2 and later.
5 */
6static const u64 intel_perfmon_event_map[] =
7{
8 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
9 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
10 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
11 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
12 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
13 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
14 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
15};
16
17static struct event_constraint intel_core_event_constraints[] =
18{
19 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
20 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
21 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
22 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
23 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
24 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
25 EVENT_CONSTRAINT_END
26};
27
28static struct event_constraint intel_core2_event_constraints[] =
29{
30 FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
31 FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
32 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
33 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
34 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
35 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
36 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
37 INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
38 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
39 INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
40 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
41 EVENT_CONSTRAINT_END
42};
43
44static struct event_constraint intel_nehalem_event_constraints[] =
45{
46 FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
47 FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
48 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
49 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
50 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
51 INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
52 INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
53 INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
54 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
55 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
56 EVENT_CONSTRAINT_END
57};
58
59static struct event_constraint intel_westmere_event_constraints[] =
60{
61 FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
62 FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
63 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
64 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
65 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
66 EVENT_CONSTRAINT_END
67};
68
69static struct event_constraint intel_gen_event_constraints[] =
70{
71 FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
72 FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
73 EVENT_CONSTRAINT_END
74};
75
76static u64 intel_pmu_event_map(int hw_event)
77{
78 return intel_perfmon_event_map[hw_event];
79}
80
81static __initconst u64 westmere_hw_cache_event_ids
82 [PERF_COUNT_HW_CACHE_MAX]
83 [PERF_COUNT_HW_CACHE_OP_MAX]
84 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
85{
86 [ C(L1D) ] = {
87 [ C(OP_READ) ] = {
88 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
89 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
90 },
91 [ C(OP_WRITE) ] = {
92 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
93 [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
94 },
95 [ C(OP_PREFETCH) ] = {
96 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
97 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
98 },
99 },
100 [ C(L1I ) ] = {
101 [ C(OP_READ) ] = {
102 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
103 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
104 },
105 [ C(OP_WRITE) ] = {
106 [ C(RESULT_ACCESS) ] = -1,
107 [ C(RESULT_MISS) ] = -1,
108 },
109 [ C(OP_PREFETCH) ] = {
110 [ C(RESULT_ACCESS) ] = 0x0,
111 [ C(RESULT_MISS) ] = 0x0,
112 },
113 },
114 [ C(LL ) ] = {
115 [ C(OP_READ) ] = {
116 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
117 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
118 },
119 [ C(OP_WRITE) ] = {
120 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
121 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
122 },
123 [ C(OP_PREFETCH) ] = {
124 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
125 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
126 },
127 },
128 [ C(DTLB) ] = {
129 [ C(OP_READ) ] = {
130 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
131 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
132 },
133 [ C(OP_WRITE) ] = {
134 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
135 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
136 },
137 [ C(OP_PREFETCH) ] = {
138 [ C(RESULT_ACCESS) ] = 0x0,
139 [ C(RESULT_MISS) ] = 0x0,
140 },
141 },
142 [ C(ITLB) ] = {
143 [ C(OP_READ) ] = {
144 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
145 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */
146 },
147 [ C(OP_WRITE) ] = {
148 [ C(RESULT_ACCESS) ] = -1,
149 [ C(RESULT_MISS) ] = -1,
150 },
151 [ C(OP_PREFETCH) ] = {
152 [ C(RESULT_ACCESS) ] = -1,
153 [ C(RESULT_MISS) ] = -1,
154 },
155 },
156 [ C(BPU ) ] = {
157 [ C(OP_READ) ] = {
158 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
159 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
160 },
161 [ C(OP_WRITE) ] = {
162 [ C(RESULT_ACCESS) ] = -1,
163 [ C(RESULT_MISS) ] = -1,
164 },
165 [ C(OP_PREFETCH) ] = {
166 [ C(RESULT_ACCESS) ] = -1,
167 [ C(RESULT_MISS) ] = -1,
168 },
169 },
170};
171
172static __initconst u64 nehalem_hw_cache_event_ids
173 [PERF_COUNT_HW_CACHE_MAX]
174 [PERF_COUNT_HW_CACHE_OP_MAX]
175 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
176{
177 [ C(L1D) ] = {
178 [ C(OP_READ) ] = {
179 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
180 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
181 },
182 [ C(OP_WRITE) ] = {
183 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
184 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
185 },
186 [ C(OP_PREFETCH) ] = {
187 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
188 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
189 },
190 },
191 [ C(L1I ) ] = {
192 [ C(OP_READ) ] = {
193 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
194 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
195 },
196 [ C(OP_WRITE) ] = {
197 [ C(RESULT_ACCESS) ] = -1,
198 [ C(RESULT_MISS) ] = -1,
199 },
200 [ C(OP_PREFETCH) ] = {
201 [ C(RESULT_ACCESS) ] = 0x0,
202 [ C(RESULT_MISS) ] = 0x0,
203 },
204 },
205 [ C(LL ) ] = {
206 [ C(OP_READ) ] = {
207 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
208 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
209 },
210 [ C(OP_WRITE) ] = {
211 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
212 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
213 },
214 [ C(OP_PREFETCH) ] = {
215 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
216 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
217 },
218 },
219 [ C(DTLB) ] = {
220 [ C(OP_READ) ] = {
221 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
222 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
223 },
224 [ C(OP_WRITE) ] = {
225 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
226 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
227 },
228 [ C(OP_PREFETCH) ] = {
229 [ C(RESULT_ACCESS) ] = 0x0,
230 [ C(RESULT_MISS) ] = 0x0,
231 },
232 },
233 [ C(ITLB) ] = {
234 [ C(OP_READ) ] = {
235 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
236 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
237 },
238 [ C(OP_WRITE) ] = {
239 [ C(RESULT_ACCESS) ] = -1,
240 [ C(RESULT_MISS) ] = -1,
241 },
242 [ C(OP_PREFETCH) ] = {
243 [ C(RESULT_ACCESS) ] = -1,
244 [ C(RESULT_MISS) ] = -1,
245 },
246 },
247 [ C(BPU ) ] = {
248 [ C(OP_READ) ] = {
249 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
250 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
251 },
252 [ C(OP_WRITE) ] = {
253 [ C(RESULT_ACCESS) ] = -1,
254 [ C(RESULT_MISS) ] = -1,
255 },
256 [ C(OP_PREFETCH) ] = {
257 [ C(RESULT_ACCESS) ] = -1,
258 [ C(RESULT_MISS) ] = -1,
259 },
260 },
261};
262
263static __initconst u64 core2_hw_cache_event_ids
264 [PERF_COUNT_HW_CACHE_MAX]
265 [PERF_COUNT_HW_CACHE_OP_MAX]
266 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
267{
268 [ C(L1D) ] = {
269 [ C(OP_READ) ] = {
270 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
271 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
272 },
273 [ C(OP_WRITE) ] = {
274 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
275 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
276 },
277 [ C(OP_PREFETCH) ] = {
278 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
279 [ C(RESULT_MISS) ] = 0,
280 },
281 },
282 [ C(L1I ) ] = {
283 [ C(OP_READ) ] = {
284 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
285 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
286 },
287 [ C(OP_WRITE) ] = {
288 [ C(RESULT_ACCESS) ] = -1,
289 [ C(RESULT_MISS) ] = -1,
290 },
291 [ C(OP_PREFETCH) ] = {
292 [ C(RESULT_ACCESS) ] = 0,
293 [ C(RESULT_MISS) ] = 0,
294 },
295 },
296 [ C(LL ) ] = {
297 [ C(OP_READ) ] = {
298 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
299 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
300 },
301 [ C(OP_WRITE) ] = {
302 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
303 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
304 },
305 [ C(OP_PREFETCH) ] = {
306 [ C(RESULT_ACCESS) ] = 0,
307 [ C(RESULT_MISS) ] = 0,
308 },
309 },
310 [ C(DTLB) ] = {
311 [ C(OP_READ) ] = {
312 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
313 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
314 },
315 [ C(OP_WRITE) ] = {
316 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
317 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
318 },
319 [ C(OP_PREFETCH) ] = {
320 [ C(RESULT_ACCESS) ] = 0,
321 [ C(RESULT_MISS) ] = 0,
322 },
323 },
324 [ C(ITLB) ] = {
325 [ C(OP_READ) ] = {
326 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
327 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
328 },
329 [ C(OP_WRITE) ] = {
330 [ C(RESULT_ACCESS) ] = -1,
331 [ C(RESULT_MISS) ] = -1,
332 },
333 [ C(OP_PREFETCH) ] = {
334 [ C(RESULT_ACCESS) ] = -1,
335 [ C(RESULT_MISS) ] = -1,
336 },
337 },
338 [ C(BPU ) ] = {
339 [ C(OP_READ) ] = {
340 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
341 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
342 },
343 [ C(OP_WRITE) ] = {
344 [ C(RESULT_ACCESS) ] = -1,
345 [ C(RESULT_MISS) ] = -1,
346 },
347 [ C(OP_PREFETCH) ] = {
348 [ C(RESULT_ACCESS) ] = -1,
349 [ C(RESULT_MISS) ] = -1,
350 },
351 },
352};
353
354static __initconst u64 atom_hw_cache_event_ids
355 [PERF_COUNT_HW_CACHE_MAX]
356 [PERF_COUNT_HW_CACHE_OP_MAX]
357 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
358{
359 [ C(L1D) ] = {
360 [ C(OP_READ) ] = {
361 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
362 [ C(RESULT_MISS) ] = 0,
363 },
364 [ C(OP_WRITE) ] = {
365 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
366 [ C(RESULT_MISS) ] = 0,
367 },
368 [ C(OP_PREFETCH) ] = {
369 [ C(RESULT_ACCESS) ] = 0x0,
370 [ C(RESULT_MISS) ] = 0,
371 },
372 },
373 [ C(L1I ) ] = {
374 [ C(OP_READ) ] = {
375 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
376 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
377 },
378 [ C(OP_WRITE) ] = {
379 [ C(RESULT_ACCESS) ] = -1,
380 [ C(RESULT_MISS) ] = -1,
381 },
382 [ C(OP_PREFETCH) ] = {
383 [ C(RESULT_ACCESS) ] = 0,
384 [ C(RESULT_MISS) ] = 0,
385 },
386 },
387 [ C(LL ) ] = {
388 [ C(OP_READ) ] = {
389 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
390 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
391 },
392 [ C(OP_WRITE) ] = {
393 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
394 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
395 },
396 [ C(OP_PREFETCH) ] = {
397 [ C(RESULT_ACCESS) ] = 0,
398 [ C(RESULT_MISS) ] = 0,
399 },
400 },
401 [ C(DTLB) ] = {
402 [ C(OP_READ) ] = {
403 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
404 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
405 },
406 [ C(OP_WRITE) ] = {
407 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
408 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
409 },
410 [ C(OP_PREFETCH) ] = {
411 [ C(RESULT_ACCESS) ] = 0,
412 [ C(RESULT_MISS) ] = 0,
413 },
414 },
415 [ C(ITLB) ] = {
416 [ C(OP_READ) ] = {
417 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
418 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
419 },
420 [ C(OP_WRITE) ] = {
421 [ C(RESULT_ACCESS) ] = -1,
422 [ C(RESULT_MISS) ] = -1,
423 },
424 [ C(OP_PREFETCH) ] = {
425 [ C(RESULT_ACCESS) ] = -1,
426 [ C(RESULT_MISS) ] = -1,
427 },
428 },
429 [ C(BPU ) ] = {
430 [ C(OP_READ) ] = {
431 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
432 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
433 },
434 [ C(OP_WRITE) ] = {
435 [ C(RESULT_ACCESS) ] = -1,
436 [ C(RESULT_MISS) ] = -1,
437 },
438 [ C(OP_PREFETCH) ] = {
439 [ C(RESULT_ACCESS) ] = -1,
440 [ C(RESULT_MISS) ] = -1,
441 },
442 },
443};
444
445static u64 intel_pmu_raw_event(u64 hw_event)
446{
447#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
448#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
449#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
450#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
451#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
452
453#define CORE_EVNTSEL_MASK \
454 (INTEL_ARCH_EVTSEL_MASK | \
455 INTEL_ARCH_UNIT_MASK | \
456 INTEL_ARCH_EDGE_MASK | \
457 INTEL_ARCH_INV_MASK | \
458 INTEL_ARCH_CNT_MASK)
459
460 return hw_event & CORE_EVNTSEL_MASK;
461}
462
463static void intel_pmu_enable_bts(u64 config)
464{
465 unsigned long debugctlmsr;
466
467 debugctlmsr = get_debugctlmsr();
468
469 debugctlmsr |= X86_DEBUGCTL_TR;
470 debugctlmsr |= X86_DEBUGCTL_BTS;
471 debugctlmsr |= X86_DEBUGCTL_BTINT;
472
473 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
474 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
475
476 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
477 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
478
479 update_debugctlmsr(debugctlmsr);
480}
481
482static void intel_pmu_disable_bts(void)
483{
484 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
485 unsigned long debugctlmsr;
486
487 if (!cpuc->ds)
488 return;
489
490 debugctlmsr = get_debugctlmsr();
491
492 debugctlmsr &=
493 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
494 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
495
496 update_debugctlmsr(debugctlmsr);
497}
498
499static void intel_pmu_disable_all(void)
500{
501 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
502
503 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
504
505 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
506 intel_pmu_disable_bts();
507}
508
509static void intel_pmu_enable_all(void)
510{
511 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
512
513 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
514
515 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
516 struct perf_event *event =
517 cpuc->events[X86_PMC_IDX_FIXED_BTS];
518
519 if (WARN_ON_ONCE(!event))
520 return;
521
522 intel_pmu_enable_bts(event->hw.config);
523 }
524}
525
526static inline u64 intel_pmu_get_status(void)
527{
528 u64 status;
529
530 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
531
532 return status;
533}
534
535static inline void intel_pmu_ack_status(u64 ack)
536{
537 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
538}
539
540static inline void
541intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
542{
543 int idx = __idx - X86_PMC_IDX_FIXED;
544 u64 ctrl_val, mask;
545
546 mask = 0xfULL << (idx * 4);
547
548 rdmsrl(hwc->config_base, ctrl_val);
549 ctrl_val &= ~mask;
550 (void)checking_wrmsrl(hwc->config_base, ctrl_val);
551}
552
553static void intel_pmu_drain_bts_buffer(void)
554{
555 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
556 struct debug_store *ds = cpuc->ds;
557 struct bts_record {
558 u64 from;
559 u64 to;
560 u64 flags;
561 };
562 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
563 struct bts_record *at, *top;
564 struct perf_output_handle handle;
565 struct perf_event_header header;
566 struct perf_sample_data data;
567 struct pt_regs regs;
568
569 if (!event)
570 return;
571
572 if (!ds)
573 return;
574
575 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
576 top = (struct bts_record *)(unsigned long)ds->bts_index;
577
578 if (top <= at)
579 return;
580
581 ds->bts_index = ds->bts_buffer_base;
582
583
584 data.period = event->hw.last_period;
585 data.addr = 0;
586 data.raw = NULL;
587 regs.ip = 0;
588
589 /*
590 * Prepare a generic sample, i.e. fill in the invariant fields.
591 * We will overwrite the from and to address before we output
592 * the sample.
593 */
594 perf_prepare_sample(&header, &data, event, &regs);
595
596 if (perf_output_begin(&handle, event,
597 header.size * (top - at), 1, 1))
598 return;
599
600 for (; at < top; at++) {
601 data.ip = at->from;
602 data.addr = at->to;
603
604 perf_output_sample(&handle, &header, &data, event);
605 }
606
607 perf_output_end(&handle);
608
609 /* There's new data available. */
610 event->hw.interrupts++;
611 event->pending_kill = POLL_IN;
612}
613
614static inline void
615intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
616{
617 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
618 intel_pmu_disable_bts();
619 intel_pmu_drain_bts_buffer();
620 return;
621 }
622
623 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
624 intel_pmu_disable_fixed(hwc, idx);
625 return;
626 }
627
628 x86_pmu_disable_event(hwc, idx);
629}
630
631static inline void
632intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
633{
634 int idx = __idx - X86_PMC_IDX_FIXED;
635 u64 ctrl_val, bits, mask;
636 int err;
637
638 /*
639 * Enable IRQ generation (0x8),
640 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
641 * if requested:
642 */
643 bits = 0x8ULL;
644 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
645 bits |= 0x2;
646 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
647 bits |= 0x1;
648
649 /*
650 * ANY bit is supported in v3 and up
651 */
652 if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
653 bits |= 0x4;
654
655 bits <<= (idx * 4);
656 mask = 0xfULL << (idx * 4);
657
658 rdmsrl(hwc->config_base, ctrl_val);
659 ctrl_val &= ~mask;
660 ctrl_val |= bits;
661 err = checking_wrmsrl(hwc->config_base, ctrl_val);
662}
663
664static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
665{
666 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
667 if (!__get_cpu_var(cpu_hw_events).enabled)
668 return;
669
670 intel_pmu_enable_bts(hwc->config);
671 return;
672 }
673
674 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
675 intel_pmu_enable_fixed(hwc, idx);
676 return;
677 }
678
679 __x86_pmu_enable_event(hwc, idx);
680}
681
682/*
683 * Save and restart an expired event. Called by NMI contexts,
684 * so it has to be careful about preempting normal event ops:
685 */
686static int intel_pmu_save_and_restart(struct perf_event *event)
687{
688 struct hw_perf_event *hwc = &event->hw;
689 int idx = hwc->idx;
690 int ret;
691
692 x86_perf_event_update(event, hwc, idx);
693 ret = x86_perf_event_set_period(event, hwc, idx);
694
695 return ret;
696}
697
698static void intel_pmu_reset(void)
699{
700 struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
701 unsigned long flags;
702 int idx;
703
704 if (!x86_pmu.num_events)
705 return;
706
707 local_irq_save(flags);
708
709 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
710
711 for (idx = 0; idx < x86_pmu.num_events; idx++) {
712 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
713 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
714 }
715 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
716 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
717 }
718 if (ds)
719 ds->bts_index = ds->bts_buffer_base;
720
721 local_irq_restore(flags);
722}
723
724/*
725 * This handler is triggered by the local APIC, so the APIC IRQ handling
726 * rules apply:
727 */
728static int intel_pmu_handle_irq(struct pt_regs *regs)
729{
730 struct perf_sample_data data;
731 struct cpu_hw_events *cpuc;
732 int bit, loops;
733 u64 ack, status;
734
735 data.addr = 0;
736 data.raw = NULL;
737
738 cpuc = &__get_cpu_var(cpu_hw_events);
739
740 perf_disable();
741 intel_pmu_drain_bts_buffer();
742 status = intel_pmu_get_status();
743 if (!status) {
744 perf_enable();
745 return 0;
746 }
747
748 loops = 0;
749again:
750 if (++loops > 100) {
751 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
752 perf_event_print_debug();
753 intel_pmu_reset();
754 perf_enable();
755 return 1;
756 }
757
758 inc_irq_stat(apic_perf_irqs);
759 ack = status;
760 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
761 struct perf_event *event = cpuc->events[bit];
762
763 clear_bit(bit, (unsigned long *) &status);
764 if (!test_bit(bit, cpuc->active_mask))
765 continue;
766
767 if (!intel_pmu_save_and_restart(event))
768 continue;
769
770 data.period = event->hw.last_period;
771
772 if (perf_event_overflow(event, 1, &data, regs))
773 intel_pmu_disable_event(&event->hw, bit);
774 }
775
776 intel_pmu_ack_status(ack);
777
778 /*
779 * Repeat if there is more work to be done:
780 */
781 status = intel_pmu_get_status();
782 if (status)
783 goto again;
784
785 perf_enable();
786
787 return 1;
788}
789
790static struct event_constraint bts_constraint =
791 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
792
793static struct event_constraint *
794intel_special_constraints(struct perf_event *event)
795{
796 unsigned int hw_event;
797
798 hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
799
800 if (unlikely((hw_event ==
801 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
802 (event->hw.sample_period == 1))) {
803
804 return &bts_constraint;
805 }
806 return NULL;
807}
808
809static struct event_constraint *
810intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
811{
812 struct event_constraint *c;
813
814 c = intel_special_constraints(event);
815 if (c)
816 return c;
817
818 return x86_get_event_constraints(cpuc, event);
819}
820
821static __initconst struct x86_pmu core_pmu = {
822 .name = "core",
823 .handle_irq = x86_pmu_handle_irq,
824 .disable_all = x86_pmu_disable_all,
825 .enable_all = x86_pmu_enable_all,
826 .enable = x86_pmu_enable_event,
827 .disable = x86_pmu_disable_event,
828 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
829 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
830 .event_map = intel_pmu_event_map,
831 .raw_event = intel_pmu_raw_event,
832 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
833 .apic = 1,
834 /*
835 * Intel PMCs cannot be accessed sanely above 32 bit width,
836 * so we install an artificial 1<<31 period regardless of
837 * the generic event period:
838 */
839 .max_period = (1ULL << 31) - 1,
840 .get_event_constraints = intel_get_event_constraints,
841 .event_constraints = intel_core_event_constraints,
842};
843
844static __initconst struct x86_pmu intel_pmu = {
845 .name = "Intel",
846 .handle_irq = intel_pmu_handle_irq,
847 .disable_all = intel_pmu_disable_all,
848 .enable_all = intel_pmu_enable_all,
849 .enable = intel_pmu_enable_event,
850 .disable = intel_pmu_disable_event,
851 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
852 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
853 .event_map = intel_pmu_event_map,
854 .raw_event = intel_pmu_raw_event,
855 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
856 .apic = 1,
857 /*
858 * Intel PMCs cannot be accessed sanely above 32 bit width,
859 * so we install an artificial 1<<31 period regardless of
860 * the generic event period:
861 */
862 .max_period = (1ULL << 31) - 1,
863 .enable_bts = intel_pmu_enable_bts,
864 .disable_bts = intel_pmu_disable_bts,
865 .get_event_constraints = intel_get_event_constraints
866};
867
868static __init int intel_pmu_init(void)
869{
870 union cpuid10_edx edx;
871 union cpuid10_eax eax;
872 unsigned int unused;
873 unsigned int ebx;
874 int version;
875
876 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
877 /* check for P6 processor family */
878 if (boot_cpu_data.x86 == 6) {
879 return p6_pmu_init();
880 } else {
881 return -ENODEV;
882 }
883 }
884
885 /*
886 * Check whether the Architectural PerfMon supports
887 * Branch Misses Retired hw_event or not.
888 */
889 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
890 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
891 return -ENODEV;
892
893 version = eax.split.version_id;
894 if (version < 2)
895 x86_pmu = core_pmu;
896 else
897 x86_pmu = intel_pmu;
898
899 x86_pmu.version = version;
900 x86_pmu.num_events = eax.split.num_events;
901 x86_pmu.event_bits = eax.split.bit_width;
902 x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1;
903
904 /*
905 * Quirk: v2 perfmon does not report fixed-purpose events, so
906 * assume at least 3 events:
907 */
908 if (version > 1)
909 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
910
911 /*
912 * Install the hw-cache-events table:
913 */
914 switch (boot_cpu_data.x86_model) {
915 case 14: /* 65 nm core solo/duo, "Yonah" */
916 pr_cont("Core events, ");
917 break;
918
919 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
920 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
921 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
922 case 29: /* six-core 45 nm xeon "Dunnington" */
923 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
924 sizeof(hw_cache_event_ids));
925
926 x86_pmu.event_constraints = intel_core2_event_constraints;
927 pr_cont("Core2 events, ");
928 break;
929
930 case 26: /* 45 nm nehalem, "Bloomfield" */
931 case 30: /* 45 nm nehalem, "Lynnfield" */
932 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
933 sizeof(hw_cache_event_ids));
934
935 x86_pmu.event_constraints = intel_nehalem_event_constraints;
936 pr_cont("Nehalem/Corei7 events, ");
937 break;
938 case 28:
939 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
940 sizeof(hw_cache_event_ids));
941
942 x86_pmu.event_constraints = intel_gen_event_constraints;
943 pr_cont("Atom events, ");
944 break;
945
946 case 37: /* 32 nm nehalem, "Clarkdale" */
947 case 44: /* 32 nm nehalem, "Gulftown" */
948 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
949 sizeof(hw_cache_event_ids));
950
951 x86_pmu.event_constraints = intel_westmere_event_constraints;
952 pr_cont("Westmere events, ");
953 break;
954 default:
955 /*
956 * default constraints for v2 and up
957 */
958 x86_pmu.event_constraints = intel_gen_event_constraints;
959 pr_cont("generic architected perfmon, ");
960 }
961 return 0;
962}
963
964#else /* CONFIG_CPU_SUP_INTEL */
965
966static int intel_pmu_init(void)
967{
968 return 0;
969}
970
971#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
new file mode 100644
index 000000000000..1ca5ba078afd
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -0,0 +1,157 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3/*
4 * Not sure about some of these
5 */
6static const u64 p6_perfmon_event_map[] =
7{
8 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
9 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
10 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
11 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
12 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
13 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
14 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
15};
16
17static u64 p6_pmu_event_map(int hw_event)
18{
19 return p6_perfmon_event_map[hw_event];
20}
21
22/*
23 * Event setting that is specified not to count anything.
24 * We use this to effectively disable a counter.
25 *
26 * L2_RQSTS with 0 MESI unit mask.
27 */
28#define P6_NOP_EVENT 0x0000002EULL
29
30static u64 p6_pmu_raw_event(u64 hw_event)
31{
32#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
33#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
34#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
35#define P6_EVNTSEL_INV_MASK 0x00800000ULL
36#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
37
38#define P6_EVNTSEL_MASK \
39 (P6_EVNTSEL_EVENT_MASK | \
40 P6_EVNTSEL_UNIT_MASK | \
41 P6_EVNTSEL_EDGE_MASK | \
42 P6_EVNTSEL_INV_MASK | \
43 P6_EVNTSEL_REG_MASK)
44
45 return hw_event & P6_EVNTSEL_MASK;
46}
47
48static struct event_constraint p6_event_constraints[] =
49{
50 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
51 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
52 INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
53 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
54 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
55 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
56 EVENT_CONSTRAINT_END
57};
58
59static void p6_pmu_disable_all(void)
60{
61 u64 val;
62
63 /* p6 only has one enable register */
64 rdmsrl(MSR_P6_EVNTSEL0, val);
65 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
66 wrmsrl(MSR_P6_EVNTSEL0, val);
67}
68
69static void p6_pmu_enable_all(void)
70{
71 unsigned long val;
72
73 /* p6 only has one enable register */
74 rdmsrl(MSR_P6_EVNTSEL0, val);
75 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
76 wrmsrl(MSR_P6_EVNTSEL0, val);
77}
78
79static inline void
80p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
81{
82 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
83 u64 val = P6_NOP_EVENT;
84
85 if (cpuc->enabled)
86 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
87
88 (void)checking_wrmsrl(hwc->config_base + idx, val);
89}
90
91static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
92{
93 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
94 u64 val;
95
96 val = hwc->config;
97 if (cpuc->enabled)
98 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
99
100 (void)checking_wrmsrl(hwc->config_base + idx, val);
101}
102
103static __initconst struct x86_pmu p6_pmu = {
104 .name = "p6",
105 .handle_irq = x86_pmu_handle_irq,
106 .disable_all = p6_pmu_disable_all,
107 .enable_all = p6_pmu_enable_all,
108 .enable = p6_pmu_enable_event,
109 .disable = p6_pmu_disable_event,
110 .eventsel = MSR_P6_EVNTSEL0,
111 .perfctr = MSR_P6_PERFCTR0,
112 .event_map = p6_pmu_event_map,
113 .raw_event = p6_pmu_raw_event,
114 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
115 .apic = 1,
116 .max_period = (1ULL << 31) - 1,
117 .version = 0,
118 .num_events = 2,
119 /*
120 * Events have 40 bits implemented. However they are designed such
121 * that bits [32-39] are sign extensions of bit 31. As such the
122 * effective width of a event for P6-like PMU is 32 bits only.
123 *
124 * See IA-32 Intel Architecture Software developer manual Vol 3B
125 */
126 .event_bits = 32,
127 .event_mask = (1ULL << 32) - 1,
128 .get_event_constraints = x86_get_event_constraints,
129 .event_constraints = p6_event_constraints,
130};
131
132static __init int p6_pmu_init(void)
133{
134 switch (boot_cpu_data.x86_model) {
135 case 1:
136 case 3: /* Pentium Pro */
137 case 5:
138 case 6: /* Pentium II */
139 case 7:
140 case 8:
141 case 11: /* Pentium III */
142 case 9:
143 case 13:
144 /* Pentium M */
145 break;
146 default:
147 pr_cont("unsupported p6 CPU model %d ",
148 boot_cpu_data.x86_model);
149 return -ENODEV;
150 }
151
152 x86_pmu = p6_pmu;
153
154 return 0;
155}
156
157#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 898df9719afb..74f4e85a5727 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -115,17 +115,6 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
115 115
116 return !test_bit(counter, perfctr_nmi_owner); 116 return !test_bit(counter, perfctr_nmi_owner);
117} 117}
118
119/* checks the an msr for availability */
120int avail_to_resrv_perfctr_nmi(unsigned int msr)
121{
122 unsigned int counter;
123
124 counter = nmi_perfctr_msr_to_bit(msr);
125 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
126
127 return !test_bit(counter, perfctr_nmi_owner);
128}
129EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); 118EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
130 119
131int reserve_perfctr_nmi(unsigned int msr) 120int reserve_perfctr_nmi(unsigned int msr)
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 7ef24a796992..83e5e628de73 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -187,7 +187,8 @@ static int __init cpuid_init(void)
187 int i, err = 0; 187 int i, err = 0;
188 i = 0; 188 i = 0;
189 189
190 if (register_chrdev(CPUID_MAJOR, "cpu/cpuid", &cpuid_fops)) { 190 if (__register_chrdev(CPUID_MAJOR, 0, NR_CPUS,
191 "cpu/cpuid", &cpuid_fops)) {
191 printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n", 192 printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n",
192 CPUID_MAJOR); 193 CPUID_MAJOR);
193 err = -EBUSY; 194 err = -EBUSY;
@@ -216,7 +217,7 @@ out_class:
216 } 217 }
217 class_destroy(cpuid_class); 218 class_destroy(cpuid_class);
218out_chrdev: 219out_chrdev:
219 unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); 220 __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
220out: 221out:
221 return err; 222 return err;
222} 223}
@@ -228,7 +229,7 @@ static void __exit cpuid_exit(void)
228 for_each_online_cpu(cpu) 229 for_each_online_cpu(cpu)
229 cpuid_device_destroy(cpu); 230 cpuid_device_destroy(cpu);
230 class_destroy(cpuid_class); 231 class_destroy(cpuid_class);
231 unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); 232 __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
232 unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); 233 unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
233} 234}
234 235
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index ef42a038f1a6..1c47390dd0e5 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -265,13 +265,13 @@ struct ds_context {
265 int cpu; 265 int cpu;
266}; 266};
267 267
268static DEFINE_PER_CPU(struct ds_context *, cpu_context); 268static DEFINE_PER_CPU(struct ds_context *, cpu_ds_context);
269 269
270 270
271static struct ds_context *ds_get_context(struct task_struct *task, int cpu) 271static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
272{ 272{
273 struct ds_context **p_context = 273 struct ds_context **p_context =
274 (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu)); 274 (task ? &task->thread.ds_ctx : &per_cpu(cpu_ds_context, cpu));
275 struct ds_context *context = NULL; 275 struct ds_context *context = NULL;
276 struct ds_context *new_context = NULL; 276 struct ds_context *new_context = NULL;
277 277
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index b8ce165dde5d..6d817554780a 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -109,6 +109,32 @@ print_context_stack(struct thread_info *tinfo,
109 } 109 }
110 return bp; 110 return bp;
111} 111}
112EXPORT_SYMBOL_GPL(print_context_stack);
113
114unsigned long
115print_context_stack_bp(struct thread_info *tinfo,
116 unsigned long *stack, unsigned long bp,
117 const struct stacktrace_ops *ops, void *data,
118 unsigned long *end, int *graph)
119{
120 struct stack_frame *frame = (struct stack_frame *)bp;
121 unsigned long *ret_addr = &frame->return_address;
122
123 while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) {
124 unsigned long addr = *ret_addr;
125
126 if (!__kernel_text_address(addr))
127 break;
128
129 ops->address(data, addr, 1);
130 frame = frame->next_frame;
131 ret_addr = &frame->return_address;
132 print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
133 }
134
135 return (unsigned long)frame;
136}
137EXPORT_SYMBOL_GPL(print_context_stack_bp);
112 138
113 139
114static void 140static void
@@ -141,10 +167,11 @@ static void print_trace_address(void *data, unsigned long addr, int reliable)
141} 167}
142 168
143static const struct stacktrace_ops print_trace_ops = { 169static const struct stacktrace_ops print_trace_ops = {
144 .warning = print_trace_warning, 170 .warning = print_trace_warning,
145 .warning_symbol = print_trace_warning_symbol, 171 .warning_symbol = print_trace_warning_symbol,
146 .stack = print_trace_stack, 172 .stack = print_trace_stack,
147 .address = print_trace_address, 173 .address = print_trace_address,
174 .walk_stack = print_context_stack,
148}; 175};
149 176
150void 177void
@@ -188,7 +215,7 @@ void dump_stack(void)
188} 215}
189EXPORT_SYMBOL(dump_stack); 216EXPORT_SYMBOL(dump_stack);
190 217
191static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; 218static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
192static int die_owner = -1; 219static int die_owner = -1;
193static unsigned int die_nest_count; 220static unsigned int die_nest_count;
194 221
@@ -207,11 +234,11 @@ unsigned __kprobes long oops_begin(void)
207 /* racy, but better than risking deadlock. */ 234 /* racy, but better than risking deadlock. */
208 raw_local_irq_save(flags); 235 raw_local_irq_save(flags);
209 cpu = smp_processor_id(); 236 cpu = smp_processor_id();
210 if (!__raw_spin_trylock(&die_lock)) { 237 if (!arch_spin_trylock(&die_lock)) {
211 if (cpu == die_owner) 238 if (cpu == die_owner)
212 /* nested oops. should stop eventually */; 239 /* nested oops. should stop eventually */;
213 else 240 else
214 __raw_spin_lock(&die_lock); 241 arch_spin_lock(&die_lock);
215 } 242 }
216 die_nest_count++; 243 die_nest_count++;
217 die_owner = cpu; 244 die_owner = cpu;
@@ -231,7 +258,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
231 die_nest_count--; 258 die_nest_count--;
232 if (!die_nest_count) 259 if (!die_nest_count)
233 /* Nest count reaches zero, release the lock. */ 260 /* Nest count reaches zero, release the lock. */
234 __raw_spin_unlock(&die_lock); 261 arch_spin_unlock(&die_lock);
235 raw_local_irq_restore(flags); 262 raw_local_irq_restore(flags);
236 oops_exit(); 263 oops_exit();
237 264
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
index 81086c227ab7..4fd1420faffa 100644
--- a/arch/x86/kernel/dumpstack.h
+++ b/arch/x86/kernel/dumpstack.h
@@ -14,12 +14,6 @@
14#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) 14#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
15#endif 15#endif
16 16
17extern unsigned long
18print_context_stack(struct thread_info *tinfo,
19 unsigned long *stack, unsigned long bp,
20 const struct stacktrace_ops *ops, void *data,
21 unsigned long *end, int *graph);
22
23extern void 17extern void
24show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 18show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
25 unsigned long *stack, unsigned long bp, char *log_lvl); 19 unsigned long *stack, unsigned long bp, char *log_lvl);
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index e0ed4c7abb62..11540a189d93 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -18,11 +18,6 @@
18 18
19#include "dumpstack.h" 19#include "dumpstack.h"
20 20
21/* Just a stub for now */
22int x86_is_stack_id(int id, char *name)
23{
24 return 0;
25}
26 21
27void dump_trace(struct task_struct *task, struct pt_regs *regs, 22void dump_trace(struct task_struct *task, struct pt_regs *regs,
28 unsigned long *stack, unsigned long bp, 23 unsigned long *stack, unsigned long bp,
@@ -58,7 +53,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
58 53
59 context = (struct thread_info *) 54 context = (struct thread_info *)
60 ((unsigned long)stack & (~(THREAD_SIZE - 1))); 55 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
61 bp = print_context_stack(context, stack, bp, ops, data, NULL, &graph); 56 bp = ops->walk_stack(context, stack, bp, ops, data, NULL, &graph);
62 57
63 stack = (unsigned long *)context->previous_esp; 58 stack = (unsigned long *)context->previous_esp;
64 if (!stack) 59 if (!stack)
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 8e740934bd1f..dce99abb4496 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -33,11 +33,6 @@ static char x86_stack_ids[][8] = {
33#endif 33#endif
34}; 34};
35 35
36int x86_is_stack_id(int id, char *name)
37{
38 return x86_stack_ids[id - 1] == name;
39}
40
41static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 36static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
42 unsigned *usedp, char **idp) 37 unsigned *usedp, char **idp)
43{ 38{
@@ -103,6 +98,35 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
103 return NULL; 98 return NULL;
104} 99}
105 100
101static inline int
102in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
103 unsigned long *irq_stack_end)
104{
105 return (stack >= irq_stack && stack < irq_stack_end);
106}
107
108/*
109 * We are returning from the irq stack and go to the previous one.
110 * If the previous stack is also in the irq stack, then bp in the first
111 * frame of the irq stack points to the previous, interrupted one.
112 * Otherwise we have another level of indirection: We first save
113 * the bp of the previous stack, then we switch the stack to the irq one
114 * and save a new bp that links to the previous one.
115 * (See save_args())
116 */
117static inline unsigned long
118fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
119 unsigned long *irq_stack, unsigned long *irq_stack_end)
120{
121#ifdef CONFIG_FRAME_POINTER
122 struct stack_frame *frame = (struct stack_frame *)bp;
123
124 if (!in_irq_stack(stack, irq_stack, irq_stack_end))
125 return (unsigned long)frame->next_frame;
126#endif
127 return bp;
128}
129
106/* 130/*
107 * x86-64 can have up to three kernel stacks: 131 * x86-64 can have up to three kernel stacks:
108 * process stack 132 * process stack
@@ -159,8 +183,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
159 if (ops->stack(data, id) < 0) 183 if (ops->stack(data, id) < 0)
160 break; 184 break;
161 185
162 bp = print_context_stack(tinfo, stack, bp, ops, 186 bp = ops->walk_stack(tinfo, stack, bp, ops,
163 data, estack_end, &graph); 187 data, estack_end, &graph);
164 ops->stack(data, "<EOE>"); 188 ops->stack(data, "<EOE>");
165 /* 189 /*
166 * We link to the next stack via the 190 * We link to the next stack via the
@@ -175,7 +199,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
175 irq_stack = irq_stack_end - 199 irq_stack = irq_stack_end -
176 (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); 200 (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
177 201
178 if (stack >= irq_stack && stack < irq_stack_end) { 202 if (in_irq_stack(stack, irq_stack, irq_stack_end)) {
179 if (ops->stack(data, "IRQ") < 0) 203 if (ops->stack(data, "IRQ") < 0)
180 break; 204 break;
181 bp = print_context_stack(tinfo, stack, bp, 205 bp = print_context_stack(tinfo, stack, bp,
@@ -186,6 +210,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
186 * pointer (index -1 to end) in the IRQ stack: 210 * pointer (index -1 to end) in the IRQ stack:
187 */ 211 */
188 stack = (unsigned long *) (irq_stack_end[-1]); 212 stack = (unsigned long *) (irq_stack_end[-1]);
213 bp = fixup_bp_irq_link(bp, stack, irq_stack,
214 irq_stack_end);
189 irq_stack_end = NULL; 215 irq_stack_end = NULL;
190 ops->stack(data, "EOI"); 216 ops->stack(data, "EOI");
191 continue; 217 continue;
@@ -260,6 +286,7 @@ void show_registers(struct pt_regs *regs)
260 286
261 sp = regs->sp; 287 sp = regs->sp;
262 printk("CPU %d ", cpu); 288 printk("CPU %d ", cpu);
289 print_modules();
263 __show_regs(regs, 1); 290 __show_regs(regs, 1);
264 printk("Process %s (pid: %d, threadinfo %p, task %p)\n", 291 printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
265 cur->comm, cur->pid, task_thread_info(cur), cur); 292 cur->comm, cur->pid, task_thread_info(cur), cur);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d17d482a04f4..a966b753e496 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -517,11 +517,19 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
517 int checktype) 517 int checktype)
518{ 518{
519 int i; 519 int i;
520 u64 end;
520 u64 real_removed_size = 0; 521 u64 real_removed_size = 0;
521 522
522 if (size > (ULLONG_MAX - start)) 523 if (size > (ULLONG_MAX - start))
523 size = ULLONG_MAX - start; 524 size = ULLONG_MAX - start;
524 525
526 end = start + size;
527 printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
528 (unsigned long long) start,
529 (unsigned long long) end);
530 e820_print_type(old_type);
531 printk(KERN_CONT "\n");
532
525 for (i = 0; i < e820.nr_map; i++) { 533 for (i = 0; i < e820.nr_map; i++) {
526 struct e820entry *ei = &e820.map[i]; 534 struct e820entry *ei = &e820.map[i];
527 u64 final_start, final_end; 535 u64 final_start, final_end;
@@ -724,7 +732,7 @@ core_initcall(e820_mark_nvs_memory);
724/* 732/*
725 * Early reserved memory areas. 733 * Early reserved memory areas.
726 */ 734 */
727#define MAX_EARLY_RES 20 735#define MAX_EARLY_RES 32
728 736
729struct early_res { 737struct early_res {
730 u64 start, end; 738 u64 start, end;
@@ -732,7 +740,16 @@ struct early_res {
732 char overlap_ok; 740 char overlap_ok;
733}; 741};
734static struct early_res early_res[MAX_EARLY_RES] __initdata = { 742static struct early_res early_res[MAX_EARLY_RES] __initdata = {
735 { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ 743 { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */
744#if defined(CONFIG_X86_32) && defined(CONFIG_X86_TRAMPOLINE)
745 /*
746 * But first pinch a few for the stack/trampoline stuff
747 * FIXME: Don't need the extra page at 4K, but need to fix
748 * trampoline before removing it. (see the GDT stuff)
749 */
750 { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE", 1 },
751#endif
752
736 {} 753 {}
737}; 754};
738 755
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index cdcfb122f256..c2fa9b8b497e 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -362,7 +362,7 @@ void __init efi_init(void)
362 printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); 362 printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
363 early_iounmap(tmp, 2); 363 early_iounmap(tmp, 2);
364 364
365 printk(KERN_INFO "EFI v%u.%.02u by %s \n", 365 printk(KERN_INFO "EFI v%u.%.02u by %s\n",
366 efi.systab->hdr.revision >> 16, 366 efi.systab->hdr.revision >> 16,
367 efi.systab->hdr.revision & 0xffff, vendor); 367 efi.systab->hdr.revision & 0xffff, vendor);
368 368
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 50b9c220e121..44a8e0dc6737 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -725,22 +725,61 @@ END(syscall_badsys)
725/* 725/*
726 * System calls that need a pt_regs pointer. 726 * System calls that need a pt_regs pointer.
727 */ 727 */
728#define PTREGSCALL(name) \ 728#define PTREGSCALL0(name) \
729 ALIGN; \ 729 ALIGN; \
730ptregs_##name: \ 730ptregs_##name: \
731 leal 4(%esp),%eax; \ 731 leal 4(%esp),%eax; \
732 jmp sys_##name; 732 jmp sys_##name;
733 733
734PTREGSCALL(iopl) 734#define PTREGSCALL1(name) \
735PTREGSCALL(fork) 735 ALIGN; \
736PTREGSCALL(clone) 736ptregs_##name: \
737PTREGSCALL(vfork) 737 leal 4(%esp),%edx; \
738PTREGSCALL(execve) 738 movl (PT_EBX+4)(%esp),%eax; \
739PTREGSCALL(sigaltstack) 739 jmp sys_##name;
740PTREGSCALL(sigreturn) 740
741PTREGSCALL(rt_sigreturn) 741#define PTREGSCALL2(name) \
742PTREGSCALL(vm86) 742 ALIGN; \
743PTREGSCALL(vm86old) 743ptregs_##name: \
744 leal 4(%esp),%ecx; \
745 movl (PT_ECX+4)(%esp),%edx; \
746 movl (PT_EBX+4)(%esp),%eax; \
747 jmp sys_##name;
748
749#define PTREGSCALL3(name) \
750 ALIGN; \
751ptregs_##name: \
752 leal 4(%esp),%eax; \
753 pushl %eax; \
754 movl PT_EDX(%eax),%ecx; \
755 movl PT_ECX(%eax),%edx; \
756 movl PT_EBX(%eax),%eax; \
757 call sys_##name; \
758 addl $4,%esp; \
759 ret
760
761PTREGSCALL1(iopl)
762PTREGSCALL0(fork)
763PTREGSCALL0(vfork)
764PTREGSCALL3(execve)
765PTREGSCALL2(sigaltstack)
766PTREGSCALL0(sigreturn)
767PTREGSCALL0(rt_sigreturn)
768PTREGSCALL2(vm86)
769PTREGSCALL1(vm86old)
770
771/* Clone is an oddball. The 4th arg is in %edi */
772 ALIGN;
773ptregs_clone:
774 leal 4(%esp),%eax
775 pushl %eax
776 pushl PT_EDI(%eax)
777 movl PT_EDX(%eax),%ecx
778 movl PT_ECX(%eax),%edx
779 movl PT_EBX(%eax),%eax
780 call sys_clone
781 addl $8,%esp
782 ret
744 783
745.macro FIXUP_ESPFIX_STACK 784.macro FIXUP_ESPFIX_STACK
746/* 785/*
@@ -1008,12 +1047,8 @@ END(spurious_interrupt_bug)
1008ENTRY(kernel_thread_helper) 1047ENTRY(kernel_thread_helper)
1009 pushl $0 # fake return address for unwinder 1048 pushl $0 # fake return address for unwinder
1010 CFI_STARTPROC 1049 CFI_STARTPROC
1011 movl %edx,%eax 1050 movl %edi,%eax
1012 push %edx 1051 call *%esi
1013 CFI_ADJUST_CFA_OFFSET 4
1014 call *%ebx
1015 push %eax
1016 CFI_ADJUST_CFA_OFFSET 4
1017 call do_exit 1052 call do_exit
1018 ud2 # padding for call trace 1053 ud2 # padding for call trace
1019 CFI_ENDPROC 1054 CFI_ENDPROC
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 63bca794c8f9..0697ff139837 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1076,10 +1076,10 @@ ENTRY(\sym)
1076 TRACE_IRQS_OFF 1076 TRACE_IRQS_OFF
1077 movq %rsp,%rdi /* pt_regs pointer */ 1077 movq %rsp,%rdi /* pt_regs pointer */
1078 xorl %esi,%esi /* no error code */ 1078 xorl %esi,%esi /* no error code */
1079 PER_CPU(init_tss, %rbp) 1079 PER_CPU(init_tss, %r12)
1080 subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) 1080 subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12)
1081 call \do_sym 1081 call \do_sym
1082 addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) 1082 addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12)
1083 jmp paranoid_exit /* %ebx: no swapgs flag */ 1083 jmp paranoid_exit /* %ebx: no swapgs flag */
1084 CFI_ENDPROC 1084 CFI_ENDPROC
1085END(\sym) 1085END(\sym)
@@ -1166,63 +1166,20 @@ bad_gs:
1166 jmp 2b 1166 jmp 2b
1167 .previous 1167 .previous
1168 1168
1169/* 1169ENTRY(kernel_thread_helper)
1170 * Create a kernel thread.
1171 *
1172 * C extern interface:
1173 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1174 *
1175 * asm input arguments:
1176 * rdi: fn, rsi: arg, rdx: flags
1177 */
1178ENTRY(kernel_thread)
1179 CFI_STARTPROC
1180 FAKE_STACK_FRAME $child_rip
1181 SAVE_ALL
1182
1183 # rdi: flags, rsi: usp, rdx: will be &pt_regs
1184 movq %rdx,%rdi
1185 orq kernel_thread_flags(%rip),%rdi
1186 movq $-1, %rsi
1187 movq %rsp, %rdx
1188
1189 xorl %r8d,%r8d
1190 xorl %r9d,%r9d
1191
1192 # clone now
1193 call do_fork
1194 movq %rax,RAX(%rsp)
1195 xorl %edi,%edi
1196
1197 /*
1198 * It isn't worth to check for reschedule here,
1199 * so internally to the x86_64 port you can rely on kernel_thread()
1200 * not to reschedule the child before returning, this avoids the need
1201 * of hacks for example to fork off the per-CPU idle tasks.
1202 * [Hopefully no generic code relies on the reschedule -AK]
1203 */
1204 RESTORE_ALL
1205 UNFAKE_STACK_FRAME
1206 ret
1207 CFI_ENDPROC
1208END(kernel_thread)
1209
1210ENTRY(child_rip)
1211 pushq $0 # fake return address 1170 pushq $0 # fake return address
1212 CFI_STARTPROC 1171 CFI_STARTPROC
1213 /* 1172 /*
1214 * Here we are in the child and the registers are set as they were 1173 * Here we are in the child and the registers are set as they were
1215 * at kernel_thread() invocation in the parent. 1174 * at kernel_thread() invocation in the parent.
1216 */ 1175 */
1217 movq %rdi, %rax 1176 call *%rsi
1218 movq %rsi, %rdi
1219 call *%rax
1220 # exit 1177 # exit
1221 mov %eax, %edi 1178 mov %eax, %edi
1222 call do_exit 1179 call do_exit
1223 ud2 # padding for call trace 1180 ud2 # padding for call trace
1224 CFI_ENDPROC 1181 CFI_ENDPROC
1225END(child_rip) 1182END(kernel_thread_helper)
1226 1183
1227/* 1184/*
1228 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. 1185 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 309689245431..cd37469b54ee 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -30,14 +30,32 @@
30 30
31#ifdef CONFIG_DYNAMIC_FTRACE 31#ifdef CONFIG_DYNAMIC_FTRACE
32 32
33/*
34 * modifying_code is set to notify NMIs that they need to use
35 * memory barriers when entering or exiting. But we don't want
36 * to burden NMIs with unnecessary memory barriers when code
37 * modification is not being done (which is most of the time).
38 *
39 * A mutex is already held when ftrace_arch_code_modify_prepare
40 * and post_process are called. No locks need to be taken here.
41 *
42 * Stop machine will make sure currently running NMIs are done
43 * and new NMIs will see the updated variable before we need
44 * to worry about NMIs doing memory barriers.
45 */
46static int modifying_code __read_mostly;
47static DEFINE_PER_CPU(int, save_modifying_code);
48
33int ftrace_arch_code_modify_prepare(void) 49int ftrace_arch_code_modify_prepare(void)
34{ 50{
35 set_kernel_text_rw(); 51 set_kernel_text_rw();
52 modifying_code = 1;
36 return 0; 53 return 0;
37} 54}
38 55
39int ftrace_arch_code_modify_post_process(void) 56int ftrace_arch_code_modify_post_process(void)
40{ 57{
58 modifying_code = 0;
41 set_kernel_text_ro(); 59 set_kernel_text_ro();
42 return 0; 60 return 0;
43} 61}
@@ -149,6 +167,11 @@ static void ftrace_mod_code(void)
149 167
150void ftrace_nmi_enter(void) 168void ftrace_nmi_enter(void)
151{ 169{
170 __get_cpu_var(save_modifying_code) = modifying_code;
171
172 if (!__get_cpu_var(save_modifying_code))
173 return;
174
152 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { 175 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
153 smp_rmb(); 176 smp_rmb();
154 ftrace_mod_code(); 177 ftrace_mod_code();
@@ -160,6 +183,9 @@ void ftrace_nmi_enter(void)
160 183
161void ftrace_nmi_exit(void) 184void ftrace_nmi_exit(void)
162{ 185{
186 if (!__get_cpu_var(save_modifying_code))
187 return;
188
163 /* Finish all executions before clearing nmi_running */ 189 /* Finish all executions before clearing nmi_running */
164 smp_mb(); 190 smp_mb();
165 atomic_dec(&nmi_running); 191 atomic_dec(&nmi_running);
@@ -484,13 +510,3 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
484 } 510 }
485} 511}
486#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 512#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
487
488#ifdef CONFIG_FTRACE_SYSCALLS
489
490extern unsigned long *sys_call_table;
491
492unsigned long __init arch_syscall_addr(int nr)
493{
494 return (unsigned long)(&sys_call_table)[nr];
495}
496#endif
diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c
deleted file mode 100644
index 9b08e852fd1a..000000000000
--- a/arch/x86/kernel/geode_32.c
+++ /dev/null
@@ -1,196 +0,0 @@
1/*
2 * AMD Geode southbridge support code
3 * Copyright (C) 2006, Advanced Micro Devices, Inc.
4 * Copyright (C) 2007, Andres Salomon <dilinger@debian.org>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of version 2 of the GNU General Public License
8 * as published by the Free Software Foundation.
9 */
10
11#include <linux/kernel.h>
12#include <linux/module.h>
13#include <linux/ioport.h>
14#include <linux/io.h>
15#include <asm/msr.h>
16#include <asm/geode.h>
17
18static struct {
19 char *name;
20 u32 msr;
21 int size;
22 u32 base;
23} lbars[] = {
24 { "geode-pms", MSR_LBAR_PMS, LBAR_PMS_SIZE, 0 },
25 { "geode-acpi", MSR_LBAR_ACPI, LBAR_ACPI_SIZE, 0 },
26 { "geode-gpio", MSR_LBAR_GPIO, LBAR_GPIO_SIZE, 0 },
27 { "geode-mfgpt", MSR_LBAR_MFGPT, LBAR_MFGPT_SIZE, 0 }
28};
29
30static void __init init_lbars(void)
31{
32 u32 lo, hi;
33 int i;
34
35 for (i = 0; i < ARRAY_SIZE(lbars); i++) {
36 rdmsr(lbars[i].msr, lo, hi);
37 if (hi & 0x01)
38 lbars[i].base = lo & 0x0000ffff;
39
40 if (lbars[i].base == 0)
41 printk(KERN_ERR "geode: Couldn't initialize '%s'\n",
42 lbars[i].name);
43 }
44}
45
46int geode_get_dev_base(unsigned int dev)
47{
48 BUG_ON(dev >= ARRAY_SIZE(lbars));
49 return lbars[dev].base;
50}
51EXPORT_SYMBOL_GPL(geode_get_dev_base);
52
53/* === GPIO API === */
54
55void geode_gpio_set(u32 gpio, unsigned int reg)
56{
57 u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
58
59 if (!base)
60 return;
61
62 /* low bank register */
63 if (gpio & 0xFFFF)
64 outl(gpio & 0xFFFF, base + reg);
65 /* high bank register */
66 gpio >>= 16;
67 if (gpio)
68 outl(gpio, base + 0x80 + reg);
69}
70EXPORT_SYMBOL_GPL(geode_gpio_set);
71
72void geode_gpio_clear(u32 gpio, unsigned int reg)
73{
74 u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
75
76 if (!base)
77 return;
78
79 /* low bank register */
80 if (gpio & 0xFFFF)
81 outl((gpio & 0xFFFF) << 16, base + reg);
82 /* high bank register */
83 gpio &= (0xFFFF << 16);
84 if (gpio)
85 outl(gpio, base + 0x80 + reg);
86}
87EXPORT_SYMBOL_GPL(geode_gpio_clear);
88
89int geode_gpio_isset(u32 gpio, unsigned int reg)
90{
91 u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
92 u32 val;
93
94 if (!base)
95 return 0;
96
97 /* low bank register */
98 if (gpio & 0xFFFF) {
99 val = inl(base + reg) & (gpio & 0xFFFF);
100 if ((gpio & 0xFFFF) == val)
101 return 1;
102 }
103 /* high bank register */
104 gpio >>= 16;
105 if (gpio) {
106 val = inl(base + 0x80 + reg) & gpio;
107 if (gpio == val)
108 return 1;
109 }
110 return 0;
111}
112EXPORT_SYMBOL_GPL(geode_gpio_isset);
113
114void geode_gpio_set_irq(unsigned int group, unsigned int irq)
115{
116 u32 lo, hi;
117
118 if (group > 7 || irq > 15)
119 return;
120
121 rdmsr(MSR_PIC_ZSEL_HIGH, lo, hi);
122
123 lo &= ~(0xF << (group * 4));
124 lo |= (irq & 0xF) << (group * 4);
125
126 wrmsr(MSR_PIC_ZSEL_HIGH, lo, hi);
127}
128EXPORT_SYMBOL_GPL(geode_gpio_set_irq);
129
130void geode_gpio_setup_event(unsigned int gpio, int pair, int pme)
131{
132 u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
133 u32 offset, shift, val;
134
135 if (gpio >= 24)
136 offset = GPIO_MAP_W;
137 else if (gpio >= 16)
138 offset = GPIO_MAP_Z;
139 else if (gpio >= 8)
140 offset = GPIO_MAP_Y;
141 else
142 offset = GPIO_MAP_X;
143
144 shift = (gpio % 8) * 4;
145
146 val = inl(base + offset);
147
148 /* Clear whatever was there before */
149 val &= ~(0xF << shift);
150
151 /* And set the new value */
152
153 val |= ((pair & 7) << shift);
154
155 /* Set the PME bit if this is a PME event */
156
157 if (pme)
158 val |= (1 << (shift + 3));
159
160 outl(val, base + offset);
161}
162EXPORT_SYMBOL_GPL(geode_gpio_setup_event);
163
164int geode_has_vsa2(void)
165{
166 static int has_vsa2 = -1;
167
168 if (has_vsa2 == -1) {
169 u16 val;
170
171 /*
172 * The VSA has virtual registers that we can query for a
173 * signature.
174 */
175 outw(VSA_VR_UNLOCK, VSA_VRC_INDEX);
176 outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX);
177
178 val = inw(VSA_VRC_DATA);
179 has_vsa2 = (val == AMD_VSA_SIG || val == GSW_VSA_SIG);
180 }
181
182 return has_vsa2;
183}
184EXPORT_SYMBOL_GPL(geode_has_vsa2);
185
186static int __init geode_southbridge_init(void)
187{
188 if (!is_geode())
189 return -ENODEV;
190
191 init_lbars();
192 (void) mfgpt_timer_setup();
193 return 0;
194}
195
196postcore_initcall(geode_southbridge_init);
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 4f8e2507e8f3..5051b94c9069 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -29,8 +29,6 @@ static void __init i386_default_early_setup(void)
29 29
30void __init i386_start_kernel(void) 30void __init i386_start_kernel(void)
31{ 31{
32 reserve_trampoline_memory();
33
34 reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); 32 reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
35 33
36#ifdef CONFIG_BLK_DEV_INITRD 34#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 0b06cd778fd9..b5a9896ca1e7 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -98,8 +98,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
98{ 98{
99 copy_bootdata(__va(real_mode_data)); 99 copy_bootdata(__va(real_mode_data));
100 100
101 reserve_trampoline_memory();
102
103 reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); 101 reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
104 102
105#ifdef CONFIG_BLK_DEV_INITRD 103#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ba6e65884603..ad80a1c718c6 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -34,6 +34,8 @@
34 */ 34 */
35unsigned long hpet_address; 35unsigned long hpet_address;
36u8 hpet_blockid; /* OS timer block num */ 36u8 hpet_blockid; /* OS timer block num */
37u8 hpet_msi_disable;
38
37#ifdef CONFIG_PCI_MSI 39#ifdef CONFIG_PCI_MSI
38static unsigned long hpet_num_timers; 40static unsigned long hpet_num_timers;
39#endif 41#endif
@@ -596,6 +598,9 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
596 unsigned int num_timers_used = 0; 598 unsigned int num_timers_used = 0;
597 int i; 599 int i;
598 600
601 if (hpet_msi_disable)
602 return;
603
599 if (boot_cpu_has(X86_FEATURE_ARAT)) 604 if (boot_cpu_has(X86_FEATURE_ARAT))
600 return; 605 return;
601 id = hpet_readl(HPET_ID); 606 id = hpet_readl(HPET_ID);
@@ -928,6 +933,9 @@ static __init int hpet_late_init(void)
928 hpet_reserve_platform_timers(hpet_readl(HPET_ID)); 933 hpet_reserve_platform_timers(hpet_readl(HPET_ID));
929 hpet_print_config(); 934 hpet_print_config();
930 935
936 if (hpet_msi_disable)
937 return 0;
938
931 if (boot_cpu_has(X86_FEATURE_ARAT)) 939 if (boot_cpu_has(X86_FEATURE_ARAT))
932 return 0; 940 return 0;
933 941
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index d42f65ac4927..dca2802c666f 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -212,25 +212,6 @@ static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
212 return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); 212 return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
213} 213}
214 214
215/*
216 * Store a breakpoint's encoded address, length, and type.
217 */
218static int arch_store_info(struct perf_event *bp)
219{
220 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
221 /*
222 * For kernel-addresses, either the address or symbol name can be
223 * specified.
224 */
225 if (info->name)
226 info->address = (unsigned long)
227 kallsyms_lookup_name(info->name);
228 if (info->address)
229 return 0;
230
231 return -EINVAL;
232}
233
234int arch_bp_generic_fields(int x86_len, int x86_type, 215int arch_bp_generic_fields(int x86_len, int x86_type,
235 int *gen_len, int *gen_type) 216 int *gen_len, int *gen_type)
236{ 217{
@@ -362,11 +343,13 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
362 return ret; 343 return ret;
363 } 344 }
364 345
365 if (bp->callback) 346 /*
366 ret = arch_store_info(bp); 347 * For kernel-addresses, either the address or symbol name can be
367 348 * specified.
368 if (ret < 0) 349 */
369 return ret; 350 if (info->name)
351 info->address = (unsigned long)
352 kallsyms_lookup_name(info->name);
370 /* 353 /*
371 * Check that the low-order bits of the address are appropriate 354 * Check that the low-order bits of the address are appropriate
372 * for the alignment implied by len. 355 * for the alignment implied by len.
@@ -503,8 +486,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
503 rcu_read_lock(); 486 rcu_read_lock();
504 487
505 bp = per_cpu(bp_per_reg[i], cpu); 488 bp = per_cpu(bp_per_reg[i], cpu);
506 if (bp)
507 rc = NOTIFY_DONE;
508 /* 489 /*
509 * Reset the 'i'th TRAP bit in dr6 to denote completion of 490 * Reset the 'i'th TRAP bit in dr6 to denote completion of
510 * exception handling 491 * exception handling
@@ -519,11 +500,17 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
519 break; 500 break;
520 } 501 }
521 502
522 (bp->callback)(bp, args->regs); 503 perf_bp_event(bp, args->regs);
523 504
524 rcu_read_unlock(); 505 rcu_read_unlock();
525 } 506 }
526 if (dr6 & (~DR_TRAP_BITS)) 507 /*
508 * Further processing in do_debug() is needed for a) user-space
509 * breakpoints (to generate signals) and b) when the system has
510 * taken exception due to multiple causes
511 */
512 if ((current->thread.debugreg6 & DR_TRAP_BITS) ||
513 (dr6 & (~DR_TRAP_BITS)))
527 rc = NOTIFY_DONE; 514 rc = NOTIFY_DONE;
528 515
529 set_debugreg(dr7, 7); 516 set_debugreg(dr7, 7);
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index f2f8540a7f3d..c01a2b846d47 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -164,6 +164,11 @@ int init_fpu(struct task_struct *tsk)
164 return 0; 164 return 0;
165} 165}
166 166
167/*
168 * The xstateregs_active() routine is the same as the fpregs_active() routine,
169 * as the "regset->n" for the xstate regset will be updated based on the feature
170 * capabilites supported by the xsave.
171 */
167int fpregs_active(struct task_struct *target, const struct user_regset *regset) 172int fpregs_active(struct task_struct *target, const struct user_regset *regset)
168{ 173{
169 return tsk_used_math(target) ? regset->n : 0; 174 return tsk_used_math(target) ? regset->n : 0;
@@ -204,8 +209,6 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
204 if (ret) 209 if (ret)
205 return ret; 210 return ret;
206 211
207 set_stopped_child_used_math(target);
208
209 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, 212 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
210 &target->thread.xstate->fxsave, 0, -1); 213 &target->thread.xstate->fxsave, 0, -1);
211 214
@@ -224,6 +227,68 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
224 return ret; 227 return ret;
225} 228}
226 229
230int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
231 unsigned int pos, unsigned int count,
232 void *kbuf, void __user *ubuf)
233{
234 int ret;
235
236 if (!cpu_has_xsave)
237 return -ENODEV;
238
239 ret = init_fpu(target);
240 if (ret)
241 return ret;
242
243 /*
244 * Copy the 48bytes defined by the software first into the xstate
245 * memory layout in the thread struct, so that we can copy the entire
246 * xstateregs to the user using one user_regset_copyout().
247 */
248 memcpy(&target->thread.xstate->fxsave.sw_reserved,
249 xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
250
251 /*
252 * Copy the xstate memory layout.
253 */
254 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
255 &target->thread.xstate->xsave, 0, -1);
256 return ret;
257}
258
259int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
260 unsigned int pos, unsigned int count,
261 const void *kbuf, const void __user *ubuf)
262{
263 int ret;
264 struct xsave_hdr_struct *xsave_hdr;
265
266 if (!cpu_has_xsave)
267 return -ENODEV;
268
269 ret = init_fpu(target);
270 if (ret)
271 return ret;
272
273 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
274 &target->thread.xstate->xsave, 0, -1);
275
276 /*
277 * mxcsr reserved bits must be masked to zero for security reasons.
278 */
279 target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
280
281 xsave_hdr = &target->thread.xstate->xsave.xsave_hdr;
282
283 xsave_hdr->xstate_bv &= pcntxt_mask;
284 /*
285 * These bits must be zero.
286 */
287 xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
288
289 return ret;
290}
291
227#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 292#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
228 293
229/* 294/*
@@ -404,8 +469,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
404 if (ret) 469 if (ret)
405 return ret; 470 return ret;
406 471
407 set_stopped_child_used_math(target);
408
409 if (!HAVE_HWFP) 472 if (!HAVE_HWFP)
410 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); 473 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
411 474
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 99c4d308f16b..8eec0ec59af2 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -103,9 +103,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
103 * on system-call entry - see also fork() and the signal handling 103 * on system-call entry - see also fork() and the signal handling
104 * code. 104 * code.
105 */ 105 */
106static int do_iopl(unsigned int level, struct pt_regs *regs) 106long sys_iopl(unsigned int level, struct pt_regs *regs)
107{ 107{
108 unsigned int old = (regs->flags >> 12) & 3; 108 unsigned int old = (regs->flags >> 12) & 3;
109 struct thread_struct *t = &current->thread;
109 110
110 if (level > 3) 111 if (level > 3)
111 return -EINVAL; 112 return -EINVAL;
@@ -115,29 +116,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs)
115 return -EPERM; 116 return -EPERM;
116 } 117 }
117 regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12); 118 regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12);
118
119 return 0;
120}
121
122#ifdef CONFIG_X86_32
123long sys_iopl(struct pt_regs *regs)
124{
125 unsigned int level = regs->bx;
126 struct thread_struct *t = &current->thread;
127 int rc;
128
129 rc = do_iopl(level, regs);
130 if (rc < 0)
131 goto out;
132
133 t->iopl = level << 12; 119 t->iopl = level << 12;
134 set_iopl_mask(t->iopl); 120 set_iopl_mask(t->iopl);
135out: 121
136 return rc; 122 return 0;
137}
138#else
139asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
140{
141 return do_iopl(level, regs);
142} 123}
143#endif
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 664bcb7384ac..91fd0c70a18a 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -149,7 +149,7 @@ int show_interrupts(struct seq_file *p, void *v)
149 if (!desc) 149 if (!desc)
150 return 0; 150 return 0;
151 151
152 spin_lock_irqsave(&desc->lock, flags); 152 raw_spin_lock_irqsave(&desc->lock, flags);
153 for_each_online_cpu(j) 153 for_each_online_cpu(j)
154 any_count |= kstat_irqs_cpu(i, j); 154 any_count |= kstat_irqs_cpu(i, j);
155 action = desc->action; 155 action = desc->action;
@@ -170,7 +170,7 @@ int show_interrupts(struct seq_file *p, void *v)
170 170
171 seq_putc(p, '\n'); 171 seq_putc(p, '\n');
172out: 172out:
173 spin_unlock_irqrestore(&desc->lock, flags); 173 raw_spin_unlock_irqrestore(&desc->lock, flags);
174 return 0; 174 return 0;
175} 175}
176 176
@@ -294,12 +294,12 @@ void fixup_irqs(void)
294 continue; 294 continue;
295 295
296 /* interrupt's are disabled at this point */ 296 /* interrupt's are disabled at this point */
297 spin_lock(&desc->lock); 297 raw_spin_lock(&desc->lock);
298 298
299 affinity = desc->affinity; 299 affinity = desc->affinity;
300 if (!irq_has_action(irq) || 300 if (!irq_has_action(irq) ||
301 cpumask_equal(affinity, cpu_online_mask)) { 301 cpumask_equal(affinity, cpu_online_mask)) {
302 spin_unlock(&desc->lock); 302 raw_spin_unlock(&desc->lock);
303 continue; 303 continue;
304 } 304 }
305 305
@@ -326,7 +326,7 @@ void fixup_irqs(void)
326 if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask) 326 if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask)
327 desc->chip->unmask(irq); 327 desc->chip->unmask(irq);
328 328
329 spin_unlock(&desc->lock); 329 raw_spin_unlock(&desc->lock);
330 330
331 if (break_affinity && set_affinity) 331 if (break_affinity && set_affinity)
332 printk("Broke affinity for irq %i\n", irq); 332 printk("Broke affinity for irq %i\n", irq);
@@ -356,10 +356,10 @@ void fixup_irqs(void)
356 irq = __get_cpu_var(vector_irq)[vector]; 356 irq = __get_cpu_var(vector_irq)[vector];
357 357
358 desc = irq_to_desc(irq); 358 desc = irq_to_desc(irq);
359 spin_lock(&desc->lock); 359 raw_spin_lock(&desc->lock);
360 if (desc->chip->retrigger) 360 if (desc->chip->retrigger)
361 desc->chip->retrigger(irq); 361 desc->chip->retrigger(irq);
362 spin_unlock(&desc->lock); 362 raw_spin_unlock(&desc->lock);
363 } 363 }
364 } 364 }
365} 365}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 20a5b3689463..bfba6019d762 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -42,6 +42,7 @@
42#include <linux/init.h> 42#include <linux/init.h>
43#include <linux/smp.h> 43#include <linux/smp.h>
44#include <linux/nmi.h> 44#include <linux/nmi.h>
45#include <linux/hw_breakpoint.h>
45 46
46#include <asm/debugreg.h> 47#include <asm/debugreg.h>
47#include <asm/apicdef.h> 48#include <asm/apicdef.h>
@@ -86,9 +87,15 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
86 gdb_regs[GDB_DS] = regs->ds; 87 gdb_regs[GDB_DS] = regs->ds;
87 gdb_regs[GDB_ES] = regs->es; 88 gdb_regs[GDB_ES] = regs->es;
88 gdb_regs[GDB_CS] = regs->cs; 89 gdb_regs[GDB_CS] = regs->cs;
89 gdb_regs[GDB_SS] = __KERNEL_DS;
90 gdb_regs[GDB_FS] = 0xFFFF; 90 gdb_regs[GDB_FS] = 0xFFFF;
91 gdb_regs[GDB_GS] = 0xFFFF; 91 gdb_regs[GDB_GS] = 0xFFFF;
92 if (user_mode_vm(regs)) {
93 gdb_regs[GDB_SS] = regs->ss;
94 gdb_regs[GDB_SP] = regs->sp;
95 } else {
96 gdb_regs[GDB_SS] = __KERNEL_DS;
97 gdb_regs[GDB_SP] = kernel_stack_pointer(regs);
98 }
92#else 99#else
93 gdb_regs[GDB_R8] = regs->r8; 100 gdb_regs[GDB_R8] = regs->r8;
94 gdb_regs[GDB_R9] = regs->r9; 101 gdb_regs[GDB_R9] = regs->r9;
@@ -101,8 +108,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
101 gdb_regs32[GDB_PS] = regs->flags; 108 gdb_regs32[GDB_PS] = regs->flags;
102 gdb_regs32[GDB_CS] = regs->cs; 109 gdb_regs32[GDB_CS] = regs->cs;
103 gdb_regs32[GDB_SS] = regs->ss; 110 gdb_regs32[GDB_SS] = regs->ss;
104#endif
105 gdb_regs[GDB_SP] = kernel_stack_pointer(regs); 111 gdb_regs[GDB_SP] = kernel_stack_pointer(regs);
112#endif
106} 113}
107 114
108/** 115/**
@@ -198,41 +205,81 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
198 205
199static struct hw_breakpoint { 206static struct hw_breakpoint {
200 unsigned enabled; 207 unsigned enabled;
201 unsigned type;
202 unsigned len;
203 unsigned long addr; 208 unsigned long addr;
209 int len;
210 int type;
211 struct perf_event **pev;
204} breakinfo[4]; 212} breakinfo[4];
205 213
206static void kgdb_correct_hw_break(void) 214static void kgdb_correct_hw_break(void)
207{ 215{
208 unsigned long dr7;
209 int correctit = 0;
210 int breakbit;
211 int breakno; 216 int breakno;
212 217
213 get_debugreg(dr7, 7);
214 for (breakno = 0; breakno < 4; breakno++) { 218 for (breakno = 0; breakno < 4; breakno++) {
215 breakbit = 2 << (breakno << 1); 219 struct perf_event *bp;
216 if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { 220 struct arch_hw_breakpoint *info;
217 correctit = 1; 221 int val;
218 dr7 |= breakbit; 222 int cpu = raw_smp_processor_id();
219 dr7 &= ~(0xf0000 << (breakno << 2)); 223 if (!breakinfo[breakno].enabled)
220 dr7 |= ((breakinfo[breakno].len << 2) | 224 continue;
221 breakinfo[breakno].type) << 225 bp = *per_cpu_ptr(breakinfo[breakno].pev, cpu);
222 ((breakno << 2) + 16); 226 info = counter_arch_bp(bp);
223 if (breakno >= 0 && breakno <= 3) 227 if (bp->attr.disabled != 1)
224 set_debugreg(breakinfo[breakno].addr, breakno); 228 continue;
225 229 bp->attr.bp_addr = breakinfo[breakno].addr;
226 } else { 230 bp->attr.bp_len = breakinfo[breakno].len;
227 if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { 231 bp->attr.bp_type = breakinfo[breakno].type;
228 correctit = 1; 232 info->address = breakinfo[breakno].addr;
229 dr7 &= ~breakbit; 233 info->len = breakinfo[breakno].len;
230 dr7 &= ~(0xf0000 << (breakno << 2)); 234 info->type = breakinfo[breakno].type;
231 } 235 val = arch_install_hw_breakpoint(bp);
232 } 236 if (!val)
237 bp->attr.disabled = 0;
233 } 238 }
234 if (correctit) 239 hw_breakpoint_restore();
235 set_debugreg(dr7, 7); 240}
241
242static int hw_break_reserve_slot(int breakno)
243{
244 int cpu;
245 int cnt = 0;
246 struct perf_event **pevent;
247
248 for_each_online_cpu(cpu) {
249 cnt++;
250 pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
251 if (dbg_reserve_bp_slot(*pevent))
252 goto fail;
253 }
254
255 return 0;
256
257fail:
258 for_each_online_cpu(cpu) {
259 cnt--;
260 if (!cnt)
261 break;
262 pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
263 dbg_release_bp_slot(*pevent);
264 }
265 return -1;
266}
267
268static int hw_break_release_slot(int breakno)
269{
270 struct perf_event **pevent;
271 int cpu;
272
273 for_each_online_cpu(cpu) {
274 pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
275 if (dbg_release_bp_slot(*pevent))
276 /*
277 * The debugger is responisble for handing the retry on
278 * remove failure.
279 */
280 return -1;
281 }
282 return 0;
236} 283}
237 284
238static int 285static int
@@ -246,6 +293,10 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
246 if (i == 4) 293 if (i == 4)
247 return -1; 294 return -1;
248 295
296 if (hw_break_release_slot(i)) {
297 printk(KERN_ERR "Cannot remove hw breakpoint at %lx\n", addr);
298 return -1;
299 }
249 breakinfo[i].enabled = 0; 300 breakinfo[i].enabled = 0;
250 301
251 return 0; 302 return 0;
@@ -254,15 +305,23 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
254static void kgdb_remove_all_hw_break(void) 305static void kgdb_remove_all_hw_break(void)
255{ 306{
256 int i; 307 int i;
308 int cpu = raw_smp_processor_id();
309 struct perf_event *bp;
257 310
258 for (i = 0; i < 4; i++) 311 for (i = 0; i < 4; i++) {
259 memset(&breakinfo[i], 0, sizeof(struct hw_breakpoint)); 312 if (!breakinfo[i].enabled)
313 continue;
314 bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
315 if (bp->attr.disabled == 1)
316 continue;
317 arch_uninstall_hw_breakpoint(bp);
318 bp->attr.disabled = 1;
319 }
260} 320}
261 321
262static int 322static int
263kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) 323kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
264{ 324{
265 unsigned type;
266 int i; 325 int i;
267 326
268 for (i = 0; i < 4; i++) 327 for (i = 0; i < 4; i++)
@@ -273,27 +332,42 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
273 332
274 switch (bptype) { 333 switch (bptype) {
275 case BP_HARDWARE_BREAKPOINT: 334 case BP_HARDWARE_BREAKPOINT:
276 type = 0; 335 len = 1;
277 len = 1; 336 breakinfo[i].type = X86_BREAKPOINT_EXECUTE;
278 break; 337 break;
279 case BP_WRITE_WATCHPOINT: 338 case BP_WRITE_WATCHPOINT:
280 type = 1; 339 breakinfo[i].type = X86_BREAKPOINT_WRITE;
281 break; 340 break;
282 case BP_ACCESS_WATCHPOINT: 341 case BP_ACCESS_WATCHPOINT:
283 type = 3; 342 breakinfo[i].type = X86_BREAKPOINT_RW;
284 break; 343 break;
285 default: 344 default:
286 return -1; 345 return -1;
287 } 346 }
288 347 switch (len) {
289 if (len == 1 || len == 2 || len == 4) 348 case 1:
290 breakinfo[i].len = len - 1; 349 breakinfo[i].len = X86_BREAKPOINT_LEN_1;
291 else 350 break;
351 case 2:
352 breakinfo[i].len = X86_BREAKPOINT_LEN_2;
353 break;
354 case 4:
355 breakinfo[i].len = X86_BREAKPOINT_LEN_4;
356 break;
357#ifdef CONFIG_X86_64
358 case 8:
359 breakinfo[i].len = X86_BREAKPOINT_LEN_8;
360 break;
361#endif
362 default:
292 return -1; 363 return -1;
293 364 }
294 breakinfo[i].enabled = 1;
295 breakinfo[i].addr = addr; 365 breakinfo[i].addr = addr;
296 breakinfo[i].type = type; 366 if (hw_break_reserve_slot(i)) {
367 breakinfo[i].addr = 0;
368 return -1;
369 }
370 breakinfo[i].enabled = 1;
297 371
298 return 0; 372 return 0;
299} 373}
@@ -308,8 +382,21 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
308 */ 382 */
309void kgdb_disable_hw_debug(struct pt_regs *regs) 383void kgdb_disable_hw_debug(struct pt_regs *regs)
310{ 384{
385 int i;
386 int cpu = raw_smp_processor_id();
387 struct perf_event *bp;
388
311 /* Disable hardware debugging while we are in kgdb: */ 389 /* Disable hardware debugging while we are in kgdb: */
312 set_debugreg(0UL, 7); 390 set_debugreg(0UL, 7);
391 for (i = 0; i < 4; i++) {
392 if (!breakinfo[i].enabled)
393 continue;
394 bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
395 if (bp->attr.disabled == 1)
396 continue;
397 arch_uninstall_hw_breakpoint(bp);
398 bp->attr.disabled = 1;
399 }
313} 400}
314 401
315/** 402/**
@@ -373,7 +460,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
373 struct pt_regs *linux_regs) 460 struct pt_regs *linux_regs)
374{ 461{
375 unsigned long addr; 462 unsigned long addr;
376 unsigned long dr6;
377 char *ptr; 463 char *ptr;
378 int newPC; 464 int newPC;
379 465
@@ -395,25 +481,10 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
395 /* set the trace bit if we're stepping */ 481 /* set the trace bit if we're stepping */
396 if (remcomInBuffer[0] == 's') { 482 if (remcomInBuffer[0] == 's') {
397 linux_regs->flags |= X86_EFLAGS_TF; 483 linux_regs->flags |= X86_EFLAGS_TF;
398 kgdb_single_step = 1;
399 atomic_set(&kgdb_cpu_doing_single_step, 484 atomic_set(&kgdb_cpu_doing_single_step,
400 raw_smp_processor_id()); 485 raw_smp_processor_id());
401 } 486 }
402 487
403 get_debugreg(dr6, 6);
404 if (!(dr6 & 0x4000)) {
405 int breakno;
406
407 for (breakno = 0; breakno < 4; breakno++) {
408 if (dr6 & (1 << breakno) &&
409 breakinfo[breakno].type == 0) {
410 /* Set restore flag: */
411 linux_regs->flags |= X86_EFLAGS_RF;
412 break;
413 }
414 }
415 }
416 set_debugreg(0UL, 6);
417 kgdb_correct_hw_break(); 488 kgdb_correct_hw_break();
418 489
419 return 0; 490 return 0;
@@ -481,8 +552,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
481 break; 552 break;
482 553
483 case DIE_DEBUG: 554 case DIE_DEBUG:
484 if (atomic_read(&kgdb_cpu_doing_single_step) == 555 if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
485 raw_smp_processor_id()) {
486 if (user_mode(regs)) 556 if (user_mode(regs))
487 return single_step_cont(regs, args); 557 return single_step_cont(regs, args);
488 break; 558 break;
@@ -535,7 +605,42 @@ static struct notifier_block kgdb_notifier = {
535 */ 605 */
536int kgdb_arch_init(void) 606int kgdb_arch_init(void)
537{ 607{
538 return register_die_notifier(&kgdb_notifier); 608 int i, cpu;
609 int ret;
610 struct perf_event_attr attr;
611 struct perf_event **pevent;
612
613 ret = register_die_notifier(&kgdb_notifier);
614 if (ret != 0)
615 return ret;
616 /*
617 * Pre-allocate the hw breakpoint structions in the non-atomic
618 * portion of kgdb because this operation requires mutexs to
619 * complete.
620 */
621 attr.bp_addr = (unsigned long)kgdb_arch_init;
622 attr.type = PERF_TYPE_BREAKPOINT;
623 attr.bp_len = HW_BREAKPOINT_LEN_1;
624 attr.bp_type = HW_BREAKPOINT_W;
625 attr.disabled = 1;
626 for (i = 0; i < 4; i++) {
627 breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL);
628 if (IS_ERR(breakinfo[i].pev)) {
629 printk(KERN_ERR "kgdb: Could not allocate hw breakpoints\n");
630 breakinfo[i].pev = NULL;
631 kgdb_arch_exit();
632 return -1;
633 }
634 for_each_online_cpu(cpu) {
635 pevent = per_cpu_ptr(breakinfo[i].pev, cpu);
636 pevent[0]->hw.sample_period = 1;
637 if (pevent[0]->destroy != NULL) {
638 pevent[0]->destroy = NULL;
639 release_bp_slot(*pevent);
640 }
641 }
642 }
643 return ret;
539} 644}
540 645
541/** 646/**
@@ -546,6 +651,13 @@ int kgdb_arch_init(void)
546 */ 651 */
547void kgdb_arch_exit(void) 652void kgdb_arch_exit(void)
548{ 653{
654 int i;
655 for (i = 0; i < 4; i++) {
656 if (breakinfo[i].pev) {
657 unregister_wide_hw_breakpoint(breakinfo[i].pev);
658 breakinfo[i].pev = NULL;
659 }
660 }
549 unregister_die_notifier(&kgdb_notifier); 661 unregister_die_notifier(&kgdb_notifier);
550} 662}
551 663
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 1f3186ce213c..5de9f4a9c3fd 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -337,6 +337,9 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
337 337
338int __kprobes arch_prepare_kprobe(struct kprobe *p) 338int __kprobes arch_prepare_kprobe(struct kprobe *p)
339{ 339{
340 if (alternatives_text_reserved(p->addr, p->addr))
341 return -EINVAL;
342
340 if (!can_probe((unsigned long)p->addr)) 343 if (!can_probe((unsigned long)p->addr))
341 return -EILSEQ; 344 return -EILSEQ;
342 /* insn: must be on special executable page on x86. */ 345 /* insn: must be on special executable page on x86. */
@@ -429,7 +432,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
429static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, 432static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
430 struct kprobe_ctlblk *kcb) 433 struct kprobe_ctlblk *kcb)
431{ 434{
432#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) 435#if !defined(CONFIG_PREEMPT)
433 if (p->ainsn.boostable == 1 && !p->post_handler) { 436 if (p->ainsn.boostable == 1 && !p->post_handler) {
434 /* Boost up -- we can execute copied instructions directly */ 437 /* Boost up -- we can execute copied instructions directly */
435 reset_current_kprobe(); 438 reset_current_kprobe();
@@ -481,7 +484,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
481 484
482/* 485/*
483 * Interrupts are disabled on entry as trap3 is an interrupt gate and they 486 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
484 * remain disabled thorough out this function. 487 * remain disabled throughout this function.
485 */ 488 */
486static int __kprobes kprobe_handler(struct pt_regs *regs) 489static int __kprobes kprobe_handler(struct pt_regs *regs)
487{ 490{
@@ -818,7 +821,7 @@ no_change:
818 821
819/* 822/*
820 * Interrupts are disabled on entry as trap1 is an interrupt gate and they 823 * Interrupts are disabled on entry as trap1 is an interrupt gate and they
821 * remain disabled thoroughout this function. 824 * remain disabled throughout this function.
822 */ 825 */
823static int __kprobes post_kprobe_handler(struct pt_regs *regs) 826static int __kprobes post_kprobe_handler(struct pt_regs *regs)
824{ 827{
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
deleted file mode 100644
index 2a62d843f015..000000000000
--- a/arch/x86/kernel/mfgpt_32.c
+++ /dev/null
@@ -1,410 +0,0 @@
1/*
2 * Driver/API for AMD Geode Multi-Function General Purpose Timers (MFGPT)
3 *
4 * Copyright (C) 2006, Advanced Micro Devices, Inc.
5 * Copyright (C) 2007, Andres Salomon <dilinger@debian.org>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of version 2 of the GNU General Public License
9 * as published by the Free Software Foundation.
10 *
11 * The MFGPTs are documented in AMD Geode CS5536 Companion Device Data Book.
12 */
13
14/*
15 * We are using the 32.768kHz input clock - it's the only one that has the
16 * ranges we find desirable. The following table lists the suitable
17 * divisors and the associated Hz, minimum interval and the maximum interval:
18 *
19 * Divisor Hz Min Delta (s) Max Delta (s)
20 * 1 32768 .00048828125 2.000
21 * 2 16384 .0009765625 4.000
22 * 4 8192 .001953125 8.000
23 * 8 4096 .00390625 16.000
24 * 16 2048 .0078125 32.000
25 * 32 1024 .015625 64.000
26 * 64 512 .03125 128.000
27 * 128 256 .0625 256.000
28 * 256 128 .125 512.000
29 */
30
31#include <linux/kernel.h>
32#include <linux/interrupt.h>
33#include <linux/module.h>
34#include <asm/geode.h>
35
36#define MFGPT_DEFAULT_IRQ 7
37
38static struct mfgpt_timer_t {
39 unsigned int avail:1;
40} mfgpt_timers[MFGPT_MAX_TIMERS];
41
42/* Selected from the table above */
43
44#define MFGPT_DIVISOR 16
45#define MFGPT_SCALE 4 /* divisor = 2^(scale) */
46#define MFGPT_HZ (32768 / MFGPT_DIVISOR)
47#define MFGPT_PERIODIC (MFGPT_HZ / HZ)
48
49/* Allow for disabling of MFGPTs */
50static int disable;
51static int __init mfgpt_disable(char *s)
52{
53 disable = 1;
54 return 1;
55}
56__setup("nomfgpt", mfgpt_disable);
57
58/* Reset the MFGPT timers. This is required by some broken BIOSes which already
59 * do the same and leave the system in an unstable state. TinyBIOS 0.98 is
60 * affected at least (0.99 is OK with MFGPT workaround left to off).
61 */
62static int __init mfgpt_fix(char *s)
63{
64 u32 val, dummy;
65
66 /* The following udocumented bit resets the MFGPT timers */
67 val = 0xFF; dummy = 0;
68 wrmsr(MSR_MFGPT_SETUP, val, dummy);
69 return 1;
70}
71__setup("mfgptfix", mfgpt_fix);
72
73/*
74 * Check whether any MFGPTs are available for the kernel to use. In most
75 * cases, firmware that uses AMD's VSA code will claim all timers during
76 * bootup; we certainly don't want to take them if they're already in use.
77 * In other cases (such as with VSAless OpenFirmware), the system firmware
78 * leaves timers available for us to use.
79 */
80
81
82static int timers = -1;
83
84static void geode_mfgpt_detect(void)
85{
86 int i;
87 u16 val;
88
89 timers = 0;
90
91 if (disable) {
92 printk(KERN_INFO "geode-mfgpt: MFGPT support is disabled\n");
93 goto done;
94 }
95
96 if (!geode_get_dev_base(GEODE_DEV_MFGPT)) {
97 printk(KERN_INFO "geode-mfgpt: MFGPT LBAR is not set up\n");
98 goto done;
99 }
100
101 for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
102 val = geode_mfgpt_read(i, MFGPT_REG_SETUP);
103 if (!(val & MFGPT_SETUP_SETUP)) {
104 mfgpt_timers[i].avail = 1;
105 timers++;
106 }
107 }
108
109done:
110 printk(KERN_INFO "geode-mfgpt: %d MFGPT timers available.\n", timers);
111}
112
113int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
114{
115 u32 msr, mask, value, dummy;
116 int shift = (cmp == MFGPT_CMP1) ? 0 : 8;
117
118 if (timer < 0 || timer >= MFGPT_MAX_TIMERS)
119 return -EIO;
120
121 /*
122 * The register maps for these are described in sections 6.17.1.x of
123 * the AMD Geode CS5536 Companion Device Data Book.
124 */
125 switch (event) {
126 case MFGPT_EVENT_RESET:
127 /*
128 * XXX: According to the docs, we cannot reset timers above
129 * 6; that is, resets for 7 and 8 will be ignored. Is this
130 * a problem? -dilinger
131 */
132 msr = MSR_MFGPT_NR;
133 mask = 1 << (timer + 24);
134 break;
135
136 case MFGPT_EVENT_NMI:
137 msr = MSR_MFGPT_NR;
138 mask = 1 << (timer + shift);
139 break;
140
141 case MFGPT_EVENT_IRQ:
142 msr = MSR_MFGPT_IRQ;
143 mask = 1 << (timer + shift);
144 break;
145
146 default:
147 return -EIO;
148 }
149
150 rdmsr(msr, value, dummy);
151
152 if (enable)
153 value |= mask;
154 else
155 value &= ~mask;
156
157 wrmsr(msr, value, dummy);
158 return 0;
159}
160EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event);
161
162int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable)
163{
164 u32 zsel, lpc, dummy;
165 int shift;
166
167 if (timer < 0 || timer >= MFGPT_MAX_TIMERS)
168 return -EIO;
169
170 /*
171 * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA
172 * is using the same CMP of the timer's Siamese twin, the IRQ is set to
173 * 2, and we mustn't use nor change it.
174 * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the
175 * IRQ of the 1st. This can only happen if forcing an IRQ, calling this
176 * with *irq==0 is safe. Currently there _are_ no 2 drivers.
177 */
178 rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
179 shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4;
180 if (((zsel >> shift) & 0xF) == 2)
181 return -EIO;
182
183 /* Choose IRQ: if none supplied, keep IRQ already set or use default */
184 if (!*irq)
185 *irq = (zsel >> shift) & 0xF;
186 if (!*irq)
187 *irq = MFGPT_DEFAULT_IRQ;
188
189 /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */
190 if (*irq < 1 || *irq == 2 || *irq > 15)
191 return -EIO;
192 rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy);
193 if (lpc & (1 << *irq))
194 return -EIO;
195
196 /* All chosen and checked - go for it */
197 if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable))
198 return -EIO;
199 if (enable) {
200 zsel = (zsel & ~(0xF << shift)) | (*irq << shift);
201 wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
202 }
203
204 return 0;
205}
206
207static int mfgpt_get(int timer)
208{
209 mfgpt_timers[timer].avail = 0;
210 printk(KERN_INFO "geode-mfgpt: Registered timer %d\n", timer);
211 return timer;
212}
213
214int geode_mfgpt_alloc_timer(int timer, int domain)
215{
216 int i;
217
218 if (timers == -1) {
219 /* timers haven't been detected yet */
220 geode_mfgpt_detect();
221 }
222
223 if (!timers)
224 return -1;
225
226 if (timer >= MFGPT_MAX_TIMERS)
227 return -1;
228
229 if (timer < 0) {
230 /* Try to find an available timer */
231 for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
232 if (mfgpt_timers[i].avail)
233 return mfgpt_get(i);
234
235 if (i == 5 && domain == MFGPT_DOMAIN_WORKING)
236 break;
237 }
238 } else {
239 /* If they requested a specific timer, try to honor that */
240 if (mfgpt_timers[timer].avail)
241 return mfgpt_get(timer);
242 }
243
244 /* No timers available - too bad */
245 return -1;
246}
247EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer);
248
249
250#ifdef CONFIG_GEODE_MFGPT_TIMER
251
252/*
253 * The MFPGT timers on the CS5536 provide us with suitable timers to use
254 * as clock event sources - not as good as a HPET or APIC, but certainly
255 * better than the PIT. This isn't a general purpose MFGPT driver, but
256 * a simplified one designed specifically to act as a clock event source.
257 * For full details about the MFGPT, please consult the CS5536 data sheet.
258 */
259
260#include <linux/clocksource.h>
261#include <linux/clockchips.h>
262
263static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN;
264static u16 mfgpt_event_clock;
265
266static int irq;
267static int __init mfgpt_setup(char *str)
268{
269 get_option(&str, &irq);
270 return 1;
271}
272__setup("mfgpt_irq=", mfgpt_setup);
273
274static void mfgpt_disable_timer(u16 clock)
275{
276 /* avoid races by clearing CMP1 and CMP2 unconditionally */
277 geode_mfgpt_write(clock, MFGPT_REG_SETUP, (u16) ~MFGPT_SETUP_CNTEN |
278 MFGPT_SETUP_CMP1 | MFGPT_SETUP_CMP2);
279}
280
281static int mfgpt_next_event(unsigned long, struct clock_event_device *);
282static void mfgpt_set_mode(enum clock_event_mode, struct clock_event_device *);
283
284static struct clock_event_device mfgpt_clockevent = {
285 .name = "mfgpt-timer",
286 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
287 .set_mode = mfgpt_set_mode,
288 .set_next_event = mfgpt_next_event,
289 .rating = 250,
290 .cpumask = cpu_all_mask,
291 .shift = 32
292};
293
294static void mfgpt_start_timer(u16 delta)
295{
296 geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_CMP2, (u16) delta);
297 geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0);
298
299 geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP,
300 MFGPT_SETUP_CNTEN | MFGPT_SETUP_CMP2);
301}
302
303static void mfgpt_set_mode(enum clock_event_mode mode,
304 struct clock_event_device *evt)
305{
306 mfgpt_disable_timer(mfgpt_event_clock);
307
308 if (mode == CLOCK_EVT_MODE_PERIODIC)
309 mfgpt_start_timer(MFGPT_PERIODIC);
310
311 mfgpt_tick_mode = mode;
312}
313
314static int mfgpt_next_event(unsigned long delta, struct clock_event_device *evt)
315{
316 mfgpt_start_timer(delta);
317 return 0;
318}
319
320static irqreturn_t mfgpt_tick(int irq, void *dev_id)
321{
322 u16 val = geode_mfgpt_read(mfgpt_event_clock, MFGPT_REG_SETUP);
323
324 /* See if the interrupt was for us */
325 if (!(val & (MFGPT_SETUP_SETUP | MFGPT_SETUP_CMP2 | MFGPT_SETUP_CMP1)))
326 return IRQ_NONE;
327
328 /* Turn off the clock (and clear the event) */
329 mfgpt_disable_timer(mfgpt_event_clock);
330
331 if (mfgpt_tick_mode == CLOCK_EVT_MODE_SHUTDOWN)
332 return IRQ_HANDLED;
333
334 /* Clear the counter */
335 geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0);
336
337 /* Restart the clock in periodic mode */
338
339 if (mfgpt_tick_mode == CLOCK_EVT_MODE_PERIODIC) {
340 geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP,
341 MFGPT_SETUP_CNTEN | MFGPT_SETUP_CMP2);
342 }
343
344 mfgpt_clockevent.event_handler(&mfgpt_clockevent);
345 return IRQ_HANDLED;
346}
347
348static struct irqaction mfgptirq = {
349 .handler = mfgpt_tick,
350 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER,
351 .name = "mfgpt-timer"
352};
353
354int __init mfgpt_timer_setup(void)
355{
356 int timer, ret;
357 u16 val;
358
359 timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING);
360 if (timer < 0) {
361 printk(KERN_ERR
362 "mfgpt-timer: Could not allocate a MFPGT timer\n");
363 return -ENODEV;
364 }
365
366 mfgpt_event_clock = timer;
367
368 /* Set up the IRQ on the MFGPT side */
369 if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) {
370 printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq);
371 return -EIO;
372 }
373
374 /* And register it with the kernel */
375 ret = setup_irq(irq, &mfgptirq);
376
377 if (ret) {
378 printk(KERN_ERR
379 "mfgpt-timer: Unable to set up the interrupt.\n");
380 goto err;
381 }
382
383 /* Set the clock scale and enable the event mode for CMP2 */
384 val = MFGPT_SCALE | (3 << 8);
385
386 geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, val);
387
388 /* Set up the clock event */
389 mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC,
390 mfgpt_clockevent.shift);
391 mfgpt_clockevent.min_delta_ns = clockevent_delta2ns(0xF,
392 &mfgpt_clockevent);
393 mfgpt_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFE,
394 &mfgpt_clockevent);
395
396 printk(KERN_INFO
397 "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n",
398 timer, irq);
399 clockevents_register_device(&mfgpt_clockevent);
400
401 return 0;
402
403err:
404 geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq);
405 printk(KERN_ERR
406 "mfgpt-timer: Unable to set up the MFGPT clock source\n");
407 return -EIO;
408}
409
410#endif
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 63123d902103..e1af7c055c7d 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -13,6 +13,9 @@
13 * Licensed under the terms of the GNU General Public 13 * Licensed under the terms of the GNU General Public
14 * License version 2. See file COPYING for details. 14 * License version 2. See file COPYING for details.
15 */ 15 */
16
17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
16#include <linux/firmware.h> 19#include <linux/firmware.h>
17#include <linux/pci_ids.h> 20#include <linux/pci_ids.h>
18#include <linux/uaccess.h> 21#include <linux/uaccess.h>
@@ -33,9 +36,6 @@ MODULE_LICENSE("GPL v2");
33#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 36#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
34#define UCODE_UCODE_TYPE 0x00000001 37#define UCODE_UCODE_TYPE 0x00000001
35 38
36const struct firmware *firmware;
37static int supported_cpu;
38
39struct equiv_cpu_entry { 39struct equiv_cpu_entry {
40 u32 installed_cpu; 40 u32 installed_cpu;
41 u32 fixed_errata_mask; 41 u32 fixed_errata_mask;
@@ -74,14 +74,17 @@ static struct equiv_cpu_entry *equiv_cpu_table;
74 74
75static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) 75static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
76{ 76{
77 struct cpuinfo_x86 *c = &cpu_data(cpu);
77 u32 dummy; 78 u32 dummy;
78 79
79 if (!supported_cpu)
80 return -1;
81
82 memset(csig, 0, sizeof(*csig)); 80 memset(csig, 0, sizeof(*csig));
81 if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
82 pr_warning("microcode: CPU%d: AMD CPU family 0x%x not "
83 "supported\n", cpu, c->x86);
84 return -1;
85 }
83 rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); 86 rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy);
84 pr_info("microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev); 87 pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev);
85 return 0; 88 return 0;
86} 89}
87 90
@@ -111,8 +114,8 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
111 114
112 /* ucode might be chipset specific -- currently we don't support this */ 115 /* ucode might be chipset specific -- currently we don't support this */
113 if (mc_header->nb_dev_id || mc_header->sb_dev_id) { 116 if (mc_header->nb_dev_id || mc_header->sb_dev_id) {
114 pr_err(KERN_ERR "microcode: CPU%d: loading of chipset " 117 pr_err("CPU%d: loading of chipset specific code not yet supported\n",
115 "specific code not yet supported\n", cpu); 118 cpu);
116 return 0; 119 return 0;
117 } 120 }
118 121
@@ -141,12 +144,12 @@ static int apply_microcode_amd(int cpu)
141 144
142 /* check current patch id and patch's id for match */ 145 /* check current patch id and patch's id for match */
143 if (rev != mc_amd->hdr.patch_id) { 146 if (rev != mc_amd->hdr.patch_id) {
144 pr_err("microcode: CPU%d: update failed " 147 pr_err("CPU%d: update failed (for patch_level=0x%x)\n",
145 "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id); 148 cpu, mc_amd->hdr.patch_id);
146 return -1; 149 return -1;
147 } 150 }
148 151
149 pr_info("microcode: CPU%d: updated (new patch_level=0x%x)\n", cpu, rev); 152 pr_info("CPU%d: updated (new patch_level=0x%x)\n", cpu, rev);
150 uci->cpu_sig.rev = rev; 153 uci->cpu_sig.rev = rev;
151 154
152 return 0; 155 return 0;
@@ -169,15 +172,14 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
169 return NULL; 172 return NULL;
170 173
171 if (section_hdr[0] != UCODE_UCODE_TYPE) { 174 if (section_hdr[0] != UCODE_UCODE_TYPE) {
172 pr_err("microcode: error: invalid type field in " 175 pr_err("error: invalid type field in container file section header\n");
173 "container file section header\n");
174 return NULL; 176 return NULL;
175 } 177 }
176 178
177 total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); 179 total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8));
178 180
179 if (total_size > size || total_size > UCODE_MAX_SIZE) { 181 if (total_size > size || total_size > UCODE_MAX_SIZE) {
180 pr_err("microcode: error: size mismatch\n"); 182 pr_err("error: size mismatch\n");
181 return NULL; 183 return NULL;
182 } 184 }
183 185
@@ -206,14 +208,13 @@ static int install_equiv_cpu_table(const u8 *buf)
206 size = buf_pos[2]; 208 size = buf_pos[2];
207 209
208 if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { 210 if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
209 pr_err("microcode: error: invalid type field in " 211 pr_err("error: invalid type field in container file section header\n");
210 "container file section header\n");
211 return 0; 212 return 0;
212 } 213 }
213 214
214 equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); 215 equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size);
215 if (!equiv_cpu_table) { 216 if (!equiv_cpu_table) {
216 pr_err("microcode: failed to allocate equivalent CPU table\n"); 217 pr_err("failed to allocate equivalent CPU table\n");
217 return 0; 218 return 0;
218 } 219 }
219 220
@@ -246,7 +247,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
246 247
247 offset = install_equiv_cpu_table(ucode_ptr); 248 offset = install_equiv_cpu_table(ucode_ptr);
248 if (!offset) { 249 if (!offset) {
249 pr_err("microcode: failed to create equivalent cpu table\n"); 250 pr_err("failed to create equivalent cpu table\n");
250 return UCODE_ERROR; 251 return UCODE_ERROR;
251 } 252 }
252 253
@@ -277,8 +278,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
277 if (!leftover) { 278 if (!leftover) {
278 vfree(uci->mc); 279 vfree(uci->mc);
279 uci->mc = new_mc; 280 uci->mc = new_mc;
280 pr_debug("microcode: CPU%d found a matching microcode " 281 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
281 "update with version 0x%x (current=0x%x)\n",
282 cpu, new_rev, uci->cpu_sig.rev); 282 cpu, new_rev, uci->cpu_sig.rev);
283 } else { 283 } else {
284 vfree(new_mc); 284 vfree(new_mc);
@@ -294,27 +294,32 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
294 294
295static enum ucode_state request_microcode_fw(int cpu, struct device *device) 295static enum ucode_state request_microcode_fw(int cpu, struct device *device)
296{ 296{
297 const char *fw_name = "amd-ucode/microcode_amd.bin";
298 const struct firmware *firmware;
297 enum ucode_state ret; 299 enum ucode_state ret;
298 300
299 if (firmware == NULL) 301 if (request_firmware(&firmware, fw_name, device)) {
302 printk(KERN_ERR "microcode: failed to load file %s\n", fw_name);
300 return UCODE_NFOUND; 303 return UCODE_NFOUND;
304 }
301 305
302 if (*(u32 *)firmware->data != UCODE_MAGIC) { 306 if (*(u32 *)firmware->data != UCODE_MAGIC) {
303 pr_err("microcode: invalid UCODE_MAGIC (0x%08x)\n", 307 pr_err("invalid UCODE_MAGIC (0x%08x)\n",
304 *(u32 *)firmware->data); 308 *(u32 *)firmware->data);
305 return UCODE_ERROR; 309 return UCODE_ERROR;
306 } 310 }
307 311
308 ret = generic_load_microcode(cpu, firmware->data, firmware->size); 312 ret = generic_load_microcode(cpu, firmware->data, firmware->size);
309 313
314 release_firmware(firmware);
315
310 return ret; 316 return ret;
311} 317}
312 318
313static enum ucode_state 319static enum ucode_state
314request_microcode_user(int cpu, const void __user *buf, size_t size) 320request_microcode_user(int cpu, const void __user *buf, size_t size)
315{ 321{
316 pr_info("microcode: AMD microcode update via " 322 pr_info("AMD microcode update via /dev/cpu/microcode not supported\n");
317 "/dev/cpu/microcode not supported\n");
318 return UCODE_ERROR; 323 return UCODE_ERROR;
319} 324}
320 325
@@ -326,32 +331,7 @@ static void microcode_fini_cpu_amd(int cpu)
326 uci->mc = NULL; 331 uci->mc = NULL;
327} 332}
328 333
329void init_microcode_amd(struct device *device)
330{
331 const char *fw_name = "amd-ucode/microcode_amd.bin";
332 struct cpuinfo_x86 *c = &boot_cpu_data;
333
334 WARN_ON(c->x86_vendor != X86_VENDOR_AMD);
335
336 if (c->x86 < 0x10) {
337 pr_warning("microcode: AMD CPU family 0x%x not supported\n",
338 c->x86);
339 return;
340 }
341 supported_cpu = 1;
342
343 if (request_firmware(&firmware, fw_name, device))
344 pr_err("microcode: failed to load file %s\n", fw_name);
345}
346
347void fini_microcode_amd(void)
348{
349 release_firmware(firmware);
350}
351
352static struct microcode_ops microcode_amd_ops = { 334static struct microcode_ops microcode_amd_ops = {
353 .init = init_microcode_amd,
354 .fini = fini_microcode_amd,
355 .request_microcode_user = request_microcode_user, 335 .request_microcode_user = request_microcode_user,
356 .request_microcode_fw = request_microcode_fw, 336 .request_microcode_fw = request_microcode_fw,
357 .collect_cpu_info = collect_cpu_info_amd, 337 .collect_cpu_info = collect_cpu_info_amd,
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index e68aae397869..cceb5bc3c3c2 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -70,6 +70,9 @@
70 * Fix sigmatch() macro to handle old CPUs with pf == 0. 70 * Fix sigmatch() macro to handle old CPUs with pf == 0.
71 * Thanks to Stuart Swales for pointing out this bug. 71 * Thanks to Stuart Swales for pointing out this bug.
72 */ 72 */
73
74#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
75
73#include <linux/platform_device.h> 76#include <linux/platform_device.h>
74#include <linux/miscdevice.h> 77#include <linux/miscdevice.h>
75#include <linux/capability.h> 78#include <linux/capability.h>
@@ -209,7 +212,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
209 ssize_t ret = -EINVAL; 212 ssize_t ret = -EINVAL;
210 213
211 if ((len >> PAGE_SHIFT) > totalram_pages) { 214 if ((len >> PAGE_SHIFT) > totalram_pages) {
212 pr_err("microcode: too much data (max %ld pages)\n", totalram_pages); 215 pr_err("too much data (max %ld pages)\n", totalram_pages);
213 return ret; 216 return ret;
214 } 217 }
215 218
@@ -244,7 +247,7 @@ static int __init microcode_dev_init(void)
244 247
245 error = misc_register(&microcode_dev); 248 error = misc_register(&microcode_dev);
246 if (error) { 249 if (error) {
247 pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR); 250 pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR);
248 return error; 251 return error;
249 } 252 }
250 253
@@ -359,7 +362,7 @@ static enum ucode_state microcode_resume_cpu(int cpu)
359 if (!uci->mc) 362 if (!uci->mc)
360 return UCODE_NFOUND; 363 return UCODE_NFOUND;
361 364
362 pr_debug("microcode: CPU%d updated upon resume\n", cpu); 365 pr_debug("CPU%d updated upon resume\n", cpu);
363 apply_microcode_on_target(cpu); 366 apply_microcode_on_target(cpu);
364 367
365 return UCODE_OK; 368 return UCODE_OK;
@@ -379,7 +382,7 @@ static enum ucode_state microcode_init_cpu(int cpu)
379 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev); 382 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
380 383
381 if (ustate == UCODE_OK) { 384 if (ustate == UCODE_OK) {
382 pr_debug("microcode: CPU%d updated upon init\n", cpu); 385 pr_debug("CPU%d updated upon init\n", cpu);
383 apply_microcode_on_target(cpu); 386 apply_microcode_on_target(cpu);
384 } 387 }
385 388
@@ -391,7 +394,7 @@ static enum ucode_state microcode_update_cpu(int cpu)
391 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 394 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
392 enum ucode_state ustate; 395 enum ucode_state ustate;
393 396
394 if (uci->valid && uci->mc) 397 if (uci->valid)
395 ustate = microcode_resume_cpu(cpu); 398 ustate = microcode_resume_cpu(cpu);
396 else 399 else
397 ustate = microcode_init_cpu(cpu); 400 ustate = microcode_init_cpu(cpu);
@@ -406,7 +409,7 @@ static int mc_sysdev_add(struct sys_device *sys_dev)
406 if (!cpu_online(cpu)) 409 if (!cpu_online(cpu))
407 return 0; 410 return 0;
408 411
409 pr_debug("microcode: CPU%d added\n", cpu); 412 pr_debug("CPU%d added\n", cpu);
410 413
411 err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); 414 err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
412 if (err) 415 if (err)
@@ -425,7 +428,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev)
425 if (!cpu_online(cpu)) 428 if (!cpu_online(cpu))
426 return 0; 429 return 0;
427 430
428 pr_debug("microcode: CPU%d removed\n", cpu); 431 pr_debug("CPU%d removed\n", cpu);
429 microcode_fini_cpu(cpu); 432 microcode_fini_cpu(cpu);
430 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); 433 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
431 return 0; 434 return 0;
@@ -473,15 +476,15 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
473 microcode_update_cpu(cpu); 476 microcode_update_cpu(cpu);
474 case CPU_DOWN_FAILED: 477 case CPU_DOWN_FAILED:
475 case CPU_DOWN_FAILED_FROZEN: 478 case CPU_DOWN_FAILED_FROZEN:
476 pr_debug("microcode: CPU%d added\n", cpu); 479 pr_debug("CPU%d added\n", cpu);
477 if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) 480 if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
478 pr_err("microcode: Failed to create group for CPU%d\n", cpu); 481 pr_err("Failed to create group for CPU%d\n", cpu);
479 break; 482 break;
480 case CPU_DOWN_PREPARE: 483 case CPU_DOWN_PREPARE:
481 case CPU_DOWN_PREPARE_FROZEN: 484 case CPU_DOWN_PREPARE_FROZEN:
482 /* Suspend is in progress, only remove the interface */ 485 /* Suspend is in progress, only remove the interface */
483 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); 486 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
484 pr_debug("microcode: CPU%d removed\n", cpu); 487 pr_debug("CPU%d removed\n", cpu);
485 break; 488 break;
486 case CPU_DEAD: 489 case CPU_DEAD:
487 case CPU_UP_CANCELED_FROZEN: 490 case CPU_UP_CANCELED_FROZEN:
@@ -507,7 +510,7 @@ static int __init microcode_init(void)
507 microcode_ops = init_amd_microcode(); 510 microcode_ops = init_amd_microcode();
508 511
509 if (!microcode_ops) { 512 if (!microcode_ops) {
510 pr_err("microcode: no support for this CPU vendor\n"); 513 pr_err("no support for this CPU vendor\n");
511 return -ENODEV; 514 return -ENODEV;
512 } 515 }
513 516
@@ -518,9 +521,6 @@ static int __init microcode_init(void)
518 return PTR_ERR(microcode_pdev); 521 return PTR_ERR(microcode_pdev);
519 } 522 }
520 523
521 if (microcode_ops->init)
522 microcode_ops->init(&microcode_pdev->dev);
523
524 get_online_cpus(); 524 get_online_cpus();
525 mutex_lock(&microcode_mutex); 525 mutex_lock(&microcode_mutex);
526 526
@@ -541,8 +541,7 @@ static int __init microcode_init(void)
541 register_hotcpu_notifier(&mc_cpu_notifier); 541 register_hotcpu_notifier(&mc_cpu_notifier);
542 542
543 pr_info("Microcode Update Driver: v" MICROCODE_VERSION 543 pr_info("Microcode Update Driver: v" MICROCODE_VERSION
544 " <tigran@aivazian.fsnet.co.uk>," 544 " <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n");
545 " Peter Oruba\n");
546 545
547 return 0; 546 return 0;
548} 547}
@@ -564,9 +563,6 @@ static void __exit microcode_exit(void)
564 563
565 platform_device_unregister(microcode_pdev); 564 platform_device_unregister(microcode_pdev);
566 565
567 if (microcode_ops->fini)
568 microcode_ops->fini();
569
570 microcode_ops = NULL; 566 microcode_ops = NULL;
571 567
572 pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); 568 pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 0d334ddd0a96..85a343e28937 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -70,6 +70,9 @@
70 * Fix sigmatch() macro to handle old CPUs with pf == 0. 70 * Fix sigmatch() macro to handle old CPUs with pf == 0.
71 * Thanks to Stuart Swales for pointing out this bug. 71 * Thanks to Stuart Swales for pointing out this bug.
72 */ 72 */
73
74#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
75
73#include <linux/firmware.h> 76#include <linux/firmware.h>
74#include <linux/uaccess.h> 77#include <linux/uaccess.h>
75#include <linux/kernel.h> 78#include <linux/kernel.h>
@@ -146,8 +149,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
146 149
147 if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || 150 if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
148 cpu_has(c, X86_FEATURE_IA64)) { 151 cpu_has(c, X86_FEATURE_IA64)) {
149 printk(KERN_ERR "microcode: CPU%d not a capable Intel " 152 pr_err("CPU%d not a capable Intel processor\n", cpu_num);
150 "processor\n", cpu_num);
151 return -1; 153 return -1;
152 } 154 }
153 155
@@ -165,8 +167,8 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
165 /* get the current revision from MSR 0x8B */ 167 /* get the current revision from MSR 0x8B */
166 rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); 168 rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
167 169
168 printk(KERN_INFO "microcode: CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", 170 pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n",
169 cpu_num, csig->sig, csig->pf, csig->rev); 171 cpu_num, csig->sig, csig->pf, csig->rev);
170 172
171 return 0; 173 return 0;
172} 174}
@@ -194,28 +196,24 @@ static int microcode_sanity_check(void *mc)
194 data_size = get_datasize(mc_header); 196 data_size = get_datasize(mc_header);
195 197
196 if (data_size + MC_HEADER_SIZE > total_size) { 198 if (data_size + MC_HEADER_SIZE > total_size) {
197 printk(KERN_ERR "microcode: error! " 199 pr_err("error! Bad data size in microcode data file\n");
198 "Bad data size in microcode data file\n");
199 return -EINVAL; 200 return -EINVAL;
200 } 201 }
201 202
202 if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { 203 if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
203 printk(KERN_ERR "microcode: error! " 204 pr_err("error! Unknown microcode update format\n");
204 "Unknown microcode update format\n");
205 return -EINVAL; 205 return -EINVAL;
206 } 206 }
207 ext_table_size = total_size - (MC_HEADER_SIZE + data_size); 207 ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
208 if (ext_table_size) { 208 if (ext_table_size) {
209 if ((ext_table_size < EXT_HEADER_SIZE) 209 if ((ext_table_size < EXT_HEADER_SIZE)
210 || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { 210 || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
211 printk(KERN_ERR "microcode: error! " 211 pr_err("error! Small exttable size in microcode data file\n");
212 "Small exttable size in microcode data file\n");
213 return -EINVAL; 212 return -EINVAL;
214 } 213 }
215 ext_header = mc + MC_HEADER_SIZE + data_size; 214 ext_header = mc + MC_HEADER_SIZE + data_size;
216 if (ext_table_size != exttable_size(ext_header)) { 215 if (ext_table_size != exttable_size(ext_header)) {
217 printk(KERN_ERR "microcode: error! " 216 pr_err("error! Bad exttable size in microcode data file\n");
218 "Bad exttable size in microcode data file\n");
219 return -EFAULT; 217 return -EFAULT;
220 } 218 }
221 ext_sigcount = ext_header->count; 219 ext_sigcount = ext_header->count;
@@ -230,8 +228,7 @@ static int microcode_sanity_check(void *mc)
230 while (i--) 228 while (i--)
231 ext_table_sum += ext_tablep[i]; 229 ext_table_sum += ext_tablep[i];
232 if (ext_table_sum) { 230 if (ext_table_sum) {
233 printk(KERN_WARNING "microcode: aborting, " 231 pr_warning("aborting, bad extended signature table checksum\n");
234 "bad extended signature table checksum\n");
235 return -EINVAL; 232 return -EINVAL;
236 } 233 }
237 } 234 }
@@ -242,7 +239,7 @@ static int microcode_sanity_check(void *mc)
242 while (i--) 239 while (i--)
243 orig_sum += ((int *)mc)[i]; 240 orig_sum += ((int *)mc)[i];
244 if (orig_sum) { 241 if (orig_sum) {
245 printk(KERN_ERR "microcode: aborting, bad checksum\n"); 242 pr_err("aborting, bad checksum\n");
246 return -EINVAL; 243 return -EINVAL;
247 } 244 }
248 if (!ext_table_size) 245 if (!ext_table_size)
@@ -255,7 +252,7 @@ static int microcode_sanity_check(void *mc)
255 - (mc_header->sig + mc_header->pf + mc_header->cksum) 252 - (mc_header->sig + mc_header->pf + mc_header->cksum)
256 + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); 253 + (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
257 if (sum) { 254 if (sum) {
258 printk(KERN_ERR "microcode: aborting, bad checksum\n"); 255 pr_err("aborting, bad checksum\n");
259 return -EINVAL; 256 return -EINVAL;
260 } 257 }
261 } 258 }
@@ -327,13 +324,11 @@ static int apply_microcode(int cpu)
327 rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); 324 rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
328 325
329 if (val[1] != mc_intel->hdr.rev) { 326 if (val[1] != mc_intel->hdr.rev) {
330 printk(KERN_ERR "microcode: CPU%d update " 327 pr_err("CPU%d update to revision 0x%x failed\n",
331 "to revision 0x%x failed\n", 328 cpu_num, mc_intel->hdr.rev);
332 cpu_num, mc_intel->hdr.rev);
333 return -1; 329 return -1;
334 } 330 }
335 printk(KERN_INFO "microcode: CPU%d updated to revision " 331 pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n",
336 "0x%x, date = %04x-%02x-%02x \n",
337 cpu_num, val[1], 332 cpu_num, val[1],
338 mc_intel->hdr.date & 0xffff, 333 mc_intel->hdr.date & 0xffff,
339 mc_intel->hdr.date >> 24, 334 mc_intel->hdr.date >> 24,
@@ -362,8 +357,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
362 357
363 mc_size = get_totalsize(&mc_header); 358 mc_size = get_totalsize(&mc_header);
364 if (!mc_size || mc_size > leftover) { 359 if (!mc_size || mc_size > leftover) {
365 printk(KERN_ERR "microcode: error!" 360 pr_err("error! Bad data in microcode data file\n");
366 "Bad data in microcode data file\n");
367 break; 361 break;
368 } 362 }
369 363
@@ -405,9 +399,8 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
405 vfree(uci->mc); 399 vfree(uci->mc);
406 uci->mc = (struct microcode_intel *)new_mc; 400 uci->mc = (struct microcode_intel *)new_mc;
407 401
408 pr_debug("microcode: CPU%d found a matching microcode update with" 402 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
409 " version 0x%x (current=0x%x)\n", 403 cpu, new_rev, uci->cpu_sig.rev);
410 cpu, new_rev, uci->cpu_sig.rev);
411out: 404out:
412 return state; 405 return state;
413} 406}
@@ -429,7 +422,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device)
429 c->x86, c->x86_model, c->x86_mask); 422 c->x86, c->x86_model, c->x86_mask);
430 423
431 if (request_firmware(&firmware, name, device)) { 424 if (request_firmware(&firmware, name, device)) {
432 pr_debug("microcode: data file %s load failed\n", name); 425 pr_debug("data file %s load failed\n", name);
433 return UCODE_NFOUND; 426 return UCODE_NFOUND;
434 } 427 }
435 428
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 35a57c963df9..a2c1edd2d3ac 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -359,13 +359,6 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
359 x86_init.mpparse.mpc_record(1); 359 x86_init.mpparse.mpc_record(1);
360 } 360 }
361 361
362#ifdef CONFIG_X86_BIGSMP
363 generic_bigsmp_probe();
364#endif
365
366 if (apic->setup_apic_routing)
367 apic->setup_apic_routing();
368
369 if (!num_processors) 362 if (!num_processors)
370 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 363 printk(KERN_ERR "MPTABLE: no processors registered!\n");
371 return num_processors; 364 return num_processors;
@@ -945,9 +938,6 @@ void __init early_reserve_e820_mpc_new(void)
945{ 938{
946 if (enable_update_mptable && alloc_mptable) { 939 if (enable_update_mptable && alloc_mptable) {
947 u64 startt = 0; 940 u64 startt = 0;
948#ifdef CONFIG_X86_TRAMPOLINE
949 startt = TRAMPOLINE_BASE;
950#endif
951 mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4); 941 mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
952 } 942 }
953} 943}
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 553449951b84..206735ac8cbd 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -172,11 +172,10 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
172 172
173static int msr_open(struct inode *inode, struct file *file) 173static int msr_open(struct inode *inode, struct file *file)
174{ 174{
175 unsigned int cpu = iminor(file->f_path.dentry->d_inode); 175 unsigned int cpu;
176 struct cpuinfo_x86 *c = &cpu_data(cpu); 176 struct cpuinfo_x86 *c;
177 177
178 cpu = iminor(file->f_path.dentry->d_inode); 178 cpu = iminor(file->f_path.dentry->d_inode);
179
180 if (cpu >= nr_cpu_ids || !cpu_online(cpu)) 179 if (cpu >= nr_cpu_ids || !cpu_online(cpu))
181 return -ENXIO; /* No such CPU */ 180 return -ENXIO; /* No such CPU */
182 181
@@ -247,7 +246,7 @@ static int __init msr_init(void)
247 int i, err = 0; 246 int i, err = 0;
248 i = 0; 247 i = 0;
249 248
250 if (register_chrdev(MSR_MAJOR, "cpu/msr", &msr_fops)) { 249 if (__register_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr", &msr_fops)) {
251 printk(KERN_ERR "msr: unable to get major %d for msr\n", 250 printk(KERN_ERR "msr: unable to get major %d for msr\n",
252 MSR_MAJOR); 251 MSR_MAJOR);
253 err = -EBUSY; 252 err = -EBUSY;
@@ -275,7 +274,7 @@ out_class:
275 msr_device_destroy(i); 274 msr_device_destroy(i);
276 class_destroy(msr_class); 275 class_destroy(msr_class);
277out_chrdev: 276out_chrdev:
278 unregister_chrdev(MSR_MAJOR, "cpu/msr"); 277 __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
279out: 278out:
280 return err; 279 return err;
281} 280}
@@ -286,7 +285,7 @@ static void __exit msr_exit(void)
286 for_each_online_cpu(cpu) 285 for_each_online_cpu(cpu)
287 msr_device_destroy(cpu); 286 msr_device_destroy(cpu);
288 class_destroy(msr_class); 287 class_destroy(msr_class);
289 unregister_chrdev(MSR_MAJOR, "cpu/msr"); 288 __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
290 unregister_hotcpu_notifier(&msr_class_cpu_notifier); 289 unregister_hotcpu_notifier(&msr_class_cpu_notifier);
291} 290}
292 291
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 4006c522adc7..9d1d263f786f 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -212,7 +212,7 @@ static int __init olpc_init(void)
212 unsigned char *romsig; 212 unsigned char *romsig;
213 213
214 /* The ioremap check is dangerous; limit what we run it on */ 214 /* The ioremap check is dangerous; limit what we run it on */
215 if (!is_geode() || geode_has_vsa2()) 215 if (!is_geode() || cs5535_has_vsa2())
216 return 0; 216 return 0;
217 217
218 spin_lock_init(&ec_lock); 218 spin_lock_init(&ec_lock);
@@ -244,7 +244,7 @@ static int __init olpc_init(void)
244 (unsigned char *) &olpc_platform_info.ecver, 1); 244 (unsigned char *) &olpc_platform_info.ecver, 1);
245 245
246 /* check to see if the VSA exists */ 246 /* check to see if the VSA exists */
247 if (geode_has_vsa2()) 247 if (cs5535_has_vsa2())
248 olpc_platform_info.flags |= OLPC_F_VSA; 248 olpc_platform_info.flags |= OLPC_F_VSA;
249 249
250 printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n", 250 printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n",
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 3a7c5a44082e..676b8c77a976 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -8,9 +8,9 @@
8#include <asm/paravirt.h> 8#include <asm/paravirt.h>
9 9
10static inline void 10static inline void
11default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) 11default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
12{ 12{
13 __raw_spin_lock(lock); 13 arch_spin_lock(lock);
14} 14}
15 15
16struct pv_lock_ops pv_lock_ops = { 16struct pv_lock_ops pv_lock_ops = {
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index c563e4c8ff39..2bbde6078143 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -31,7 +31,7 @@
31#include <linux/string.h> 31#include <linux/string.h>
32#include <linux/crash_dump.h> 32#include <linux/crash_dump.h>
33#include <linux/dma-mapping.h> 33#include <linux/dma-mapping.h>
34#include <linux/bitops.h> 34#include <linux/bitmap.h>
35#include <linux/pci_ids.h> 35#include <linux/pci_ids.h>
36#include <linux/pci.h> 36#include <linux/pci.h>
37#include <linux/delay.h> 37#include <linux/delay.h>
@@ -212,7 +212,7 @@ static void iommu_range_reserve(struct iommu_table *tbl,
212 212
213 spin_lock_irqsave(&tbl->it_lock, flags); 213 spin_lock_irqsave(&tbl->it_lock, flags);
214 214
215 iommu_area_reserve(tbl->it_map, index, npages); 215 bitmap_set(tbl->it_map, index, npages);
216 216
217 spin_unlock_irqrestore(&tbl->it_lock, flags); 217 spin_unlock_irqrestore(&tbl->it_lock, flags);
218} 218}
@@ -303,7 +303,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
303 303
304 spin_lock_irqsave(&tbl->it_lock, flags); 304 spin_lock_irqsave(&tbl->it_lock, flags);
305 305
306 iommu_area_free(tbl->it_map, entry, npages); 306 bitmap_clear(tbl->it_map, entry, npages);
307 307
308 spin_unlock_irqrestore(&tbl->it_lock, flags); 308 spin_unlock_irqrestore(&tbl->it_lock, flags);
309} 309}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index afcc58b69c7c..75e14e21f61a 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -124,8 +124,8 @@ void __init pci_iommu_alloc(void)
124 /* free the range so iommu could get some range less than 4G */ 124 /* free the range so iommu could get some range less than 4G */
125 dma32_free_bootmem(); 125 dma32_free_bootmem();
126#endif 126#endif
127 if (pci_swiotlb_init()) 127 if (pci_swiotlb_detect())
128 return; 128 goto out;
129 129
130 gart_iommu_hole_init(); 130 gart_iommu_hole_init();
131 131
@@ -135,6 +135,8 @@ void __init pci_iommu_alloc(void)
135 135
136 /* needs to be called after gart_iommu_hole_init */ 136 /* needs to be called after gart_iommu_hole_init */
137 amd_iommu_detect(); 137 amd_iommu_detect();
138out:
139 pci_swiotlb_init();
138} 140}
139 141
140void *dma_generic_alloc_coherent(struct device *dev, size_t size, 142void *dma_generic_alloc_coherent(struct device *dev, size_t size,
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index e6a0d402f171..34de53b46f87 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -23,7 +23,7 @@
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/topology.h> 24#include <linux/topology.h>
25#include <linux/interrupt.h> 25#include <linux/interrupt.h>
26#include <linux/bitops.h> 26#include <linux/bitmap.h>
27#include <linux/kdebug.h> 27#include <linux/kdebug.h>
28#include <linux/scatterlist.h> 28#include <linux/scatterlist.h>
29#include <linux/iommu-helper.h> 29#include <linux/iommu-helper.h>
@@ -126,7 +126,7 @@ static void free_iommu(unsigned long offset, int size)
126 unsigned long flags; 126 unsigned long flags;
127 127
128 spin_lock_irqsave(&iommu_bitmap_lock, flags); 128 spin_lock_irqsave(&iommu_bitmap_lock, flags);
129 iommu_area_free(iommu_gart_bitmap, offset, size); 129 bitmap_clear(iommu_gart_bitmap, offset, size);
130 if (offset >= next_bit) 130 if (offset >= next_bit)
131 next_bit = offset + size; 131 next_bit = offset + size;
132 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 132 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
@@ -710,7 +710,8 @@ static void gart_iommu_shutdown(void)
710 struct pci_dev *dev; 710 struct pci_dev *dev;
711 int i; 711 int i;
712 712
713 if (no_agp) 713 /* don't shutdown it if there is AGP installed */
714 if (!no_agp)
714 return; 715 return;
715 716
716 for (i = 0; i < num_k8_northbridges; i++) { 717 for (i = 0; i < num_k8_northbridges; i++) {
@@ -791,7 +792,7 @@ int __init gart_iommu_init(void)
791 * Out of IOMMU space handling. 792 * Out of IOMMU space handling.
792 * Reserve some invalid pages at the beginning of the GART. 793 * Reserve some invalid pages at the beginning of the GART.
793 */ 794 */
794 iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); 795 bitmap_set(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
795 796
796 pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", 797 pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
797 iommu_size >> 20); 798 iommu_size >> 20);
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index e3c0a66b9e77..7d2829dde20e 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -43,12 +43,12 @@ static struct dma_map_ops swiotlb_dma_ops = {
43}; 43};
44 44
45/* 45/*
46 * pci_swiotlb_init - initialize swiotlb if necessary 46 * pci_swiotlb_detect - set swiotlb to 1 if necessary
47 * 47 *
48 * This returns non-zero if we are forced to use swiotlb (by the boot 48 * This returns non-zero if we are forced to use swiotlb (by the boot
49 * option). 49 * option).
50 */ 50 */
51int __init pci_swiotlb_init(void) 51int __init pci_swiotlb_detect(void)
52{ 52{
53 int use_swiotlb = swiotlb | swiotlb_force; 53 int use_swiotlb = swiotlb | swiotlb_force;
54 54
@@ -60,10 +60,13 @@ int __init pci_swiotlb_init(void)
60 if (swiotlb_force) 60 if (swiotlb_force)
61 swiotlb = 1; 61 swiotlb = 1;
62 62
63 return use_swiotlb;
64}
65
66void __init pci_swiotlb_init(void)
67{
63 if (swiotlb) { 68 if (swiotlb) {
64 swiotlb_init(0); 69 swiotlb_init(0);
65 dma_ops = &swiotlb_dma_ops; 70 dma_ops = &swiotlb_dma_ops;
66 } 71 }
67
68 return use_swiotlb;
69} 72}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 5e2ba634ea15..02d678065d7d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -10,6 +10,8 @@
10#include <linux/clockchips.h> 10#include <linux/clockchips.h>
11#include <linux/random.h> 11#include <linux/random.h>
12#include <linux/user-return-notifier.h> 12#include <linux/user-return-notifier.h>
13#include <linux/dmi.h>
14#include <linux/utsname.h>
13#include <trace/events/power.h> 15#include <trace/events/power.h>
14#include <linux/hw_breakpoint.h> 16#include <linux/hw_breakpoint.h>
15#include <asm/system.h> 17#include <asm/system.h>
@@ -90,22 +92,36 @@ void exit_thread(void)
90 } 92 }
91} 93}
92 94
95void show_regs(struct pt_regs *regs)
96{
97 show_registers(regs);
98 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs),
99 regs->bp);
100}
101
102void show_regs_common(void)
103{
104 const char *board, *product;
105
106 board = dmi_get_system_info(DMI_BOARD_NAME);
107 if (!board)
108 board = "";
109 product = dmi_get_system_info(DMI_PRODUCT_NAME);
110 if (!product)
111 product = "";
112
113 printk(KERN_CONT "\n");
114 printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n",
115 current->pid, current->comm, print_tainted(),
116 init_utsname()->release,
117 (int)strcspn(init_utsname()->version, " "),
118 init_utsname()->version, board, product);
119}
120
93void flush_thread(void) 121void flush_thread(void)
94{ 122{
95 struct task_struct *tsk = current; 123 struct task_struct *tsk = current;
96 124
97#ifdef CONFIG_X86_64
98 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
99 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
100 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
101 clear_tsk_thread_flag(tsk, TIF_IA32);
102 } else {
103 set_tsk_thread_flag(tsk, TIF_IA32);
104 current_thread_info()->status |= TS_COMPAT;
105 }
106 }
107#endif
108
109 flush_ptrace_hw_breakpoint(tsk); 125 flush_ptrace_hw_breakpoint(tsk);
110 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 126 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
111 /* 127 /*
@@ -234,6 +250,78 @@ int sys_vfork(struct pt_regs *regs)
234 NULL, NULL); 250 NULL, NULL);
235} 251}
236 252
253long
254sys_clone(unsigned long clone_flags, unsigned long newsp,
255 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
256{
257 if (!newsp)
258 newsp = regs->sp;
259 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
260}
261
262/*
263 * This gets run with %si containing the
264 * function to call, and %di containing
265 * the "args".
266 */
267extern void kernel_thread_helper(void);
268
269/*
270 * Create a kernel thread
271 */
272int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
273{
274 struct pt_regs regs;
275
276 memset(&regs, 0, sizeof(regs));
277
278 regs.si = (unsigned long) fn;
279 regs.di = (unsigned long) arg;
280
281#ifdef CONFIG_X86_32
282 regs.ds = __USER_DS;
283 regs.es = __USER_DS;
284 regs.fs = __KERNEL_PERCPU;
285 regs.gs = __KERNEL_STACK_CANARY;
286#else
287 regs.ss = __KERNEL_DS;
288#endif
289
290 regs.orig_ax = -1;
291 regs.ip = (unsigned long) kernel_thread_helper;
292 regs.cs = __KERNEL_CS | get_kernel_rpl();
293 regs.flags = X86_EFLAGS_IF | 0x2;
294
295 /* Ok, create the new process.. */
296 return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
297}
298EXPORT_SYMBOL(kernel_thread);
299
300/*
301 * sys_execve() executes a new program.
302 */
303long sys_execve(char __user *name, char __user * __user *argv,
304 char __user * __user *envp, struct pt_regs *regs)
305{
306 long error;
307 char *filename;
308
309 filename = getname(name);
310 error = PTR_ERR(filename);
311 if (IS_ERR(filename))
312 return error;
313 error = do_execve(filename, argv, envp, regs);
314
315#ifdef CONFIG_X86_32
316 if (error == 0) {
317 /* Make sure we don't return using sysenter.. */
318 set_thread_flag(TIF_IRET);
319 }
320#endif
321
322 putname(filename);
323 return error;
324}
237 325
238/* 326/*
239 * Idle related variables and functions 327 * Idle related variables and functions
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 075580b35682..f6c62667e30c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -23,7 +23,6 @@
23#include <linux/vmalloc.h> 23#include <linux/vmalloc.h>
24#include <linux/user.h> 24#include <linux/user.h>
25#include <linux/interrupt.h> 25#include <linux/interrupt.h>
26#include <linux/utsname.h>
27#include <linux/delay.h> 26#include <linux/delay.h>
28#include <linux/reboot.h> 27#include <linux/reboot.h>
29#include <linux/init.h> 28#include <linux/init.h>
@@ -35,7 +34,6 @@
35#include <linux/tick.h> 34#include <linux/tick.h>
36#include <linux/percpu.h> 35#include <linux/percpu.h>
37#include <linux/prctl.h> 36#include <linux/prctl.h>
38#include <linux/dmi.h>
39#include <linux/ftrace.h> 37#include <linux/ftrace.h>
40#include <linux/uaccess.h> 38#include <linux/uaccess.h>
41#include <linux/io.h> 39#include <linux/io.h>
@@ -128,7 +126,6 @@ void __show_regs(struct pt_regs *regs, int all)
128 unsigned long d0, d1, d2, d3, d6, d7; 126 unsigned long d0, d1, d2, d3, d6, d7;
129 unsigned long sp; 127 unsigned long sp;
130 unsigned short ss, gs; 128 unsigned short ss, gs;
131 const char *board;
132 129
133 if (user_mode_vm(regs)) { 130 if (user_mode_vm(regs)) {
134 sp = regs->sp; 131 sp = regs->sp;
@@ -140,27 +137,18 @@ void __show_regs(struct pt_regs *regs, int all)
140 savesegment(gs, gs); 137 savesegment(gs, gs);
141 } 138 }
142 139
143 printk("\n"); 140 show_regs_common();
144 141
145 board = dmi_get_system_info(DMI_PRODUCT_NAME); 142 printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
146 if (!board)
147 board = "";
148 printk("Pid: %d, comm: %s %s (%s %.*s) %s\n",
149 task_pid_nr(current), current->comm,
150 print_tainted(), init_utsname()->release,
151 (int)strcspn(init_utsname()->version, " "),
152 init_utsname()->version, board);
153
154 printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
155 (u16)regs->cs, regs->ip, regs->flags, 143 (u16)regs->cs, regs->ip, regs->flags,
156 smp_processor_id()); 144 smp_processor_id());
157 print_symbol("EIP is at %s\n", regs->ip); 145 print_symbol("EIP is at %s\n", regs->ip);
158 146
159 printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", 147 printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
160 regs->ax, regs->bx, regs->cx, regs->dx); 148 regs->ax, regs->bx, regs->cx, regs->dx);
161 printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", 149 printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
162 regs->si, regs->di, regs->bp, sp); 150 regs->si, regs->di, regs->bp, sp);
163 printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", 151 printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
164 (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss); 152 (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
165 153
166 if (!all) 154 if (!all)
@@ -170,61 +158,22 @@ void __show_regs(struct pt_regs *regs, int all)
170 cr2 = read_cr2(); 158 cr2 = read_cr2();
171 cr3 = read_cr3(); 159 cr3 = read_cr3();
172 cr4 = read_cr4_safe(); 160 cr4 = read_cr4_safe();
173 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", 161 printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
174 cr0, cr2, cr3, cr4); 162 cr0, cr2, cr3, cr4);
175 163
176 get_debugreg(d0, 0); 164 get_debugreg(d0, 0);
177 get_debugreg(d1, 1); 165 get_debugreg(d1, 1);
178 get_debugreg(d2, 2); 166 get_debugreg(d2, 2);
179 get_debugreg(d3, 3); 167 get_debugreg(d3, 3);
180 printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", 168 printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
181 d0, d1, d2, d3); 169 d0, d1, d2, d3);
182 170
183 get_debugreg(d6, 6); 171 get_debugreg(d6, 6);
184 get_debugreg(d7, 7); 172 get_debugreg(d7, 7);
185 printk("DR6: %08lx DR7: %08lx\n", 173 printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n",
186 d6, d7); 174 d6, d7);
187} 175}
188 176
189void show_regs(struct pt_regs *regs)
190{
191 show_registers(regs);
192 show_trace(NULL, regs, &regs->sp, regs->bp);
193}
194
195/*
196 * This gets run with %bx containing the
197 * function to call, and %dx containing
198 * the "args".
199 */
200extern void kernel_thread_helper(void);
201
202/*
203 * Create a kernel thread
204 */
205int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
206{
207 struct pt_regs regs;
208
209 memset(&regs, 0, sizeof(regs));
210
211 regs.bx = (unsigned long) fn;
212 regs.dx = (unsigned long) arg;
213
214 regs.ds = __USER_DS;
215 regs.es = __USER_DS;
216 regs.fs = __KERNEL_PERCPU;
217 regs.gs = __KERNEL_STACK_CANARY;
218 regs.orig_ax = -1;
219 regs.ip = (unsigned long) kernel_thread_helper;
220 regs.cs = __KERNEL_CS | get_kernel_rpl();
221 regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
222
223 /* Ok, create the new process.. */
224 return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
225}
226EXPORT_SYMBOL(kernel_thread);
227
228void release_thread(struct task_struct *dead_task) 177void release_thread(struct task_struct *dead_task)
229{ 178{
230 BUG_ON(dead_task->mm); 179 BUG_ON(dead_task->mm);
@@ -436,46 +385,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
436 return prev_p; 385 return prev_p;
437} 386}
438 387
439int sys_clone(struct pt_regs *regs)
440{
441 unsigned long clone_flags;
442 unsigned long newsp;
443 int __user *parent_tidptr, *child_tidptr;
444
445 clone_flags = regs->bx;
446 newsp = regs->cx;
447 parent_tidptr = (int __user *)regs->dx;
448 child_tidptr = (int __user *)regs->di;
449 if (!newsp)
450 newsp = regs->sp;
451 return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
452}
453
454/*
455 * sys_execve() executes a new program.
456 */
457int sys_execve(struct pt_regs *regs)
458{
459 int error;
460 char *filename;
461
462 filename = getname((char __user *) regs->bx);
463 error = PTR_ERR(filename);
464 if (IS_ERR(filename))
465 goto out;
466 error = do_execve(filename,
467 (char __user * __user *) regs->cx,
468 (char __user * __user *) regs->dx,
469 regs);
470 if (error == 0) {
471 /* Make sure we don't return using sysenter.. */
472 set_thread_flag(TIF_IRET);
473 }
474 putname(filename);
475out:
476 return error;
477}
478
479#define top_esp (THREAD_SIZE - sizeof(unsigned long)) 388#define top_esp (THREAD_SIZE - sizeof(unsigned long))
480#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) 389#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long))
481 390
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c95c8f4e790a..dc9690b4c4cc 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -26,7 +26,6 @@
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/user.h> 27#include <linux/user.h>
28#include <linux/interrupt.h> 28#include <linux/interrupt.h>
29#include <linux/utsname.h>
30#include <linux/delay.h> 29#include <linux/delay.h>
31#include <linux/module.h> 30#include <linux/module.h>
32#include <linux/ptrace.h> 31#include <linux/ptrace.h>
@@ -38,7 +37,6 @@
38#include <linux/uaccess.h> 37#include <linux/uaccess.h>
39#include <linux/io.h> 38#include <linux/io.h>
40#include <linux/ftrace.h> 39#include <linux/ftrace.h>
41#include <linux/dmi.h>
42 40
43#include <asm/pgtable.h> 41#include <asm/pgtable.h>
44#include <asm/system.h> 42#include <asm/system.h>
@@ -59,8 +57,6 @@ asmlinkage extern void ret_from_fork(void);
59DEFINE_PER_CPU(unsigned long, old_rsp); 57DEFINE_PER_CPU(unsigned long, old_rsp);
60static DEFINE_PER_CPU(unsigned char, is_idle); 58static DEFINE_PER_CPU(unsigned char, is_idle);
61 59
62unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
63
64static ATOMIC_NOTIFIER_HEAD(idle_notifier); 60static ATOMIC_NOTIFIER_HEAD(idle_notifier);
65 61
66void idle_notifier_register(struct notifier_block *n) 62void idle_notifier_register(struct notifier_block *n)
@@ -163,31 +159,21 @@ void __show_regs(struct pt_regs *regs, int all)
163 unsigned long d0, d1, d2, d3, d6, d7; 159 unsigned long d0, d1, d2, d3, d6, d7;
164 unsigned int fsindex, gsindex; 160 unsigned int fsindex, gsindex;
165 unsigned int ds, cs, es; 161 unsigned int ds, cs, es;
166 const char *board; 162
167 163 show_regs_common();
168 printk("\n"); 164 printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
169 print_modules();
170 board = dmi_get_system_info(DMI_PRODUCT_NAME);
171 if (!board)
172 board = "";
173 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
174 current->pid, current->comm, print_tainted(),
175 init_utsname()->release,
176 (int)strcspn(init_utsname()->version, " "),
177 init_utsname()->version, board);
178 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
179 printk_address(regs->ip, 1); 165 printk_address(regs->ip, 1);
180 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, 166 printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
181 regs->sp, regs->flags); 167 regs->sp, regs->flags);
182 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", 168 printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
183 regs->ax, regs->bx, regs->cx); 169 regs->ax, regs->bx, regs->cx);
184 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", 170 printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
185 regs->dx, regs->si, regs->di); 171 regs->dx, regs->si, regs->di);
186 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", 172 printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
187 regs->bp, regs->r8, regs->r9); 173 regs->bp, regs->r8, regs->r9);
188 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", 174 printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
189 regs->r10, regs->r11, regs->r12); 175 regs->r10, regs->r11, regs->r12);
190 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", 176 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
191 regs->r13, regs->r14, regs->r15); 177 regs->r13, regs->r14, regs->r15);
192 178
193 asm("movl %%ds,%0" : "=r" (ds)); 179 asm("movl %%ds,%0" : "=r" (ds));
@@ -208,27 +194,21 @@ void __show_regs(struct pt_regs *regs, int all)
208 cr3 = read_cr3(); 194 cr3 = read_cr3();
209 cr4 = read_cr4(); 195 cr4 = read_cr4();
210 196
211 printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 197 printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
212 fs, fsindex, gs, gsindex, shadowgs); 198 fs, fsindex, gs, gsindex, shadowgs);
213 printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, 199 printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
214 es, cr0); 200 es, cr0);
215 printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, 201 printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
216 cr4); 202 cr4);
217 203
218 get_debugreg(d0, 0); 204 get_debugreg(d0, 0);
219 get_debugreg(d1, 1); 205 get_debugreg(d1, 1);
220 get_debugreg(d2, 2); 206 get_debugreg(d2, 2);
221 printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); 207 printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
222 get_debugreg(d3, 3); 208 get_debugreg(d3, 3);
223 get_debugreg(d6, 6); 209 get_debugreg(d6, 6);
224 get_debugreg(d7, 7); 210 get_debugreg(d7, 7);
225 printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); 211 printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
226}
227
228void show_regs(struct pt_regs *regs)
229{
230 show_registers(regs);
231 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
232} 212}
233 213
234void release_thread(struct task_struct *dead_task) 214void release_thread(struct task_struct *dead_task)
@@ -285,8 +265,9 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
285 *childregs = *regs; 265 *childregs = *regs;
286 266
287 childregs->ax = 0; 267 childregs->ax = 0;
288 childregs->sp = sp; 268 if (user_mode(regs))
289 if (sp == ~0UL) 269 childregs->sp = sp;
270 else
290 childregs->sp = (unsigned long)childregs; 271 childregs->sp = (unsigned long)childregs;
291 272
292 p->thread.sp = (unsigned long) childregs; 273 p->thread.sp = (unsigned long) childregs;
@@ -520,25 +501,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
520 return prev_p; 501 return prev_p;
521} 502}
522 503
523/*
524 * sys_execve() executes a new program.
525 */
526asmlinkage
527long sys_execve(char __user *name, char __user * __user *argv,
528 char __user * __user *envp, struct pt_regs *regs)
529{
530 long error;
531 char *filename;
532
533 filename = getname(name);
534 error = PTR_ERR(filename);
535 if (IS_ERR(filename))
536 return error;
537 error = do_execve(filename, argv, envp, regs);
538 putname(filename);
539 return error;
540}
541
542void set_personality_64bit(void) 504void set_personality_64bit(void)
543{ 505{
544 /* inherit personality from parent */ 506 /* inherit personality from parent */
@@ -553,13 +515,16 @@ void set_personality_64bit(void)
553 current->personality &= ~READ_IMPLIES_EXEC; 515 current->personality &= ~READ_IMPLIES_EXEC;
554} 516}
555 517
556asmlinkage long 518void set_personality_ia32(void)
557sys_clone(unsigned long clone_flags, unsigned long newsp,
558 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
559{ 519{
560 if (!newsp) 520 /* inherit personality from parent */
561 newsp = regs->sp; 521
562 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); 522 /* Make sure to be in 32bit mode */
523 set_thread_flag(TIF_IA32);
524 current->personality |= force_personality32;
525
526 /* Prepare the first "return" to user space */
527 current_thread_info()->status |= TS_COMPAT;
563} 528}
564 529
565unsigned long get_wchan(struct task_struct *p) 530unsigned long get_wchan(struct task_struct *p)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 04d182a7cfdb..2d96aab82a48 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -48,6 +48,7 @@ enum x86_regset {
48 REGSET_FP, 48 REGSET_FP,
49 REGSET_XFP, 49 REGSET_XFP,
50 REGSET_IOPERM64 = REGSET_XFP, 50 REGSET_IOPERM64 = REGSET_XFP,
51 REGSET_XSTATE,
51 REGSET_TLS, 52 REGSET_TLS,
52 REGSET_IOPERM32, 53 REGSET_IOPERM32,
53}; 54};
@@ -140,30 +141,6 @@ static const int arg_offs_table[] = {
140#endif 141#endif
141}; 142};
142 143
143/**
144 * regs_get_argument_nth() - get Nth argument at function call
145 * @regs: pt_regs which contains registers at function entry.
146 * @n: argument number.
147 *
148 * regs_get_argument_nth() returns @n th argument of a function call.
149 * Since usually the kernel stack will be changed right after function entry,
150 * you must use this at function entry. If the @n th entry is NOT in the
151 * kernel stack or pt_regs, this returns 0.
152 */
153unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
154{
155 if (n < ARRAY_SIZE(arg_offs_table))
156 return *(unsigned long *)((char *)regs + arg_offs_table[n]);
157 else {
158 /*
159 * The typical case: arg n is on the stack.
160 * (Note: stack[0] = return address, so skip it)
161 */
162 n -= ARRAY_SIZE(arg_offs_table);
163 return regs_get_kernel_stack_nth(regs, 1 + n);
164 }
165}
166
167/* 144/*
168 * does not yet catch signals sent when the child dies. 145 * does not yet catch signals sent when the child dies.
169 * in exit.c or in signal.c. 146 * in exit.c or in signal.c.
@@ -509,14 +486,14 @@ static int genregs_get(struct task_struct *target,
509{ 486{
510 if (kbuf) { 487 if (kbuf) {
511 unsigned long *k = kbuf; 488 unsigned long *k = kbuf;
512 while (count > 0) { 489 while (count >= sizeof(*k)) {
513 *k++ = getreg(target, pos); 490 *k++ = getreg(target, pos);
514 count -= sizeof(*k); 491 count -= sizeof(*k);
515 pos += sizeof(*k); 492 pos += sizeof(*k);
516 } 493 }
517 } else { 494 } else {
518 unsigned long __user *u = ubuf; 495 unsigned long __user *u = ubuf;
519 while (count > 0) { 496 while (count >= sizeof(*u)) {
520 if (__put_user(getreg(target, pos), u++)) 497 if (__put_user(getreg(target, pos), u++))
521 return -EFAULT; 498 return -EFAULT;
522 count -= sizeof(*u); 499 count -= sizeof(*u);
@@ -535,14 +512,14 @@ static int genregs_set(struct task_struct *target,
535 int ret = 0; 512 int ret = 0;
536 if (kbuf) { 513 if (kbuf) {
537 const unsigned long *k = kbuf; 514 const unsigned long *k = kbuf;
538 while (count > 0 && !ret) { 515 while (count >= sizeof(*k) && !ret) {
539 ret = putreg(target, pos, *k++); 516 ret = putreg(target, pos, *k++);
540 count -= sizeof(*k); 517 count -= sizeof(*k);
541 pos += sizeof(*k); 518 pos += sizeof(*k);
542 } 519 }
543 } else { 520 } else {
544 const unsigned long __user *u = ubuf; 521 const unsigned long __user *u = ubuf;
545 while (count > 0 && !ret) { 522 while (count >= sizeof(*u) && !ret) {
546 unsigned long word; 523 unsigned long word;
547 ret = __get_user(word, u++); 524 ret = __get_user(word, u++);
548 if (ret) 525 if (ret)
@@ -555,7 +532,9 @@ static int genregs_set(struct task_struct *target,
555 return ret; 532 return ret;
556} 533}
557 534
558static void ptrace_triggered(struct perf_event *bp, void *data) 535static void ptrace_triggered(struct perf_event *bp, int nmi,
536 struct perf_sample_data *data,
537 struct pt_regs *regs)
559{ 538{
560 int i; 539 int i;
561 struct thread_struct *thread = &(current->thread); 540 struct thread_struct *thread = &(current->thread);
@@ -593,13 +572,13 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[])
593 return dr7; 572 return dr7;
594} 573}
595 574
596static struct perf_event * 575static int
597ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, 576ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
598 struct task_struct *tsk, int disabled) 577 struct task_struct *tsk, int disabled)
599{ 578{
600 int err; 579 int err;
601 int gen_len, gen_type; 580 int gen_len, gen_type;
602 DEFINE_BREAKPOINT_ATTR(attr); 581 struct perf_event_attr attr;
603 582
604 /* 583 /*
605 * We shoud have at least an inactive breakpoint at this 584 * We shoud have at least an inactive breakpoint at this
@@ -607,18 +586,18 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
607 * written the address register first 586 * written the address register first
608 */ 587 */
609 if (!bp) 588 if (!bp)
610 return ERR_PTR(-EINVAL); 589 return -EINVAL;
611 590
612 err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); 591 err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
613 if (err) 592 if (err)
614 return ERR_PTR(err); 593 return err;
615 594
616 attr = bp->attr; 595 attr = bp->attr;
617 attr.bp_len = gen_len; 596 attr.bp_len = gen_len;
618 attr.bp_type = gen_type; 597 attr.bp_type = gen_type;
619 attr.disabled = disabled; 598 attr.disabled = disabled;
620 599
621 return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); 600 return modify_user_hw_breakpoint(bp, &attr);
622} 601}
623 602
624/* 603/*
@@ -656,28 +635,17 @@ restore:
656 if (!second_pass) 635 if (!second_pass)
657 continue; 636 continue;
658 637
659 thread->ptrace_bps[i] = NULL; 638 rc = ptrace_modify_breakpoint(bp, len, type,
660 bp = ptrace_modify_breakpoint(bp, len, type,
661 tsk, 1); 639 tsk, 1);
662 if (IS_ERR(bp)) { 640 if (rc)
663 rc = PTR_ERR(bp);
664 thread->ptrace_bps[i] = NULL;
665 break; 641 break;
666 }
667 thread->ptrace_bps[i] = bp;
668 } 642 }
669 continue; 643 continue;
670 } 644 }
671 645
672 bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0); 646 rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
673 647 if (rc)
674 /* Incorrect bp, or we have a bug in bp API */
675 if (IS_ERR(bp)) {
676 rc = PTR_ERR(bp);
677 thread->ptrace_bps[i] = NULL;
678 break; 648 break;
679 }
680 thread->ptrace_bps[i] = bp;
681 } 649 }
682 /* 650 /*
683 * Make a second pass to free the remaining unused breakpoints 651 * Make a second pass to free the remaining unused breakpoints
@@ -711,7 +679,7 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
711 } else if (n == 6) { 679 } else if (n == 6) {
712 val = thread->debugreg6; 680 val = thread->debugreg6;
713 } else if (n == 7) { 681 } else if (n == 7) {
714 val = ptrace_get_dr7(thread->ptrace_bps); 682 val = thread->ptrace_dr7;
715 } 683 }
716 return val; 684 return val;
717} 685}
@@ -721,9 +689,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
721{ 689{
722 struct perf_event *bp; 690 struct perf_event *bp;
723 struct thread_struct *t = &tsk->thread; 691 struct thread_struct *t = &tsk->thread;
724 DEFINE_BREAKPOINT_ATTR(attr); 692 struct perf_event_attr attr;
725 693
726 if (!t->ptrace_bps[nr]) { 694 if (!t->ptrace_bps[nr]) {
695 hw_breakpoint_init(&attr);
727 /* 696 /*
728 * Put stub len and type to register (reserve) an inactive but 697 * Put stub len and type to register (reserve) an inactive but
729 * correct bp 698 * correct bp
@@ -734,26 +703,32 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
734 attr.disabled = 1; 703 attr.disabled = 1;
735 704
736 bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); 705 bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
706
707 /*
708 * CHECKME: the previous code returned -EIO if the addr wasn't
709 * a valid task virtual addr. The new one will return -EINVAL in
710 * this case.
711 * -EINVAL may be what we want for in-kernel breakpoints users,
712 * but -EIO looks better for ptrace, since we refuse a register
713 * writing for the user. And anyway this is the previous
714 * behaviour.
715 */
716 if (IS_ERR(bp))
717 return PTR_ERR(bp);
718
719 t->ptrace_bps[nr] = bp;
737 } else { 720 } else {
721 int err;
722
738 bp = t->ptrace_bps[nr]; 723 bp = t->ptrace_bps[nr];
739 t->ptrace_bps[nr] = NULL;
740 724
741 attr = bp->attr; 725 attr = bp->attr;
742 attr.bp_addr = addr; 726 attr.bp_addr = addr;
743 bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); 727 err = modify_user_hw_breakpoint(bp, &attr);
728 if (err)
729 return err;
744 } 730 }
745 /*
746 * CHECKME: the previous code returned -EIO if the addr wasn't a
747 * valid task virtual addr. The new one will return -EINVAL in this
748 * case.
749 * -EINVAL may be what we want for in-kernel breakpoints users, but
750 * -EIO looks better for ptrace, since we refuse a register writing
751 * for the user. And anyway this is the previous behaviour.
752 */
753 if (IS_ERR(bp))
754 return PTR_ERR(bp);
755 731
756 t->ptrace_bps[nr] = bp;
757 732
758 return 0; 733 return 0;
759} 734}
@@ -780,8 +755,11 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
780 return rc; 755 return rc;
781 } 756 }
782 /* All that's left is DR7 */ 757 /* All that's left is DR7 */
783 if (n == 7) 758 if (n == 7) {
784 rc = ptrace_write_dr7(tsk, val); 759 rc = ptrace_write_dr7(tsk, val);
760 if (!rc)
761 thread->ptrace_dr7 = val;
762 }
785 763
786ret_path: 764ret_path:
787 return rc; 765 return rc;
@@ -1460,14 +1438,14 @@ static int genregs32_get(struct task_struct *target,
1460{ 1438{
1461 if (kbuf) { 1439 if (kbuf) {
1462 compat_ulong_t *k = kbuf; 1440 compat_ulong_t *k = kbuf;
1463 while (count > 0) { 1441 while (count >= sizeof(*k)) {
1464 getreg32(target, pos, k++); 1442 getreg32(target, pos, k++);
1465 count -= sizeof(*k); 1443 count -= sizeof(*k);
1466 pos += sizeof(*k); 1444 pos += sizeof(*k);
1467 } 1445 }
1468 } else { 1446 } else {
1469 compat_ulong_t __user *u = ubuf; 1447 compat_ulong_t __user *u = ubuf;
1470 while (count > 0) { 1448 while (count >= sizeof(*u)) {
1471 compat_ulong_t word; 1449 compat_ulong_t word;
1472 getreg32(target, pos, &word); 1450 getreg32(target, pos, &word);
1473 if (__put_user(word, u++)) 1451 if (__put_user(word, u++))
@@ -1488,14 +1466,14 @@ static int genregs32_set(struct task_struct *target,
1488 int ret = 0; 1466 int ret = 0;
1489 if (kbuf) { 1467 if (kbuf) {
1490 const compat_ulong_t *k = kbuf; 1468 const compat_ulong_t *k = kbuf;
1491 while (count > 0 && !ret) { 1469 while (count >= sizeof(*k) && !ret) {
1492 ret = putreg32(target, pos, *k++); 1470 ret = putreg32(target, pos, *k++);
1493 count -= sizeof(*k); 1471 count -= sizeof(*k);
1494 pos += sizeof(*k); 1472 pos += sizeof(*k);
1495 } 1473 }
1496 } else { 1474 } else {
1497 const compat_ulong_t __user *u = ubuf; 1475 const compat_ulong_t __user *u = ubuf;
1498 while (count > 0 && !ret) { 1476 while (count >= sizeof(*u) && !ret) {
1499 compat_ulong_t word; 1477 compat_ulong_t word;
1500 ret = __get_user(word, u++); 1478 ret = __get_user(word, u++);
1501 if (ret) 1479 if (ret)
@@ -1586,7 +1564,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
1586 1564
1587#ifdef CONFIG_X86_64 1565#ifdef CONFIG_X86_64
1588 1566
1589static const struct user_regset x86_64_regsets[] = { 1567static struct user_regset x86_64_regsets[] __read_mostly = {
1590 [REGSET_GENERAL] = { 1568 [REGSET_GENERAL] = {
1591 .core_note_type = NT_PRSTATUS, 1569 .core_note_type = NT_PRSTATUS,
1592 .n = sizeof(struct user_regs_struct) / sizeof(long), 1570 .n = sizeof(struct user_regs_struct) / sizeof(long),
@@ -1599,6 +1577,12 @@ static const struct user_regset x86_64_regsets[] = {
1599 .size = sizeof(long), .align = sizeof(long), 1577 .size = sizeof(long), .align = sizeof(long),
1600 .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set 1578 .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
1601 }, 1579 },
1580 [REGSET_XSTATE] = {
1581 .core_note_type = NT_X86_XSTATE,
1582 .size = sizeof(u64), .align = sizeof(u64),
1583 .active = xstateregs_active, .get = xstateregs_get,
1584 .set = xstateregs_set
1585 },
1602 [REGSET_IOPERM64] = { 1586 [REGSET_IOPERM64] = {
1603 .core_note_type = NT_386_IOPERM, 1587 .core_note_type = NT_386_IOPERM,
1604 .n = IO_BITMAP_LONGS, 1588 .n = IO_BITMAP_LONGS,
@@ -1624,7 +1608,7 @@ static const struct user_regset_view user_x86_64_view = {
1624#endif /* CONFIG_X86_64 */ 1608#endif /* CONFIG_X86_64 */
1625 1609
1626#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 1610#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
1627static const struct user_regset x86_32_regsets[] = { 1611static struct user_regset x86_32_regsets[] __read_mostly = {
1628 [REGSET_GENERAL] = { 1612 [REGSET_GENERAL] = {
1629 .core_note_type = NT_PRSTATUS, 1613 .core_note_type = NT_PRSTATUS,
1630 .n = sizeof(struct user_regs_struct32) / sizeof(u32), 1614 .n = sizeof(struct user_regs_struct32) / sizeof(u32),
@@ -1643,6 +1627,12 @@ static const struct user_regset x86_32_regsets[] = {
1643 .size = sizeof(u32), .align = sizeof(u32), 1627 .size = sizeof(u32), .align = sizeof(u32),
1644 .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set 1628 .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
1645 }, 1629 },
1630 [REGSET_XSTATE] = {
1631 .core_note_type = NT_X86_XSTATE,
1632 .size = sizeof(u64), .align = sizeof(u64),
1633 .active = xstateregs_active, .get = xstateregs_get,
1634 .set = xstateregs_set
1635 },
1646 [REGSET_TLS] = { 1636 [REGSET_TLS] = {
1647 .core_note_type = NT_386_TLS, 1637 .core_note_type = NT_386_TLS,
1648 .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN, 1638 .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN,
@@ -1665,6 +1655,23 @@ static const struct user_regset_view user_x86_32_view = {
1665}; 1655};
1666#endif 1656#endif
1667 1657
1658/*
1659 * This represents bytes 464..511 in the memory layout exported through
1660 * the REGSET_XSTATE interface.
1661 */
1662u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
1663
1664void update_regset_xstate_info(unsigned int size, u64 xstate_mask)
1665{
1666#ifdef CONFIG_X86_64
1667 x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64);
1668#endif
1669#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
1670 x86_32_regsets[REGSET_XSTATE].n = size / sizeof(u64);
1671#endif
1672 xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask;
1673}
1674
1668const struct user_regset_view *task_user_regset_view(struct task_struct *task) 1675const struct user_regset_view *task_user_regset_view(struct task_struct *task)
1669{ 1676{
1670#ifdef CONFIG_IA32_EMULATION 1677#ifdef CONFIG_IA32_EMULATION
@@ -1678,21 +1685,33 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
1678#endif 1685#endif
1679} 1686}
1680 1687
1681void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, 1688static void fill_sigtrap_info(struct task_struct *tsk,
1682 int error_code, int si_code) 1689 struct pt_regs *regs,
1690 int error_code, int si_code,
1691 struct siginfo *info)
1683{ 1692{
1684 struct siginfo info;
1685
1686 tsk->thread.trap_no = 1; 1693 tsk->thread.trap_no = 1;
1687 tsk->thread.error_code = error_code; 1694 tsk->thread.error_code = error_code;
1688 1695
1689 memset(&info, 0, sizeof(info)); 1696 memset(info, 0, sizeof(*info));
1690 info.si_signo = SIGTRAP; 1697 info->si_signo = SIGTRAP;
1691 info.si_code = si_code; 1698 info->si_code = si_code;
1699 info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL;
1700}
1692 1701
1693 /* User-mode ip? */ 1702void user_single_step_siginfo(struct task_struct *tsk,
1694 info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL; 1703 struct pt_regs *regs,
1704 struct siginfo *info)
1705{
1706 fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info);
1707}
1695 1708
1709void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
1710 int error_code, int si_code)
1711{
1712 struct siginfo info;
1713
1714 fill_sigtrap_info(tsk, regs, error_code, si_code, &info);
1696 /* Send us the fake SIGTRAP */ 1715 /* Send us the fake SIGTRAP */
1697 force_sig_info(SIGTRAP, &info, tsk); 1716 force_sig_info(SIGTRAP, &info, tsk);
1698} 1717}
@@ -1757,29 +1776,22 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
1757 1776
1758asmregparm void syscall_trace_leave(struct pt_regs *regs) 1777asmregparm void syscall_trace_leave(struct pt_regs *regs)
1759{ 1778{
1779 bool step;
1780
1760 if (unlikely(current->audit_context)) 1781 if (unlikely(current->audit_context))
1761 audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); 1782 audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
1762 1783
1763 if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) 1784 if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
1764 trace_sys_exit(regs, regs->ax); 1785 trace_sys_exit(regs, regs->ax);
1765 1786
1766 if (test_thread_flag(TIF_SYSCALL_TRACE))
1767 tracehook_report_syscall_exit(regs, 0);
1768
1769 /* 1787 /*
1770 * If TIF_SYSCALL_EMU is set, we only get here because of 1788 * If TIF_SYSCALL_EMU is set, we only get here because of
1771 * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). 1789 * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
1772 * We already reported this syscall instruction in 1790 * We already reported this syscall instruction in
1773 * syscall_trace_enter(), so don't do any more now. 1791 * syscall_trace_enter().
1774 */
1775 if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
1776 return;
1777
1778 /*
1779 * If we are single-stepping, synthesize a trap to follow the
1780 * system call instruction.
1781 */ 1792 */
1782 if (test_thread_flag(TIF_SINGLESTEP) && 1793 step = unlikely(test_thread_flag(TIF_SINGLESTEP)) &&
1783 tracehook_consider_fatal_signal(current, SIGTRAP)) 1794 !test_thread_flag(TIF_SYSCALL_EMU);
1784 send_sigtrap(current, regs, 0, TRAP_BRKPT); 1795 if (step || test_thread_flag(TIF_SYSCALL_TRACE))
1796 tracehook_report_syscall_exit(regs, step);
1785} 1797}
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 18093d7498f0..12e9feaa2f7a 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -491,6 +491,19 @@ void force_hpet_resume(void)
491 break; 491 break;
492 } 492 }
493} 493}
494
495/*
496 * HPET MSI on some boards (ATI SB700/SB800) has side effect on
497 * floppy DMA. Disable HPET MSI on such platforms.
498 */
499static void force_disable_hpet_msi(struct pci_dev *unused)
500{
501 hpet_msi_disable = 1;
502}
503
504DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
505 force_disable_hpet_msi);
506
494#endif 507#endif
495 508
496#if defined(CONFIG_PCI) && defined(CONFIG_NUMA) 509#if defined(CONFIG_PCI) && defined(CONFIG_NUMA)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2b97fc5b124e..704bddcdf64d 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -203,6 +203,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
203 DMI_MATCH(DMI_BOARD_NAME, "0T656F"), 203 DMI_MATCH(DMI_BOARD_NAME, "0T656F"),
204 }, 204 },
205 }, 205 },
206 { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G*/
207 .callback = set_bios_reboot,
208 .ident = "Dell OptiPlex 760",
209 .matches = {
210 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
211 DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 760"),
212 DMI_MATCH(DMI_BOARD_NAME, "0G919G"),
213 },
214 },
206 { /* Handle problems with rebooting on Dell 2400's */ 215 { /* Handle problems with rebooting on Dell 2400's */
207 .callback = set_bios_reboot, 216 .callback = set_bios_reboot,
208 .ident = "Dell PowerEdge 2400", 217 .ident = "Dell PowerEdge 2400",
@@ -259,6 +268,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
259 DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), 268 DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"),
260 }, 269 },
261 }, 270 },
271 { /* Handle problems with rebooting on ASUS P4S800 */
272 .callback = set_bios_reboot,
273 .ident = "ASUS P4S800",
274 .matches = {
275 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
276 DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
277 },
278 },
262 { } 279 { }
263}; 280};
264 281
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index 201eab63b05f..fda313ebbb03 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -12,7 +12,7 @@
12#include <linux/interrupt.h> 12#include <linux/interrupt.h>
13#include <asm/reboot_fixups.h> 13#include <asm/reboot_fixups.h>
14#include <asm/msr.h> 14#include <asm/msr.h>
15#include <asm/geode.h> 15#include <linux/cs5535.h>
16 16
17static void cs5530a_warm_reset(struct pci_dev *dev) 17static void cs5530a_warm_reset(struct pci_dev *dev)
18{ 18{
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 946a311a25c9..cb42109a55b4 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -73,6 +73,7 @@
73 73
74#include <asm/mtrr.h> 74#include <asm/mtrr.h>
75#include <asm/apic.h> 75#include <asm/apic.h>
76#include <asm/trampoline.h>
76#include <asm/e820.h> 77#include <asm/e820.h>
77#include <asm/mpspec.h> 78#include <asm/mpspec.h>
78#include <asm/setup.h> 79#include <asm/setup.h>
@@ -120,7 +121,9 @@
120unsigned long max_low_pfn_mapped; 121unsigned long max_low_pfn_mapped;
121unsigned long max_pfn_mapped; 122unsigned long max_pfn_mapped;
122 123
124#ifdef CONFIG_DMI
123RESERVE_BRK(dmi_alloc, 65536); 125RESERVE_BRK(dmi_alloc, 65536);
126#endif
124 127
125unsigned int boot_cpu_id __read_mostly; 128unsigned int boot_cpu_id __read_mostly;
126 129
@@ -641,23 +644,48 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
641 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"), 644 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"),
642 }, 645 },
643 }, 646 },
644 {
645 /* 647 /*
646 * AMI BIOS with low memory corruption was found on Intel DG45ID board. 648 * AMI BIOS with low memory corruption was found on Intel DG45ID and
647 * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will 649 * DG45FC boards.
650 * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
648 * match only DMI_BOARD_NAME and see if there is more bad products 651 * match only DMI_BOARD_NAME and see if there is more bad products
649 * with this vendor. 652 * with this vendor.
650 */ 653 */
654 {
651 .callback = dmi_low_memory_corruption, 655 .callback = dmi_low_memory_corruption,
652 .ident = "AMI BIOS", 656 .ident = "AMI BIOS",
653 .matches = { 657 .matches = {
654 DMI_MATCH(DMI_BOARD_NAME, "DG45ID"), 658 DMI_MATCH(DMI_BOARD_NAME, "DG45ID"),
655 }, 659 },
656 }, 660 },
661 {
662 .callback = dmi_low_memory_corruption,
663 .ident = "AMI BIOS",
664 .matches = {
665 DMI_MATCH(DMI_BOARD_NAME, "DG45FC"),
666 },
667 },
657#endif 668#endif
658 {} 669 {}
659}; 670};
660 671
672static void __init trim_bios_range(void)
673{
674 /*
675 * A special case is the first 4Kb of memory;
676 * This is a BIOS owned area, not kernel ram, but generally
677 * not listed as such in the E820 table.
678 */
679 e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
680 /*
681 * special case: Some BIOSen report the PC BIOS
682 * area (640->1Mb) as ram even though it is not.
683 * take them out.
684 */
685 e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
686 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
687}
688
661/* 689/*
662 * Determine if we were loaded by an EFI loader. If so, then we have also been 690 * Determine if we were loaded by an EFI loader. If so, then we have also been
663 * passed the efi memmap, systab, etc., so we should use these data structures 691 * passed the efi memmap, systab, etc., so we should use these data structures
@@ -821,7 +849,7 @@ void __init setup_arch(char **cmdline_p)
821 insert_resource(&iomem_resource, &data_resource); 849 insert_resource(&iomem_resource, &data_resource);
822 insert_resource(&iomem_resource, &bss_resource); 850 insert_resource(&iomem_resource, &bss_resource);
823 851
824 852 trim_bios_range();
825#ifdef CONFIG_X86_32 853#ifdef CONFIG_X86_32
826 if (ppro_with_ram_bug()) { 854 if (ppro_with_ram_bug()) {
827 e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM, 855 e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
@@ -875,6 +903,13 @@ void __init setup_arch(char **cmdline_p)
875 903
876 reserve_brk(); 904 reserve_brk();
877 905
906 /*
907 * Find and reserve possible boot-time SMP configuration:
908 */
909 find_smp_config();
910
911 reserve_trampoline_memory();
912
878#ifdef CONFIG_ACPI_SLEEP 913#ifdef CONFIG_ACPI_SLEEP
879 /* 914 /*
880 * Reserve low memory region for sleep support. 915 * Reserve low memory region for sleep support.
@@ -921,11 +956,6 @@ void __init setup_arch(char **cmdline_p)
921 956
922 early_acpi_boot_init(); 957 early_acpi_boot_init();
923 958
924 /*
925 * Find and reserve possible boot-time SMP configuration:
926 */
927 find_smp_config();
928
929#ifdef CONFIG_ACPI_NUMA 959#ifdef CONFIG_ACPI_NUMA
930 /* 960 /*
931 * Parse SRAT to discover nodes. 961 * Parse SRAT to discover nodes.
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index d559af913e1f..35abcb8b00e9 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -1,3 +1,5 @@
1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2
1#include <linux/kernel.h> 3#include <linux/kernel.h>
2#include <linux/module.h> 4#include <linux/module.h>
3#include <linux/init.h> 5#include <linux/init.h>
@@ -20,9 +22,9 @@
20#include <asm/stackprotector.h> 22#include <asm/stackprotector.h>
21 23
22#ifdef CONFIG_DEBUG_PER_CPU_MAPS 24#ifdef CONFIG_DEBUG_PER_CPU_MAPS
23# define DBG(x...) printk(KERN_DEBUG x) 25# define DBG(fmt, ...) pr_dbg(fmt, ##__VA_ARGS__)
24#else 26#else
25# define DBG(x...) 27# define DBG(fmt, ...) do { if (0) pr_dbg(fmt, ##__VA_ARGS__); } while (0)
26#endif 28#endif
27 29
28DEFINE_PER_CPU(int, cpu_number); 30DEFINE_PER_CPU(int, cpu_number);
@@ -116,8 +118,8 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
116 } else { 118 } else {
117 ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node), 119 ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node),
118 size, align, goal); 120 size, align, goal);
119 pr_debug("per cpu data for cpu%d %lu bytes on node%d at " 121 pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n",
120 "%016lx\n", cpu, size, node, __pa(ptr)); 122 cpu, size, node, __pa(ptr));
121 } 123 }
122 return ptr; 124 return ptr;
123#else 125#else
@@ -198,8 +200,7 @@ void __init setup_per_cpu_areas(void)
198 pcpu_cpu_distance, 200 pcpu_cpu_distance,
199 pcpu_fc_alloc, pcpu_fc_free); 201 pcpu_fc_alloc, pcpu_fc_free);
200 if (rc < 0) 202 if (rc < 0)
201 pr_warning("PERCPU: %s allocator failed (%d), " 203 pr_warning("%s allocator failed (%d), falling back to page size\n",
202 "falling back to page size\n",
203 pcpu_fc_names[pcpu_chosen_fc], rc); 204 pcpu_fc_names[pcpu_chosen_fc], rc);
204 } 205 }
205 if (rc < 0) 206 if (rc < 0)
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 74fe6d86dc5d..4fd173cd8e57 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -545,22 +545,12 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
545} 545}
546#endif /* CONFIG_X86_32 */ 546#endif /* CONFIG_X86_32 */
547 547
548#ifdef CONFIG_X86_32 548long
549int sys_sigaltstack(struct pt_regs *regs)
550{
551 const stack_t __user *uss = (const stack_t __user *)regs->bx;
552 stack_t __user *uoss = (stack_t __user *)regs->cx;
553
554 return do_sigaltstack(uss, uoss, regs->sp);
555}
556#else /* !CONFIG_X86_32 */
557asmlinkage long
558sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, 549sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
559 struct pt_regs *regs) 550 struct pt_regs *regs)
560{ 551{
561 return do_sigaltstack(uss, uoss, regs->sp); 552 return do_sigaltstack(uss, uoss, regs->sp);
562} 553}
563#endif /* CONFIG_X86_32 */
564 554
565/* 555/*
566 * Do a signal return; undo the signal stack. 556 * Do a signal return; undo the signal stack.
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 324f2a44c221..9b4401115ea1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -320,6 +320,7 @@ notrace static void __cpuinit start_secondary(void *unused)
320 unlock_vector_lock(); 320 unlock_vector_lock();
321 ipi_call_unlock(); 321 ipi_call_unlock();
322 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 322 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
323 x86_platform.nmi_init();
323 324
324 /* enable local interrupts */ 325 /* enable local interrupts */
325 local_irq_enable(); 326 local_irq_enable();
@@ -671,6 +672,26 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
671 complete(&c_idle->done); 672 complete(&c_idle->done);
672} 673}
673 674
675/* reduce the number of lines printed when booting a large cpu count system */
676static void __cpuinit announce_cpu(int cpu, int apicid)
677{
678 static int current_node = -1;
679 int node = cpu_to_node(cpu);
680
681 if (system_state == SYSTEM_BOOTING) {
682 if (node != current_node) {
683 if (current_node > (-1))
684 pr_cont(" Ok.\n");
685 current_node = node;
686 pr_info("Booting Node %3d, Processors ", node);
687 }
688 pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : "");
689 return;
690 } else
691 pr_info("Booting Node %d Processor %d APIC 0x%x\n",
692 node, cpu, apicid);
693}
694
674/* 695/*
675 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 696 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
676 * (ie clustered apic addressing mode), this is a LOGICAL apic ID. 697 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -687,7 +708,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
687 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), 708 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
688 }; 709 };
689 710
690 INIT_WORK(&c_idle.work, do_fork_idle); 711 INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle);
691 712
692 alternatives_smp_switch(1); 713 alternatives_smp_switch(1);
693 714
@@ -713,6 +734,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
713 734
714 if (IS_ERR(c_idle.idle)) { 735 if (IS_ERR(c_idle.idle)) {
715 printk("failed fork for CPU %d\n", cpu); 736 printk("failed fork for CPU %d\n", cpu);
737 destroy_work_on_stack(&c_idle.work);
716 return PTR_ERR(c_idle.idle); 738 return PTR_ERR(c_idle.idle);
717 } 739 }
718 740
@@ -736,9 +758,8 @@ do_rest:
736 /* start_ip had better be page-aligned! */ 758 /* start_ip had better be page-aligned! */
737 start_ip = setup_trampoline(); 759 start_ip = setup_trampoline();
738 760
739 /* So we see what's up */ 761 /* So we see what's up */
740 printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n", 762 announce_cpu(cpu, apicid);
741 cpu, apicid, start_ip);
742 763
743 /* 764 /*
744 * This grunge runs the startup process for 765 * This grunge runs the startup process for
@@ -787,21 +808,17 @@ do_rest:
787 udelay(100); 808 udelay(100);
788 } 809 }
789 810
790 if (cpumask_test_cpu(cpu, cpu_callin_mask)) { 811 if (cpumask_test_cpu(cpu, cpu_callin_mask))
791 /* number CPUs logically, starting from 1 (BSP is 0) */ 812 pr_debug("CPU%d: has booted.\n", cpu);
792 pr_debug("OK.\n"); 813 else {
793 printk(KERN_INFO "CPU%d: ", cpu);
794 print_cpu_info(&cpu_data(cpu));
795 pr_debug("CPU has booted.\n");
796 } else {
797 boot_error = 1; 814 boot_error = 1;
798 if (*((volatile unsigned char *)trampoline_base) 815 if (*((volatile unsigned char *)trampoline_base)
799 == 0xA5) 816 == 0xA5)
800 /* trampoline started but...? */ 817 /* trampoline started but...? */
801 printk(KERN_ERR "Stuck ??\n"); 818 pr_err("CPU%d: Stuck ??\n", cpu);
802 else 819 else
803 /* trampoline code not run */ 820 /* trampoline code not run */
804 printk(KERN_ERR "Not responding.\n"); 821 pr_err("CPU%d: Not responding.\n", cpu);
805 if (apic->inquire_remote_apic) 822 if (apic->inquire_remote_apic)
806 apic->inquire_remote_apic(apicid); 823 apic->inquire_remote_apic(apicid);
807 } 824 }
@@ -831,6 +848,7 @@ do_rest:
831 smpboot_restore_warm_reset_vector(); 848 smpboot_restore_warm_reset_vector();
832 } 849 }
833 850
851 destroy_work_on_stack(&c_idle.work);
834 return boot_error; 852 return boot_error;
835} 853}
836 854
@@ -1066,9 +1084,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1066 set_cpu_sibling_map(0); 1084 set_cpu_sibling_map(0);
1067 1085
1068 enable_IR_x2apic(); 1086 enable_IR_x2apic();
1069#ifdef CONFIG_X86_64
1070 default_setup_apic_routing(); 1087 default_setup_apic_routing();
1071#endif
1072 1088
1073 if (smp_sanity_check(max_cpus) < 0) { 1089 if (smp_sanity_check(max_cpus) < 0) {
1074 printk(KERN_INFO "SMP disabled\n"); 1090 printk(KERN_INFO "SMP disabled\n");
@@ -1291,14 +1307,16 @@ void native_cpu_die(unsigned int cpu)
1291 for (i = 0; i < 10; i++) { 1307 for (i = 0; i < 10; i++) {
1292 /* They ack this in play_dead by setting CPU_DEAD */ 1308 /* They ack this in play_dead by setting CPU_DEAD */
1293 if (per_cpu(cpu_state, cpu) == CPU_DEAD) { 1309 if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
1294 printk(KERN_INFO "CPU %d is now offline\n", cpu); 1310 if (system_state == SYSTEM_RUNNING)
1311 pr_info("CPU %u is now offline\n", cpu);
1312
1295 if (1 == num_online_cpus()) 1313 if (1 == num_online_cpus())
1296 alternatives_smp_switch(0); 1314 alternatives_smp_switch(0);
1297 return; 1315 return;
1298 } 1316 }
1299 msleep(100); 1317 msleep(100);
1300 } 1318 }
1301 printk(KERN_ERR "CPU %u didn't die...\n", cpu); 1319 pr_err("CPU %u didn't die...\n", cpu);
1302} 1320}
1303 1321
1304void play_dead_common(void) 1322void play_dead_common(void)
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index c3eb207181fe..922eefbb3f6c 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -53,17 +53,19 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable)
53} 53}
54 54
55static const struct stacktrace_ops save_stack_ops = { 55static const struct stacktrace_ops save_stack_ops = {
56 .warning = save_stack_warning, 56 .warning = save_stack_warning,
57 .warning_symbol = save_stack_warning_symbol, 57 .warning_symbol = save_stack_warning_symbol,
58 .stack = save_stack_stack, 58 .stack = save_stack_stack,
59 .address = save_stack_address, 59 .address = save_stack_address,
60 .walk_stack = print_context_stack,
60}; 61};
61 62
62static const struct stacktrace_ops save_stack_ops_nosched = { 63static const struct stacktrace_ops save_stack_ops_nosched = {
63 .warning = save_stack_warning, 64 .warning = save_stack_warning,
64 .warning_symbol = save_stack_warning_symbol, 65 .warning_symbol = save_stack_warning_symbol,
65 .stack = save_stack_stack, 66 .stack = save_stack_stack,
66 .address = save_stack_address_nosched, 67 .address = save_stack_address_nosched,
68 .walk_stack = print_context_stack,
67}; 69};
68 70
69/* 71/*
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 1884a8d12bfa..dee1ff7cba58 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -24,31 +24,6 @@
24 24
25#include <asm/syscalls.h> 25#include <asm/syscalls.h>
26 26
27asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
28 unsigned long prot, unsigned long flags,
29 unsigned long fd, unsigned long pgoff)
30{
31 int error = -EBADF;
32 struct file *file = NULL;
33 struct mm_struct *mm = current->mm;
34
35 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
36 if (!(flags & MAP_ANONYMOUS)) {
37 file = fget(fd);
38 if (!file)
39 goto out;
40 }
41
42 down_write(&mm->mmap_sem);
43 error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
44 up_write(&mm->mmap_sem);
45
46 if (file)
47 fput(file);
48out:
49 return error;
50}
51
52/* 27/*
53 * Perform the select(nd, in, out, ex, tv) and mmap() system 28 * Perform the select(nd, in, out, ex, tv) and mmap() system
54 * calls. Linux/i386 didn't use to be able to handle more than 29 * calls. Linux/i386 didn't use to be able to handle more than
@@ -77,7 +52,7 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
77 if (a.offset & ~PAGE_MASK) 52 if (a.offset & ~PAGE_MASK)
78 goto out; 53 goto out;
79 54
80 err = sys_mmap2(a.addr, a.len, a.prot, a.flags, 55 err = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags,
81 a.fd, a.offset >> PAGE_SHIFT); 56 a.fd, a.offset >> PAGE_SHIFT);
82out: 57out:
83 return err; 58 return err;
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 45e00eb09c3a..8aa2057efd12 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -23,26 +23,11 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
23 unsigned long, fd, unsigned long, off) 23 unsigned long, fd, unsigned long, off)
24{ 24{
25 long error; 25 long error;
26 struct file *file;
27
28 error = -EINVAL; 26 error = -EINVAL;
29 if (off & ~PAGE_MASK) 27 if (off & ~PAGE_MASK)
30 goto out; 28 goto out;
31 29
32 error = -EBADF; 30 error = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
33 file = NULL;
34 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
35 if (!(flags & MAP_ANONYMOUS)) {
36 file = fget(fd);
37 if (!file)
38 goto out;
39 }
40 down_write(&current->mm->mmap_sem);
41 error = do_mmap_pgoff(file, addr, len, prot, flags, off >> PAGE_SHIFT);
42 up_write(&current->mm->mmap_sem);
43
44 if (file)
45 fput(file);
46out: 31out:
47 return error; 32 return error;
48} 33}
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 70c2125d55b9..15228b5d3eb7 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -191,7 +191,7 @@ ENTRY(sys_call_table)
191 .long sys_ni_syscall /* reserved for streams2 */ 191 .long sys_ni_syscall /* reserved for streams2 */
192 .long ptregs_vfork /* 190 */ 192 .long ptregs_vfork /* 190 */
193 .long sys_getrlimit 193 .long sys_getrlimit
194 .long sys_mmap2 194 .long sys_mmap_pgoff
195 .long sys_truncate64 195 .long sys_truncate64
196 .long sys_ftruncate64 196 .long sys_ftruncate64
197 .long sys_stat64 /* 195 */ 197 .long sys_stat64 /* 195 */
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index cd022121cab6..c652ef62742d 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -12,21 +12,19 @@
12#endif 12#endif
13 13
14/* ready for x86_64 and x86 */ 14/* ready for x86_64 and x86 */
15unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE); 15unsigned char *__trampinitdata trampoline_base;
16 16
17void __init reserve_trampoline_memory(void) 17void __init reserve_trampoline_memory(void)
18{ 18{
19#ifdef CONFIG_X86_32 19 unsigned long mem;
20 /* 20
21 * But first pinch a few for the stack/trampoline stuff
22 * FIXME: Don't need the extra page at 4K, but need to fix
23 * trampoline before removing it. (see the GDT stuff)
24 */
25 reserve_early(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE");
26#endif
27 /* Has to be in very low memory so we can execute real-mode AP code. */ 21 /* Has to be in very low memory so we can execute real-mode AP code. */
28 reserve_early(TRAMPOLINE_BASE, TRAMPOLINE_BASE + TRAMPOLINE_SIZE, 22 mem = find_e820_area(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE);
29 "TRAMPOLINE"); 23 if (mem == -1L)
24 panic("Cannot allocate trampoline\n");
25
26 trampoline_base = __va(mem);
27 reserve_early(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE");
30} 28}
31 29
32/* 30/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 33399176512a..1168e4454188 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -534,6 +534,9 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
534 534
535 get_debugreg(dr6, 6); 535 get_debugreg(dr6, 6);
536 536
537 /* Filter out all the reserved bits which are preset to 1 */
538 dr6 &= ~DR6_RESERVED;
539
537 /* Catch kmemcheck conditions first of all! */ 540 /* Catch kmemcheck conditions first of all! */
538 if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) 541 if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
539 return; 542 return;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index cd982f48e23e..23066ecf12fa 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -763,6 +763,7 @@ void mark_tsc_unstable(char *reason)
763{ 763{
764 if (!tsc_unstable) { 764 if (!tsc_unstable) {
765 tsc_unstable = 1; 765 tsc_unstable = 1;
766 sched_clock_stable = 0;
766 printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); 767 printk(KERN_INFO "Marking TSC unstable due to %s\n", reason);
767 /* Change only the rating, when not registered */ 768 /* Change only the rating, when not registered */
768 if (clocksource_tsc.mult) 769 if (clocksource_tsc.mult)
@@ -805,7 +806,7 @@ static void __init check_system_tsc_reliable(void)
805 unsigned long res_low, res_high; 806 unsigned long res_low, res_high;
806 807
807 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); 808 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
808 /* Geode_LX - the OLPC CPU has a possibly a very reliable TSC */ 809 /* Geode_LX - the OLPC CPU has a very reliable TSC */
809 if (res_low & RTSC_SUSP) 810 if (res_low & RTSC_SUSP)
810 tsc_clocksource_reliable = 1; 811 tsc_clocksource_reliable = 1;
811#endif 812#endif
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index eed156851f5d..0aa5fed8b9e6 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -33,7 +33,7 @@ static __cpuinitdata atomic_t stop_count;
33 * we want to have the fastest, inlined, non-debug version 33 * we want to have the fastest, inlined, non-debug version
34 * of a critical section, to be able to prove TSC time-warps: 34 * of a critical section, to be able to prove TSC time-warps:
35 */ 35 */
36static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED; 36static __cpuinitdata arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED;
37 37
38static __cpuinitdata cycles_t last_tsc; 38static __cpuinitdata cycles_t last_tsc;
39static __cpuinitdata cycles_t max_warp; 39static __cpuinitdata cycles_t max_warp;
@@ -62,13 +62,13 @@ static __cpuinit void check_tsc_warp(void)
62 * previous TSC that was measured (possibly on 62 * previous TSC that was measured (possibly on
63 * another CPU) and update the previous TSC timestamp. 63 * another CPU) and update the previous TSC timestamp.
64 */ 64 */
65 __raw_spin_lock(&sync_lock); 65 arch_spin_lock(&sync_lock);
66 prev = last_tsc; 66 prev = last_tsc;
67 rdtsc_barrier(); 67 rdtsc_barrier();
68 now = get_cycles(); 68 now = get_cycles();
69 rdtsc_barrier(); 69 rdtsc_barrier();
70 last_tsc = now; 70 last_tsc = now;
71 __raw_spin_unlock(&sync_lock); 71 arch_spin_unlock(&sync_lock);
72 72
73 /* 73 /*
74 * Be nice every now and then (and also check whether 74 * Be nice every now and then (and also check whether
@@ -87,10 +87,10 @@ static __cpuinit void check_tsc_warp(void)
87 * we saw a time-warp of the TSC going backwards: 87 * we saw a time-warp of the TSC going backwards:
88 */ 88 */
89 if (unlikely(prev > now)) { 89 if (unlikely(prev > now)) {
90 __raw_spin_lock(&sync_lock); 90 arch_spin_lock(&sync_lock);
91 max_warp = max(max_warp, prev - now); 91 max_warp = max(max_warp, prev - now);
92 nr_warps++; 92 nr_warps++;
93 __raw_spin_unlock(&sync_lock); 93 arch_spin_unlock(&sync_lock);
94 } 94 }
95 } 95 }
96 WARN(!(now-start), 96 WARN(!(now-start),
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index 61d805df4c91..ece73d8e3240 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -215,8 +215,7 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
215 unsigned long mmr_offset; 215 unsigned long mmr_offset;
216 unsigned mmr_pnode; 216 unsigned mmr_pnode;
217 217
218 dest = set_desc_affinity(desc, mask); 218 if (set_desc_affinity(desc, mask, &dest))
219 if (dest == BAD_APICID)
220 return -1; 219 return -1;
221 220
222 mmr_value = 0; 221 mmr_value = 0;
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c
index 36afb98675a4..309c70fb7759 100644
--- a/arch/x86/kernel/uv_sysfs.c
+++ b/arch/x86/kernel/uv_sysfs.c
@@ -54,19 +54,19 @@ static int __init sgi_uv_sysfs_init(void)
54 if (!sgi_uv_kobj) 54 if (!sgi_uv_kobj)
55 sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); 55 sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
56 if (!sgi_uv_kobj) { 56 if (!sgi_uv_kobj) {
57 printk(KERN_WARNING "kobject_create_and_add sgi_uv failed \n"); 57 printk(KERN_WARNING "kobject_create_and_add sgi_uv failed\n");
58 return -EINVAL; 58 return -EINVAL;
59 } 59 }
60 60
61 ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); 61 ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr);
62 if (ret) { 62 if (ret) {
63 printk(KERN_WARNING "sysfs_create_file partition_id failed \n"); 63 printk(KERN_WARNING "sysfs_create_file partition_id failed\n");
64 return ret; 64 return ret;
65 } 65 }
66 66
67 ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); 67 ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr);
68 if (ret) { 68 if (ret) {
69 printk(KERN_WARNING "sysfs_create_file coherence_id failed \n"); 69 printk(KERN_WARNING "sysfs_create_file coherence_id failed\n");
70 return ret; 70 return ret;
71 } 71 }
72 72
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c
index 3c84aa001c11..2b75ef638dbc 100644
--- a/arch/x86/kernel/uv_time.c
+++ b/arch/x86/kernel/uv_time.c
@@ -282,10 +282,21 @@ static int uv_rtc_unset_timer(int cpu, int force)
282 282
283/* 283/*
284 * Read the RTC. 284 * Read the RTC.
285 *
286 * Starting with HUB rev 2.0, the UV RTC register is replicated across all
287 * cachelines of it's own page. This allows faster simultaneous reads
288 * from a given socket.
285 */ 289 */
286static cycle_t uv_read_rtc(struct clocksource *cs) 290static cycle_t uv_read_rtc(struct clocksource *cs)
287{ 291{
288 return (cycle_t)uv_read_local_mmr(UVH_RTC); 292 unsigned long offset;
293
294 if (uv_get_min_hub_revision_id() == 1)
295 offset = 0;
296 else
297 offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
298
299 return (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
289} 300}
290 301
291/* 302/*
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 9c4e62539058..5ffb5622f793 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -197,9 +197,8 @@ out:
197static int do_vm86_irq_handling(int subfunction, int irqnumber); 197static int do_vm86_irq_handling(int subfunction, int irqnumber);
198static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); 198static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
199 199
200int sys_vm86old(struct pt_regs *regs) 200int sys_vm86old(struct vm86_struct __user *v86, struct pt_regs *regs)
201{ 201{
202 struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx;
203 struct kernel_vm86_struct info; /* declare this _on top_, 202 struct kernel_vm86_struct info; /* declare this _on top_,
204 * this avoids wasting of stack space. 203 * this avoids wasting of stack space.
205 * This remains on the stack until we 204 * This remains on the stack until we
@@ -227,7 +226,7 @@ out:
227} 226}
228 227
229 228
230int sys_vm86(struct pt_regs *regs) 229int sys_vm86(unsigned long cmd, unsigned long arg, struct pt_regs *regs)
231{ 230{
232 struct kernel_vm86_struct info; /* declare this _on top_, 231 struct kernel_vm86_struct info; /* declare this _on top_,
233 * this avoids wasting of stack space. 232 * this avoids wasting of stack space.
@@ -239,12 +238,12 @@ int sys_vm86(struct pt_regs *regs)
239 struct vm86plus_struct __user *v86; 238 struct vm86plus_struct __user *v86;
240 239
241 tsk = current; 240 tsk = current;
242 switch (regs->bx) { 241 switch (cmd) {
243 case VM86_REQUEST_IRQ: 242 case VM86_REQUEST_IRQ:
244 case VM86_FREE_IRQ: 243 case VM86_FREE_IRQ:
245 case VM86_GET_IRQ_BITS: 244 case VM86_GET_IRQ_BITS:
246 case VM86_GET_AND_RESET_IRQ: 245 case VM86_GET_AND_RESET_IRQ:
247 ret = do_vm86_irq_handling(regs->bx, (int)regs->cx); 246 ret = do_vm86_irq_handling(cmd, (int)arg);
248 goto out; 247 goto out;
249 case VM86_PLUS_INSTALL_CHECK: 248 case VM86_PLUS_INSTALL_CHECK:
250 /* 249 /*
@@ -261,7 +260,7 @@ int sys_vm86(struct pt_regs *regs)
261 ret = -EPERM; 260 ret = -EPERM;
262 if (tsk->thread.saved_sp0) 261 if (tsk->thread.saved_sp0)
263 goto out; 262 goto out;
264 v86 = (struct vm86plus_struct __user *)regs->cx; 263 v86 = (struct vm86plus_struct __user *)arg;
265 tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, 264 tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
266 offsetof(struct kernel_vm86_struct, regs32) - 265 offsetof(struct kernel_vm86_struct, regs32) -
267 sizeof(info.regs)); 266 sizeof(info.regs));
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index f3f2104408d9..f92a0da608cb 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -319,9 +319,7 @@ SECTIONS
319 __brk_limit = .; 319 __brk_limit = .;
320 } 320 }
321 321
322 .end : AT(ADDR(.end) - LOAD_OFFSET) { 322 _end = .;
323 _end = .;
324 }
325 323
326 STABS_DEBUG 324 STABS_DEBUG
327 DWARF_DEBUG 325 DWARF_DEBUG
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index a1029769b6f2..693920b22496 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -17,8 +17,6 @@
17EXPORT_SYMBOL(mcount); 17EXPORT_SYMBOL(mcount);
18#endif 18#endif
19 19
20EXPORT_SYMBOL(kernel_thread);
21
22EXPORT_SYMBOL(__get_user_1); 20EXPORT_SYMBOL(__get_user_1);
23EXPORT_SYMBOL(__get_user_2); 21EXPORT_SYMBOL(__get_user_2);
24EXPORT_SYMBOL(__get_user_4); 22EXPORT_SYMBOL(__get_user_4);
@@ -28,7 +26,8 @@ EXPORT_SYMBOL(__put_user_2);
28EXPORT_SYMBOL(__put_user_4); 26EXPORT_SYMBOL(__put_user_4);
29EXPORT_SYMBOL(__put_user_8); 27EXPORT_SYMBOL(__put_user_8);
30 28
31EXPORT_SYMBOL(copy_user_generic); 29EXPORT_SYMBOL(copy_user_generic_string);
30EXPORT_SYMBOL(copy_user_generic_unrolled);
32EXPORT_SYMBOL(__copy_user_nocache); 31EXPORT_SYMBOL(__copy_user_nocache);
33EXPORT_SYMBOL(_copy_from_user); 32EXPORT_SYMBOL(_copy_from_user);
34EXPORT_SYMBOL(_copy_to_user); 33EXPORT_SYMBOL(_copy_to_user);
@@ -56,4 +55,6 @@ EXPORT_SYMBOL(__memcpy);
56 55
57EXPORT_SYMBOL(empty_zero_page); 56EXPORT_SYMBOL(empty_zero_page);
58EXPORT_SYMBOL(init_level4_pgt); 57EXPORT_SYMBOL(init_level4_pgt);
59EXPORT_SYMBOL(load_gs_index); 58#ifndef CONFIG_PARAVIRT
59EXPORT_SYMBOL(native_load_gs_index);
60#endif
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ccd179dec36e..ee5746c94628 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -76,10 +76,13 @@ struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
76 .setup_percpu_clockev = setup_secondary_APIC_clock, 76 .setup_percpu_clockev = setup_secondary_APIC_clock,
77}; 77};
78 78
79static void default_nmi_init(void) { };
80
79struct x86_platform_ops x86_platform = { 81struct x86_platform_ops x86_platform = {
80 .calibrate_tsc = native_calibrate_tsc, 82 .calibrate_tsc = native_calibrate_tsc,
81 .get_wallclock = mach_get_cmos_time, 83 .get_wallclock = mach_get_cmos_time,
82 .set_wallclock = mach_set_rtc_mmss, 84 .set_wallclock = mach_set_rtc_mmss,
83 .iommu_shutdown = iommu_shutdown_noop, 85 .iommu_shutdown = iommu_shutdown_noop,
84 .is_untracked_pat_range = is_ISA_range, 86 .is_untracked_pat_range = is_ISA_range,
87 .nmi_init = default_nmi_init
85}; 88};
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index c5ee17e8c6d9..782c3a362ec6 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -337,6 +337,7 @@ void __ref xsave_cntxt_init(void)
337 cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); 337 cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
338 xstate_size = ebx; 338 xstate_size = ebx;
339 339
340 update_regset_xstate_info(xstate_size, pcntxt_mask);
340 prepare_fx_sw_frame(); 341 prepare_fx_sw_frame();
341 342
342 setup_xstate_init(); 343 setup_xstate_init();
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index fab7440c9bb2..15578f180e59 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -29,6 +29,8 @@
29 * Based on QEMU and Xen. 29 * Based on QEMU and Xen.
30 */ 30 */
31 31
32#define pr_fmt(fmt) "pit: " fmt
33
32#include <linux/kvm_host.h> 34#include <linux/kvm_host.h>
33 35
34#include "irq.h" 36#include "irq.h"
@@ -262,7 +264,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
262 264
263static void destroy_pit_timer(struct kvm_timer *pt) 265static void destroy_pit_timer(struct kvm_timer *pt)
264{ 266{
265 pr_debug("pit: execute del timer!\n"); 267 pr_debug("execute del timer!\n");
266 hrtimer_cancel(&pt->timer); 268 hrtimer_cancel(&pt->timer);
267} 269}
268 270
@@ -284,7 +286,7 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
284 286
285 interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); 287 interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
286 288
287 pr_debug("pit: create pit timer, interval is %llu nsec\n", interval); 289 pr_debug("create pit timer, interval is %llu nsec\n", interval);
288 290
289 /* TODO The new value only affected after the retriggered */ 291 /* TODO The new value only affected after the retriggered */
290 hrtimer_cancel(&pt->timer); 292 hrtimer_cancel(&pt->timer);
@@ -309,7 +311,7 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
309 311
310 WARN_ON(!mutex_is_locked(&ps->lock)); 312 WARN_ON(!mutex_is_locked(&ps->lock));
311 313
312 pr_debug("pit: load_count val is %d, channel is %d\n", val, channel); 314 pr_debug("load_count val is %d, channel is %d\n", val, channel);
313 315
314 /* 316 /*
315 * The largest possible initial count is 0; this is equivalent 317 * The largest possible initial count is 0; this is equivalent
@@ -395,8 +397,8 @@ static int pit_ioport_write(struct kvm_io_device *this,
395 mutex_lock(&pit_state->lock); 397 mutex_lock(&pit_state->lock);
396 398
397 if (val != 0) 399 if (val != 0)
398 pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n", 400 pr_debug("write addr is 0x%x, len is %d, val is 0x%x\n",
399 (unsigned int)addr, len, val); 401 (unsigned int)addr, len, val);
400 402
401 if (addr == 3) { 403 if (addr == 3) {
402 channel = val >> 6; 404 channel = val >> 6;
@@ -465,6 +467,9 @@ static int pit_ioport_read(struct kvm_io_device *this,
465 return -EOPNOTSUPP; 467 return -EOPNOTSUPP;
466 468
467 addr &= KVM_PIT_CHANNEL_MASK; 469 addr &= KVM_PIT_CHANNEL_MASK;
470 if (addr == 3)
471 return 0;
472
468 s = &pit_state->channels[addr]; 473 s = &pit_state->channels[addr];
469 474
470 mutex_lock(&pit_state->lock); 475 mutex_lock(&pit_state->lock);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index cd60c0bd1b32..ba8c045da782 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -373,6 +373,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
373 if (unlikely(!apic_enabled(apic))) 373 if (unlikely(!apic_enabled(apic)))
374 break; 374 break;
375 375
376 if (trig_mode) {
377 apic_debug("level trig mode for vector %d", vector);
378 apic_set_vector(vector, apic->regs + APIC_TMR);
379 } else
380 apic_clear_vector(vector, apic->regs + APIC_TMR);
381
376 result = !apic_test_and_set_irr(vector, apic); 382 result = !apic_test_and_set_irr(vector, apic);
377 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, 383 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
378 trig_mode, vector, !result); 384 trig_mode, vector, !result);
@@ -383,11 +389,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
383 break; 389 break;
384 } 390 }
385 391
386 if (trig_mode) {
387 apic_debug("level trig mode for vector %d", vector);
388 apic_set_vector(vector, apic->regs + APIC_TMR);
389 } else
390 apic_clear_vector(vector, apic->regs + APIC_TMR);
391 kvm_vcpu_kick(vcpu); 392 kvm_vcpu_kick(vcpu);
392 break; 393 break;
393 394
@@ -1150,6 +1151,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
1150 hrtimer_cancel(&apic->lapic_timer.timer); 1151 hrtimer_cancel(&apic->lapic_timer.timer);
1151 update_divide_count(apic); 1152 update_divide_count(apic);
1152 start_apic_timer(apic); 1153 start_apic_timer(apic);
1154 apic->irr_pending = true;
1153} 1155}
1154 1156
1155void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) 1157void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4c3e5b2314cb..89a49fb46a27 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -477,7 +477,7 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
477 477
478 addr = gfn_to_hva(kvm, gfn); 478 addr = gfn_to_hva(kvm, gfn);
479 if (kvm_is_error_hva(addr)) 479 if (kvm_is_error_hva(addr))
480 return page_size; 480 return PT_PAGE_TABLE_LEVEL;
481 481
482 down_read(&current->mm->mmap_sem); 482 down_read(&current->mm->mmap_sem);
483 vma = find_vma(current->mm, addr); 483 vma = find_vma(current->mm, addr);
@@ -515,11 +515,9 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
515 if (host_level == PT_PAGE_TABLE_LEVEL) 515 if (host_level == PT_PAGE_TABLE_LEVEL)
516 return host_level; 516 return host_level;
517 517
518 for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) { 518 for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level)
519
520 if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) 519 if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
521 break; 520 break;
522 }
523 521
524 return level - 1; 522 return level - 1;
525} 523}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index a6017132fba8..ede2131a9225 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -150,7 +150,9 @@ walk:
150 walker->table_gfn[walker->level - 1] = table_gfn; 150 walker->table_gfn[walker->level - 1] = table_gfn;
151 walker->pte_gpa[walker->level - 1] = pte_gpa; 151 walker->pte_gpa[walker->level - 1] = pte_gpa;
152 152
153 kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); 153 if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)))
154 goto not_present;
155
154 trace_kvm_mmu_paging_element(pte, walker->level); 156 trace_kvm_mmu_paging_element(pte, walker->level);
155 157
156 if (!is_present_gpte(pte)) 158 if (!is_present_gpte(pte))
@@ -455,8 +457,6 @@ out_unlock:
455static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) 457static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
456{ 458{
457 struct kvm_shadow_walk_iterator iterator; 459 struct kvm_shadow_walk_iterator iterator;
458 pt_element_t gpte;
459 gpa_t pte_gpa = -1;
460 int level; 460 int level;
461 u64 *sptep; 461 u64 *sptep;
462 int need_flush = 0; 462 int need_flush = 0;
@@ -470,10 +470,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
470 if (level == PT_PAGE_TABLE_LEVEL || 470 if (level == PT_PAGE_TABLE_LEVEL ||
471 ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || 471 ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
472 ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { 472 ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
473 struct kvm_mmu_page *sp = page_header(__pa(sptep));
474
475 pte_gpa = (sp->gfn << PAGE_SHIFT);
476 pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
477 473
478 if (is_shadow_present_pte(*sptep)) { 474 if (is_shadow_present_pte(*sptep)) {
479 rmap_remove(vcpu->kvm, sptep); 475 rmap_remove(vcpu->kvm, sptep);
@@ -492,18 +488,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
492 if (need_flush) 488 if (need_flush)
493 kvm_flush_remote_tlbs(vcpu->kvm); 489 kvm_flush_remote_tlbs(vcpu->kvm);
494 spin_unlock(&vcpu->kvm->mmu_lock); 490 spin_unlock(&vcpu->kvm->mmu_lock);
495
496 if (pte_gpa == -1)
497 return;
498 if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
499 sizeof(pt_element_t)))
500 return;
501 if (is_present_gpte(gpte) && (gpte & PT_ACCESSED_MASK)) {
502 if (mmu_topup_memory_caches(vcpu))
503 return;
504 kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
505 sizeof(pt_element_t), 0);
506 }
507} 491}
508 492
509static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) 493static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 3de0b37ec038..1d9b33843c80 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -316,7 +316,7 @@ static void svm_hardware_disable(void *garbage)
316static int svm_hardware_enable(void *garbage) 316static int svm_hardware_enable(void *garbage)
317{ 317{
318 318
319 struct svm_cpu_data *svm_data; 319 struct svm_cpu_data *sd;
320 uint64_t efer; 320 uint64_t efer;
321 struct descriptor_table gdt_descr; 321 struct descriptor_table gdt_descr;
322 struct desc_struct *gdt; 322 struct desc_struct *gdt;
@@ -331,63 +331,61 @@ static int svm_hardware_enable(void *garbage)
331 me); 331 me);
332 return -EINVAL; 332 return -EINVAL;
333 } 333 }
334 svm_data = per_cpu(svm_data, me); 334 sd = per_cpu(svm_data, me);
335 335
336 if (!svm_data) { 336 if (!sd) {
337 printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n", 337 printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
338 me); 338 me);
339 return -EINVAL; 339 return -EINVAL;
340 } 340 }
341 341
342 svm_data->asid_generation = 1; 342 sd->asid_generation = 1;
343 svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; 343 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
344 svm_data->next_asid = svm_data->max_asid + 1; 344 sd->next_asid = sd->max_asid + 1;
345 345
346 kvm_get_gdt(&gdt_descr); 346 kvm_get_gdt(&gdt_descr);
347 gdt = (struct desc_struct *)gdt_descr.base; 347 gdt = (struct desc_struct *)gdt_descr.base;
348 svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); 348 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
349 349
350 wrmsrl(MSR_EFER, efer | EFER_SVME); 350 wrmsrl(MSR_EFER, efer | EFER_SVME);
351 351
352 wrmsrl(MSR_VM_HSAVE_PA, 352 wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
353 page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
354 353
355 return 0; 354 return 0;
356} 355}
357 356
358static void svm_cpu_uninit(int cpu) 357static void svm_cpu_uninit(int cpu)
359{ 358{
360 struct svm_cpu_data *svm_data 359 struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
361 = per_cpu(svm_data, raw_smp_processor_id());
362 360
363 if (!svm_data) 361 if (!sd)
364 return; 362 return;
365 363
366 per_cpu(svm_data, raw_smp_processor_id()) = NULL; 364 per_cpu(svm_data, raw_smp_processor_id()) = NULL;
367 __free_page(svm_data->save_area); 365 __free_page(sd->save_area);
368 kfree(svm_data); 366 kfree(sd);
369} 367}
370 368
371static int svm_cpu_init(int cpu) 369static int svm_cpu_init(int cpu)
372{ 370{
373 struct svm_cpu_data *svm_data; 371 struct svm_cpu_data *sd;
374 int r; 372 int r;
375 373
376 svm_data = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); 374 sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
377 if (!svm_data) 375 if (!sd)
378 return -ENOMEM; 376 return -ENOMEM;
379 svm_data->cpu = cpu; 377 sd->cpu = cpu;
380 svm_data->save_area = alloc_page(GFP_KERNEL); 378 sd->save_area = alloc_page(GFP_KERNEL);
381 r = -ENOMEM; 379 r = -ENOMEM;
382 if (!svm_data->save_area) 380 if (!sd->save_area)
383 goto err_1; 381 goto err_1;
384 382
385 per_cpu(svm_data, cpu) = svm_data; 383 per_cpu(svm_data, cpu) = sd;
386 384
387 return 0; 385 return 0;
388 386
389err_1: 387err_1:
390 kfree(svm_data); 388 kfree(sd);
391 return r; 389 return r;
392 390
393} 391}
@@ -1092,16 +1090,16 @@ static void save_host_msrs(struct kvm_vcpu *vcpu)
1092#endif 1090#endif
1093} 1091}
1094 1092
1095static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data) 1093static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1096{ 1094{
1097 if (svm_data->next_asid > svm_data->max_asid) { 1095 if (sd->next_asid > sd->max_asid) {
1098 ++svm_data->asid_generation; 1096 ++sd->asid_generation;
1099 svm_data->next_asid = 1; 1097 sd->next_asid = 1;
1100 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; 1098 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
1101 } 1099 }
1102 1100
1103 svm->asid_generation = svm_data->asid_generation; 1101 svm->asid_generation = sd->asid_generation;
1104 svm->vmcb->control.asid = svm_data->next_asid++; 1102 svm->vmcb->control.asid = sd->next_asid++;
1105} 1103}
1106 1104
1107static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) 1105static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
@@ -2429,8 +2427,8 @@ static void reload_tss(struct kvm_vcpu *vcpu)
2429{ 2427{
2430 int cpu = raw_smp_processor_id(); 2428 int cpu = raw_smp_processor_id();
2431 2429
2432 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); 2430 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
2433 svm_data->tss_desc->type = 9; /* available 32/64-bit TSS */ 2431 sd->tss_desc->type = 9; /* available 32/64-bit TSS */
2434 load_TR_desc(); 2432 load_TR_desc();
2435} 2433}
2436 2434
@@ -2438,12 +2436,12 @@ static void pre_svm_run(struct vcpu_svm *svm)
2438{ 2436{
2439 int cpu = raw_smp_processor_id(); 2437 int cpu = raw_smp_processor_id();
2440 2438
2441 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); 2439 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
2442 2440
2443 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; 2441 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
2444 /* FIXME: handle wraparound of asid_generation */ 2442 /* FIXME: handle wraparound of asid_generation */
2445 if (svm->asid_generation != svm_data->asid_generation) 2443 if (svm->asid_generation != sd->asid_generation)
2446 new_asid(svm, svm_data); 2444 new_asid(svm, sd);
2447} 2445}
2448 2446
2449static void svm_inject_nmi(struct kvm_vcpu *vcpu) 2447static void svm_inject_nmi(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9d068966fb2a..a1e1bc9d412d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -670,7 +670,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
670{ 670{
671 static int version; 671 static int version;
672 struct pvclock_wall_clock wc; 672 struct pvclock_wall_clock wc;
673 struct timespec now, sys, boot; 673 struct timespec boot;
674 674
675 if (!wall_clock) 675 if (!wall_clock)
676 return; 676 return;
@@ -685,9 +685,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
685 * wall clock specified here. guest system time equals host 685 * wall clock specified here. guest system time equals host
686 * system time for us, thus we must fill in host boot time here. 686 * system time for us, thus we must fill in host boot time here.
687 */ 687 */
688 now = current_kernel_time(); 688 getboottime(&boot);
689 ktime_get_ts(&sys);
690 boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
691 689
692 wc.sec = boot.tv_sec; 690 wc.sec = boot.tv_sec;
693 wc.nsec = boot.tv_nsec; 691 wc.nsec = boot.tv_nsec;
@@ -762,6 +760,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
762 local_irq_save(flags); 760 local_irq_save(flags);
763 kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); 761 kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
764 ktime_get_ts(&ts); 762 ktime_get_ts(&ts);
763 monotonic_to_bootbased(&ts);
765 local_irq_restore(flags); 764 local_irq_restore(flags);
766 765
767 /* With all the info we got, fill in the values */ 766 /* With all the info we got, fill in the values */
@@ -1913,7 +1912,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
1913 1912
1914 events->sipi_vector = vcpu->arch.sipi_vector; 1913 events->sipi_vector = vcpu->arch.sipi_vector;
1915 1914
1916 events->flags = 0; 1915 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
1916 | KVM_VCPUEVENT_VALID_SIPI_VECTOR);
1917 1917
1918 vcpu_put(vcpu); 1918 vcpu_put(vcpu);
1919} 1919}
@@ -1921,7 +1921,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
1921static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, 1921static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
1922 struct kvm_vcpu_events *events) 1922 struct kvm_vcpu_events *events)
1923{ 1923{
1924 if (events->flags) 1924 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
1925 | KVM_VCPUEVENT_VALID_SIPI_VECTOR))
1925 return -EINVAL; 1926 return -EINVAL;
1926 1927
1927 vcpu_load(vcpu); 1928 vcpu_load(vcpu);
@@ -1938,10 +1939,12 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
1938 kvm_pic_clear_isr_ack(vcpu->kvm); 1939 kvm_pic_clear_isr_ack(vcpu->kvm);
1939 1940
1940 vcpu->arch.nmi_injected = events->nmi.injected; 1941 vcpu->arch.nmi_injected = events->nmi.injected;
1941 vcpu->arch.nmi_pending = events->nmi.pending; 1942 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
1943 vcpu->arch.nmi_pending = events->nmi.pending;
1942 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); 1944 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
1943 1945
1944 vcpu->arch.sipi_vector = events->sipi_vector; 1946 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
1947 vcpu->arch.sipi_vector = events->sipi_vector;
1945 1948
1946 vcpu_put(vcpu); 1949 vcpu_put(vcpu);
1947 1950
@@ -5068,12 +5071,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
5068 GFP_KERNEL); 5071 GFP_KERNEL);
5069 if (!vcpu->arch.mce_banks) { 5072 if (!vcpu->arch.mce_banks) {
5070 r = -ENOMEM; 5073 r = -ENOMEM;
5071 goto fail_mmu_destroy; 5074 goto fail_free_lapic;
5072 } 5075 }
5073 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; 5076 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
5074 5077
5075 return 0; 5078 return 0;
5076 5079fail_free_lapic:
5080 kvm_free_lapic(vcpu);
5077fail_mmu_destroy: 5081fail_mmu_destroy:
5078 kvm_mmu_destroy(vcpu); 5082 kvm_mmu_destroy(vcpu);
5079fail_free_pio_data: 5083fail_free_pio_data:
@@ -5084,6 +5088,7 @@ fail:
5084 5088
5085void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 5089void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
5086{ 5090{
5091 kfree(vcpu->arch.mce_banks);
5087 kvm_free_lapic(vcpu); 5092 kvm_free_lapic(vcpu);
5088 down_read(&vcpu->kvm->slots_lock); 5093 down_read(&vcpu->kvm->slots_lock);
5089 kvm_mmu_destroy(vcpu); 5094 kvm_mmu_destroy(vcpu);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index a2d6472895fb..419386c24b82 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -5,7 +5,7 @@
5inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk 5inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
6inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt 6inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
7quiet_cmd_inat_tables = GEN $@ 7quiet_cmd_inat_tables = GEN $@
8 cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ 8 cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
9 9
10$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) 10$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
11 $(call cmd,inat_tables) 11 $(call cmd,inat_tables)
@@ -14,15 +14,15 @@ $(obj)/inat.o: $(obj)/inat-tables.c
14 14
15clean-files := inat-tables.c 15clean-files := inat-tables.c
16 16
17obj-$(CONFIG_SMP) := msr.o 17obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
18 18
19lib-y := delay.o 19lib-y := delay.o
20lib-y += thunk_$(BITS).o 20lib-y += thunk_$(BITS).o
21lib-y += usercopy_$(BITS).o getuser.o putuser.o 21lib-y += usercopy_$(BITS).o getuser.o putuser.o
22lib-y += memcpy_$(BITS).o 22lib-y += memcpy_$(BITS).o
23lib-y += insn.o inat.o 23lib-$(CONFIG_KPROBES) += insn.o inat.o
24 24
25obj-y += msr-reg.o msr-reg-export.o 25obj-y += msr.o msr-reg.o msr-reg-export.o
26 26
27ifeq ($(CONFIG_X86_32),y) 27ifeq ($(CONFIG_X86_32),y)
28 obj-y += atomic64_32.o 28 obj-y += atomic64_32.o
@@ -34,9 +34,10 @@ ifneq ($(CONFIG_X86_CMPXCHG64),y)
34endif 34endif
35 lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o 35 lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
36else 36else
37 obj-y += io_64.o iomap_copy_64.o 37 obj-y += iomap_copy_64.o
38 lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o 38 lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
39 lib-y += thunk_64.o clear_page_64.o copy_page_64.o 39 lib-y += thunk_64.o clear_page_64.o copy_page_64.o
40 lib-y += memmove_64.o memset_64.o 40 lib-y += memmove_64.o memset_64.o
41 lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o 41 lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
42 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o
42endif 43endif
diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c
new file mode 100644
index 000000000000..a3c668875038
--- /dev/null
+++ b/arch/x86/lib/cache-smp.c
@@ -0,0 +1,19 @@
1#include <linux/smp.h>
2#include <linux/module.h>
3
4static void __wbinvd(void *dummy)
5{
6 wbinvd();
7}
8
9void wbinvd_on_cpu(int cpu)
10{
11 smp_call_function_single(cpu, __wbinvd, NULL, 1);
12}
13EXPORT_SYMBOL(wbinvd_on_cpu);
14
15int wbinvd_on_all_cpus(void)
16{
17 return on_each_cpu(__wbinvd, NULL, 1);
18}
19EXPORT_SYMBOL(wbinvd_on_all_cpus);
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index cf889d4e076a..71100c98e337 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -90,12 +90,6 @@ ENTRY(_copy_from_user)
90 CFI_ENDPROC 90 CFI_ENDPROC
91ENDPROC(_copy_from_user) 91ENDPROC(_copy_from_user)
92 92
93ENTRY(copy_user_generic)
94 CFI_STARTPROC
95 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
96 CFI_ENDPROC
97ENDPROC(copy_user_generic)
98
99 .section .fixup,"ax" 93 .section .fixup,"ax"
100 /* must zero dest */ 94 /* must zero dest */
101ENTRY(bad_from_user) 95ENTRY(bad_from_user)
diff --git a/arch/x86/lib/io_64.c b/arch/x86/lib/io_64.c
deleted file mode 100644
index 3f1eb59b5f08..000000000000
--- a/arch/x86/lib/io_64.c
+++ /dev/null
@@ -1,25 +0,0 @@
1#include <linux/string.h>
2#include <linux/module.h>
3#include <asm/io.h>
4
5void __memcpy_toio(unsigned long dst, const void *src, unsigned len)
6{
7 __inline_memcpy((void *)dst, src, len);
8}
9EXPORT_SYMBOL(__memcpy_toio);
10
11void __memcpy_fromio(void *dst, unsigned long src, unsigned len)
12{
13 __inline_memcpy(dst, (const void *)src, len);
14}
15EXPORT_SYMBOL(__memcpy_fromio);
16
17void memset_io(volatile void __iomem *a, int b, size_t c)
18{
19 /*
20 * TODO: memset can mangle the IO patterns quite a bit.
21 * perhaps it would be better to use a dumb one:
22 */
23 memset((void *)a, b, c);
24}
25EXPORT_SYMBOL(memset_io);
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index ad5441ed1b57..f82e884928af 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -20,12 +20,11 @@
20/* 20/*
21 * memcpy_c() - fast string ops (REP MOVSQ) based variant. 21 * memcpy_c() - fast string ops (REP MOVSQ) based variant.
22 * 22 *
23 * Calls to this get patched into the kernel image via the 23 * This gets patched over the unrolled variant (below) via the
24 * alternative instructions framework: 24 * alternative instructions framework:
25 */ 25 */
26 ALIGN 26 .section .altinstr_replacement, "ax", @progbits
27memcpy_c: 27.Lmemcpy_c:
28 CFI_STARTPROC
29 movq %rdi, %rax 28 movq %rdi, %rax
30 29
31 movl %edx, %ecx 30 movl %edx, %ecx
@@ -35,8 +34,8 @@ memcpy_c:
35 movl %edx, %ecx 34 movl %edx, %ecx
36 rep movsb 35 rep movsb
37 ret 36 ret
38 CFI_ENDPROC 37.Lmemcpy_e:
39ENDPROC(memcpy_c) 38 .previous
40 39
41ENTRY(__memcpy) 40ENTRY(__memcpy)
42ENTRY(memcpy) 41ENTRY(memcpy)
@@ -128,16 +127,10 @@ ENDPROC(__memcpy)
128 * It is also a lot simpler. Use this when possible: 127 * It is also a lot simpler. Use this when possible:
129 */ 128 */
130 129
131 .section .altinstr_replacement, "ax"
1321: .byte 0xeb /* jmp <disp8> */
133 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
1342:
135 .previous
136
137 .section .altinstructions, "a" 130 .section .altinstructions, "a"
138 .align 8 131 .align 8
139 .quad memcpy 132 .quad memcpy
140 .quad 1b 133 .quad .Lmemcpy_c
141 .byte X86_FEATURE_REP_GOOD 134 .byte X86_FEATURE_REP_GOOD
142 135
143 /* 136 /*
@@ -145,6 +138,6 @@ ENDPROC(__memcpy)
145 * so it is silly to overwrite itself with nops - reboot is the 138 * so it is silly to overwrite itself with nops - reboot is the
146 * only outcome... 139 * only outcome...
147 */ 140 */
148 .byte 2b - 1b 141 .byte .Lmemcpy_e - .Lmemcpy_c
149 .byte 2b - 1b 142 .byte .Lmemcpy_e - .Lmemcpy_c
150 .previous 143 .previous
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 2c5948116bd2..e88d3b81644a 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -12,9 +12,8 @@
12 * 12 *
13 * rax original destination 13 * rax original destination
14 */ 14 */
15 ALIGN 15 .section .altinstr_replacement, "ax", @progbits
16memset_c: 16.Lmemset_c:
17 CFI_STARTPROC
18 movq %rdi,%r9 17 movq %rdi,%r9
19 movl %edx,%r8d 18 movl %edx,%r8d
20 andl $7,%r8d 19 andl $7,%r8d
@@ -29,8 +28,8 @@ memset_c:
29 rep stosb 28 rep stosb
30 movq %r9,%rax 29 movq %r9,%rax
31 ret 30 ret
32 CFI_ENDPROC 31.Lmemset_e:
33ENDPROC(memset_c) 32 .previous
34 33
35ENTRY(memset) 34ENTRY(memset)
36ENTRY(__memset) 35ENTRY(__memset)
@@ -118,16 +117,11 @@ ENDPROC(__memset)
118 117
119#include <asm/cpufeature.h> 118#include <asm/cpufeature.h>
120 119
121 .section .altinstr_replacement,"ax"
1221: .byte 0xeb /* jmp <disp8> */
123 .byte (memset_c - memset) - (2f - 1b) /* offset */
1242:
125 .previous
126 .section .altinstructions,"a" 120 .section .altinstructions,"a"
127 .align 8 121 .align 8
128 .quad memset 122 .quad memset
129 .quad 1b 123 .quad .Lmemset_c
130 .byte X86_FEATURE_REP_GOOD 124 .byte X86_FEATURE_REP_GOOD
131 .byte .Lfinal - memset 125 .byte .Lfinal - memset
132 .byte 2b - 1b 126 .byte .Lmemset_e - .Lmemset_c
133 .previous 127 .previous
diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c
new file mode 100644
index 000000000000..a6b1b86d2253
--- /dev/null
+++ b/arch/x86/lib/msr-smp.c
@@ -0,0 +1,204 @@
1#include <linux/module.h>
2#include <linux/preempt.h>
3#include <linux/smp.h>
4#include <asm/msr.h>
5
6static void __rdmsr_on_cpu(void *info)
7{
8 struct msr_info *rv = info;
9 struct msr *reg;
10 int this_cpu = raw_smp_processor_id();
11
12 if (rv->msrs)
13 reg = per_cpu_ptr(rv->msrs, this_cpu);
14 else
15 reg = &rv->reg;
16
17 rdmsr(rv->msr_no, reg->l, reg->h);
18}
19
20static void __wrmsr_on_cpu(void *info)
21{
22 struct msr_info *rv = info;
23 struct msr *reg;
24 int this_cpu = raw_smp_processor_id();
25
26 if (rv->msrs)
27 reg = per_cpu_ptr(rv->msrs, this_cpu);
28 else
29 reg = &rv->reg;
30
31 wrmsr(rv->msr_no, reg->l, reg->h);
32}
33
34int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
35{
36 int err;
37 struct msr_info rv;
38
39 memset(&rv, 0, sizeof(rv));
40
41 rv.msr_no = msr_no;
42 err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
43 *l = rv.reg.l;
44 *h = rv.reg.h;
45
46 return err;
47}
48EXPORT_SYMBOL(rdmsr_on_cpu);
49
50int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
51{
52 int err;
53 struct msr_info rv;
54
55 memset(&rv, 0, sizeof(rv));
56
57 rv.msr_no = msr_no;
58 rv.reg.l = l;
59 rv.reg.h = h;
60 err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
61
62 return err;
63}
64EXPORT_SYMBOL(wrmsr_on_cpu);
65
66static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no,
67 struct msr *msrs,
68 void (*msr_func) (void *info))
69{
70 struct msr_info rv;
71 int this_cpu;
72
73 memset(&rv, 0, sizeof(rv));
74
75 rv.msrs = msrs;
76 rv.msr_no = msr_no;
77
78 this_cpu = get_cpu();
79
80 if (cpumask_test_cpu(this_cpu, mask))
81 msr_func(&rv);
82
83 smp_call_function_many(mask, msr_func, &rv, 1);
84 put_cpu();
85}
86
87/* rdmsr on a bunch of CPUs
88 *
89 * @mask: which CPUs
90 * @msr_no: which MSR
91 * @msrs: array of MSR values
92 *
93 */
94void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
95{
96 __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu);
97}
98EXPORT_SYMBOL(rdmsr_on_cpus);
99
100/*
101 * wrmsr on a bunch of CPUs
102 *
103 * @mask: which CPUs
104 * @msr_no: which MSR
105 * @msrs: array of MSR values
106 *
107 */
108void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
109{
110 __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu);
111}
112EXPORT_SYMBOL(wrmsr_on_cpus);
113
114/* These "safe" variants are slower and should be used when the target MSR
115 may not actually exist. */
116static void __rdmsr_safe_on_cpu(void *info)
117{
118 struct msr_info *rv = info;
119
120 rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h);
121}
122
123static void __wrmsr_safe_on_cpu(void *info)
124{
125 struct msr_info *rv = info;
126
127 rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h);
128}
129
130int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
131{
132 int err;
133 struct msr_info rv;
134
135 memset(&rv, 0, sizeof(rv));
136
137 rv.msr_no = msr_no;
138 err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
139 *l = rv.reg.l;
140 *h = rv.reg.h;
141
142 return err ? err : rv.err;
143}
144EXPORT_SYMBOL(rdmsr_safe_on_cpu);
145
146int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
147{
148 int err;
149 struct msr_info rv;
150
151 memset(&rv, 0, sizeof(rv));
152
153 rv.msr_no = msr_no;
154 rv.reg.l = l;
155 rv.reg.h = h;
156 err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
157
158 return err ? err : rv.err;
159}
160EXPORT_SYMBOL(wrmsr_safe_on_cpu);
161
162/*
163 * These variants are significantly slower, but allows control over
164 * the entire 32-bit GPR set.
165 */
166static void __rdmsr_safe_regs_on_cpu(void *info)
167{
168 struct msr_regs_info *rv = info;
169
170 rv->err = rdmsr_safe_regs(rv->regs);
171}
172
173static void __wrmsr_safe_regs_on_cpu(void *info)
174{
175 struct msr_regs_info *rv = info;
176
177 rv->err = wrmsr_safe_regs(rv->regs);
178}
179
180int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
181{
182 int err;
183 struct msr_regs_info rv;
184
185 rv.regs = regs;
186 rv.err = -EIO;
187 err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1);
188
189 return err ? err : rv.err;
190}
191EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu);
192
193int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
194{
195 int err;
196 struct msr_regs_info rv;
197
198 rv.regs = regs;
199 rv.err = -EIO;
200 err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1);
201
202 return err ? err : rv.err;
203}
204EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu);
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index 41628b104b9e..8f8eebdca7d4 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -1,218 +1,23 @@
1#include <linux/module.h> 1#include <linux/module.h>
2#include <linux/preempt.h> 2#include <linux/preempt.h>
3#include <linux/smp.h>
4#include <asm/msr.h> 3#include <asm/msr.h>
5 4
6struct msr_info { 5struct msr *msrs_alloc(void)
7 u32 msr_no;
8 struct msr reg;
9 struct msr *msrs;
10 int off;
11 int err;
12};
13
14static void __rdmsr_on_cpu(void *info)
15{
16 struct msr_info *rv = info;
17 struct msr *reg;
18 int this_cpu = raw_smp_processor_id();
19
20 if (rv->msrs)
21 reg = &rv->msrs[this_cpu - rv->off];
22 else
23 reg = &rv->reg;
24
25 rdmsr(rv->msr_no, reg->l, reg->h);
26}
27
28static void __wrmsr_on_cpu(void *info)
29{
30 struct msr_info *rv = info;
31 struct msr *reg;
32 int this_cpu = raw_smp_processor_id();
33
34 if (rv->msrs)
35 reg = &rv->msrs[this_cpu - rv->off];
36 else
37 reg = &rv->reg;
38
39 wrmsr(rv->msr_no, reg->l, reg->h);
40}
41
42int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
43{
44 int err;
45 struct msr_info rv;
46
47 memset(&rv, 0, sizeof(rv));
48
49 rv.msr_no = msr_no;
50 err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
51 *l = rv.reg.l;
52 *h = rv.reg.h;
53
54 return err;
55}
56EXPORT_SYMBOL(rdmsr_on_cpu);
57
58int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
59{
60 int err;
61 struct msr_info rv;
62
63 memset(&rv, 0, sizeof(rv));
64
65 rv.msr_no = msr_no;
66 rv.reg.l = l;
67 rv.reg.h = h;
68 err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
69
70 return err;
71}
72EXPORT_SYMBOL(wrmsr_on_cpu);
73
74static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no,
75 struct msr *msrs,
76 void (*msr_func) (void *info))
77{
78 struct msr_info rv;
79 int this_cpu;
80
81 memset(&rv, 0, sizeof(rv));
82
83 rv.off = cpumask_first(mask);
84 rv.msrs = msrs;
85 rv.msr_no = msr_no;
86
87 this_cpu = get_cpu();
88
89 if (cpumask_test_cpu(this_cpu, mask))
90 msr_func(&rv);
91
92 smp_call_function_many(mask, msr_func, &rv, 1);
93 put_cpu();
94}
95
96/* rdmsr on a bunch of CPUs
97 *
98 * @mask: which CPUs
99 * @msr_no: which MSR
100 * @msrs: array of MSR values
101 *
102 */
103void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
104{
105 __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu);
106}
107EXPORT_SYMBOL(rdmsr_on_cpus);
108
109/*
110 * wrmsr on a bunch of CPUs
111 *
112 * @mask: which CPUs
113 * @msr_no: which MSR
114 * @msrs: array of MSR values
115 *
116 */
117void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
118{ 6{
119 __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); 7 struct msr *msrs = NULL;
120}
121EXPORT_SYMBOL(wrmsr_on_cpus);
122
123/* These "safe" variants are slower and should be used when the target MSR
124 may not actually exist. */
125static void __rdmsr_safe_on_cpu(void *info)
126{
127 struct msr_info *rv = info;
128
129 rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h);
130}
131
132static void __wrmsr_safe_on_cpu(void *info)
133{
134 struct msr_info *rv = info;
135
136 rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h);
137}
138
139int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
140{
141 int err;
142 struct msr_info rv;
143
144 memset(&rv, 0, sizeof(rv));
145
146 rv.msr_no = msr_no;
147 err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
148 *l = rv.reg.l;
149 *h = rv.reg.h;
150
151 return err ? err : rv.err;
152}
153EXPORT_SYMBOL(rdmsr_safe_on_cpu);
154
155int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
156{
157 int err;
158 struct msr_info rv;
159 8
160 memset(&rv, 0, sizeof(rv)); 9 msrs = alloc_percpu(struct msr);
10 if (!msrs) {
11 pr_warning("%s: error allocating msrs\n", __func__);
12 return NULL;
13 }
161 14
162 rv.msr_no = msr_no; 15 return msrs;
163 rv.reg.l = l;
164 rv.reg.h = h;
165 err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
166
167 return err ? err : rv.err;
168} 16}
169EXPORT_SYMBOL(wrmsr_safe_on_cpu); 17EXPORT_SYMBOL(msrs_alloc);
170
171/*
172 * These variants are significantly slower, but allows control over
173 * the entire 32-bit GPR set.
174 */
175struct msr_regs_info {
176 u32 *regs;
177 int err;
178};
179 18
180static void __rdmsr_safe_regs_on_cpu(void *info) 19void msrs_free(struct msr *msrs)
181{ 20{
182 struct msr_regs_info *rv = info; 21 free_percpu(msrs);
183
184 rv->err = rdmsr_safe_regs(rv->regs);
185}
186
187static void __wrmsr_safe_regs_on_cpu(void *info)
188{
189 struct msr_regs_info *rv = info;
190
191 rv->err = wrmsr_safe_regs(rv->regs);
192}
193
194int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
195{
196 int err;
197 struct msr_regs_info rv;
198
199 rv.regs = regs;
200 rv.err = -EIO;
201 err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1);
202
203 return err ? err : rv.err;
204}
205EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu);
206
207int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
208{
209 int err;
210 struct msr_regs_info rv;
211
212 rv.regs = regs;
213 rv.err = -EIO;
214 err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1);
215
216 return err ? err : rv.err;
217} 22}
218EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu); 23EXPORT_SYMBOL(msrs_free);
diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S
new file mode 100644
index 000000000000..15acecf0d7aa
--- /dev/null
+++ b/arch/x86/lib/rwsem_64.S
@@ -0,0 +1,81 @@
1/*
2 * x86-64 rwsem wrappers
3 *
4 * This interfaces the inline asm code to the slow-path
5 * C routines. We need to save the call-clobbered regs
6 * that the asm does not mark as clobbered, and move the
7 * argument from %rax to %rdi.
8 *
9 * NOTE! We don't need to save %rax, because the functions
10 * will always return the semaphore pointer in %rax (which
11 * is also the input argument to these helpers)
12 *
13 * The following can clobber %rdx because the asm clobbers it:
14 * call_rwsem_down_write_failed
15 * call_rwsem_wake
16 * but %rdi, %rsi, %rcx, %r8-r11 always need saving.
17 */
18
19#include <linux/linkage.h>
20#include <asm/rwlock.h>
21#include <asm/alternative-asm.h>
22#include <asm/frame.h>
23#include <asm/dwarf2.h>
24
25#define save_common_regs \
26 pushq %rdi; \
27 pushq %rsi; \
28 pushq %rcx; \
29 pushq %r8; \
30 pushq %r9; \
31 pushq %r10; \
32 pushq %r11
33
34#define restore_common_regs \
35 popq %r11; \
36 popq %r10; \
37 popq %r9; \
38 popq %r8; \
39 popq %rcx; \
40 popq %rsi; \
41 popq %rdi
42
43/* Fix up special calling conventions */
44ENTRY(call_rwsem_down_read_failed)
45 save_common_regs
46 pushq %rdx
47 movq %rax,%rdi
48 call rwsem_down_read_failed
49 popq %rdx
50 restore_common_regs
51 ret
52 ENDPROC(call_rwsem_down_read_failed)
53
54ENTRY(call_rwsem_down_write_failed)
55 save_common_regs
56 movq %rax,%rdi
57 call rwsem_down_write_failed
58 restore_common_regs
59 ret
60 ENDPROC(call_rwsem_down_write_failed)
61
62ENTRY(call_rwsem_wake)
63 decw %dx /* do nothing if still outstanding active readers */
64 jnz 1f
65 save_common_regs
66 movq %rax,%rdi
67 call rwsem_wake
68 restore_common_regs
691: ret
70 ENDPROC(call_rwsem_wake)
71
72/* Fix up special calling conventions */
73ENTRY(call_rwsem_downgrade_wake)
74 save_common_regs
75 pushq %rdx
76 movq %rax,%rdi
77 call rwsem_downgrade_wake
78 popq %rdx
79 restore_common_regs
80 ret
81 ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 71da1bca13cb..738e6593799d 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -18,7 +18,7 @@ static inline pte_t gup_get_pte(pte_t *ptep)
18#else 18#else
19 /* 19 /*
20 * With get_user_pages_fast, we walk down the pagetables without taking 20 * With get_user_pages_fast, we walk down the pagetables without taking
21 * any locks. For this we would like to load the pointers atoimcally, 21 * any locks. For this we would like to load the pointers atomically,
22 * but that is not possible (without expensive cmpxchg8b) on PAE. What 22 * but that is not possible (without expensive cmpxchg8b) on PAE. What
23 * we do have is the guarantee that a pte will only either go from not 23 * we do have is the guarantee that a pte will only either go from not
24 * present to present, or present to not present or both -- it will not 24 * present to present, or present to not present or both -- it will not
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index d406c5239019..e71c5cbc8f35 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -266,16 +266,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
266 if (!after_bootmem) 266 if (!after_bootmem)
267 find_early_table_space(end, use_pse, use_gbpages); 267 find_early_table_space(end, use_pse, use_gbpages);
268 268
269#ifdef CONFIG_X86_32
270 for (i = 0; i < nr_range; i++)
271 kernel_physical_mapping_init(mr[i].start, mr[i].end,
272 mr[i].page_size_mask);
273 ret = end;
274#else /* CONFIG_X86_64 */
275 for (i = 0; i < nr_range; i++) 269 for (i = 0; i < nr_range; i++)
276 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, 270 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
277 mr[i].page_size_mask); 271 mr[i].page_size_mask);
278#endif
279 272
280#ifdef CONFIG_X86_32 273#ifdef CONFIG_X86_32
281 early_ioremap_page_table_range_init(); 274 early_ioremap_page_table_range_init();
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c973f8e2a6cf..2226f2c70ea3 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -241,6 +241,7 @@ kernel_physical_mapping_init(unsigned long start,
241 unsigned long page_size_mask) 241 unsigned long page_size_mask)
242{ 242{
243 int use_pse = page_size_mask == (1<<PG_LEVEL_2M); 243 int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
244 unsigned long last_map_addr = end;
244 unsigned long start_pfn, end_pfn; 245 unsigned long start_pfn, end_pfn;
245 pgd_t *pgd_base = swapper_pg_dir; 246 pgd_t *pgd_base = swapper_pg_dir;
246 int pgd_idx, pmd_idx, pte_ofs; 247 int pgd_idx, pmd_idx, pte_ofs;
@@ -341,9 +342,10 @@ repeat:
341 prot = PAGE_KERNEL_EXEC; 342 prot = PAGE_KERNEL_EXEC;
342 343
343 pages_4k++; 344 pages_4k++;
344 if (mapping_iter == 1) 345 if (mapping_iter == 1) {
345 set_pte(pte, pfn_pte(pfn, init_prot)); 346 set_pte(pte, pfn_pte(pfn, init_prot));
346 else 347 last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;
348 } else
347 set_pte(pte, pfn_pte(pfn, prot)); 349 set_pte(pte, pfn_pte(pfn, prot));
348 } 350 }
349 } 351 }
@@ -368,7 +370,7 @@ repeat:
368 mapping_iter = 2; 370 mapping_iter = 2;
369 goto repeat; 371 goto repeat;
370 } 372 }
371 return 0; 373 return last_map_addr;
372} 374}
373 375
374pte_t *kmap_pte; 376pte_t *kmap_pte;
@@ -892,8 +894,7 @@ void __init mem_init(void)
892 reservedpages << (PAGE_SHIFT-10), 894 reservedpages << (PAGE_SHIFT-10),
893 datasize >> 10, 895 datasize >> 10,
894 initsize >> 10, 896 initsize >> 10,
895 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) 897 totalhigh_pages << (PAGE_SHIFT-10));
896 );
897 898
898 printk(KERN_INFO "virtual kernel memory layout:\n" 899 printk(KERN_INFO "virtual kernel memory layout:\n"
899 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" 900 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5198b9bb34ef..69ddfbd91135 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -49,6 +49,7 @@
49#include <asm/numa.h> 49#include <asm/numa.h>
50#include <asm/cacheflush.h> 50#include <asm/cacheflush.h>
51#include <asm/init.h> 51#include <asm/init.h>
52#include <linux/bootmem.h>
52 53
53static unsigned long dma_reserve __initdata; 54static unsigned long dma_reserve __initdata;
54 55
@@ -616,6 +617,21 @@ void __init paging_init(void)
616 */ 617 */
617#ifdef CONFIG_MEMORY_HOTPLUG 618#ifdef CONFIG_MEMORY_HOTPLUG
618/* 619/*
620 * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
621 * updating.
622 */
623static void update_end_of_memory_vars(u64 start, u64 size)
624{
625 unsigned long end_pfn = PFN_UP(start + size);
626
627 if (end_pfn > max_pfn) {
628 max_pfn = end_pfn;
629 max_low_pfn = end_pfn;
630 high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
631 }
632}
633
634/*
619 * Memory is added always to NORMAL zone. This means you will never get 635 * Memory is added always to NORMAL zone. This means you will never get
620 * additional DMA/DMA32 memory. 636 * additional DMA/DMA32 memory.
621 */ 637 */
@@ -634,6 +650,9 @@ int arch_add_memory(int nid, u64 start, u64 size)
634 ret = __add_pages(nid, zone, start_pfn, nr_pages); 650 ret = __add_pages(nid, zone, start_pfn, nr_pages);
635 WARN_ON_ONCE(ret); 651 WARN_ON_ONCE(ret);
636 652
653 /* update max_pfn, max_low_pfn and high_memory */
654 update_end_of_memory_vars(start, size);
655
637 return ret; 656 return ret;
638} 657}
639EXPORT_SYMBOL_GPL(arch_add_memory); 658EXPORT_SYMBOL_GPL(arch_add_memory);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index c246d259822d..5eb1ba74a3a9 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -24,43 +24,6 @@
24 24
25#include "physaddr.h" 25#include "physaddr.h"
26 26
27int page_is_ram(unsigned long pagenr)
28{
29 resource_size_t addr, end;
30 int i;
31
32 /*
33 * A special case is the first 4Kb of memory;
34 * This is a BIOS owned area, not kernel ram, but generally
35 * not listed as such in the E820 table.
36 */
37 if (pagenr == 0)
38 return 0;
39
40 /*
41 * Second special case: Some BIOSen report the PC BIOS
42 * area (640->1Mb) as ram even though it is not.
43 */
44 if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
45 pagenr < (BIOS_END >> PAGE_SHIFT))
46 return 0;
47
48 for (i = 0; i < e820.nr_map; i++) {
49 /*
50 * Not usable memory:
51 */
52 if (e820.map[i].type != E820_RAM)
53 continue;
54 addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
55 end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
56
57
58 if ((pagenr >= addr) && (pagenr < end))
59 return 1;
60 }
61 return 0;
62}
63
64/* 27/*
65 * Fix up the linear direct mapping of the kernel to avoid cache attribute 28 * Fix up the linear direct mapping of the kernel to avoid cache attribute
66 * conflicts. 29 * conflicts.
@@ -422,6 +385,10 @@ void __init early_ioremap_init(void)
422 * The boot-ioremap range spans multiple pmds, for which 385 * The boot-ioremap range spans multiple pmds, for which
423 * we are not prepared: 386 * we are not prepared:
424 */ 387 */
388#define __FIXADDR_TOP (-PAGE_SIZE)
389 BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
390 != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
391#undef __FIXADDR_TOP
425 if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { 392 if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
426 WARN_ON(1); 393 WARN_ON(1);
427 printk(KERN_WARNING "pmd %p != %p\n", 394 printk(KERN_WARNING "pmd %p != %p\n",
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
index 4901d0dafda6..af3b6c8a436f 100644
--- a/arch/x86/mm/kmemcheck/error.c
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -106,26 +106,25 @@ void kmemcheck_error_recall(void)
106 106
107 switch (e->type) { 107 switch (e->type) {
108 case KMEMCHECK_ERROR_INVALID_ACCESS: 108 case KMEMCHECK_ERROR_INVALID_ACCESS:
109 printk(KERN_ERR "WARNING: kmemcheck: Caught %d-bit read " 109 printk(KERN_WARNING "WARNING: kmemcheck: Caught %d-bit read from %s memory (%p)\n",
110 "from %s memory (%p)\n",
111 8 * e->size, e->state < ARRAY_SIZE(desc) ? 110 8 * e->size, e->state < ARRAY_SIZE(desc) ?
112 desc[e->state] : "(invalid shadow state)", 111 desc[e->state] : "(invalid shadow state)",
113 (void *) e->address); 112 (void *) e->address);
114 113
115 printk(KERN_INFO); 114 printk(KERN_WARNING);
116 for (i = 0; i < SHADOW_COPY_SIZE; ++i) 115 for (i = 0; i < SHADOW_COPY_SIZE; ++i)
117 printk("%02x", e->memory_copy[i]); 116 printk(KERN_CONT "%02x", e->memory_copy[i]);
118 printk("\n"); 117 printk(KERN_CONT "\n");
119 118
120 printk(KERN_INFO); 119 printk(KERN_WARNING);
121 for (i = 0; i < SHADOW_COPY_SIZE; ++i) { 120 for (i = 0; i < SHADOW_COPY_SIZE; ++i) {
122 if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) 121 if (e->shadow_copy[i] < ARRAY_SIZE(short_desc))
123 printk(" %c", short_desc[e->shadow_copy[i]]); 122 printk(KERN_CONT " %c", short_desc[e->shadow_copy[i]]);
124 else 123 else
125 printk(" ?"); 124 printk(KERN_CONT " ?");
126 } 125 }
127 printk("\n"); 126 printk(KERN_CONT "\n");
128 printk(KERN_INFO "%*c\n", 2 + 2 127 printk(KERN_WARNING "%*c\n", 2 + 2
129 * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); 128 * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^');
130 break; 129 break;
131 case KMEMCHECK_ERROR_BUG: 130 case KMEMCHECK_ERROR_BUG:
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index 8cc183344140..b3b531a4f8e5 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -337,7 +337,7 @@ bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
337 if (!shadow) 337 if (!shadow)
338 return true; 338 return true;
339 339
340 status = kmemcheck_shadow_test(shadow, size); 340 status = kmemcheck_shadow_test_all(shadow, size);
341 341
342 return status == KMEMCHECK_SHADOW_INITIALIZED; 342 return status == KMEMCHECK_SHADOW_INITIALIZED;
343} 343}
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
index 3f66b82076a3..aec124214d97 100644
--- a/arch/x86/mm/kmemcheck/shadow.c
+++ b/arch/x86/mm/kmemcheck/shadow.c
@@ -125,12 +125,12 @@ void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n)
125 125
126enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) 126enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
127{ 127{
128#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
128 uint8_t *x; 129 uint8_t *x;
129 unsigned int i; 130 unsigned int i;
130 131
131 x = shadow; 132 x = shadow;
132 133
133#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
134 /* 134 /*
135 * Make sure _some_ bytes are initialized. Gcc frequently generates 135 * Make sure _some_ bytes are initialized. Gcc frequently generates
136 * code to access neighboring bytes. 136 * code to access neighboring bytes.
@@ -139,13 +139,25 @@ enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
139 if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) 139 if (x[i] == KMEMCHECK_SHADOW_INITIALIZED)
140 return x[i]; 140 return x[i];
141 } 141 }
142
143 return x[0];
142#else 144#else
145 return kmemcheck_shadow_test_all(shadow, size);
146#endif
147}
148
149enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, unsigned int size)
150{
151 uint8_t *x;
152 unsigned int i;
153
154 x = shadow;
155
143 /* All bytes must be initialized. */ 156 /* All bytes must be initialized. */
144 for (i = 0; i < size; ++i) { 157 for (i = 0; i < size; ++i) {
145 if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) 158 if (x[i] != KMEMCHECK_SHADOW_INITIALIZED)
146 return x[i]; 159 return x[i];
147 } 160 }
148#endif
149 161
150 return x[0]; 162 return x[0];
151} 163}
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h
index af46d9ab9d86..ff0b2f70fbcb 100644
--- a/arch/x86/mm/kmemcheck/shadow.h
+++ b/arch/x86/mm/kmemcheck/shadow.h
@@ -11,6 +11,8 @@ enum kmemcheck_shadow {
11void *kmemcheck_shadow_lookup(unsigned long address); 11void *kmemcheck_shadow_lookup(unsigned long address);
12 12
13enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); 13enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size);
14enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow,
15 unsigned int size);
14void kmemcheck_shadow_set(void *shadow, unsigned int size); 16void kmemcheck_shadow_set(void *shadow, unsigned int size);
15 17
16#endif 18#endif
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 11a4ad4d6253..536fb6823366 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -5,6 +5,8 @@
5 * 2008 Pekka Paalanen <pq@iki.fi> 5 * 2008 Pekka Paalanen <pq@iki.fi>
6 */ 6 */
7 7
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
8#include <linux/list.h> 10#include <linux/list.h>
9#include <linux/rculist.h> 11#include <linux/rculist.h>
10#include <linux/spinlock.h> 12#include <linux/spinlock.h>
@@ -136,7 +138,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
136 pte_t *pte = lookup_address(f->page, &level); 138 pte_t *pte = lookup_address(f->page, &level);
137 139
138 if (!pte) { 140 if (!pte) {
139 pr_err("kmmio: no pte for page 0x%08lx\n", f->page); 141 pr_err("no pte for page 0x%08lx\n", f->page);
140 return -1; 142 return -1;
141 } 143 }
142 144
@@ -148,7 +150,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
148 clear_pte_presence(pte, clear, &f->old_presence); 150 clear_pte_presence(pte, clear, &f->old_presence);
149 break; 151 break;
150 default: 152 default:
151 pr_err("kmmio: unexpected page level 0x%x.\n", level); 153 pr_err("unexpected page level 0x%x.\n", level);
152 return -1; 154 return -1;
153 } 155 }
154 156
@@ -170,13 +172,14 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
170static int arm_kmmio_fault_page(struct kmmio_fault_page *f) 172static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
171{ 173{
172 int ret; 174 int ret;
173 WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); 175 WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
174 if (f->armed) { 176 if (f->armed) {
175 pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", 177 pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n",
176 f->page, f->count, !!f->old_presence); 178 f->page, f->count, !!f->old_presence);
177 } 179 }
178 ret = clear_page_presence(f, true); 180 ret = clear_page_presence(f, true);
179 WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); 181 WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"),
182 f->page);
180 f->armed = true; 183 f->armed = true;
181 return ret; 184 return ret;
182} 185}
@@ -203,7 +206,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
203 */ 206 */
204/* 207/*
205 * Interrupts are disabled on entry as trap3 is an interrupt gate 208 * Interrupts are disabled on entry as trap3 is an interrupt gate
206 * and they remain disabled thorough out this function. 209 * and they remain disabled throughout this function.
207 */ 210 */
208int kmmio_handler(struct pt_regs *regs, unsigned long addr) 211int kmmio_handler(struct pt_regs *regs, unsigned long addr)
209{ 212{
@@ -240,24 +243,21 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
240 * condition needs handling by do_page_fault(), the 243 * condition needs handling by do_page_fault(), the
241 * page really not being present is the most common. 244 * page really not being present is the most common.
242 */ 245 */
243 pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n", 246 pr_debug("secondary hit for 0x%08lx CPU %d.\n",
244 addr, smp_processor_id()); 247 addr, smp_processor_id());
245 248
246 if (!faultpage->old_presence) 249 if (!faultpage->old_presence)
247 pr_info("kmmio: unexpected secondary hit for " 250 pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n",
248 "address 0x%08lx on CPU %d.\n", addr, 251 addr, smp_processor_id());
249 smp_processor_id());
250 } else { 252 } else {
251 /* 253 /*
252 * Prevent overwriting already in-flight context. 254 * Prevent overwriting already in-flight context.
253 * This should not happen, let's hope disarming at 255 * This should not happen, let's hope disarming at
254 * least prevents a panic. 256 * least prevents a panic.
255 */ 257 */
256 pr_emerg("kmmio: recursive probe hit on CPU %d, " 258 pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n",
257 "for address 0x%08lx. Ignoring.\n", 259 smp_processor_id(), addr);
258 smp_processor_id(), addr); 260 pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr);
259 pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
260 ctx->addr);
261 disarm_kmmio_fault_page(faultpage); 261 disarm_kmmio_fault_page(faultpage);
262 } 262 }
263 goto no_kmmio_ctx; 263 goto no_kmmio_ctx;
@@ -302,7 +302,7 @@ no_kmmio:
302 302
303/* 303/*
304 * Interrupts are disabled on entry as trap1 is an interrupt gate 304 * Interrupts are disabled on entry as trap1 is an interrupt gate
305 * and they remain disabled thorough out this function. 305 * and they remain disabled throughout this function.
306 * This must always get called as the pair to kmmio_handler(). 306 * This must always get called as the pair to kmmio_handler().
307 */ 307 */
308static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) 308static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
@@ -316,8 +316,8 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
316 * something external causing them (f.e. using a debugger while 316 * something external causing them (f.e. using a debugger while
317 * mmio tracing enabled), or erroneous behaviour 317 * mmio tracing enabled), or erroneous behaviour
318 */ 318 */
319 pr_warning("kmmio: unexpected debug trap on CPU %d.\n", 319 pr_warning("unexpected debug trap on CPU %d.\n",
320 smp_processor_id()); 320 smp_processor_id());
321 goto out; 321 goto out;
322 } 322 }
323 323
@@ -425,7 +425,7 @@ int register_kmmio_probe(struct kmmio_probe *p)
425 list_add_rcu(&p->list, &kmmio_probes); 425 list_add_rcu(&p->list, &kmmio_probes);
426 while (size < size_lim) { 426 while (size < size_lim) {
427 if (add_kmmio_fault_page(p->addr + size)) 427 if (add_kmmio_fault_page(p->addr + size))
428 pr_err("kmmio: Unable to set page fault.\n"); 428 pr_err("Unable to set page fault.\n");
429 size += PAGE_SIZE; 429 size += PAGE_SIZE;
430 } 430 }
431out: 431out:
@@ -490,7 +490,7 @@ static void remove_kmmio_fault_pages(struct rcu_head *head)
490 * 2. remove_kmmio_fault_pages() 490 * 2. remove_kmmio_fault_pages()
491 * Remove the pages from kmmio_page_table. 491 * Remove the pages from kmmio_page_table.
492 * 3. rcu_free_kmmio_fault_pages() 492 * 3. rcu_free_kmmio_fault_pages()
493 * Actally free the kmmio_fault_page structs as with RCU. 493 * Actually free the kmmio_fault_page structs as with RCU.
494 */ 494 */
495void unregister_kmmio_probe(struct kmmio_probe *p) 495void unregister_kmmio_probe(struct kmmio_probe *p)
496{ 496{
@@ -511,7 +511,7 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
511 511
512 drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); 512 drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
513 if (!drelease) { 513 if (!drelease) {
514 pr_crit("kmmio: leaking kmmio_fault_page objects.\n"); 514 pr_crit("leaking kmmio_fault_page objects.\n");
515 return; 515 return;
516 } 516 }
517 drelease->release_list = release_list; 517 drelease->release_list = release_list;
@@ -538,14 +538,15 @@ static int
538kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) 538kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
539{ 539{
540 struct die_args *arg = args; 540 struct die_args *arg = args;
541 unsigned long* dr6_p = (unsigned long *)ERR_PTR(arg->err);
541 542
542 if (val == DIE_DEBUG && (arg->err & DR_STEP)) 543 if (val == DIE_DEBUG && (*dr6_p & DR_STEP))
543 if (post_kmmio_handler(arg->err, arg->regs) == 1) { 544 if (post_kmmio_handler(*dr6_p, arg->regs) == 1) {
544 /* 545 /*
545 * Reset the BS bit in dr6 (pointed by args->err) to 546 * Reset the BS bit in dr6 (pointed by args->err) to
546 * denote completion of processing 547 * denote completion of processing
547 */ 548 */
548 (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP; 549 *dr6_p &= ~DR_STEP;
549 return NOTIFY_STOP; 550 return NOTIFY_STOP;
550 } 551 }
551 552
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index c8191defc38a..1dab5194fd9d 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -71,7 +71,7 @@ static int mmap_is_legacy(void)
71 if (current->personality & ADDR_COMPAT_LAYOUT) 71 if (current->personality & ADDR_COMPAT_LAYOUT)
72 return 1; 72 return 1;
73 73
74 if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) 74 if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
75 return 1; 75 return 1;
76 76
77 return sysctl_legacy_va_layout; 77 return sysctl_legacy_va_layout;
@@ -96,7 +96,7 @@ static unsigned long mmap_rnd(void)
96 96
97static unsigned long mmap_base(void) 97static unsigned long mmap_base(void)
98{ 98{
99 unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; 99 unsigned long gap = rlimit(RLIMIT_STACK);
100 100
101 if (gap < MIN_GAP) 101 if (gap < MIN_GAP)
102 gap = MIN_GAP; 102 gap = MIN_GAP;
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 132772a8ec57..34a3291ca103 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -19,6 +19,9 @@
19 * 19 *
20 * Derived from the read-mod example from relay-examples by Tom Zanussi. 20 * Derived from the read-mod example from relay-examples by Tom Zanussi.
21 */ 21 */
22
23#define pr_fmt(fmt) "mmiotrace: " fmt
24
22#define DEBUG 1 25#define DEBUG 1
23 26
24#include <linux/module.h> 27#include <linux/module.h>
@@ -36,8 +39,6 @@
36 39
37#include "pf_in.h" 40#include "pf_in.h"
38 41
39#define NAME "mmiotrace: "
40
41struct trap_reason { 42struct trap_reason {
42 unsigned long addr; 43 unsigned long addr;
43 unsigned long ip; 44 unsigned long ip;
@@ -96,17 +97,18 @@ static void print_pte(unsigned long address)
96 pte_t *pte = lookup_address(address, &level); 97 pte_t *pte = lookup_address(address, &level);
97 98
98 if (!pte) { 99 if (!pte) {
99 pr_err(NAME "Error in %s: no pte for page 0x%08lx\n", 100 pr_err("Error in %s: no pte for page 0x%08lx\n",
100 __func__, address); 101 __func__, address);
101 return; 102 return;
102 } 103 }
103 104
104 if (level == PG_LEVEL_2M) { 105 if (level == PG_LEVEL_2M) {
105 pr_emerg(NAME "4MB pages are not currently supported: " 106 pr_emerg("4MB pages are not currently supported: 0x%08lx\n",
106 "0x%08lx\n", address); 107 address);
107 BUG(); 108 BUG();
108 } 109 }
109 pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address, 110 pr_info("pte for 0x%lx: 0x%llx 0x%llx\n",
111 address,
110 (unsigned long long)pte_val(*pte), 112 (unsigned long long)pte_val(*pte),
111 (unsigned long long)pte_val(*pte) & _PAGE_PRESENT); 113 (unsigned long long)pte_val(*pte) & _PAGE_PRESENT);
112} 114}
@@ -118,22 +120,21 @@ static void print_pte(unsigned long address)
118static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) 120static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
119{ 121{
120 const struct trap_reason *my_reason = &get_cpu_var(pf_reason); 122 const struct trap_reason *my_reason = &get_cpu_var(pf_reason);
121 pr_emerg(NAME "unexpected fault for address: 0x%08lx, " 123 pr_emerg("unexpected fault for address: 0x%08lx, last fault for address: 0x%08lx\n",
122 "last fault for address: 0x%08lx\n", 124 addr, my_reason->addr);
123 addr, my_reason->addr);
124 print_pte(addr); 125 print_pte(addr);
125 print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); 126 print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip);
126 print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); 127 print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip);
127#ifdef __i386__ 128#ifdef __i386__
128 pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", 129 pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
129 regs->ax, regs->bx, regs->cx, regs->dx); 130 regs->ax, regs->bx, regs->cx, regs->dx);
130 pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", 131 pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
131 regs->si, regs->di, regs->bp, regs->sp); 132 regs->si, regs->di, regs->bp, regs->sp);
132#else 133#else
133 pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", 134 pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n",
134 regs->ax, regs->cx, regs->dx); 135 regs->ax, regs->cx, regs->dx);
135 pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", 136 pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n",
136 regs->si, regs->di, regs->bp, regs->sp); 137 regs->si, regs->di, regs->bp, regs->sp);
137#endif 138#endif
138 put_cpu_var(pf_reason); 139 put_cpu_var(pf_reason);
139 BUG(); 140 BUG();
@@ -213,7 +214,7 @@ static void post(struct kmmio_probe *p, unsigned long condition,
213 /* this should always return the active_trace count to 0 */ 214 /* this should always return the active_trace count to 0 */
214 my_reason->active_traces--; 215 my_reason->active_traces--;
215 if (my_reason->active_traces) { 216 if (my_reason->active_traces) {
216 pr_emerg(NAME "unexpected post handler"); 217 pr_emerg("unexpected post handler");
217 BUG(); 218 BUG();
218 } 219 }
219 220
@@ -244,7 +245,7 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size,
244 }; 245 };
245 246
246 if (!trace) { 247 if (!trace) {
247 pr_err(NAME "kmalloc failed in ioremap\n"); 248 pr_err("kmalloc failed in ioremap\n");
248 return; 249 return;
249 } 250 }
250 251
@@ -282,8 +283,8 @@ void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
282 if (!is_enabled()) /* recheck and proper locking in *_core() */ 283 if (!is_enabled()) /* recheck and proper locking in *_core() */
283 return; 284 return;
284 285
285 pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n", 286 pr_debug("ioremap_*(0x%llx, 0x%lx) = %p\n",
286 (unsigned long long)offset, size, addr); 287 (unsigned long long)offset, size, addr);
287 if ((filter_offset) && (offset != filter_offset)) 288 if ((filter_offset) && (offset != filter_offset))
288 return; 289 return;
289 ioremap_trace_core(offset, size, addr); 290 ioremap_trace_core(offset, size, addr);
@@ -301,7 +302,7 @@ static void iounmap_trace_core(volatile void __iomem *addr)
301 struct remap_trace *tmp; 302 struct remap_trace *tmp;
302 struct remap_trace *found_trace = NULL; 303 struct remap_trace *found_trace = NULL;
303 304
304 pr_debug(NAME "Unmapping %p.\n", addr); 305 pr_debug("Unmapping %p.\n", addr);
305 306
306 spin_lock_irq(&trace_lock); 307 spin_lock_irq(&trace_lock);
307 if (!is_enabled()) 308 if (!is_enabled())
@@ -363,9 +364,8 @@ static void clear_trace_list(void)
363 * Caller also ensures is_enabled() cannot change. 364 * Caller also ensures is_enabled() cannot change.
364 */ 365 */
365 list_for_each_entry(trace, &trace_list, list) { 366 list_for_each_entry(trace, &trace_list, list) {
366 pr_notice(NAME "purging non-iounmapped " 367 pr_notice("purging non-iounmapped trace @0x%08lx, size 0x%lx.\n",
367 "trace @0x%08lx, size 0x%lx.\n", 368 trace->probe.addr, trace->probe.len);
368 trace->probe.addr, trace->probe.len);
369 if (!nommiotrace) 369 if (!nommiotrace)
370 unregister_kmmio_probe(&trace->probe); 370 unregister_kmmio_probe(&trace->probe);
371 } 371 }
@@ -387,7 +387,7 @@ static void enter_uniprocessor(void)
387 387
388 if (downed_cpus == NULL && 388 if (downed_cpus == NULL &&
389 !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) { 389 !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) {
390 pr_notice(NAME "Failed to allocate mask\n"); 390 pr_notice("Failed to allocate mask\n");
391 goto out; 391 goto out;
392 } 392 }
393 393
@@ -395,20 +395,19 @@ static void enter_uniprocessor(void)
395 cpumask_copy(downed_cpus, cpu_online_mask); 395 cpumask_copy(downed_cpus, cpu_online_mask);
396 cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus); 396 cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus);
397 if (num_online_cpus() > 1) 397 if (num_online_cpus() > 1)
398 pr_notice(NAME "Disabling non-boot CPUs...\n"); 398 pr_notice("Disabling non-boot CPUs...\n");
399 put_online_cpus(); 399 put_online_cpus();
400 400
401 for_each_cpu(cpu, downed_cpus) { 401 for_each_cpu(cpu, downed_cpus) {
402 err = cpu_down(cpu); 402 err = cpu_down(cpu);
403 if (!err) 403 if (!err)
404 pr_info(NAME "CPU%d is down.\n", cpu); 404 pr_info("CPU%d is down.\n", cpu);
405 else 405 else
406 pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); 406 pr_err("Error taking CPU%d down: %d\n", cpu, err);
407 } 407 }
408out: 408out:
409 if (num_online_cpus() > 1) 409 if (num_online_cpus() > 1)
410 pr_warning(NAME "multiple CPUs still online, " 410 pr_warning("multiple CPUs still online, may miss events.\n");
411 "may miss events.\n");
412} 411}
413 412
414/* __ref because leave_uniprocessor calls cpu_up which is __cpuinit, 413/* __ref because leave_uniprocessor calls cpu_up which is __cpuinit,
@@ -420,13 +419,13 @@ static void __ref leave_uniprocessor(void)
420 419
421 if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0) 420 if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0)
422 return; 421 return;
423 pr_notice(NAME "Re-enabling CPUs...\n"); 422 pr_notice("Re-enabling CPUs...\n");
424 for_each_cpu(cpu, downed_cpus) { 423 for_each_cpu(cpu, downed_cpus) {
425 err = cpu_up(cpu); 424 err = cpu_up(cpu);
426 if (!err) 425 if (!err)
427 pr_info(NAME "enabled CPU%d.\n", cpu); 426 pr_info("enabled CPU%d.\n", cpu);
428 else 427 else
429 pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err); 428 pr_err("cannot re-enable CPU%d: %d\n", cpu, err);
430 } 429 }
431} 430}
432 431
@@ -434,8 +433,8 @@ static void __ref leave_uniprocessor(void)
434static void enter_uniprocessor(void) 433static void enter_uniprocessor(void)
435{ 434{
436 if (num_online_cpus() > 1) 435 if (num_online_cpus() > 1)
437 pr_warning(NAME "multiple CPUs are online, may miss events. " 436 pr_warning("multiple CPUs are online, may miss events. "
438 "Suggest booting with maxcpus=1 kernel argument.\n"); 437 "Suggest booting with maxcpus=1 kernel argument.\n");
439} 438}
440 439
441static void leave_uniprocessor(void) 440static void leave_uniprocessor(void)
@@ -450,13 +449,13 @@ void enable_mmiotrace(void)
450 goto out; 449 goto out;
451 450
452 if (nommiotrace) 451 if (nommiotrace)
453 pr_info(NAME "MMIO tracing disabled.\n"); 452 pr_info("MMIO tracing disabled.\n");
454 kmmio_init(); 453 kmmio_init();
455 enter_uniprocessor(); 454 enter_uniprocessor();
456 spin_lock_irq(&trace_lock); 455 spin_lock_irq(&trace_lock);
457 atomic_inc(&mmiotrace_enabled); 456 atomic_inc(&mmiotrace_enabled);
458 spin_unlock_irq(&trace_lock); 457 spin_unlock_irq(&trace_lock);
459 pr_info(NAME "enabled.\n"); 458 pr_info("enabled.\n");
460out: 459out:
461 mutex_unlock(&mmiotrace_mutex); 460 mutex_unlock(&mmiotrace_mutex);
462} 461}
@@ -475,7 +474,7 @@ void disable_mmiotrace(void)
475 clear_trace_list(); /* guarantees: no more kmmio callbacks */ 474 clear_trace_list(); /* guarantees: no more kmmio callbacks */
476 leave_uniprocessor(); 475 leave_uniprocessor();
477 kmmio_cleanup(); 476 kmmio_cleanup();
478 pr_info(NAME "disabled.\n"); 477 pr_info("disabled.\n");
479out: 478out:
480 mutex_unlock(&mmiotrace_mutex); 479 mutex_unlock(&mmiotrace_mutex);
481} 480}
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 83bbc70d11bb..3307ea8bd43a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -427,7 +427,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr,
427 * Calculate the number of big nodes that can be allocated as a result 427 * Calculate the number of big nodes that can be allocated as a result
428 * of consolidating the remainder. 428 * of consolidating the remainder.
429 */ 429 */
430 big = ((size & ~FAKE_NODE_MIN_HASH_MASK) & nr_nodes) / 430 big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) /
431 FAKE_NODE_MIN_SIZE; 431 FAKE_NODE_MIN_SIZE;
432 432
433 size &= FAKE_NODE_MIN_HASH_MASK; 433 size &= FAKE_NODE_MIN_HASH_MASK;
@@ -502,77 +502,99 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr,
502} 502}
503 503
504/* 504/*
505 * Splits num_nodes nodes up equally starting at node_start. The return value 505 * Returns the end address of a node so that there is at least `size' amount of
506 * is the number of nodes split up and addr is adjusted to be at the end of the 506 * non-reserved memory or `max_addr' is reached.
507 * last node allocated.
508 */ 507 */
509static int __init split_nodes_equally(u64 *addr, u64 max_addr, int node_start, 508static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
510 int num_nodes)
511{ 509{
512 unsigned int big; 510 u64 end = start + size;
513 u64 size;
514 int i;
515
516 if (num_nodes <= 0)
517 return -1;
518 if (num_nodes > MAX_NUMNODES)
519 num_nodes = MAX_NUMNODES;
520 size = (max_addr - *addr - e820_hole_size(*addr, max_addr)) /
521 num_nodes;
522 /*
523 * Calculate the number of big nodes that can be allocated as a result
524 * of consolidating the leftovers.
525 */
526 big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) /
527 FAKE_NODE_MIN_SIZE;
528
529 /* Round down to nearest FAKE_NODE_MIN_SIZE. */
530 size &= FAKE_NODE_MIN_HASH_MASK;
531 if (!size) {
532 printk(KERN_ERR "Not enough memory for each node. "
533 "NUMA emulation disabled.\n");
534 return -1;
535 }
536
537 for (i = node_start; i < num_nodes + node_start; i++) {
538 u64 end = *addr + size;
539 511
540 if (i < big) 512 while (end - start - e820_hole_size(start, end) < size) {
541 end += FAKE_NODE_MIN_SIZE; 513 end += FAKE_NODE_MIN_SIZE;
542 /* 514 if (end > max_addr) {
543 * The final node can have the remaining system RAM. Other
544 * nodes receive roughly the same amount of available pages.
545 */
546 if (i == num_nodes + node_start - 1)
547 end = max_addr; 515 end = max_addr;
548 else
549 while (end - *addr - e820_hole_size(*addr, end) <
550 size) {
551 end += FAKE_NODE_MIN_SIZE;
552 if (end > max_addr) {
553 end = max_addr;
554 break;
555 }
556 }
557 if (setup_node_range(i, addr, end - *addr, max_addr) < 0)
558 break; 516 break;
517 }
559 } 518 }
560 return i - node_start + 1; 519 return end;
561} 520}
562 521
563/* 522/*
564 * Splits the remaining system RAM into chunks of size. The remaining memory is 523 * Sets up fake nodes of `size' interleaved over physical nodes ranging from
565 * always assigned to a final node and can be asymmetric. Returns the number of 524 * `addr' to `max_addr'. The return value is the number of nodes allocated.
566 * nodes split.
567 */ 525 */
568static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start, 526static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
569 u64 size)
570{ 527{
571 int i = node_start; 528 nodemask_t physnode_mask = NODE_MASK_NONE;
572 size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; 529 u64 min_size;
573 while (!setup_node_range(i++, addr, size, max_addr)) 530 int ret = 0;
574 ; 531 int i;
575 return i - node_start; 532
533 if (!size)
534 return -1;
535 /*
536 * The limit on emulated nodes is MAX_NUMNODES, so the size per node is
537 * increased accordingly if the requested size is too small. This
538 * creates a uniform distribution of node sizes across the entire
539 * machine (but not necessarily over physical nodes).
540 */
541 min_size = (max_addr - addr - e820_hole_size(addr, max_addr)) /
542 MAX_NUMNODES;
543 min_size = max(min_size, FAKE_NODE_MIN_SIZE);
544 if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
545 min_size = (min_size + FAKE_NODE_MIN_SIZE) &
546 FAKE_NODE_MIN_HASH_MASK;
547 if (size < min_size) {
548 pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
549 size >> 20, min_size >> 20);
550 size = min_size;
551 }
552 size &= FAKE_NODE_MIN_HASH_MASK;
553
554 for (i = 0; i < MAX_NUMNODES; i++)
555 if (physnodes[i].start != physnodes[i].end)
556 node_set(i, physnode_mask);
557 /*
558 * Fill physical nodes with fake nodes of size until there is no memory
559 * left on any of them.
560 */
561 while (nodes_weight(physnode_mask)) {
562 for_each_node_mask(i, physnode_mask) {
563 u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
564 u64 end;
565
566 end = find_end_of_node(physnodes[i].start,
567 physnodes[i].end, size);
568 /*
569 * If there won't be at least FAKE_NODE_MIN_SIZE of
570 * non-reserved memory in ZONE_DMA32 for the next node,
571 * this one must extend to the boundary.
572 */
573 if (end < dma32_end && dma32_end - end -
574 e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
575 end = dma32_end;
576
577 /*
578 * If there won't be enough non-reserved memory for the
579 * next node, this one must extend to the end of the
580 * physical node.
581 */
582 if (physnodes[i].end - end -
583 e820_hole_size(end, physnodes[i].end) < size)
584 end = physnodes[i].end;
585
586 /*
587 * Setup the fake node that will be allocated as bootmem
588 * later. If setup_node_range() returns non-zero, there
589 * is no more memory available on this physical node.
590 */
591 if (setup_node_range(ret++, &physnodes[i].start,
592 end - physnodes[i].start,
593 physnodes[i].end) < 0)
594 node_clear(i, physnode_mask);
595 }
596 }
597 return ret;
576} 598}
577 599
578/* 600/*
@@ -582,87 +604,32 @@ static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start,
582static int __init numa_emulation(unsigned long start_pfn, 604static int __init numa_emulation(unsigned long start_pfn,
583 unsigned long last_pfn, int acpi, int k8) 605 unsigned long last_pfn, int acpi, int k8)
584{ 606{
585 u64 size, addr = start_pfn << PAGE_SHIFT; 607 u64 addr = start_pfn << PAGE_SHIFT;
586 u64 max_addr = last_pfn << PAGE_SHIFT; 608 u64 max_addr = last_pfn << PAGE_SHIFT;
587 int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i;
588 int num_phys_nodes; 609 int num_phys_nodes;
610 int num_nodes;
611 int i;
589 612
590 num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); 613 num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8);
591 /* 614 /*
592 * If the numa=fake command-line is just a single number N, split the 615 * If the numa=fake command-line contains a 'M' or 'G', it represents
593 * system RAM into N fake nodes. 616 * the fixed node size. Otherwise, if it is just a single number N,
617 * split the system RAM into N fake nodes.
594 */ 618 */
595 if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { 619 if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) {
596 long n = simple_strtol(cmdline, NULL, 0); 620 u64 size;
597
598 num_nodes = split_nodes_interleave(addr, max_addr,
599 num_phys_nodes, n);
600 if (num_nodes < 0)
601 return num_nodes;
602 goto out;
603 }
604 621
605 /* Parse the command line. */ 622 size = memparse(cmdline, &cmdline);
606 for (coeff_flag = 0; ; cmdline++) { 623 num_nodes = split_nodes_size_interleave(addr, max_addr, size);
607 if (*cmdline && isdigit(*cmdline)) { 624 } else {
608 num = num * 10 + *cmdline - '0'; 625 unsigned long n;
609 continue; 626
610 } 627 n = simple_strtoul(cmdline, NULL, 0);
611 if (*cmdline == '*') { 628 num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n);
612 if (num > 0)
613 coeff = num;
614 coeff_flag = 1;
615 }
616 if (!*cmdline || *cmdline == ',') {
617 if (!coeff_flag)
618 coeff = 1;
619 /*
620 * Round down to the nearest FAKE_NODE_MIN_SIZE.
621 * Command-line coefficients are in megabytes.
622 */
623 size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK;
624 if (size)
625 for (i = 0; i < coeff; i++, num_nodes++)
626 if (setup_node_range(num_nodes, &addr,
627 size, max_addr) < 0)
628 goto done;
629 if (!*cmdline)
630 break;
631 coeff_flag = 0;
632 coeff = -1;
633 }
634 num = 0;
635 }
636done:
637 if (!num_nodes)
638 return -1;
639 /* Fill remainder of system RAM, if appropriate. */
640 if (addr < max_addr) {
641 if (coeff_flag && coeff < 0) {
642 /* Split remaining nodes into num-sized chunks */
643 num_nodes += split_nodes_by_size(&addr, max_addr,
644 num_nodes, num);
645 goto out;
646 }
647 switch (*(cmdline - 1)) {
648 case '*':
649 /* Split remaining nodes into coeff chunks */
650 if (coeff <= 0)
651 break;
652 num_nodes += split_nodes_equally(&addr, max_addr,
653 num_nodes, coeff);
654 break;
655 case ',':
656 /* Do not allocate remaining system RAM */
657 break;
658 default:
659 /* Give one final node */
660 setup_node_range(num_nodes, &addr, max_addr - addr,
661 max_addr);
662 num_nodes++;
663 }
664 } 629 }
665out: 630
631 if (num_nodes < 0)
632 return num_nodes;
666 memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); 633 memnode_shift = compute_hash_shift(nodes, num_nodes, NULL);
667 if (memnode_shift < 0) { 634 if (memnode_shift < 0) {
668 memnode_shift = 0; 635 memnode_shift = 0;
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 66b55d6e69ed..ae9648eb1c7f 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -704,9 +704,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
704 if (!range_is_allowed(pfn, size)) 704 if (!range_is_allowed(pfn, size))
705 return 0; 705 return 0;
706 706
707 if (file->f_flags & O_SYNC) { 707 if (file->f_flags & O_DSYNC)
708 flags = _PAGE_CACHE_UC_MINUS; 708 flags = _PAGE_CACHE_UC_MINUS;
709 }
710 709
711#ifdef CONFIG_X86_32 710#ifdef CONFIG_X86_32
712 /* 711 /*
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index ed34f5e35999..c9ba9deafe83 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -6,6 +6,14 @@
6 6
7#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO 7#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
8 8
9#ifdef CONFIG_HIGHPTE
10#define PGALLOC_USER_GFP __GFP_HIGHMEM
11#else
12#define PGALLOC_USER_GFP 0
13#endif
14
15gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
16
9pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) 17pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
10{ 18{
11 return (pte_t *)__get_free_page(PGALLOC_GFP); 19 return (pte_t *)__get_free_page(PGALLOC_GFP);
@@ -15,16 +23,29 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
15{ 23{
16 struct page *pte; 24 struct page *pte;
17 25
18#ifdef CONFIG_HIGHPTE 26 pte = alloc_pages(__userpte_alloc_gfp, 0);
19 pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
20#else
21 pte = alloc_pages(PGALLOC_GFP, 0);
22#endif
23 if (pte) 27 if (pte)
24 pgtable_page_ctor(pte); 28 pgtable_page_ctor(pte);
25 return pte; 29 return pte;
26} 30}
27 31
32static int __init setup_userpte(char *arg)
33{
34 if (!arg)
35 return -EINVAL;
36
37 /*
38 * "userpte=nohigh" disables allocation of user pagetables in
39 * high memory.
40 */
41 if (strcmp(arg, "nohigh") == 0)
42 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
43 else
44 return -EINVAL;
45 return 0;
46}
47early_param("userpte", setup_userpte);
48
28void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) 49void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
29{ 50{
30 pgtable_page_dtor(pte); 51 pgtable_page_dtor(pte);
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 6f8aa33031c7..9324f13492d5 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -267,6 +267,8 @@ int __init get_memcfg_from_srat(void)
267 e820_register_active_regions(chunk->nid, chunk->start_pfn, 267 e820_register_active_regions(chunk->nid, chunk->start_pfn,
268 min(chunk->end_pfn, max_pfn)); 268 min(chunk->end_pfn, max_pfn));
269 } 269 }
270 /* for out of order entries in SRAT */
271 sort_node_map();
270 272
271 for_each_online_node(nid) { 273 for_each_online_node(nid) {
272 unsigned long start = node_start_pfn[nid]; 274 unsigned long start = node_start_pfn[nid];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index d89075489664..28c68762648f 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -229,9 +229,11 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
229 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); 229 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
230 } 230 }
231 231
232 if (changed) 232 if (changed) {
233 node_set(node, cpu_nodes_parsed);
233 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", 234 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
234 nd->start, nd->end); 235 nd->start, nd->end);
236 }
235} 237}
236 238
237/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ 239/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
@@ -317,7 +319,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
317 unsigned long s = nodes[i].start >> PAGE_SHIFT; 319 unsigned long s = nodes[i].start >> PAGE_SHIFT;
318 unsigned long e = nodes[i].end >> PAGE_SHIFT; 320 unsigned long e = nodes[i].end >> PAGE_SHIFT;
319 pxmram += e - s; 321 pxmram += e - s;
320 pxmram -= absent_pages_in_range(s, e); 322 pxmram -= __absent_pages_in_range(i, s, e);
321 if ((long)pxmram < 0) 323 if ((long)pxmram < 0)
322 pxmram = 0; 324 pxmram = 0;
323 } 325 }
@@ -373,6 +375,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
373 for_each_node_mask(i, nodes_parsed) 375 for_each_node_mask(i, nodes_parsed)
374 e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, 376 e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
375 nodes[i].end >> PAGE_SHIFT); 377 nodes[i].end >> PAGE_SHIFT);
378 /* for out of order entries in SRAT */
379 sort_node_map();
376 if (!nodes_cover_memory(nodes)) { 380 if (!nodes_cover_memory(nodes)) {
377 bad_srat(); 381 bad_srat();
378 return -1; 382 return -1;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 65b58e4b0b8b..426f3a1a64d3 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -41,7 +41,7 @@ union smp_flush_state {
41 struct { 41 struct {
42 struct mm_struct *flush_mm; 42 struct mm_struct *flush_mm;
43 unsigned long flush_va; 43 unsigned long flush_va;
44 spinlock_t tlbstate_lock; 44 raw_spinlock_t tlbstate_lock;
45 DECLARE_BITMAP(flush_cpumask, NR_CPUS); 45 DECLARE_BITMAP(flush_cpumask, NR_CPUS);
46 }; 46 };
47 char pad[INTERNODE_CACHE_BYTES]; 47 char pad[INTERNODE_CACHE_BYTES];
@@ -181,7 +181,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
181 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is 181 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
182 * probably not worth checking this for a cache-hot lock. 182 * probably not worth checking this for a cache-hot lock.
183 */ 183 */
184 spin_lock(&f->tlbstate_lock); 184 raw_spin_lock(&f->tlbstate_lock);
185 185
186 f->flush_mm = mm; 186 f->flush_mm = mm;
187 f->flush_va = va; 187 f->flush_va = va;
@@ -199,7 +199,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
199 199
200 f->flush_mm = NULL; 200 f->flush_mm = NULL;
201 f->flush_va = 0; 201 f->flush_va = 0;
202 spin_unlock(&f->tlbstate_lock); 202 raw_spin_unlock(&f->tlbstate_lock);
203} 203}
204 204
205void native_flush_tlb_others(const struct cpumask *cpumask, 205void native_flush_tlb_others(const struct cpumask *cpumask,
@@ -223,7 +223,7 @@ static int __cpuinit init_smp_flush(void)
223 int i; 223 int i;
224 224
225 for (i = 0; i < ARRAY_SIZE(flush_state); i++) 225 for (i = 0; i < ARRAY_SIZE(flush_state); i++)
226 spin_lock_init(&flush_state[i].tlbstate_lock); 226 raw_spin_lock_init(&flush_state[i].tlbstate_lock);
227 227
228 return 0; 228 return 0;
229} 229}
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 044897be021f..3855096c59b8 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -41,10 +41,11 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
41} 41}
42 42
43static struct stacktrace_ops backtrace_ops = { 43static struct stacktrace_ops backtrace_ops = {
44 .warning = backtrace_warning, 44 .warning = backtrace_warning,
45 .warning_symbol = backtrace_warning_symbol, 45 .warning_symbol = backtrace_warning_symbol,
46 .stack = backtrace_stack, 46 .stack = backtrace_stack,
47 .address = backtrace_address, 47 .address = backtrace_address,
48 .walk_stack = print_context_stack,
48}; 49};
49 50
50struct frame_head { 51struct frame_head {
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index cb88b1a0bd5f..2c505ee71014 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -159,7 +159,7 @@ static int nmi_setup_mux(void)
159 159
160 for_each_possible_cpu(i) { 160 for_each_possible_cpu(i) {
161 per_cpu(cpu_msrs, i).multiplex = 161 per_cpu(cpu_msrs, i).multiplex =
162 kmalloc(multiplex_size, GFP_KERNEL); 162 kzalloc(multiplex_size, GFP_KERNEL);
163 if (!per_cpu(cpu_msrs, i).multiplex) 163 if (!per_cpu(cpu_msrs, i).multiplex)
164 return 0; 164 return 0;
165 } 165 }
@@ -179,7 +179,6 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
179 if (counter_config[i].enabled) { 179 if (counter_config[i].enabled) {
180 multiplex[i].saved = -(u64)counter_config[i].count; 180 multiplex[i].saved = -(u64)counter_config[i].count;
181 } else { 181 } else {
182 multiplex[i].addr = 0;
183 multiplex[i].saved = 0; 182 multiplex[i].saved = 0;
184 } 183 }
185 } 184 }
@@ -189,25 +188,27 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
189 188
190static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) 189static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
191{ 190{
191 struct op_msr *counters = msrs->counters;
192 struct op_msr *multiplex = msrs->multiplex; 192 struct op_msr *multiplex = msrs->multiplex;
193 int i; 193 int i;
194 194
195 for (i = 0; i < model->num_counters; ++i) { 195 for (i = 0; i < model->num_counters; ++i) {
196 int virt = op_x86_phys_to_virt(i); 196 int virt = op_x86_phys_to_virt(i);
197 if (multiplex[virt].addr) 197 if (counters[i].addr)
198 rdmsrl(multiplex[virt].addr, multiplex[virt].saved); 198 rdmsrl(counters[i].addr, multiplex[virt].saved);
199 } 199 }
200} 200}
201 201
202static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) 202static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
203{ 203{
204 struct op_msr *counters = msrs->counters;
204 struct op_msr *multiplex = msrs->multiplex; 205 struct op_msr *multiplex = msrs->multiplex;
205 int i; 206 int i;
206 207
207 for (i = 0; i < model->num_counters; ++i) { 208 for (i = 0; i < model->num_counters; ++i) {
208 int virt = op_x86_phys_to_virt(i); 209 int virt = op_x86_phys_to_virt(i);
209 if (multiplex[virt].addr) 210 if (counters[i].addr)
210 wrmsrl(multiplex[virt].addr, multiplex[virt].saved); 211 wrmsrl(counters[i].addr, multiplex[virt].saved);
211 } 212 }
212} 213}
213 214
@@ -222,7 +223,7 @@ static void nmi_cpu_switch(void *dummy)
222 223
223 /* move to next set */ 224 /* move to next set */
224 si += model->num_counters; 225 si += model->num_counters;
225 if ((si > model->num_virt_counters) || (counter_config[si].count == 0)) 226 if ((si >= model->num_virt_counters) || (counter_config[si].count == 0))
226 per_cpu(switch_index, cpu) = 0; 227 per_cpu(switch_index, cpu) = 0;
227 else 228 else
228 per_cpu(switch_index, cpu) = si; 229 per_cpu(switch_index, cpu) = si;
@@ -303,11 +304,11 @@ static int allocate_msrs(void)
303 304
304 int i; 305 int i;
305 for_each_possible_cpu(i) { 306 for_each_possible_cpu(i) {
306 per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, 307 per_cpu(cpu_msrs, i).counters = kzalloc(counters_size,
307 GFP_KERNEL); 308 GFP_KERNEL);
308 if (!per_cpu(cpu_msrs, i).counters) 309 if (!per_cpu(cpu_msrs, i).counters)
309 return 0; 310 return 0;
310 per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, 311 per_cpu(cpu_msrs, i).controls = kzalloc(controls_size,
311 GFP_KERNEL); 312 GFP_KERNEL);
312 if (!per_cpu(cpu_msrs, i).controls) 313 if (!per_cpu(cpu_msrs, i).controls)
313 return 0; 314 return 0;
@@ -598,6 +599,7 @@ static int __init ppro_init(char **cpu_type)
598 case 15: case 23: 599 case 15: case 23:
599 *cpu_type = "i386/core_2"; 600 *cpu_type = "i386/core_2";
600 break; 601 break;
602 case 0x2e:
601 case 26: 603 case 26:
602 spec = &op_arch_perfmon_spec; 604 spec = &op_arch_perfmon_spec;
603 *cpu_type = "i386/core_i7"; 605 *cpu_type = "i386/core_i7";
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 39686c29f03a..6a58256dce9f 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -22,6 +22,9 @@
22#include <asm/ptrace.h> 22#include <asm/ptrace.h>
23#include <asm/msr.h> 23#include <asm/msr.h>
24#include <asm/nmi.h> 24#include <asm/nmi.h>
25#include <asm/apic.h>
26#include <asm/processor.h>
27#include <asm/cpufeature.h>
25 28
26#include "op_x86_model.h" 29#include "op_x86_model.h"
27#include "op_counter.h" 30#include "op_counter.h"
@@ -43,15 +46,13 @@
43 46
44static unsigned long reset_value[NUM_VIRT_COUNTERS]; 47static unsigned long reset_value[NUM_VIRT_COUNTERS];
45 48
46#ifdef CONFIG_OPROFILE_IBS
47
48/* IbsFetchCtl bits/masks */ 49/* IbsFetchCtl bits/masks */
49#define IBS_FETCH_RAND_EN (1ULL<<57) 50#define IBS_FETCH_RAND_EN (1ULL<<57)
50#define IBS_FETCH_VAL (1ULL<<49) 51#define IBS_FETCH_VAL (1ULL<<49)
51#define IBS_FETCH_ENABLE (1ULL<<48) 52#define IBS_FETCH_ENABLE (1ULL<<48)
52#define IBS_FETCH_CNT_MASK 0xFFFF0000ULL 53#define IBS_FETCH_CNT_MASK 0xFFFF0000ULL
53 54
54/*IbsOpCtl bits */ 55/* IbsOpCtl bits */
55#define IBS_OP_CNT_CTL (1ULL<<19) 56#define IBS_OP_CNT_CTL (1ULL<<19)
56#define IBS_OP_VAL (1ULL<<18) 57#define IBS_OP_VAL (1ULL<<18)
57#define IBS_OP_ENABLE (1ULL<<17) 58#define IBS_OP_ENABLE (1ULL<<17)
@@ -59,7 +60,7 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS];
59#define IBS_FETCH_SIZE 6 60#define IBS_FETCH_SIZE 6
60#define IBS_OP_SIZE 12 61#define IBS_OP_SIZE 12
61 62
62static int has_ibs; /* AMD Family10h and later */ 63static u32 ibs_caps;
63 64
64struct op_ibs_config { 65struct op_ibs_config {
65 unsigned long op_enabled; 66 unsigned long op_enabled;
@@ -71,24 +72,52 @@ struct op_ibs_config {
71}; 72};
72 73
73static struct op_ibs_config ibs_config; 74static struct op_ibs_config ibs_config;
75static u64 ibs_op_ctl;
74 76
75#endif 77/*
78 * IBS cpuid feature detection
79 */
76 80
77#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX 81#define IBS_CPUID_FEATURES 0x8000001b
82
83/*
84 * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but
85 * bit 0 is used to indicate the existence of IBS.
86 */
87#define IBS_CAPS_AVAIL (1LL<<0)
88#define IBS_CAPS_RDWROPCNT (1LL<<3)
89#define IBS_CAPS_OPCNT (1LL<<4)
90
91/*
92 * IBS randomization macros
93 */
94#define IBS_RANDOM_BITS 12
95#define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1)
96#define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5))
78 97
79static void op_mux_fill_in_addresses(struct op_msrs * const msrs) 98static u32 get_ibs_caps(void)
80{ 99{
81 int i; 100 u32 ibs_caps;
101 unsigned int max_level;
82 102
83 for (i = 0; i < NUM_VIRT_COUNTERS; i++) { 103 if (!boot_cpu_has(X86_FEATURE_IBS))
84 int hw_counter = op_x86_virt_to_phys(i); 104 return 0;
85 if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) 105
86 msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; 106 /* check IBS cpuid feature flags */
87 else 107 max_level = cpuid_eax(0x80000000);
88 msrs->multiplex[i].addr = 0; 108 if (max_level < IBS_CPUID_FEATURES)
89 } 109 return IBS_CAPS_AVAIL;
110
111 ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
112 if (!(ibs_caps & IBS_CAPS_AVAIL))
113 /* cpuid flags not valid */
114 return IBS_CAPS_AVAIL;
115
116 return ibs_caps;
90} 117}
91 118
119#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
120
92static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, 121static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
93 struct op_msrs const * const msrs) 122 struct op_msrs const * const msrs)
94{ 123{
@@ -98,7 +127,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
98 /* enable active counters */ 127 /* enable active counters */
99 for (i = 0; i < NUM_COUNTERS; ++i) { 128 for (i = 0; i < NUM_COUNTERS; ++i) {
100 int virt = op_x86_phys_to_virt(i); 129 int virt = op_x86_phys_to_virt(i);
101 if (!counter_config[virt].enabled) 130 if (!reset_value[virt])
102 continue; 131 continue;
103 rdmsrl(msrs->controls[i].addr, val); 132 rdmsrl(msrs->controls[i].addr, val);
104 val &= model->reserved; 133 val &= model->reserved;
@@ -107,10 +136,6 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
107 } 136 }
108} 137}
109 138
110#else
111
112static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { }
113
114#endif 139#endif
115 140
116/* functions for op_amd_spec */ 141/* functions for op_amd_spec */
@@ -122,18 +147,12 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
122 for (i = 0; i < NUM_COUNTERS; i++) { 147 for (i = 0; i < NUM_COUNTERS; i++) {
123 if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) 148 if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
124 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; 149 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
125 else
126 msrs->counters[i].addr = 0;
127 } 150 }
128 151
129 for (i = 0; i < NUM_CONTROLS; i++) { 152 for (i = 0; i < NUM_CONTROLS; i++) {
130 if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) 153 if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
131 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; 154 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
132 else
133 msrs->controls[i].addr = 0;
134 } 155 }
135
136 op_mux_fill_in_addresses(msrs);
137} 156}
138 157
139static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, 158static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
@@ -144,7 +163,8 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
144 163
145 /* setup reset_value */ 164 /* setup reset_value */
146 for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { 165 for (i = 0; i < NUM_VIRT_COUNTERS; ++i) {
147 if (counter_config[i].enabled) 166 if (counter_config[i].enabled
167 && msrs->counters[op_x86_virt_to_phys(i)].addr)
148 reset_value[i] = counter_config[i].count; 168 reset_value[i] = counter_config[i].count;
149 else 169 else
150 reset_value[i] = 0; 170 reset_value[i] = 0;
@@ -152,9 +172,18 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
152 172
153 /* clear all counters */ 173 /* clear all counters */
154 for (i = 0; i < NUM_CONTROLS; ++i) { 174 for (i = 0; i < NUM_CONTROLS; ++i) {
155 if (unlikely(!msrs->controls[i].addr)) 175 if (unlikely(!msrs->controls[i].addr)) {
176 if (counter_config[i].enabled && !smp_processor_id())
177 /*
178 * counter is reserved, this is on all
179 * cpus, so report only for cpu #0
180 */
181 op_x86_warn_reserved(i);
156 continue; 182 continue;
183 }
157 rdmsrl(msrs->controls[i].addr, val); 184 rdmsrl(msrs->controls[i].addr, val);
185 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
186 op_x86_warn_in_use(i);
158 val &= model->reserved; 187 val &= model->reserved;
159 wrmsrl(msrs->controls[i].addr, val); 188 wrmsrl(msrs->controls[i].addr, val);
160 } 189 }
@@ -169,9 +198,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
169 /* enable active counters */ 198 /* enable active counters */
170 for (i = 0; i < NUM_COUNTERS; ++i) { 199 for (i = 0; i < NUM_COUNTERS; ++i) {
171 int virt = op_x86_phys_to_virt(i); 200 int virt = op_x86_phys_to_virt(i);
172 if (!counter_config[virt].enabled) 201 if (!reset_value[virt])
173 continue;
174 if (!msrs->counters[i].addr)
175 continue; 202 continue;
176 203
177 /* setup counter registers */ 204 /* setup counter registers */
@@ -185,7 +212,60 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
185 } 212 }
186} 213}
187 214
188#ifdef CONFIG_OPROFILE_IBS 215/*
216 * 16-bit Linear Feedback Shift Register (LFSR)
217 *
218 * 16 14 13 11
219 * Feedback polynomial = X + X + X + X + 1
220 */
221static unsigned int lfsr_random(void)
222{
223 static unsigned int lfsr_value = 0xF00D;
224 unsigned int bit;
225
226 /* Compute next bit to shift in */
227 bit = ((lfsr_value >> 0) ^
228 (lfsr_value >> 2) ^
229 (lfsr_value >> 3) ^
230 (lfsr_value >> 5)) & 0x0001;
231
232 /* Advance to next register value */
233 lfsr_value = (lfsr_value >> 1) | (bit << 15);
234
235 return lfsr_value;
236}
237
238/*
239 * IBS software randomization
240 *
241 * The IBS periodic op counter is randomized in software. The lower 12
242 * bits of the 20 bit counter are randomized. IbsOpCurCnt is
243 * initialized with a 12 bit random value.
244 */
245static inline u64 op_amd_randomize_ibs_op(u64 val)
246{
247 unsigned int random = lfsr_random();
248
249 if (!(ibs_caps & IBS_CAPS_RDWROPCNT))
250 /*
251 * Work around if the hw can not write to IbsOpCurCnt
252 *
253 * Randomize the lower 8 bits of the 16 bit
254 * IbsOpMaxCnt [15:0] value in the range of -128 to
255 * +127 by adding/subtracting an offset to the
256 * maximum count (IbsOpMaxCnt).
257 *
258 * To avoid over or underflows and protect upper bits
259 * starting at bit 16, the initial value for
260 * IbsOpMaxCnt must fit in the range from 0x0081 to
261 * 0xff80.
262 */
263 val += (s8)(random >> 4);
264 else
265 val |= (u64)(random & IBS_RANDOM_MASK) << 32;
266
267 return val;
268}
189 269
190static inline void 270static inline void
191op_amd_handle_ibs(struct pt_regs * const regs, 271op_amd_handle_ibs(struct pt_regs * const regs,
@@ -194,7 +274,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
194 u64 val, ctl; 274 u64 val, ctl;
195 struct op_entry entry; 275 struct op_entry entry;
196 276
197 if (!has_ibs) 277 if (!ibs_caps)
198 return; 278 return;
199 279
200 if (ibs_config.fetch_enabled) { 280 if (ibs_config.fetch_enabled) {
@@ -236,8 +316,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
236 oprofile_write_commit(&entry); 316 oprofile_write_commit(&entry);
237 317
238 /* reenable the IRQ */ 318 /* reenable the IRQ */
239 ctl &= ~IBS_OP_VAL & 0xFFFFFFFF; 319 ctl = op_amd_randomize_ibs_op(ibs_op_ctl);
240 ctl |= IBS_OP_ENABLE;
241 wrmsrl(MSR_AMD64_IBSOPCTL, ctl); 320 wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
242 } 321 }
243 } 322 }
@@ -246,41 +325,57 @@ op_amd_handle_ibs(struct pt_regs * const regs,
246static inline void op_amd_start_ibs(void) 325static inline void op_amd_start_ibs(void)
247{ 326{
248 u64 val; 327 u64 val;
249 if (has_ibs && ibs_config.fetch_enabled) { 328
329 if (!ibs_caps)
330 return;
331
332 if (ibs_config.fetch_enabled) {
250 val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; 333 val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
251 val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; 334 val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
252 val |= IBS_FETCH_ENABLE; 335 val |= IBS_FETCH_ENABLE;
253 wrmsrl(MSR_AMD64_IBSFETCHCTL, val); 336 wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
254 } 337 }
255 338
256 if (has_ibs && ibs_config.op_enabled) { 339 if (ibs_config.op_enabled) {
257 val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; 340 ibs_op_ctl = ibs_config.max_cnt_op >> 4;
258 val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; 341 if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
259 val |= IBS_OP_ENABLE; 342 /*
343 * IbsOpCurCnt not supported. See
344 * op_amd_randomize_ibs_op() for details.
345 */
346 ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL);
347 } else {
348 /*
349 * The start value is randomized with a
350 * positive offset, we need to compensate it
351 * with the half of the randomized range. Also
352 * avoid underflows.
353 */
354 ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET,
355 0xFFFFULL);
356 }
357 if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops)
358 ibs_op_ctl |= IBS_OP_CNT_CTL;
359 ibs_op_ctl |= IBS_OP_ENABLE;
360 val = op_amd_randomize_ibs_op(ibs_op_ctl);
260 wrmsrl(MSR_AMD64_IBSOPCTL, val); 361 wrmsrl(MSR_AMD64_IBSOPCTL, val);
261 } 362 }
262} 363}
263 364
264static void op_amd_stop_ibs(void) 365static void op_amd_stop_ibs(void)
265{ 366{
266 if (has_ibs && ibs_config.fetch_enabled) 367 if (!ibs_caps)
368 return;
369
370 if (ibs_config.fetch_enabled)
267 /* clear max count and enable */ 371 /* clear max count and enable */
268 wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); 372 wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
269 373
270 if (has_ibs && ibs_config.op_enabled) 374 if (ibs_config.op_enabled)
271 /* clear max count and enable */ 375 /* clear max count and enable */
272 wrmsrl(MSR_AMD64_IBSOPCTL, 0); 376 wrmsrl(MSR_AMD64_IBSOPCTL, 0);
273} 377}
274 378
275#else
276
277static inline void op_amd_handle_ibs(struct pt_regs * const regs,
278 struct op_msrs const * const msrs) { }
279static inline void op_amd_start_ibs(void) { }
280static inline void op_amd_stop_ibs(void) { }
281
282#endif
283
284static int op_amd_check_ctrs(struct pt_regs * const regs, 379static int op_amd_check_ctrs(struct pt_regs * const regs,
285 struct op_msrs const * const msrs) 380 struct op_msrs const * const msrs)
286{ 381{
@@ -355,8 +450,6 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
355 } 450 }
356} 451}
357 452
358#ifdef CONFIG_OPROFILE_IBS
359
360static u8 ibs_eilvt_off; 453static u8 ibs_eilvt_off;
361 454
362static inline void apic_init_ibs_nmi_per_cpu(void *arg) 455static inline void apic_init_ibs_nmi_per_cpu(void *arg)
@@ -405,45 +498,36 @@ static int init_ibs_nmi(void)
405 return 1; 498 return 1;
406 } 499 }
407 500
408#ifdef CONFIG_NUMA
409 /* Sanity check */
410 /* Works only for 64bit with proper numa implementation. */
411 if (nodes != num_possible_nodes()) {
412 printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, "
413 "found: %d, expected %d",
414 nodes, num_possible_nodes());
415 return 1;
416 }
417#endif
418 return 0; 501 return 0;
419} 502}
420 503
421/* uninitialize the APIC for the IBS interrupts if needed */ 504/* uninitialize the APIC for the IBS interrupts if needed */
422static void clear_ibs_nmi(void) 505static void clear_ibs_nmi(void)
423{ 506{
424 if (has_ibs) 507 if (ibs_caps)
425 on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); 508 on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1);
426} 509}
427 510
428/* initialize the APIC for the IBS interrupts if available */ 511/* initialize the APIC for the IBS interrupts if available */
429static void ibs_init(void) 512static void ibs_init(void)
430{ 513{
431 has_ibs = boot_cpu_has(X86_FEATURE_IBS); 514 ibs_caps = get_ibs_caps();
432 515
433 if (!has_ibs) 516 if (!ibs_caps)
434 return; 517 return;
435 518
436 if (init_ibs_nmi()) { 519 if (init_ibs_nmi()) {
437 has_ibs = 0; 520 ibs_caps = 0;
438 return; 521 return;
439 } 522 }
440 523
441 printk(KERN_INFO "oprofile: AMD IBS detected\n"); 524 printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n",
525 (unsigned)ibs_caps);
442} 526}
443 527
444static void ibs_exit(void) 528static void ibs_exit(void)
445{ 529{
446 if (!has_ibs) 530 if (!ibs_caps)
447 return; 531 return;
448 532
449 clear_ibs_nmi(); 533 clear_ibs_nmi();
@@ -463,7 +547,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
463 if (ret) 547 if (ret)
464 return ret; 548 return ret;
465 549
466 if (!has_ibs) 550 if (!ibs_caps)
467 return ret; 551 return ret;
468 552
469 /* model specific files */ 553 /* model specific files */
@@ -473,7 +557,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
473 ibs_config.fetch_enabled = 0; 557 ibs_config.fetch_enabled = 0;
474 ibs_config.max_cnt_op = 250000; 558 ibs_config.max_cnt_op = 250000;
475 ibs_config.op_enabled = 0; 559 ibs_config.op_enabled = 0;
476 ibs_config.dispatched_ops = 1; 560 ibs_config.dispatched_ops = 0;
477 561
478 dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); 562 dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
479 oprofilefs_create_ulong(sb, dir, "enable", 563 oprofilefs_create_ulong(sb, dir, "enable",
@@ -488,8 +572,9 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
488 &ibs_config.op_enabled); 572 &ibs_config.op_enabled);
489 oprofilefs_create_ulong(sb, dir, "max_count", 573 oprofilefs_create_ulong(sb, dir, "max_count",
490 &ibs_config.max_cnt_op); 574 &ibs_config.max_cnt_op);
491 oprofilefs_create_ulong(sb, dir, "dispatched_ops", 575 if (ibs_caps & IBS_CAPS_OPCNT)
492 &ibs_config.dispatched_ops); 576 oprofilefs_create_ulong(sb, dir, "dispatched_ops",
577 &ibs_config.dispatched_ops);
493 578
494 return 0; 579 return 0;
495} 580}
@@ -507,19 +592,6 @@ static void op_amd_exit(void)
507 ibs_exit(); 592 ibs_exit();
508} 593}
509 594
510#else
511
512/* no IBS support */
513
514static int op_amd_init(struct oprofile_operations *ops)
515{
516 return 0;
517}
518
519static void op_amd_exit(void) {}
520
521#endif /* CONFIG_OPROFILE_IBS */
522
523struct op_x86_model_spec op_amd_spec = { 595struct op_x86_model_spec op_amd_spec = {
524 .num_counters = NUM_COUNTERS, 596 .num_counters = NUM_COUNTERS,
525 .num_controls = NUM_CONTROLS, 597 .num_controls = NUM_CONTROLS,
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index ac6b354becdf..e6a160a4684a 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -394,12 +394,6 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
394 setup_num_counters(); 394 setup_num_counters();
395 stag = get_stagger(); 395 stag = get_stagger();
396 396
397 /* initialize some registers */
398 for (i = 0; i < num_counters; ++i)
399 msrs->counters[i].addr = 0;
400 for (i = 0; i < num_controls; ++i)
401 msrs->controls[i].addr = 0;
402
403 /* the counter & cccr registers we pay attention to */ 397 /* the counter & cccr registers we pay attention to */
404 for (i = 0; i < num_counters; ++i) { 398 for (i = 0; i < num_counters; ++i) {
405 addr = p4_counters[VIRT_CTR(stag, i)].counter_address; 399 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 8eb05878554c..5d1727ba409e 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -37,15 +37,11 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs)
37 for (i = 0; i < num_counters; i++) { 37 for (i = 0; i < num_counters; i++) {
38 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) 38 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
39 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; 39 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
40 else
41 msrs->counters[i].addr = 0;
42 } 40 }
43 41
44 for (i = 0; i < num_counters; i++) { 42 for (i = 0; i < num_counters; i++) {
45 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) 43 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
46 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; 44 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
47 else
48 msrs->controls[i].addr = 0;
49 } 45 }
50} 46}
51 47
@@ -57,7 +53,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
57 int i; 53 int i;
58 54
59 if (!reset_value) { 55 if (!reset_value) {
60 reset_value = kmalloc(sizeof(reset_value[0]) * num_counters, 56 reset_value = kzalloc(sizeof(reset_value[0]) * num_counters,
61 GFP_ATOMIC); 57 GFP_ATOMIC);
62 if (!reset_value) 58 if (!reset_value)
63 return; 59 return;
@@ -82,9 +78,18 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
82 78
83 /* clear all counters */ 79 /* clear all counters */
84 for (i = 0; i < num_counters; ++i) { 80 for (i = 0; i < num_counters; ++i) {
85 if (unlikely(!msrs->controls[i].addr)) 81 if (unlikely(!msrs->controls[i].addr)) {
82 if (counter_config[i].enabled && !smp_processor_id())
83 /*
84 * counter is reserved, this is on all
85 * cpus, so report only for cpu #0
86 */
87 op_x86_warn_reserved(i);
86 continue; 88 continue;
89 }
87 rdmsrl(msrs->controls[i].addr, val); 90 rdmsrl(msrs->controls[i].addr, val);
91 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
92 op_x86_warn_in_use(i);
88 val &= model->reserved; 93 val &= model->reserved;
89 wrmsrl(msrs->controls[i].addr, val); 94 wrmsrl(msrs->controls[i].addr, val);
90 } 95 }
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 7b8e75d16081..ff82a755edd4 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -57,6 +57,26 @@ struct op_x86_model_spec {
57 57
58struct op_counter_config; 58struct op_counter_config;
59 59
60static inline void op_x86_warn_in_use(int counter)
61{
62 /*
63 * The warning indicates an already running counter. If
64 * oprofile doesn't collect data, then try using a different
65 * performance counter on your platform to monitor the desired
66 * event. Delete counter #%d from the desired event by editing
67 * the /usr/share/oprofile/%s/<cpu>/events file. If the event
68 * cannot be monitored by any other counter, contact your
69 * hardware or BIOS vendor.
70 */
71 pr_warning("oprofile: counter #%d on cpu #%d may already be used\n",
72 counter, smp_processor_id());
73}
74
75static inline void op_x86_warn_reserved(int counter)
76{
77 pr_warning("oprofile: counter #%d is already reserved\n", counter);
78}
79
60extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, 80extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
61 struct op_counter_config *counter_config); 81 struct op_counter_config *counter_config);
62extern int op_x86_phys_to_virt(int phys); 82extern int op_x86_phys_to_virt(int phys);
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index d49202e740ea..39fba37f702f 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -15,3 +15,8 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
15 15
16obj-y += common.o early.o 16obj-y += common.o early.o
17obj-y += amd_bus.o 17obj-y += amd_bus.o
18obj-$(CONFIG_X86_64) += bus_numa.o
19
20ifeq ($(CONFIG_PCI_DEBUG),y)
21EXTRA_CFLAGS += -DDEBUG
22endif
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 1014eb4bfc37..5f11ff6f5389 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -7,6 +7,7 @@
7#include <asm/pci_x86.h> 7#include <asm/pci_x86.h>
8 8
9struct pci_root_info { 9struct pci_root_info {
10 struct acpi_device *bridge;
10 char *name; 11 char *name;
11 unsigned int res_num; 12 unsigned int res_num;
12 struct resource *res; 13 struct resource *res;
@@ -14,6 +15,51 @@ struct pci_root_info {
14 int busnum; 15 int busnum;
15}; 16};
16 17
18static bool pci_use_crs = true;
19
20static int __init set_use_crs(const struct dmi_system_id *id)
21{
22 pci_use_crs = true;
23 return 0;
24}
25
26static const struct dmi_system_id pci_use_crs_table[] __initconst = {
27 /* http://bugzilla.kernel.org/show_bug.cgi?id=14183 */
28 {
29 .callback = set_use_crs,
30 .ident = "IBM System x3800",
31 .matches = {
32 DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
33 DMI_MATCH(DMI_PRODUCT_NAME, "x3800"),
34 },
35 },
36 {}
37};
38
39void __init pci_acpi_crs_quirks(void)
40{
41 int year;
42
43 if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && year < 2008)
44 pci_use_crs = false;
45
46 dmi_check_system(pci_use_crs_table);
47
48 /*
49 * If the user specifies "pci=use_crs" or "pci=nocrs" explicitly, that
50 * takes precedence over anything we figured out above.
51 */
52 if (pci_probe & PCI_ROOT_NO_CRS)
53 pci_use_crs = false;
54 else if (pci_probe & PCI_USE__CRS)
55 pci_use_crs = true;
56
57 printk(KERN_INFO "PCI: %s host bridge windows from ACPI; "
58 "if necessary, use \"pci=%s\" and report a bug\n",
59 pci_use_crs ? "Using" : "Ignoring",
60 pci_use_crs ? "nocrs" : "use_crs");
61}
62
17static acpi_status 63static acpi_status
18resource_to_addr(struct acpi_resource *resource, 64resource_to_addr(struct acpi_resource *resource,
19 struct acpi_resource_address64 *addr) 65 struct acpi_resource_address64 *addr)
@@ -44,18 +90,28 @@ count_resource(struct acpi_resource *acpi_res, void *data)
44 return AE_OK; 90 return AE_OK;
45} 91}
46 92
47static int 93static void
48bus_has_transparent_bridge(struct pci_bus *bus) 94align_resource(struct acpi_device *bridge, struct resource *res)
49{ 95{
50 struct pci_dev *dev; 96 int align = (res->flags & IORESOURCE_MEM) ? 16 : 4;
51 97
52 list_for_each_entry(dev, &bus->devices, bus_list) { 98 /*
53 u16 class = dev->class >> 8; 99 * Host bridge windows are not BARs, but the decoders on the PCI side
54 100 * that claim this address space have starting alignment and length
55 if (class == PCI_CLASS_BRIDGE_PCI && dev->transparent) 101 * constraints, so fix any obvious BIOS goofs.
56 return true; 102 */
103 if (!IS_ALIGNED(res->start, align)) {
104 dev_printk(KERN_DEBUG, &bridge->dev,
105 "host bridge window %pR invalid; "
106 "aligning start to %d-byte boundary\n", res, align);
107 res->start &= ~(align - 1);
108 }
109 if (!IS_ALIGNED(res->end + 1, align)) {
110 dev_printk(KERN_DEBUG, &bridge->dev,
111 "host bridge window %pR invalid; "
112 "aligning end to %d-byte boundary\n", res, align);
113 res->end = ALIGN(res->end, align) - 1;
57 } 114 }
58 return false;
59} 115}
60 116
61static acpi_status 117static acpi_status
@@ -67,12 +123,8 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
67 acpi_status status; 123 acpi_status status;
68 unsigned long flags; 124 unsigned long flags;
69 struct resource *root; 125 struct resource *root;
70 int max_root_bus_resources = PCI_BUS_NUM_RESOURCES;
71 u64 start, end; 126 u64 start, end;
72 127
73 if (bus_has_transparent_bridge(info->bus))
74 max_root_bus_resources -= 3;
75
76 status = resource_to_addr(acpi_res, &addr); 128 status = resource_to_addr(acpi_res, &addr);
77 if (!ACPI_SUCCESS(status)) 129 if (!ACPI_SUCCESS(status))
78 return AE_OK; 130 return AE_OK;
@@ -90,14 +142,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
90 142
91 start = addr.minimum + addr.translation_offset; 143 start = addr.minimum + addr.translation_offset;
92 end = start + addr.address_length - 1; 144 end = start + addr.address_length - 1;
93 if (info->res_num >= max_root_bus_resources) {
94 printk(KERN_WARNING "PCI: Failed to allocate 0x%lx-0x%lx "
95 "from %s for %s due to _CRS returning more than "
96 "%d resource descriptors\n", (unsigned long) start,
97 (unsigned long) end, root->name, info->name,
98 max_root_bus_resources);
99 return AE_OK;
100 }
101 145
102 res = &info->res[info->res_num]; 146 res = &info->res[info->res_num];
103 res->name = info->name; 147 res->name = info->name;
@@ -105,14 +149,28 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
105 res->start = start; 149 res->start = start;
106 res->end = end; 150 res->end = end;
107 res->child = NULL; 151 res->child = NULL;
152 align_resource(info->bridge, res);
153
154 if (!pci_use_crs) {
155 dev_printk(KERN_DEBUG, &info->bridge->dev,
156 "host bridge window %pR (ignored)\n", res);
157 return AE_OK;
158 }
108 159
109 if (insert_resource(root, res)) { 160 if (insert_resource(root, res)) {
110 printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx " 161 dev_err(&info->bridge->dev,
111 "from %s for %s\n", (unsigned long) res->start, 162 "can't allocate host bridge window %pR\n", res);
112 (unsigned long) res->end, root->name, info->name);
113 } else { 163 } else {
114 info->bus->resource[info->res_num] = res; 164 pci_bus_add_resource(info->bus, res, 0);
115 info->res_num++; 165 info->res_num++;
166 if (addr.translation_offset)
167 dev_info(&info->bridge->dev, "host bridge window %pR "
168 "(PCI address [%#llx-%#llx])\n",
169 res, res->start - addr.translation_offset,
170 res->end - addr.translation_offset);
171 else
172 dev_info(&info->bridge->dev,
173 "host bridge window %pR\n", res);
116 } 174 }
117 return AE_OK; 175 return AE_OK;
118} 176}
@@ -124,6 +182,10 @@ get_current_resources(struct acpi_device *device, int busnum,
124 struct pci_root_info info; 182 struct pci_root_info info;
125 size_t size; 183 size_t size;
126 184
185 if (pci_use_crs)
186 pci_bus_remove_resources(bus);
187
188 info.bridge = device;
127 info.bus = bus; 189 info.bus = bus;
128 info.res_num = 0; 190 info.res_num = 0;
129 acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, 191 acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource,
@@ -163,8 +225,9 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
163#endif 225#endif
164 226
165 if (domain && !pci_domains_supported) { 227 if (domain && !pci_domains_supported) {
166 printk(KERN_WARNING "PCI: Multiple domains not supported " 228 printk(KERN_WARNING "pci_bus %04x:%02x: "
167 "(dom %d, bus %d)\n", domain, busnum); 229 "ignored (multiple domains not supported)\n",
230 domain, busnum);
168 return NULL; 231 return NULL;
169 } 232 }
170 233
@@ -188,7 +251,8 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
188 */ 251 */
189 sd = kzalloc(sizeof(*sd), GFP_KERNEL); 252 sd = kzalloc(sizeof(*sd), GFP_KERNEL);
190 if (!sd) { 253 if (!sd) {
191 printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum); 254 printk(KERN_WARNING "pci_bus %04x:%02x: "
255 "ignored (out of memory)\n", domain, busnum);
192 return NULL; 256 return NULL;
193 } 257 }
194 258
@@ -209,9 +273,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
209 } else { 273 } else {
210 bus = pci_create_bus(NULL, busnum, &pci_root_ops, sd); 274 bus = pci_create_bus(NULL, busnum, &pci_root_ops, sd);
211 if (bus) { 275 if (bus) {
212 if (pci_probe & PCI_USE__CRS) 276 get_current_resources(device, busnum, domain, bus);
213 get_current_resources(device, busnum, domain,
214 bus);
215 bus->subordinate = pci_scan_child_bus(bus); 277 bus->subordinate = pci_scan_child_bus(bus);
216 } 278 }
217 } 279 }
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 572ee9782f2a..95ecbd495955 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -6,10 +6,10 @@
6 6
7#ifdef CONFIG_X86_64 7#ifdef CONFIG_X86_64
8#include <asm/pci-direct.h> 8#include <asm/pci-direct.h>
9#include <asm/mpspec.h>
10#include <linux/cpumask.h>
11#endif 9#endif
12 10
11#include "bus_numa.h"
12
13/* 13/*
14 * This discovers the pcibus <-> node mapping on AMD K8. 14 * This discovers the pcibus <-> node mapping on AMD K8.
15 * also get peer root bus resource for io,mmio 15 * also get peer root bus resource for io,mmio
@@ -17,67 +17,6 @@
17 17
18#ifdef CONFIG_X86_64 18#ifdef CONFIG_X86_64
19 19
20/*
21 * sub bus (transparent) will use entres from 3 to store extra from root,
22 * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES?
23 */
24#define RES_NUM 16
25struct pci_root_info {
26 char name[12];
27 unsigned int res_num;
28 struct resource res[RES_NUM];
29 int bus_min;
30 int bus_max;
31 int node;
32 int link;
33};
34
35/* 4 at this time, it may become to 32 */
36#define PCI_ROOT_NR 4
37static int pci_root_num;
38static struct pci_root_info pci_root_info[PCI_ROOT_NR];
39
40void x86_pci_root_bus_res_quirks(struct pci_bus *b)
41{
42 int i;
43 int j;
44 struct pci_root_info *info;
45
46 /* don't go for it if _CRS is used already */
47 if (b->resource[0] != &ioport_resource ||
48 b->resource[1] != &iomem_resource)
49 return;
50
51 /* if only one root bus, don't need to anything */
52 if (pci_root_num < 2)
53 return;
54
55 for (i = 0; i < pci_root_num; i++) {
56 if (pci_root_info[i].bus_min == b->number)
57 break;
58 }
59
60 if (i == pci_root_num)
61 return;
62
63 printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n",
64 b->number);
65
66 info = &pci_root_info[i];
67 for (j = 0; j < info->res_num; j++) {
68 struct resource *res;
69 struct resource *root;
70
71 res = &info->res[j];
72 b->resource[j] = res;
73 if (res->flags & IORESOURCE_IO)
74 root = &ioport_resource;
75 else
76 root = &iomem_resource;
77 insert_resource(root, res);
78 }
79}
80
81#define RANGE_NUM 16 20#define RANGE_NUM 16
82 21
83struct res_range { 22struct res_range {
@@ -130,52 +69,6 @@ static void __init update_range(struct res_range *range, size_t start,
130 } 69 }
131} 70}
132 71
133static void __init update_res(struct pci_root_info *info, size_t start,
134 size_t end, unsigned long flags, int merge)
135{
136 int i;
137 struct resource *res;
138
139 if (!merge)
140 goto addit;
141
142 /* try to merge it with old one */
143 for (i = 0; i < info->res_num; i++) {
144 size_t final_start, final_end;
145 size_t common_start, common_end;
146
147 res = &info->res[i];
148 if (res->flags != flags)
149 continue;
150
151 common_start = max((size_t)res->start, start);
152 common_end = min((size_t)res->end, end);
153 if (common_start > common_end + 1)
154 continue;
155
156 final_start = min((size_t)res->start, start);
157 final_end = max((size_t)res->end, end);
158
159 res->start = final_start;
160 res->end = final_end;
161 return;
162 }
163
164addit:
165
166 /* need to add that */
167 if (info->res_num >= RES_NUM)
168 return;
169
170 res = &info->res[info->res_num];
171 res->name = info->name;
172 res->flags = flags;
173 res->start = start;
174 res->end = end;
175 res->child = NULL;
176 info->res_num++;
177}
178
179struct pci_hostbridge_probe { 72struct pci_hostbridge_probe {
180 u32 bus; 73 u32 bus;
181 u32 slot; 74 u32 slot;
@@ -230,7 +123,6 @@ static int __init early_fill_mp_bus_info(void)
230 int j; 123 int j;
231 unsigned bus; 124 unsigned bus;
232 unsigned slot; 125 unsigned slot;
233 int found;
234 int node; 126 int node;
235 int link; 127 int link;
236 int def_node; 128 int def_node;
@@ -247,7 +139,7 @@ static int __init early_fill_mp_bus_info(void)
247 if (!early_pci_allowed()) 139 if (!early_pci_allowed())
248 return -1; 140 return -1;
249 141
250 found = 0; 142 found_all_numa_early = 0;
251 for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { 143 for (i = 0; i < ARRAY_SIZE(pci_probes); i++) {
252 u32 id; 144 u32 id;
253 u16 device; 145 u16 device;
@@ -261,12 +153,12 @@ static int __init early_fill_mp_bus_info(void)
261 device = (id>>16) & 0xffff; 153 device = (id>>16) & 0xffff;
262 if (pci_probes[i].vendor == vendor && 154 if (pci_probes[i].vendor == vendor &&
263 pci_probes[i].device == device) { 155 pci_probes[i].device == device) {
264 found = 1; 156 found_all_numa_early = 1;
265 break; 157 break;
266 } 158 }
267 } 159 }
268 160
269 if (!found) 161 if (!found_all_numa_early)
270 return 0; 162 return 0;
271 163
272 pci_root_num = 0; 164 pci_root_num = 0;
@@ -488,7 +380,7 @@ static int __init early_fill_mp_bus_info(void)
488 info = &pci_root_info[i]; 380 info = &pci_root_info[i];
489 res_num = info->res_num; 381 res_num = info->res_num;
490 busnum = info->bus_min; 382 busnum = info->bus_min;
491 printk(KERN_DEBUG "bus: [%02x,%02x] on node %x link %x\n", 383 printk(KERN_DEBUG "bus: [%02x, %02x] on node %x link %x\n",
492 info->bus_min, info->bus_max, info->node, info->link); 384 info->bus_min, info->bus_max, info->node, info->link);
493 for (j = 0; j < res_num; j++) { 385 for (j = 0; j < res_num; j++) {
494 res = &info->res[j]; 386 res = &info->res[j];
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
new file mode 100644
index 000000000000..12d54ff3654d
--- /dev/null
+++ b/arch/x86/pci/bus_numa.c
@@ -0,0 +1,102 @@
1#include <linux/init.h>
2#include <linux/pci.h>
3
4#include "bus_numa.h"
5
6int pci_root_num;
7struct pci_root_info pci_root_info[PCI_ROOT_NR];
8int found_all_numa_early;
9
10void x86_pci_root_bus_res_quirks(struct pci_bus *b)
11{
12 int i;
13 int j;
14 struct pci_root_info *info;
15
16 /* don't go for it if _CRS is used already */
17 if (b->resource[0] != &ioport_resource ||
18 b->resource[1] != &iomem_resource)
19 return;
20
21 if (!pci_root_num)
22 return;
23
24 /* for amd, if only one root bus, don't need to do anything */
25 if (pci_root_num < 2 && found_all_numa_early)
26 return;
27
28 for (i = 0; i < pci_root_num; i++) {
29 if (pci_root_info[i].bus_min == b->number)
30 break;
31 }
32
33 if (i == pci_root_num)
34 return;
35
36 printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n",
37 b->number);
38
39 pci_bus_remove_resources(b);
40 info = &pci_root_info[i];
41 for (j = 0; j < info->res_num; j++) {
42 struct resource *res;
43 struct resource *root;
44
45 res = &info->res[j];
46 pci_bus_add_resource(b, res, 0);
47 if (res->flags & IORESOURCE_IO)
48 root = &ioport_resource;
49 else
50 root = &iomem_resource;
51 insert_resource(root, res);
52 }
53}
54
55void __devinit update_res(struct pci_root_info *info, size_t start,
56 size_t end, unsigned long flags, int merge)
57{
58 int i;
59 struct resource *res;
60
61 if (start > end)
62 return;
63
64 if (!merge)
65 goto addit;
66
67 /* try to merge it with old one */
68 for (i = 0; i < info->res_num; i++) {
69 size_t final_start, final_end;
70 size_t common_start, common_end;
71
72 res = &info->res[i];
73 if (res->flags != flags)
74 continue;
75
76 common_start = max((size_t)res->start, start);
77 common_end = min((size_t)res->end, end);
78 if (common_start > common_end + 1)
79 continue;
80
81 final_start = min((size_t)res->start, start);
82 final_end = max((size_t)res->end, end);
83
84 res->start = final_start;
85 res->end = final_end;
86 return;
87 }
88
89addit:
90
91 /* need to add that */
92 if (info->res_num >= RES_NUM)
93 return;
94
95 res = &info->res[info->res_num];
96 res->name = info->name;
97 res->flags = flags;
98 res->start = start;
99 res->end = end;
100 res->child = NULL;
101 info->res_num++;
102}
diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h
new file mode 100644
index 000000000000..731b64ee8d84
--- /dev/null
+++ b/arch/x86/pci/bus_numa.h
@@ -0,0 +1,26 @@
1#ifdef CONFIG_X86_64
2
3/*
4 * sub bus (transparent) will use entres from 3 to store extra from
5 * root, so need to make sure we have enough slot there.
6 */
7#define RES_NUM 16
8struct pci_root_info {
9 char name[12];
10 unsigned int res_num;
11 struct resource res[RES_NUM];
12 int bus_min;
13 int bus_max;
14 int node;
15 int link;
16};
17
18/* 4 at this time, it may become to 32 */
19#define PCI_ROOT_NR 4
20extern int pci_root_num;
21extern struct pci_root_info pci_root_info[PCI_ROOT_NR];
22extern int found_all_numa_early;
23
24extern void update_res(struct pci_root_info *info, size_t start,
25 size_t end, unsigned long flags, int merge);
26#endif
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 1331fcf26143..3736176acaab 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -410,8 +410,6 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
410 return bus; 410 return bus;
411} 411}
412 412
413extern u8 pci_cache_line_size;
414
415int __init pcibios_init(void) 413int __init pcibios_init(void)
416{ 414{
417 struct cpuinfo_x86 *c = &boot_cpu_data; 415 struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -422,15 +420,19 @@ int __init pcibios_init(void)
422 } 420 }
423 421
424 /* 422 /*
425 * Assume PCI cacheline size of 32 bytes for all x86s except K7/K8 423 * Set PCI cacheline size to that of the CPU if the CPU has reported it.
426 * and P4. It's also good for 386/486s (which actually have 16) 424 * (For older CPUs that don't support cpuid, we se it to 32 bytes
425 * It's also good for 386/486s (which actually have 16)
427 * as quite a few PCI devices do not support smaller values. 426 * as quite a few PCI devices do not support smaller values.
428 */ 427 */
429 pci_cache_line_size = 32 >> 2; 428 if (c->x86_clflush_size > 0) {
430 if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD) 429 pci_dfl_cache_line_size = c->x86_clflush_size >> 2;
431 pci_cache_line_size = 64 >> 2; /* K7 & K8 */ 430 printk(KERN_DEBUG "PCI: pci_cache_line_size set to %d bytes\n",
432 else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL) 431 pci_dfl_cache_line_size << 2);
433 pci_cache_line_size = 128 >> 2; /* P4 */ 432 } else {
433 pci_dfl_cache_line_size = 32 >> 2;
434 printk(KERN_DEBUG "PCI: Unknown cacheline size. Setting to 32 bytes\n");
435 }
434 436
435 pcibios_resource_survey(); 437 pcibios_resource_survey();
436 438
@@ -518,6 +520,9 @@ char * __devinit pcibios_setup(char *str)
518 } else if (!strcmp(str, "use_crs")) { 520 } else if (!strcmp(str, "use_crs")) {
519 pci_probe |= PCI_USE__CRS; 521 pci_probe |= PCI_USE__CRS;
520 return NULL; 522 return NULL;
523 } else if (!strcmp(str, "nocrs")) {
524 pci_probe |= PCI_ROOT_NO_CRS;
525 return NULL;
521 } else if (!strcmp(str, "earlydump")) { 526 } else if (!strcmp(str, "earlydump")) {
522 pci_early_dump_regs = 1; 527 pci_early_dump_regs = 1;
523 return NULL; 528 return NULL;
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index aaf26ae58cd5..d1067d539bee 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -12,8 +12,6 @@ u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset)
12 u32 v; 12 u32 v;
13 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); 13 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
14 v = inl(0xcfc); 14 v = inl(0xcfc);
15 if (v != 0xffffffff)
16 pr_debug("%x reading 4 from %x: %x\n", slot, offset, v);
17 return v; 15 return v;
18} 16}
19 17
@@ -22,7 +20,6 @@ u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset)
22 u8 v; 20 u8 v;
23 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); 21 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
24 v = inb(0xcfc + (offset&3)); 22 v = inb(0xcfc + (offset&3));
25 pr_debug("%x reading 1 from %x: %x\n", slot, offset, v);
26 return v; 23 return v;
27} 24}
28 25
@@ -31,28 +28,24 @@ u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset)
31 u16 v; 28 u16 v;
32 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); 29 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
33 v = inw(0xcfc + (offset&2)); 30 v = inw(0xcfc + (offset&2));
34 pr_debug("%x reading 2 from %x: %x\n", slot, offset, v);
35 return v; 31 return v;
36} 32}
37 33
38void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, 34void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset,
39 u32 val) 35 u32 val)
40{ 36{
41 pr_debug("%x writing to %x: %x\n", slot, offset, val);
42 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); 37 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
43 outl(val, 0xcfc); 38 outl(val, 0xcfc);
44} 39}
45 40
46void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val) 41void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val)
47{ 42{
48 pr_debug("%x writing to %x: %x\n", slot, offset, val);
49 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); 43 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
50 outb(val, 0xcfc + (offset&3)); 44 outb(val, 0xcfc + (offset&3));
51} 45}
52 46
53void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val) 47void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val)
54{ 48{
55 pr_debug("%x writing to %x: %x\n", slot, offset, val);
56 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); 49 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
57 outw(val, 0xcfc + (offset&2)); 50 outw(val, 0xcfc + (offset&2));
58} 51}
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index b22d13b0c71d..5a8fbf8d4cac 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -60,22 +60,20 @@ skip_isa_ioresource_align(struct pci_dev *dev) {
60 * but we want to try to avoid allocating at 0x2900-0x2bff 60 * but we want to try to avoid allocating at 0x2900-0x2bff
61 * which might have be mirrored at 0x0100-0x03ff.. 61 * which might have be mirrored at 0x0100-0x03ff..
62 */ 62 */
63void 63resource_size_t
64pcibios_align_resource(void *data, struct resource *res, 64pcibios_align_resource(void *data, const struct resource *res,
65 resource_size_t size, resource_size_t align) 65 resource_size_t size, resource_size_t align)
66{ 66{
67 struct pci_dev *dev = data; 67 struct pci_dev *dev = data;
68 resource_size_t start = res->start;
68 69
69 if (res->flags & IORESOURCE_IO) { 70 if (res->flags & IORESOURCE_IO) {
70 resource_size_t start = res->start;
71
72 if (skip_isa_ioresource_align(dev)) 71 if (skip_isa_ioresource_align(dev))
73 return; 72 return start;
74 if (start & 0x300) { 73 if (start & 0x300)
75 start = (start + 0x3ff) & ~0x3ff; 74 start = (start + 0x3ff) & ~0x3ff;
76 res->start = start;
77 }
78 } 75 }
76 return start;
79} 77}
80EXPORT_SYMBOL(pcibios_align_resource); 78EXPORT_SYMBOL(pcibios_align_resource);
81 79
@@ -129,7 +127,9 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
129 continue; 127 continue;
130 if (!r->start || 128 if (!r->start ||
131 pci_claim_resource(dev, idx) < 0) { 129 pci_claim_resource(dev, idx) < 0) {
132 dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); 130 dev_info(&dev->dev,
131 "can't reserve window %pR\n",
132 r);
133 /* 133 /*
134 * Something is wrong with the region. 134 * Something is wrong with the region.
135 * Invalidate the resource to prevent 135 * Invalidate the resource to prevent
@@ -144,16 +144,29 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
144 } 144 }
145} 145}
146 146
147struct pci_check_idx_range {
148 int start;
149 int end;
150};
151
147static void __init pcibios_allocate_resources(int pass) 152static void __init pcibios_allocate_resources(int pass)
148{ 153{
149 struct pci_dev *dev = NULL; 154 struct pci_dev *dev = NULL;
150 int idx, disabled; 155 int idx, disabled, i;
151 u16 command; 156 u16 command;
152 struct resource *r; 157 struct resource *r;
153 158
159 struct pci_check_idx_range idx_range[] = {
160 { PCI_STD_RESOURCES, PCI_STD_RESOURCE_END },
161#ifdef CONFIG_PCI_IOV
162 { PCI_IOV_RESOURCES, PCI_IOV_RESOURCE_END },
163#endif
164 };
165
154 for_each_pci_dev(dev) { 166 for_each_pci_dev(dev) {
155 pci_read_config_word(dev, PCI_COMMAND, &command); 167 pci_read_config_word(dev, PCI_COMMAND, &command);
156 for (idx = 0; idx < PCI_ROM_RESOURCE; idx++) { 168 for (i = 0; i < ARRAY_SIZE(idx_range); i++)
169 for (idx = idx_range[i].start; idx <= idx_range[i].end; idx++) {
157 r = &dev->resource[idx]; 170 r = &dev->resource[idx];
158 if (r->parent) /* Already allocated */ 171 if (r->parent) /* Already allocated */
159 continue; 172 continue;
@@ -164,12 +177,12 @@ static void __init pcibios_allocate_resources(int pass)
164 else 177 else
165 disabled = !(command & PCI_COMMAND_MEMORY); 178 disabled = !(command & PCI_COMMAND_MEMORY);
166 if (pass == disabled) { 179 if (pass == disabled) {
167 dev_dbg(&dev->dev, "resource %#08llx-%#08llx (f=%lx, d=%d, p=%d)\n", 180 dev_dbg(&dev->dev,
168 (unsigned long long) r->start, 181 "BAR %d: reserving %pr (d=%d, p=%d)\n",
169 (unsigned long long) r->end, 182 idx, r, disabled, pass);
170 r->flags, disabled, pass);
171 if (pci_claim_resource(dev, idx) < 0) { 183 if (pci_claim_resource(dev, idx) < 0) {
172 dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); 184 dev_info(&dev->dev,
185 "can't reserve %pR\n", r);
173 /* We'll assign a new address later */ 186 /* We'll assign a new address later */
174 r->end -= r->start; 187 r->end -= r->start;
175 r->start = 0; 188 r->start = 0;
@@ -182,7 +195,7 @@ static void __init pcibios_allocate_resources(int pass)
182 /* Turn the ROM off, leave the resource region, 195 /* Turn the ROM off, leave the resource region,
183 * but keep it unregistered. */ 196 * but keep it unregistered. */
184 u32 reg; 197 u32 reg;
185 dev_dbg(&dev->dev, "disabling ROM\n"); 198 dev_dbg(&dev->dev, "disabling ROM %pR\n", r);
186 r->flags &= ~IORESOURCE_ROM_ENABLE; 199 r->flags &= ~IORESOURCE_ROM_ENABLE;
187 pci_read_config_dword(dev, 200 pci_read_config_dword(dev,
188 dev->rom_base_reg, &reg); 201 dev->rom_base_reg, &reg);
@@ -282,6 +295,15 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
282 return -EINVAL; 295 return -EINVAL;
283 296
284 prot = pgprot_val(vma->vm_page_prot); 297 prot = pgprot_val(vma->vm_page_prot);
298
299 /*
300 * Return error if pat is not enabled and write_combine is requested.
301 * Caller can followup with UC MINUS request and add a WC mtrr if there
302 * is a free mtrr slot.
303 */
304 if (!pat_enabled && write_combine)
305 return -EINVAL;
306
285 if (pat_enabled && write_combine) 307 if (pat_enabled && write_combine)
286 prot |= _PAGE_CACHE_WC; 308 prot |= _PAGE_CACHE_WC;
287 else if (pat_enabled || boot_cpu_data.x86 > 3) 309 else if (pat_enabled || boot_cpu_data.x86 > 3)
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 0696d506c4ad..b02f6d8ac922 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -590,6 +590,8 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
590 case PCI_DEVICE_ID_INTEL_ICH10_1: 590 case PCI_DEVICE_ID_INTEL_ICH10_1:
591 case PCI_DEVICE_ID_INTEL_ICH10_2: 591 case PCI_DEVICE_ID_INTEL_ICH10_2:
592 case PCI_DEVICE_ID_INTEL_ICH10_3: 592 case PCI_DEVICE_ID_INTEL_ICH10_3:
593 case PCI_DEVICE_ID_INTEL_CPT_LPC1:
594 case PCI_DEVICE_ID_INTEL_CPT_LPC2:
593 r->name = "PIIX/ICH"; 595 r->name = "PIIX/ICH";
594 r->get = pirq_piix_get; 596 r->get = pirq_piix_get;
595 r->set = pirq_piix_set; 597 r->set = pirq_piix_set;
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 602c172d3bd5..8f3f9a50b1e0 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -15,48 +15,98 @@
15#include <linux/acpi.h> 15#include <linux/acpi.h>
16#include <linux/sfi_acpi.h> 16#include <linux/sfi_acpi.h>
17#include <linux/bitmap.h> 17#include <linux/bitmap.h>
18#include <linux/sort.h> 18#include <linux/dmi.h>
19#include <asm/e820.h> 19#include <asm/e820.h>
20#include <asm/pci_x86.h> 20#include <asm/pci_x86.h>
21#include <asm/acpi.h> 21#include <asm/acpi.h>
22 22
23#define PREFIX "PCI: " 23#define PREFIX "PCI: "
24 24
25/* aperture is up to 256MB but BIOS may reserve less */
26#define MMCONFIG_APER_MIN (2 * 1024*1024)
27#define MMCONFIG_APER_MAX (256 * 1024*1024)
28
29/* Indicate if the mmcfg resources have been placed into the resource table. */ 25/* Indicate if the mmcfg resources have been placed into the resource table. */
30static int __initdata pci_mmcfg_resources_inserted; 26static int __initdata pci_mmcfg_resources_inserted;
31 27
32static __init int extend_mmcfg(int num) 28LIST_HEAD(pci_mmcfg_list);
29
30static __init void pci_mmconfig_remove(struct pci_mmcfg_region *cfg)
33{ 31{
34 struct acpi_mcfg_allocation *new; 32 if (cfg->res.parent)
35 int new_num = pci_mmcfg_config_num + num; 33 release_resource(&cfg->res);
34 list_del(&cfg->list);
35 kfree(cfg);
36}
36 37
37 new = kzalloc(sizeof(pci_mmcfg_config[0]) * new_num, GFP_KERNEL); 38static __init void free_all_mmcfg(void)
38 if (!new) 39{
39 return -1; 40 struct pci_mmcfg_region *cfg, *tmp;
41
42 pci_mmcfg_arch_free();
43 list_for_each_entry_safe(cfg, tmp, &pci_mmcfg_list, list)
44 pci_mmconfig_remove(cfg);
45}
40 46
41 if (pci_mmcfg_config) { 47static __init void list_add_sorted(struct pci_mmcfg_region *new)
42 memcpy(new, pci_mmcfg_config, 48{
43 sizeof(pci_mmcfg_config[0]) * new_num); 49 struct pci_mmcfg_region *cfg;
44 kfree(pci_mmcfg_config); 50
51 /* keep list sorted by segment and starting bus number */
52 list_for_each_entry(cfg, &pci_mmcfg_list, list) {
53 if (cfg->segment > new->segment ||
54 (cfg->segment == new->segment &&
55 cfg->start_bus >= new->start_bus)) {
56 list_add_tail(&new->list, &cfg->list);
57 return;
58 }
45 } 59 }
46 pci_mmcfg_config = new; 60 list_add_tail(&new->list, &pci_mmcfg_list);
61}
47 62
48 return 0; 63static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start,
64 int end, u64 addr)
65{
66 struct pci_mmcfg_region *new;
67 int num_buses;
68 struct resource *res;
69
70 if (addr == 0)
71 return NULL;
72
73 new = kzalloc(sizeof(*new), GFP_KERNEL);
74 if (!new)
75 return NULL;
76
77 new->address = addr;
78 new->segment = segment;
79 new->start_bus = start;
80 new->end_bus = end;
81
82 list_add_sorted(new);
83
84 num_buses = end - start + 1;
85 res = &new->res;
86 res->start = addr + PCI_MMCFG_BUS_OFFSET(start);
87 res->end = addr + PCI_MMCFG_BUS_OFFSET(num_buses) - 1;
88 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
89 snprintf(new->name, PCI_MMCFG_RESOURCE_NAME_LEN,
90 "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end);
91 res->name = new->name;
92
93 printk(KERN_INFO PREFIX "MMCONFIG for domain %04x [bus %02x-%02x] at "
94 "%pR (base %#lx)\n", segment, start, end, &new->res,
95 (unsigned long) addr);
96
97 return new;
49} 98}
50 99
51static __init void fill_one_mmcfg(u64 addr, int segment, int start, int end) 100struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus)
52{ 101{
53 int i = pci_mmcfg_config_num; 102 struct pci_mmcfg_region *cfg;
54 103
55 pci_mmcfg_config_num++; 104 list_for_each_entry(cfg, &pci_mmcfg_list, list)
56 pci_mmcfg_config[i].address = addr; 105 if (cfg->segment == segment &&
57 pci_mmcfg_config[i].pci_segment = segment; 106 cfg->start_bus <= bus && bus <= cfg->end_bus)
58 pci_mmcfg_config[i].start_bus_number = start; 107 return cfg;
59 pci_mmcfg_config[i].end_bus_number = end; 108
109 return NULL;
60} 110}
61 111
62static const char __init *pci_mmcfg_e7520(void) 112static const char __init *pci_mmcfg_e7520(void)
@@ -68,11 +118,9 @@ static const char __init *pci_mmcfg_e7520(void)
68 if (win == 0x0000 || win == 0xf000) 118 if (win == 0x0000 || win == 0xf000)
69 return NULL; 119 return NULL;
70 120
71 if (extend_mmcfg(1) == -1) 121 if (pci_mmconfig_add(0, 0, 255, win << 16) == NULL)
72 return NULL; 122 return NULL;
73 123
74 fill_one_mmcfg(win << 16, 0, 0, 255);
75
76 return "Intel Corporation E7520 Memory Controller Hub"; 124 return "Intel Corporation E7520 Memory Controller Hub";
77} 125}
78 126
@@ -114,11 +162,9 @@ static const char __init *pci_mmcfg_intel_945(void)
114 if ((pciexbar & mask) >= 0xf0000000U) 162 if ((pciexbar & mask) >= 0xf0000000U)
115 return NULL; 163 return NULL;
116 164
117 if (extend_mmcfg(1) == -1) 165 if (pci_mmconfig_add(0, 0, (len >> 20) - 1, pciexbar & mask) == NULL)
118 return NULL; 166 return NULL;
119 167
120 fill_one_mmcfg(pciexbar & mask, 0, 0, (len >> 20) - 1);
121
122 return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub"; 168 return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub";
123} 169}
124 170
@@ -127,7 +173,7 @@ static const char __init *pci_mmcfg_amd_fam10h(void)
127 u32 low, high, address; 173 u32 low, high, address;
128 u64 base, msr; 174 u64 base, msr;
129 int i; 175 int i;
130 unsigned segnbits = 0, busnbits; 176 unsigned segnbits = 0, busnbits, end_bus;
131 177
132 if (!(pci_probe & PCI_CHECK_ENABLE_AMD_MMCONF)) 178 if (!(pci_probe & PCI_CHECK_ENABLE_AMD_MMCONF))
133 return NULL; 179 return NULL;
@@ -161,11 +207,13 @@ static const char __init *pci_mmcfg_amd_fam10h(void)
161 busnbits = 8; 207 busnbits = 8;
162 } 208 }
163 209
164 if (extend_mmcfg(1 << segnbits) == -1) 210 end_bus = (1 << busnbits) - 1;
165 return NULL;
166
167 for (i = 0; i < (1 << segnbits); i++) 211 for (i = 0; i < (1 << segnbits); i++)
168 fill_one_mmcfg(base + (1<<28) * i, i, 0, (1 << busnbits) - 1); 212 if (pci_mmconfig_add(i, 0, end_bus,
213 base + (1<<28) * i) == NULL) {
214 free_all_mmcfg();
215 return NULL;
216 }
169 217
170 return "AMD Family 10h NB"; 218 return "AMD Family 10h NB";
171} 219}
@@ -190,7 +238,7 @@ static const char __init *pci_mmcfg_nvidia_mcp55(void)
190 /* 238 /*
191 * do check if amd fam10h already took over 239 * do check if amd fam10h already took over
192 */ 240 */
193 if (!acpi_disabled || pci_mmcfg_config_num || mcp55_checked) 241 if (!acpi_disabled || !list_empty(&pci_mmcfg_list) || mcp55_checked)
194 return NULL; 242 return NULL;
195 243
196 mcp55_checked = true; 244 mcp55_checked = true;
@@ -213,16 +261,14 @@ static const char __init *pci_mmcfg_nvidia_mcp55(void)
213 if (!(extcfg & extcfg_enable_mask)) 261 if (!(extcfg & extcfg_enable_mask))
214 continue; 262 continue;
215 263
216 if (extend_mmcfg(1) == -1)
217 continue;
218
219 size_index = (extcfg & extcfg_size_mask) >> extcfg_size_shift; 264 size_index = (extcfg & extcfg_size_mask) >> extcfg_size_shift;
220 base = extcfg & extcfg_base_mask[size_index]; 265 base = extcfg & extcfg_base_mask[size_index];
221 /* base could > 4G */ 266 /* base could > 4G */
222 base <<= extcfg_base_lshift; 267 base <<= extcfg_base_lshift;
223 start = (extcfg & extcfg_start_mask) >> extcfg_start_shift; 268 start = (extcfg & extcfg_start_mask) >> extcfg_start_shift;
224 end = start + extcfg_sizebus[size_index] - 1; 269 end = start + extcfg_sizebus[size_index] - 1;
225 fill_one_mmcfg(base, 0, start, end); 270 if (pci_mmconfig_add(0, start, end, base) == NULL)
271 continue;
226 mcp55_mmconf_found++; 272 mcp55_mmconf_found++;
227 } 273 }
228 274
@@ -253,45 +299,22 @@ static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = {
253 0x0369, pci_mmcfg_nvidia_mcp55 }, 299 0x0369, pci_mmcfg_nvidia_mcp55 },
254}; 300};
255 301
256static int __init cmp_mmcfg(const void *x1, const void *x2)
257{
258 const typeof(pci_mmcfg_config[0]) *m1 = x1;
259 const typeof(pci_mmcfg_config[0]) *m2 = x2;
260 int start1, start2;
261
262 start1 = m1->start_bus_number;
263 start2 = m2->start_bus_number;
264
265 return start1 - start2;
266}
267
268static void __init pci_mmcfg_check_end_bus_number(void) 302static void __init pci_mmcfg_check_end_bus_number(void)
269{ 303{
270 int i; 304 struct pci_mmcfg_region *cfg, *cfgx;
271 typeof(pci_mmcfg_config[0]) *cfg, *cfgx;
272
273 /* sort them at first */
274 sort(pci_mmcfg_config, pci_mmcfg_config_num,
275 sizeof(pci_mmcfg_config[0]), cmp_mmcfg, NULL);
276
277 /* last one*/
278 if (pci_mmcfg_config_num > 0) {
279 i = pci_mmcfg_config_num - 1;
280 cfg = &pci_mmcfg_config[i];
281 if (cfg->end_bus_number < cfg->start_bus_number)
282 cfg->end_bus_number = 255;
283 }
284 305
285 /* don't overlap please */ 306 /* Fixup overlaps */
286 for (i = 0; i < pci_mmcfg_config_num - 1; i++) { 307 list_for_each_entry(cfg, &pci_mmcfg_list, list) {
287 cfg = &pci_mmcfg_config[i]; 308 if (cfg->end_bus < cfg->start_bus)
288 cfgx = &pci_mmcfg_config[i+1]; 309 cfg->end_bus = 255;
289 310
290 if (cfg->end_bus_number < cfg->start_bus_number) 311 /* Don't access the list head ! */
291 cfg->end_bus_number = 255; 312 if (cfg->list.next == &pci_mmcfg_list)
313 break;
292 314
293 if (cfg->end_bus_number >= cfgx->start_bus_number) 315 cfgx = list_entry(cfg->list.next, typeof(*cfg), list);
294 cfg->end_bus_number = cfgx->start_bus_number - 1; 316 if (cfg->end_bus >= cfgx->start_bus)
317 cfg->end_bus = cfgx->start_bus - 1;
295 } 318 }
296} 319}
297 320
@@ -306,8 +329,7 @@ static int __init pci_mmcfg_check_hostbridge(void)
306 if (!raw_pci_ops) 329 if (!raw_pci_ops)
307 return 0; 330 return 0;
308 331
309 pci_mmcfg_config_num = 0; 332 free_all_mmcfg();
310 pci_mmcfg_config = NULL;
311 333
312 for (i = 0; i < ARRAY_SIZE(pci_mmcfg_probes); i++) { 334 for (i = 0; i < ARRAY_SIZE(pci_mmcfg_probes); i++) {
313 bus = pci_mmcfg_probes[i].bus; 335 bus = pci_mmcfg_probes[i].bus;
@@ -322,45 +344,22 @@ static int __init pci_mmcfg_check_hostbridge(void)
322 name = pci_mmcfg_probes[i].probe(); 344 name = pci_mmcfg_probes[i].probe();
323 345
324 if (name) 346 if (name)
325 printk(KERN_INFO "PCI: Found %s with MMCONFIG support.\n", 347 printk(KERN_INFO PREFIX "%s with MMCONFIG support\n",
326 name); 348 name);
327 } 349 }
328 350
329 /* some end_bus_number is crazy, fix it */ 351 /* some end_bus_number is crazy, fix it */
330 pci_mmcfg_check_end_bus_number(); 352 pci_mmcfg_check_end_bus_number();
331 353
332 return pci_mmcfg_config_num != 0; 354 return !list_empty(&pci_mmcfg_list);
333} 355}
334 356
335static void __init pci_mmcfg_insert_resources(void) 357static void __init pci_mmcfg_insert_resources(void)
336{ 358{
337#define PCI_MMCFG_RESOURCE_NAME_LEN 24 359 struct pci_mmcfg_region *cfg;
338 int i;
339 struct resource *res;
340 char *names;
341 unsigned num_buses;
342
343 res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res),
344 pci_mmcfg_config_num, GFP_KERNEL);
345 if (!res) {
346 printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n");
347 return;
348 }
349 360
350 names = (void *)&res[pci_mmcfg_config_num]; 361 list_for_each_entry(cfg, &pci_mmcfg_list, list)
351 for (i = 0; i < pci_mmcfg_config_num; i++, res++) { 362 insert_resource(&iomem_resource, &cfg->res);
352 struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i];
353 num_buses = cfg->end_bus_number - cfg->start_bus_number + 1;
354 res->name = names;
355 snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN,
356 "PCI MMCONFIG %u [%02x-%02x]", cfg->pci_segment,
357 cfg->start_bus_number, cfg->end_bus_number);
358 res->start = cfg->address + (cfg->start_bus_number << 20);
359 res->end = res->start + (num_buses << 20) - 1;
360 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
361 insert_resource(&iomem_resource, res);
362 names += PCI_MMCFG_RESOURCE_NAME_LEN;
363 }
364 363
365 /* Mark that the resources have been inserted. */ 364 /* Mark that the resources have been inserted. */
366 pci_mmcfg_resources_inserted = 1; 365 pci_mmcfg_resources_inserted = 1;
@@ -437,11 +436,12 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used)
437typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); 436typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type);
438 437
439static int __init is_mmconf_reserved(check_reserved_t is_reserved, 438static int __init is_mmconf_reserved(check_reserved_t is_reserved,
440 u64 addr, u64 size, int i, 439 struct pci_mmcfg_region *cfg, int with_e820)
441 typeof(pci_mmcfg_config[0]) *cfg, int with_e820)
442{ 440{
441 u64 addr = cfg->res.start;
442 u64 size = resource_size(&cfg->res);
443 u64 old_size = size; 443 u64 old_size = size;
444 int valid = 0; 444 int valid = 0, num_buses;
445 445
446 while (!is_reserved(addr, addr + size, E820_RESERVED)) { 446 while (!is_reserved(addr, addr + size, E820_RESERVED)) {
447 size >>= 1; 447 size >>= 1;
@@ -450,19 +450,25 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved,
450 } 450 }
451 451
452 if (size >= (16UL<<20) || size == old_size) { 452 if (size >= (16UL<<20) || size == old_size) {
453 printk(KERN_NOTICE 453 printk(KERN_INFO PREFIX "MMCONFIG at %pR reserved in %s\n",
454 "PCI: MCFG area at %Lx reserved in %s\n", 454 &cfg->res,
455 addr, with_e820?"E820":"ACPI motherboard resources"); 455 with_e820 ? "E820" : "ACPI motherboard resources");
456 valid = 1; 456 valid = 1;
457 457
458 if (old_size != size) { 458 if (old_size != size) {
459 /* update end_bus_number */ 459 /* update end_bus */
460 cfg->end_bus_number = cfg->start_bus_number + ((size>>20) - 1); 460 cfg->end_bus = cfg->start_bus + ((size>>20) - 1);
461 printk(KERN_NOTICE "PCI: updated MCFG configuration %d: base %lx " 461 num_buses = cfg->end_bus - cfg->start_bus + 1;
462 "segment %hu buses %u - %u\n", 462 cfg->res.end = cfg->res.start +
463 i, (unsigned long)cfg->address, cfg->pci_segment, 463 PCI_MMCFG_BUS_OFFSET(num_buses) - 1;
464 (unsigned int)cfg->start_bus_number, 464 snprintf(cfg->name, PCI_MMCFG_RESOURCE_NAME_LEN,
465 (unsigned int)cfg->end_bus_number); 465 "PCI MMCONFIG %04x [bus %02x-%02x]",
466 cfg->segment, cfg->start_bus, cfg->end_bus);
467 printk(KERN_INFO PREFIX
468 "MMCONFIG for %04x [bus%02x-%02x] "
469 "at %pR (base %#lx) (size reduced!)\n",
470 cfg->segment, cfg->start_bus, cfg->end_bus,
471 &cfg->res, (unsigned long) cfg->address);
466 } 472 }
467 } 473 }
468 474
@@ -471,45 +477,26 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved,
471 477
472static void __init pci_mmcfg_reject_broken(int early) 478static void __init pci_mmcfg_reject_broken(int early)
473{ 479{
474 typeof(pci_mmcfg_config[0]) *cfg; 480 struct pci_mmcfg_region *cfg;
475 int i;
476 481
477 if ((pci_mmcfg_config_num == 0) || 482 list_for_each_entry(cfg, &pci_mmcfg_list, list) {
478 (pci_mmcfg_config == NULL) ||
479 (pci_mmcfg_config[0].address == 0))
480 return;
481
482 for (i = 0; i < pci_mmcfg_config_num; i++) {
483 int valid = 0; 483 int valid = 0;
484 u64 addr, size;
485
486 cfg = &pci_mmcfg_config[i];
487 addr = cfg->start_bus_number;
488 addr <<= 20;
489 addr += cfg->address;
490 size = cfg->end_bus_number + 1 - cfg->start_bus_number;
491 size <<= 20;
492 printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx "
493 "segment %hu buses %u - %u\n",
494 i, (unsigned long)cfg->address, cfg->pci_segment,
495 (unsigned int)cfg->start_bus_number,
496 (unsigned int)cfg->end_bus_number);
497 484
498 if (!early && !acpi_disabled) 485 if (!early && !acpi_disabled)
499 valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0); 486 valid = is_mmconf_reserved(is_acpi_reserved, cfg, 0);
500 487
501 if (valid) 488 if (valid)
502 continue; 489 continue;
503 490
504 if (!early) 491 if (!early)
505 printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" 492 printk(KERN_ERR FW_BUG PREFIX
506 " reserved in ACPI motherboard resources\n", 493 "MMCONFIG at %pR not reserved in "
507 cfg->address); 494 "ACPI motherboard resources\n", &cfg->res);
508 495
509 /* Don't try to do this check unless configuration 496 /* Don't try to do this check unless configuration
510 type 1 is available. how about type 2 ?*/ 497 type 1 is available. how about type 2 ?*/
511 if (raw_pci_ops) 498 if (raw_pci_ops)
512 valid = is_mmconf_reserved(e820_all_mapped, addr, size, i, cfg, 1); 499 valid = is_mmconf_reserved(e820_all_mapped, cfg, 1);
513 500
514 if (!valid) 501 if (!valid)
515 goto reject; 502 goto reject;
@@ -518,34 +505,41 @@ static void __init pci_mmcfg_reject_broken(int early)
518 return; 505 return;
519 506
520reject: 507reject:
521 printk(KERN_INFO "PCI: Not using MMCONFIG.\n"); 508 printk(KERN_INFO PREFIX "not using MMCONFIG\n");
522 pci_mmcfg_arch_free(); 509 free_all_mmcfg();
523 kfree(pci_mmcfg_config);
524 pci_mmcfg_config = NULL;
525 pci_mmcfg_config_num = 0;
526} 510}
527 511
528static int __initdata known_bridge; 512static int __initdata known_bridge;
529 513
530static int acpi_mcfg_64bit_base_addr __initdata = FALSE; 514static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
515 struct acpi_mcfg_allocation *cfg)
516{
517 int year;
531 518
532/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ 519 if (cfg->address < 0xFFFFFFFF)
533struct acpi_mcfg_allocation *pci_mmcfg_config; 520 return 0;
534int pci_mmcfg_config_num;
535 521
536static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
537{
538 if (!strcmp(mcfg->header.oem_id, "SGI")) 522 if (!strcmp(mcfg->header.oem_id, "SGI"))
539 acpi_mcfg_64bit_base_addr = TRUE; 523 return 0;
540 524
541 return 0; 525 if (mcfg->header.revision >= 1) {
526 if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) &&
527 year >= 2010)
528 return 0;
529 }
530
531 printk(KERN_ERR PREFIX "MCFG region for %04x [bus %02x-%02x] at %#llx "
532 "is above 4GB, ignored\n", cfg->pci_segment,
533 cfg->start_bus_number, cfg->end_bus_number, cfg->address);
534 return -EINVAL;
542} 535}
543 536
544static int __init pci_parse_mcfg(struct acpi_table_header *header) 537static int __init pci_parse_mcfg(struct acpi_table_header *header)
545{ 538{
546 struct acpi_table_mcfg *mcfg; 539 struct acpi_table_mcfg *mcfg;
540 struct acpi_mcfg_allocation *cfg_table, *cfg;
547 unsigned long i; 541 unsigned long i;
548 int config_size; 542 int entries;
549 543
550 if (!header) 544 if (!header)
551 return -EINVAL; 545 return -EINVAL;
@@ -553,38 +547,33 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header)
553 mcfg = (struct acpi_table_mcfg *)header; 547 mcfg = (struct acpi_table_mcfg *)header;
554 548
555 /* how many config structures do we have */ 549 /* how many config structures do we have */
556 pci_mmcfg_config_num = 0; 550 free_all_mmcfg();
551 entries = 0;
557 i = header->length - sizeof(struct acpi_table_mcfg); 552 i = header->length - sizeof(struct acpi_table_mcfg);
558 while (i >= sizeof(struct acpi_mcfg_allocation)) { 553 while (i >= sizeof(struct acpi_mcfg_allocation)) {
559 ++pci_mmcfg_config_num; 554 entries++;
560 i -= sizeof(struct acpi_mcfg_allocation); 555 i -= sizeof(struct acpi_mcfg_allocation);
561 }; 556 };
562 if (pci_mmcfg_config_num == 0) { 557 if (entries == 0) {
563 printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); 558 printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
564 return -ENODEV; 559 return -ENODEV;
565 } 560 }
566 561
567 config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config); 562 cfg_table = (struct acpi_mcfg_allocation *) &mcfg[1];
568 pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL); 563 for (i = 0; i < entries; i++) {
569 if (!pci_mmcfg_config) { 564 cfg = &cfg_table[i];
570 printk(KERN_WARNING PREFIX 565 if (acpi_mcfg_check_entry(mcfg, cfg)) {
571 "No memory for MCFG config tables\n"); 566 free_all_mmcfg();
572 return -ENOMEM;
573 }
574
575 memcpy(pci_mmcfg_config, &mcfg[1], config_size);
576
577 acpi_mcfg_oem_check(mcfg);
578
579 for (i = 0; i < pci_mmcfg_config_num; ++i) {
580 if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) &&
581 !acpi_mcfg_64bit_base_addr) {
582 printk(KERN_ERR PREFIX
583 "MMCONFIG not in low 4GB of memory\n");
584 kfree(pci_mmcfg_config);
585 pci_mmcfg_config_num = 0;
586 return -ENODEV; 567 return -ENODEV;
587 } 568 }
569
570 if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number,
571 cfg->end_bus_number, cfg->address) == NULL) {
572 printk(KERN_WARNING PREFIX
573 "no memory for MCFG entries\n");
574 free_all_mmcfg();
575 return -ENOMEM;
576 }
588 } 577 }
589 578
590 return 0; 579 return 0;
@@ -614,9 +603,7 @@ static void __init __pci_mmcfg_init(int early)
614 603
615 pci_mmcfg_reject_broken(early); 604 pci_mmcfg_reject_broken(early);
616 605
617 if ((pci_mmcfg_config_num == 0) || 606 if (list_empty(&pci_mmcfg_list))
618 (pci_mmcfg_config == NULL) ||
619 (pci_mmcfg_config[0].address == 0))
620 return; 607 return;
621 608
622 if (pci_mmcfg_arch_init()) 609 if (pci_mmcfg_arch_init())
@@ -648,9 +635,7 @@ static int __init pci_mmcfg_late_insert_resources(void)
648 */ 635 */
649 if ((pci_mmcfg_resources_inserted == 1) || 636 if ((pci_mmcfg_resources_inserted == 1) ||
650 (pci_probe & PCI_PROBE_MMCONF) == 0 || 637 (pci_probe & PCI_PROBE_MMCONF) == 0 ||
651 (pci_mmcfg_config_num == 0) || 638 list_empty(&pci_mmcfg_list))
652 (pci_mmcfg_config == NULL) ||
653 (pci_mmcfg_config[0].address == 0))
654 return 1; 639 return 1;
655 640
656 /* 641 /*
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index f10a7e94a84c..90d5fd476ed4 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -27,18 +27,10 @@ static int mmcfg_last_accessed_cpu;
27 */ 27 */
28static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) 28static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn)
29{ 29{
30 struct acpi_mcfg_allocation *cfg; 30 struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus);
31 int cfg_num;
32
33 for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) {
34 cfg = &pci_mmcfg_config[cfg_num];
35 if (cfg->pci_segment == seg &&
36 (cfg->start_bus_number <= bus) &&
37 (cfg->end_bus_number >= bus))
38 return cfg->address;
39 }
40 31
41 /* Fall back to type 0 */ 32 if (cfg)
33 return cfg->address;
42 return 0; 34 return 0;
43} 35}
44 36
@@ -47,7 +39,7 @@ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn)
47 */ 39 */
48static void pci_exp_set_dev_base(unsigned int base, int bus, int devfn) 40static void pci_exp_set_dev_base(unsigned int base, int bus, int devfn)
49{ 41{
50 u32 dev_base = base | (bus << 20) | (devfn << 12); 42 u32 dev_base = base | PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12);
51 int cpu = smp_processor_id(); 43 int cpu = smp_processor_id();
52 if (dev_base != mmcfg_last_accessed_device || 44 if (dev_base != mmcfg_last_accessed_device ||
53 cpu != mmcfg_last_accessed_cpu) { 45 cpu != mmcfg_last_accessed_cpu) {
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index 94349f8b2f96..e783841bd1d7 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -12,38 +12,15 @@
12#include <asm/e820.h> 12#include <asm/e820.h>
13#include <asm/pci_x86.h> 13#include <asm/pci_x86.h>
14 14
15/* Static virtual mapping of the MMCONFIG aperture */ 15#define PREFIX "PCI: "
16struct mmcfg_virt {
17 struct acpi_mcfg_allocation *cfg;
18 char __iomem *virt;
19};
20static struct mmcfg_virt *pci_mmcfg_virt;
21
22static char __iomem *get_virt(unsigned int seg, unsigned bus)
23{
24 struct acpi_mcfg_allocation *cfg;
25 int cfg_num;
26
27 for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) {
28 cfg = pci_mmcfg_virt[cfg_num].cfg;
29 if (cfg->pci_segment == seg &&
30 (cfg->start_bus_number <= bus) &&
31 (cfg->end_bus_number >= bus))
32 return pci_mmcfg_virt[cfg_num].virt;
33 }
34
35 /* Fall back to type 0 */
36 return NULL;
37}
38 16
39static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) 17static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn)
40{ 18{
41 char __iomem *addr; 19 struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus);
42 20
43 addr = get_virt(seg, bus); 21 if (cfg && cfg->virt)
44 if (!addr) 22 return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12));
45 return NULL; 23 return NULL;
46 return addr + ((bus << 20) | (devfn << 12));
47} 24}
48 25
49static int pci_mmcfg_read(unsigned int seg, unsigned int bus, 26static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
@@ -109,42 +86,30 @@ static struct pci_raw_ops pci_mmcfg = {
109 .write = pci_mmcfg_write, 86 .write = pci_mmcfg_write,
110}; 87};
111 88
112static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg) 89static void __iomem * __init mcfg_ioremap(struct pci_mmcfg_region *cfg)
113{ 90{
114 void __iomem *addr; 91 void __iomem *addr;
115 u64 start, size; 92 u64 start, size;
93 int num_buses;
116 94
117 start = cfg->start_bus_number; 95 start = cfg->address + PCI_MMCFG_BUS_OFFSET(cfg->start_bus);
118 start <<= 20; 96 num_buses = cfg->end_bus - cfg->start_bus + 1;
119 start += cfg->address; 97 size = PCI_MMCFG_BUS_OFFSET(num_buses);
120 size = cfg->end_bus_number + 1 - cfg->start_bus_number;
121 size <<= 20;
122 addr = ioremap_nocache(start, size); 98 addr = ioremap_nocache(start, size);
123 if (addr) { 99 if (addr)
124 printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n", 100 addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus);
125 start, start + size - 1);
126 addr -= cfg->start_bus_number << 20;
127 }
128 return addr; 101 return addr;
129} 102}
130 103
131int __init pci_mmcfg_arch_init(void) 104int __init pci_mmcfg_arch_init(void)
132{ 105{
133 int i; 106 struct pci_mmcfg_region *cfg;
134 pci_mmcfg_virt = kzalloc(sizeof(*pci_mmcfg_virt) *
135 pci_mmcfg_config_num, GFP_KERNEL);
136 if (pci_mmcfg_virt == NULL) {
137 printk(KERN_ERR "PCI: Can not allocate memory for mmconfig structures\n");
138 return 0;
139 }
140 107
141 for (i = 0; i < pci_mmcfg_config_num; ++i) { 108 list_for_each_entry(cfg, &pci_mmcfg_list, list) {
142 pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i]; 109 cfg->virt = mcfg_ioremap(cfg);
143 pci_mmcfg_virt[i].virt = mcfg_ioremap(&pci_mmcfg_config[i]); 110 if (!cfg->virt) {
144 if (!pci_mmcfg_virt[i].virt) { 111 printk(KERN_ERR PREFIX "can't map MMCONFIG at %pR\n",
145 printk(KERN_ERR "PCI: Cannot map mmconfig aperture for " 112 &cfg->res);
146 "segment %d\n",
147 pci_mmcfg_config[i].pci_segment);
148 pci_mmcfg_arch_free(); 113 pci_mmcfg_arch_free();
149 return 0; 114 return 0;
150 } 115 }
@@ -155,19 +120,12 @@ int __init pci_mmcfg_arch_init(void)
155 120
156void __init pci_mmcfg_arch_free(void) 121void __init pci_mmcfg_arch_free(void)
157{ 122{
158 int i; 123 struct pci_mmcfg_region *cfg;
159
160 if (pci_mmcfg_virt == NULL)
161 return;
162 124
163 for (i = 0; i < pci_mmcfg_config_num; ++i) { 125 list_for_each_entry(cfg, &pci_mmcfg_list, list) {
164 if (pci_mmcfg_virt[i].virt) { 126 if (cfg->virt) {
165 iounmap(pci_mmcfg_virt[i].virt + (pci_mmcfg_virt[i].cfg->start_bus_number << 20)); 127 iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus));
166 pci_mmcfg_virt[i].virt = NULL; 128 cfg->virt = NULL;
167 pci_mmcfg_virt[i].cfg = NULL;
168 } 129 }
169 } 130 }
170
171 kfree(pci_mmcfg_virt);
172 pci_mmcfg_virt = NULL;
173} 131}
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index 8eb295e116f6..8884a1c1ada6 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -8,9 +8,7 @@
8#include <asm/apic.h> 8#include <asm/apic.h>
9#include <asm/mpspec.h> 9#include <asm/mpspec.h>
10#include <asm/pci_x86.h> 10#include <asm/pci_x86.h>
11 11#include <asm/numaq.h>
12#define XQUAD_PORTIO_BASE 0xfe400000
13#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
14 12
15#define BUS2QUAD(global) (mp_bus_id_to_node[global]) 13#define BUS2QUAD(global) (mp_bus_id_to_node[global])
16 14
@@ -18,8 +16,6 @@
18 16
19#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) 17#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
20 18
21#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
22
23#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \ 19#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \
24 (0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3)) 20 (0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3))
25 21
diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk
index 0d13cd9fdcff..fd1ab80be0de 100644
--- a/arch/x86/tools/chkobjdump.awk
+++ b/arch/x86/tools/chkobjdump.awk
@@ -8,14 +8,24 @@ BEGIN {
8 od_sver = 19; 8 od_sver = 19;
9} 9}
10 10
11/^GNU/ { 11/^GNU objdump/ {
12 split($4, ver, "."); 12 verstr = ""
13 for (i = 3; i <= NF; i++)
14 if (match($(i), "^[0-9]")) {
15 verstr = $(i);
16 break;
17 }
18 if (verstr == "") {
19 printf("Warning: Failed to find objdump version number.\n");
20 exit 0;
21 }
22 split(verstr, ver, ".");
13 if (ver[1] > od_ver || 23 if (ver[1] > od_ver ||
14 (ver[1] == od_ver && ver[2] >= od_sver)) { 24 (ver[1] == od_ver && ver[2] >= od_sver)) {
15 exit 1; 25 exit 1;
16 } else { 26 } else {
17 printf("Warning: objdump version %s is older than %d.%d\n", 27 printf("Warning: objdump version %s is older than %d.%d\n",
18 $4, od_ver, od_sver); 28 verstr, od_ver, od_sver);
19 print("Warning: Skipping posttest."); 29 print("Warning: Skipping posttest.");
20 # Logic is inverted, because we just skip test without error. 30 # Logic is inverted, because we just skip test without error.
21 exit 0; 31 exit 0;
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index e34e92a28eb6..eaf11f52fc0b 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -6,8 +6,6 @@
6 6
7# Awk implementation sanity check 7# Awk implementation sanity check
8function check_awk_implement() { 8function check_awk_implement() {
9 if (!match("abc", "[[:lower:]]+"))
10 return "Your awk doesn't support charactor-class."
11 if (sprintf("%x", 0) != "0") 9 if (sprintf("%x", 0) != "0")
12 return "Your awk has a printf-format problem." 10 return "Your awk has a printf-format problem."
13 return "" 11 return ""
@@ -44,12 +42,12 @@ BEGIN {
44 delete gtable 42 delete gtable
45 delete atable 43 delete atable
46 44
47 opnd_expr = "^[[:alpha:]/]" 45 opnd_expr = "^[A-Za-z/]"
48 ext_expr = "^\\(" 46 ext_expr = "^\\("
49 sep_expr = "^\\|$" 47 sep_expr = "^\\|$"
50 group_expr = "^Grp[[:alnum:]]+" 48 group_expr = "^Grp[0-9A-Za-z]+"
51 49
52 imm_expr = "^[IJAO][[:lower:]]" 50 imm_expr = "^[IJAO][a-z]"
53 imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 51 imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
54 imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 52 imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
55 imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" 53 imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
@@ -62,7 +60,7 @@ BEGIN {
62 imm_flag["Ob"] = "INAT_MOFFSET" 60 imm_flag["Ob"] = "INAT_MOFFSET"
63 imm_flag["Ov"] = "INAT_MOFFSET" 61 imm_flag["Ov"] = "INAT_MOFFSET"
64 62
65 modrm_expr = "^([CDEGMNPQRSUVW/][[:lower:]]+|NTA|T[012])" 63 modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
66 force64_expr = "\\([df]64\\)" 64 force64_expr = "\\([df]64\\)"
67 rex_expr = "^REX(\\.[XRWB]+)*" 65 rex_expr = "^REX(\\.[XRWB]+)*"
68 fpu_expr = "^ESC" # TODO 66 fpu_expr = "^ESC" # TODO
@@ -226,12 +224,12 @@ function add_flags(old,new) {
226} 224}
227 225
228# convert operands to flags. 226# convert operands to flags.
229function convert_operands(opnd, i,imm,mod) 227function convert_operands(count,opnd, i,j,imm,mod)
230{ 228{
231 imm = null 229 imm = null
232 mod = null 230 mod = null
233 for (i in opnd) { 231 for (j = 1; j <= count; j++) {
234 i = opnd[i] 232 i = opnd[j]
235 if (match(i, imm_expr) == 1) { 233 if (match(i, imm_expr) == 1) {
236 if (!imm_flag[i]) 234 if (!imm_flag[i])
237 semantic_error("Unknown imm opnd: " i) 235 semantic_error("Unknown imm opnd: " i)
@@ -282,8 +280,8 @@ function convert_operands(opnd, i,imm,mod)
282 # parse one opcode 280 # parse one opcode
283 if (match($i, opnd_expr)) { 281 if (match($i, opnd_expr)) {
284 opnd = $i 282 opnd = $i
285 split($(i++), opnds, ",") 283 count = split($(i++), opnds, ",")
286 flags = convert_operands(opnds) 284 flags = convert_operands(count, opnds)
287 } 285 }
288 if (match($i, ext_expr)) 286 if (match($i, ext_expr))
289 ext = $(i++) 287 ext = $(i++)
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c
index d8214dc03fa7..13403fc95a96 100644
--- a/arch/x86/tools/test_get_len.c
+++ b/arch/x86/tools/test_get_len.c
@@ -43,7 +43,7 @@ static int x86_64;
43static void usage(void) 43static void usage(void)
44{ 44{
45 fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" 45 fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |"
46 " %s [-y|-n] [-v] \n", prog); 46 " %s [-y|-n] [-v]\n", prog);
47 fprintf(stderr, "\t-y 64bit mode\n"); 47 fprintf(stderr, "\t-y 64bit mode\n");
48 fprintf(stderr, "\t-n 32bit mode\n"); 48 fprintf(stderr, "\t-n 32bit mode\n");
49 fprintf(stderr, "\t-v verbose mode\n"); 49 fprintf(stderr, "\t-v verbose mode\n");
@@ -69,7 +69,7 @@ static void dump_field(FILE *fp, const char *name, const char *indent,
69 69
70static void dump_insn(FILE *fp, struct insn *insn) 70static void dump_insn(FILE *fp, struct insn *insn)
71{ 71{
72 fprintf(fp, "Instruction = { \n"); 72 fprintf(fp, "Instruction = {\n");
73 dump_field(fp, "prefixes", "\t", &insn->prefixes); 73 dump_field(fp, "prefixes", "\t", &insn->prefixes);
74 dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); 74 dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix);
75 dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); 75 dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix);
@@ -113,7 +113,7 @@ int main(int argc, char **argv)
113 char line[BUFSIZE], sym[BUFSIZE] = "<unknown>"; 113 char line[BUFSIZE], sym[BUFSIZE] = "<unknown>";
114 unsigned char insn_buf[16]; 114 unsigned char insn_buf[16];
115 struct insn insn; 115 struct insn insn;
116 int insns = 0, c; 116 int insns = 0;
117 int warnings = 0; 117 int warnings = 0;
118 118
119 parse_args(argc, argv); 119 parse_args(argc, argv);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c462cea8ef09..36daccb68642 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -27,7 +27,9 @@
27#include <linux/page-flags.h> 27#include <linux/page-flags.h>
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29#include <linux/console.h> 29#include <linux/console.h>
30#include <linux/pci.h>
30 31
32#include <xen/xen.h>
31#include <xen/interface/xen.h> 33#include <xen/interface/xen.h>
32#include <xen/interface/version.h> 34#include <xen/interface/version.h>
33#include <xen/interface/physdev.h> 35#include <xen/interface/physdev.h>
@@ -138,24 +140,23 @@ static void xen_vcpu_setup(int cpu)
138 */ 140 */
139void xen_vcpu_restore(void) 141void xen_vcpu_restore(void)
140{ 142{
141 if (have_vcpu_info_placement) { 143 int cpu;
142 int cpu;
143 144
144 for_each_online_cpu(cpu) { 145 for_each_online_cpu(cpu) {
145 bool other_cpu = (cpu != smp_processor_id()); 146 bool other_cpu = (cpu != smp_processor_id());
146 147
147 if (other_cpu && 148 if (other_cpu &&
148 HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) 149 HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
149 BUG(); 150 BUG();
150 151
151 xen_vcpu_setup(cpu); 152 xen_setup_runstate_info(cpu);
152 153
153 if (other_cpu && 154 if (have_vcpu_info_placement)
154 HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) 155 xen_vcpu_setup(cpu);
155 BUG();
156 }
157 156
158 BUG_ON(!have_vcpu_info_placement); 157 if (other_cpu &&
158 HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
159 BUG();
159 } 160 }
160} 161}
161 162
@@ -1150,9 +1151,13 @@ asmlinkage void __init xen_start_kernel(void)
1150 1151
1151 /* keep using Xen gdt for now; no urgent need to change it */ 1152 /* keep using Xen gdt for now; no urgent need to change it */
1152 1153
1154#ifdef CONFIG_X86_32
1153 pv_info.kernel_rpl = 1; 1155 pv_info.kernel_rpl = 1;
1154 if (xen_feature(XENFEAT_supervisor_mode_kernel)) 1156 if (xen_feature(XENFEAT_supervisor_mode_kernel))
1155 pv_info.kernel_rpl = 0; 1157 pv_info.kernel_rpl = 0;
1158#else
1159 pv_info.kernel_rpl = 0;
1160#endif
1156 1161
1157 /* set the limit of our address space */ 1162 /* set the limit of our address space */
1158 xen_reserve_top(); 1163 xen_reserve_top();
@@ -1176,10 +1181,16 @@ asmlinkage void __init xen_start_kernel(void)
1176 add_preferred_console("xenboot", 0, NULL); 1181 add_preferred_console("xenboot", 0, NULL);
1177 add_preferred_console("tty", 0, NULL); 1182 add_preferred_console("tty", 0, NULL);
1178 add_preferred_console("hvc", 0, NULL); 1183 add_preferred_console("hvc", 0, NULL);
1184 } else {
1185 /* Make sure ACS will be enabled */
1186 pci_request_acs();
1179 } 1187 }
1188
1180 1189
1181 xen_raw_console_write("about to get started...\n"); 1190 xen_raw_console_write("about to get started...\n");
1182 1191
1192 xen_setup_runstate_info(0);
1193
1183 /* Start the world */ 1194 /* Start the world */
1184#ifdef CONFIG_X86_32 1195#ifdef CONFIG_X86_32
1185 i386_start_kernel(); 1196 i386_start_kernel();
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3bf7b1d250ce..bf4cd6bfe959 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -185,7 +185,7 @@ static inline unsigned p2m_index(unsigned long pfn)
185} 185}
186 186
187/* Build the parallel p2m_top_mfn structures */ 187/* Build the parallel p2m_top_mfn structures */
188static void __init xen_build_mfn_list_list(void) 188void xen_build_mfn_list_list(void)
189{ 189{
190 unsigned pfn, idx; 190 unsigned pfn, idx;
191 191
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 738da0cb0d8b..563d20504988 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -35,10 +35,10 @@
35 35
36cpumask_var_t xen_cpu_initialized_map; 36cpumask_var_t xen_cpu_initialized_map;
37 37
38static DEFINE_PER_CPU(int, resched_irq); 38static DEFINE_PER_CPU(int, xen_resched_irq);
39static DEFINE_PER_CPU(int, callfunc_irq); 39static DEFINE_PER_CPU(int, xen_callfunc_irq);
40static DEFINE_PER_CPU(int, callfuncsingle_irq); 40static DEFINE_PER_CPU(int, xen_callfuncsingle_irq);
41static DEFINE_PER_CPU(int, debug_irq) = -1; 41static DEFINE_PER_CPU(int, xen_debug_irq) = -1;
42 42
43static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); 43static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
44static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); 44static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
@@ -103,7 +103,7 @@ static int xen_smp_intr_init(unsigned int cpu)
103 NULL); 103 NULL);
104 if (rc < 0) 104 if (rc < 0)
105 goto fail; 105 goto fail;
106 per_cpu(resched_irq, cpu) = rc; 106 per_cpu(xen_resched_irq, cpu) = rc;
107 107
108 callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); 108 callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
109 rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, 109 rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
@@ -114,7 +114,7 @@ static int xen_smp_intr_init(unsigned int cpu)
114 NULL); 114 NULL);
115 if (rc < 0) 115 if (rc < 0)
116 goto fail; 116 goto fail;
117 per_cpu(callfunc_irq, cpu) = rc; 117 per_cpu(xen_callfunc_irq, cpu) = rc;
118 118
119 debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); 119 debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
120 rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt, 120 rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
@@ -122,7 +122,7 @@ static int xen_smp_intr_init(unsigned int cpu)
122 debug_name, NULL); 122 debug_name, NULL);
123 if (rc < 0) 123 if (rc < 0)
124 goto fail; 124 goto fail;
125 per_cpu(debug_irq, cpu) = rc; 125 per_cpu(xen_debug_irq, cpu) = rc;
126 126
127 callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu); 127 callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
128 rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR, 128 rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
@@ -133,19 +133,20 @@ static int xen_smp_intr_init(unsigned int cpu)
133 NULL); 133 NULL);
134 if (rc < 0) 134 if (rc < 0)
135 goto fail; 135 goto fail;
136 per_cpu(callfuncsingle_irq, cpu) = rc; 136 per_cpu(xen_callfuncsingle_irq, cpu) = rc;
137 137
138 return 0; 138 return 0;
139 139
140 fail: 140 fail:
141 if (per_cpu(resched_irq, cpu) >= 0) 141 if (per_cpu(xen_resched_irq, cpu) >= 0)
142 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); 142 unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
143 if (per_cpu(callfunc_irq, cpu) >= 0) 143 if (per_cpu(xen_callfunc_irq, cpu) >= 0)
144 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); 144 unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
145 if (per_cpu(debug_irq, cpu) >= 0) 145 if (per_cpu(xen_debug_irq, cpu) >= 0)
146 unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); 146 unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
147 if (per_cpu(callfuncsingle_irq, cpu) >= 0) 147 if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0)
148 unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL); 148 unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu),
149 NULL);
149 150
150 return rc; 151 return rc;
151} 152}
@@ -295,6 +296,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
295 (unsigned long)task_stack_page(idle) - 296 (unsigned long)task_stack_page(idle) -
296 KERNEL_STACK_OFFSET + THREAD_SIZE; 297 KERNEL_STACK_OFFSET + THREAD_SIZE;
297#endif 298#endif
299 xen_setup_runstate_info(cpu);
298 xen_setup_timer(cpu); 300 xen_setup_timer(cpu);
299 xen_init_lock_cpu(cpu); 301 xen_init_lock_cpu(cpu);
300 302
@@ -348,10 +350,10 @@ static void xen_cpu_die(unsigned int cpu)
348 current->state = TASK_UNINTERRUPTIBLE; 350 current->state = TASK_UNINTERRUPTIBLE;
349 schedule_timeout(HZ/10); 351 schedule_timeout(HZ/10);
350 } 352 }
351 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); 353 unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
352 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); 354 unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
353 unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); 355 unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
354 unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL); 356 unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
355 xen_uninit_lock_cpu(cpu); 357 xen_uninit_lock_cpu(cpu);
356 xen_teardown_timer(cpu); 358 xen_teardown_timer(cpu);
357 359
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 36a5141108df..24ded31b5aec 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -120,14 +120,14 @@ struct xen_spinlock {
120 unsigned short spinners; /* count of waiting cpus */ 120 unsigned short spinners; /* count of waiting cpus */
121}; 121};
122 122
123static int xen_spin_is_locked(struct raw_spinlock *lock) 123static int xen_spin_is_locked(struct arch_spinlock *lock)
124{ 124{
125 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 125 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
126 126
127 return xl->lock != 0; 127 return xl->lock != 0;
128} 128}
129 129
130static int xen_spin_is_contended(struct raw_spinlock *lock) 130static int xen_spin_is_contended(struct arch_spinlock *lock)
131{ 131{
132 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 132 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
133 133
@@ -136,7 +136,7 @@ static int xen_spin_is_contended(struct raw_spinlock *lock)
136 return xl->spinners != 0; 136 return xl->spinners != 0;
137} 137}
138 138
139static int xen_spin_trylock(struct raw_spinlock *lock) 139static int xen_spin_trylock(struct arch_spinlock *lock)
140{ 140{
141 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 141 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
142 u8 old = 1; 142 u8 old = 1;
@@ -181,7 +181,7 @@ static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock
181 __get_cpu_var(lock_spinners) = prev; 181 __get_cpu_var(lock_spinners) = prev;
182} 182}
183 183
184static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enable) 184static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable)
185{ 185{
186 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 186 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
187 struct xen_spinlock *prev; 187 struct xen_spinlock *prev;
@@ -254,7 +254,7 @@ out:
254 return ret; 254 return ret;
255} 255}
256 256
257static inline void __xen_spin_lock(struct raw_spinlock *lock, bool irq_enable) 257static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable)
258{ 258{
259 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 259 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
260 unsigned timeout; 260 unsigned timeout;
@@ -291,12 +291,12 @@ static inline void __xen_spin_lock(struct raw_spinlock *lock, bool irq_enable)
291 spin_time_accum_total(start_spin); 291 spin_time_accum_total(start_spin);
292} 292}
293 293
294static void xen_spin_lock(struct raw_spinlock *lock) 294static void xen_spin_lock(struct arch_spinlock *lock)
295{ 295{
296 __xen_spin_lock(lock, false); 296 __xen_spin_lock(lock, false);
297} 297}
298 298
299static void xen_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) 299static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags)
300{ 300{
301 __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags)); 301 __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
302} 302}
@@ -317,7 +317,7 @@ static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
317 } 317 }
318} 318}
319 319
320static void xen_spin_unlock(struct raw_spinlock *lock) 320static void xen_spin_unlock(struct arch_spinlock *lock)
321{ 321{
322 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 322 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
323 323
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 95be7b434724..987267f79bf5 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -1,4 +1,5 @@
1#include <linux/types.h> 1#include <linux/types.h>
2#include <linux/clockchips.h>
2 3
3#include <xen/interface/xen.h> 4#include <xen/interface/xen.h>
4#include <xen/grant_table.h> 5#include <xen/grant_table.h>
@@ -27,6 +28,8 @@ void xen_pre_suspend(void)
27 28
28void xen_post_suspend(int suspend_cancelled) 29void xen_post_suspend(int suspend_cancelled)
29{ 30{
31 xen_build_mfn_list_list();
32
30 xen_setup_shared_info(); 33 xen_setup_shared_info();
31 34
32 if (suspend_cancelled) { 35 if (suspend_cancelled) {
@@ -44,7 +47,19 @@ void xen_post_suspend(int suspend_cancelled)
44 47
45} 48}
46 49
50static void xen_vcpu_notify_restore(void *data)
51{
52 unsigned long reason = (unsigned long)data;
53
54 /* Boot processor notified via generic timekeeping_resume() */
55 if ( smp_processor_id() == 0)
56 return;
57
58 clockevents_notify(reason, NULL);
59}
60
47void xen_arch_resume(void) 61void xen_arch_resume(void)
48{ 62{
49 /* nothing */ 63 smp_call_function(xen_vcpu_notify_restore,
64 (void *)CLOCK_EVT_NOTIFY_RESUME, 1);
50} 65}
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 0a5aa44299a5..0d3f07cd1b5f 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -31,14 +31,14 @@
31#define NS_PER_TICK (1000000000LL / HZ) 31#define NS_PER_TICK (1000000000LL / HZ)
32 32
33/* runstate info updated by Xen */ 33/* runstate info updated by Xen */
34static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); 34static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
35 35
36/* snapshots of runstate info */ 36/* snapshots of runstate info */
37static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate_snapshot); 37static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
38 38
39/* unused ns of stolen and blocked time */ 39/* unused ns of stolen and blocked time */
40static DEFINE_PER_CPU(u64, residual_stolen); 40static DEFINE_PER_CPU(u64, xen_residual_stolen);
41static DEFINE_PER_CPU(u64, residual_blocked); 41static DEFINE_PER_CPU(u64, xen_residual_blocked);
42 42
43/* return an consistent snapshot of 64-bit time/counter value */ 43/* return an consistent snapshot of 64-bit time/counter value */
44static u64 get64(const u64 *p) 44static u64 get64(const u64 *p)
@@ -79,7 +79,7 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res)
79 79
80 BUG_ON(preemptible()); 80 BUG_ON(preemptible());
81 81
82 state = &__get_cpu_var(runstate); 82 state = &__get_cpu_var(xen_runstate);
83 83
84 /* 84 /*
85 * The runstate info is always updated by the hypervisor on 85 * The runstate info is always updated by the hypervisor on
@@ -97,14 +97,14 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res)
97/* return true when a vcpu could run but has no real cpu to run on */ 97/* return true when a vcpu could run but has no real cpu to run on */
98bool xen_vcpu_stolen(int vcpu) 98bool xen_vcpu_stolen(int vcpu)
99{ 99{
100 return per_cpu(runstate, vcpu).state == RUNSTATE_runnable; 100 return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
101} 101}
102 102
103static void setup_runstate_info(int cpu) 103void xen_setup_runstate_info(int cpu)
104{ 104{
105 struct vcpu_register_runstate_memory_area area; 105 struct vcpu_register_runstate_memory_area area;
106 106
107 area.addr.v = &per_cpu(runstate, cpu); 107 area.addr.v = &per_cpu(xen_runstate, cpu);
108 108
109 if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, 109 if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
110 cpu, &area)) 110 cpu, &area))
@@ -122,7 +122,7 @@ static void do_stolen_accounting(void)
122 122
123 WARN_ON(state.state != RUNSTATE_running); 123 WARN_ON(state.state != RUNSTATE_running);
124 124
125 snap = &__get_cpu_var(runstate_snapshot); 125 snap = &__get_cpu_var(xen_runstate_snapshot);
126 126
127 /* work out how much time the VCPU has not been runn*ing* */ 127 /* work out how much time the VCPU has not been runn*ing* */
128 blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked]; 128 blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
@@ -133,24 +133,24 @@ static void do_stolen_accounting(void)
133 133
134 /* Add the appropriate number of ticks of stolen time, 134 /* Add the appropriate number of ticks of stolen time,
135 including any left-overs from last time. */ 135 including any left-overs from last time. */
136 stolen = runnable + offline + __get_cpu_var(residual_stolen); 136 stolen = runnable + offline + __get_cpu_var(xen_residual_stolen);
137 137
138 if (stolen < 0) 138 if (stolen < 0)
139 stolen = 0; 139 stolen = 0;
140 140
141 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); 141 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
142 __get_cpu_var(residual_stolen) = stolen; 142 __get_cpu_var(xen_residual_stolen) = stolen;
143 account_steal_ticks(ticks); 143 account_steal_ticks(ticks);
144 144
145 /* Add the appropriate number of ticks of blocked time, 145 /* Add the appropriate number of ticks of blocked time,
146 including any left-overs from last time. */ 146 including any left-overs from last time. */
147 blocked += __get_cpu_var(residual_blocked); 147 blocked += __get_cpu_var(xen_residual_blocked);
148 148
149 if (blocked < 0) 149 if (blocked < 0)
150 blocked = 0; 150 blocked = 0;
151 151
152 ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); 152 ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
153 __get_cpu_var(residual_blocked) = blocked; 153 __get_cpu_var(xen_residual_blocked) = blocked;
154 account_idle_ticks(ticks); 154 account_idle_ticks(ticks);
155} 155}
156 156
@@ -434,7 +434,7 @@ void xen_setup_timer(int cpu)
434 name = "<timer kasprintf failed>"; 434 name = "<timer kasprintf failed>";
435 435
436 irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, 436 irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
437 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, 437 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER,
438 name, NULL); 438 name, NULL);
439 439
440 evt = &per_cpu(xen_clock_events, cpu); 440 evt = &per_cpu(xen_clock_events, cpu);
@@ -442,8 +442,6 @@ void xen_setup_timer(int cpu)
442 442
443 evt->cpumask = cpumask_of(cpu); 443 evt->cpumask = cpumask_of(cpu);
444 evt->irq = irq; 444 evt->irq = irq;
445
446 setup_runstate_info(cpu);
447} 445}
448 446
449void xen_teardown_timer(int cpu) 447void xen_teardown_timer(int cpu)
@@ -494,6 +492,7 @@ __init void xen_time_init(void)
494 492
495 setup_force_cpu_cap(X86_FEATURE_TSC); 493 setup_force_cpu_cap(X86_FEATURE_TSC);
496 494
495 xen_setup_runstate_info(cpu);
497 xen_setup_timer(cpu); 496 xen_setup_timer(cpu);
498 xen_setup_cpu_clockevents(); 497 xen_setup_cpu_clockevents();
499} 498}
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 02f496a8dbaa..53adefda4275 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -96,7 +96,7 @@ ENTRY(xen_sysret32)
96 pushq $__USER32_CS 96 pushq $__USER32_CS
97 pushq %rcx 97 pushq %rcx
98 98
99 pushq $VGCF_in_syscall 99 pushq $0
1001: jmp hypercall_iret 1001: jmp hypercall_iret
101ENDPATCH(xen_sysret32) 101ENDPATCH(xen_sysret32)
102RELOC(xen_sysret32, 1b+1) 102RELOC(xen_sysret32, 1b+1)
@@ -151,7 +151,7 @@ ENTRY(xen_syscall32_target)
151ENTRY(xen_sysenter_target) 151ENTRY(xen_sysenter_target)
152 lea 16(%rsp), %rsp /* strip %rcx, %r11 */ 152 lea 16(%rsp), %rsp /* strip %rcx, %r11 */
153 mov $-ENOSYS, %rax 153 mov $-ENOSYS, %rax
154 pushq $VGCF_in_syscall 154 pushq $0
155 jmp hypercall_iret 155 jmp hypercall_iret
156ENDPROC(xen_syscall32_target) 156ENDPROC(xen_syscall32_target)
157ENDPROC(xen_sysenter_target) 157ENDPROC(xen_sysenter_target)
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 355fa6b99c9c..f9153a300bce 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -25,6 +25,7 @@ extern struct shared_info *HYPERVISOR_shared_info;
25 25
26void xen_setup_mfn_list_list(void); 26void xen_setup_mfn_list_list(void);
27void xen_setup_shared_info(void); 27void xen_setup_shared_info(void);
28void xen_build_mfn_list_list(void);
28void xen_setup_machphys_mapping(void); 29void xen_setup_machphys_mapping(void);
29pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); 30pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
30void xen_ident_map_ISA(void); 31void xen_ident_map_ISA(void);
@@ -41,6 +42,7 @@ void __init xen_build_dynamic_phys_to_machine(void);
41 42
42void xen_init_irq_ops(void); 43void xen_init_irq_ops(void);
43void xen_setup_timer(int cpu); 44void xen_setup_timer(int cpu);
45void xen_setup_runstate_info(int cpu);
44void xen_teardown_timer(int cpu); 46void xen_teardown_timer(int cpu);
45cycle_t xen_clocksource_read(void); 47cycle_t xen_clocksource_read(void);
46void xen_setup_cpu_clockevents(void); 48void xen_setup_cpu_clockevents(void);