aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig42
-rw-r--r--arch/x86/crypto/fpu.c1
-rw-r--r--arch/x86/crypto/twofish-i586-asm_32.S10
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64.S20
-rw-r--r--arch/x86/ia32/ia32_aout.c2
-rw-r--r--arch/x86/ia32/ia32entry.S8
-rw-r--r--arch/x86/ia32/sys_ia32.c77
-rw-r--r--arch/x86/include/asm/alternative.h4
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h3
-rw-r--r--arch/x86/include/asm/apb_timer.h70
-rw-r--r--arch/x86/include/asm/compat.h3
-rw-r--r--arch/x86/include/asm/e820.h5
-rw-r--r--arch/x86/include/asm/fixmap.h6
-rw-r--r--arch/x86/include/asm/highmem.h4
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h1
-rw-r--r--arch/x86/include/asm/hw_irq.h8
-rw-r--r--arch/x86/include/asm/i8259.h21
-rw-r--r--arch/x86/include/asm/io.h1
-rw-r--r--arch/x86/include/asm/io_apic.h8
-rw-r--r--arch/x86/include/asm/irq_vectors.h48
-rw-r--r--arch/x86/include/asm/kprobes.h31
-rw-r--r--arch/x86/include/asm/lguest_hcall.h29
-rw-r--r--arch/x86/include/asm/local.h37
-rw-r--r--arch/x86/include/asm/mrst.h19
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/numaq.h1
-rw-r--r--arch/x86/include/asm/olpc.h20
-rw-r--r--arch/x86/include/asm/paravirt.h9
-rw-r--r--arch/x86/include/asm/paravirt_types.h4
-rw-r--r--arch/x86/include/asm/pci.h39
-rw-r--r--arch/x86/include/asm/pci_64.h2
-rw-r--r--arch/x86/include/asm/pci_x86.h22
-rw-r--r--arch/x86/include/asm/percpu.h119
-rw-r--r--arch/x86/include/asm/perf_event.h16
-rw-r--r--arch/x86/include/asm/pgtable_32.h7
-rw-r--r--arch/x86/include/asm/pgtable_64.h2
-rw-r--r--arch/x86/include/asm/proto.h10
-rw-r--r--arch/x86/include/asm/ptrace.h7
-rw-r--r--arch/x86/include/asm/setup.h2
-rw-r--r--arch/x86/include/asm/sys_ia32.h11
-rw-r--r--arch/x86/include/asm/syscalls.h15
-rw-r--r--arch/x86/include/asm/system.h8
-rw-r--r--arch/x86/include/asm/unistd_32.h4
-rw-r--r--arch/x86/include/asm/unistd_64.h3
-rw-r--r--arch/x86/include/asm/visws/cobalt.h2
-rw-r--r--arch/x86/include/asm/x86_init.h15
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c114
-rw-r--r--arch/x86/kernel/alternative.c61
-rw-r--r--arch/x86/kernel/amd_iommu.c26
-rw-r--r--arch/x86/kernel/amd_iommu_init.c56
-rw-r--r--arch/x86/kernel/apb_timer.c785
-rw-r--r--arch/x86/kernel/aperture_64.c16
-rw-r--r--arch/x86/kernel/apic/apic.c10
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/es7000_32.c1
-rw-r--r--arch/x86/kernel/apic/io_apic.c352
-rw-r--r--arch/x86/kernel/apic/nmi.c15
-rw-r--r--arch/x86/kernel/apic/numaq_32.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c5
-rw-r--r--arch/x86/kernel/bootflag.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig14
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Makefile1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/elanfreq.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/gx-suspmod.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longrun.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c621
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k6.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c9
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-smi.c1
-rw-r--r--arch/x86/kernel/cpu/intel.c24
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c17
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c5
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c208
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c269
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c142
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c97
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c18
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c2
-rw-r--r--arch/x86/kernel/cpu/vmware.c2
-rw-r--r--arch/x86/kernel/cpuid.c1
-rw-r--r--arch/x86/kernel/crash.c6
-rw-r--r--arch/x86/kernel/crash_dump_32.c1
-rw-r--r--arch/x86/kernel/dumpstack.h24
-rw-r--r--arch/x86/kernel/dumpstack_64.c14
-rw-r--r--arch/x86/kernel/e820.c373
-rw-r--r--arch/x86/kernel/head32.c14
-rw-r--r--arch/x86/kernel/head64.c3
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/head_64.S2
-rw-r--r--arch/x86/kernel/hpet.c12
-rw-r--r--arch/x86/kernel/hw_breakpoint.c12
-rw-r--r--arch/x86/kernel/i387.c1
-rw-r--r--arch/x86/kernel/i8259.c95
-rw-r--r--arch/x86/kernel/irqinit.c59
-rw-r--r--arch/x86/kernel/k8.c16
-rw-r--r--arch/x86/kernel/kdebugfs.c1
-rw-r--r--arch/x86/kernel/kgdb.c2
-rw-r--r--arch/x86/kernel/kprobes.c609
-rw-r--r--arch/x86/kernel/ldt.c1
-rw-r--r--arch/x86/kernel/machine_kexec_64.c1
-rw-r--r--arch/x86/kernel/mca_32.c1
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c7
-rw-r--r--arch/x86/kernel/module.c1
-rw-r--r--arch/x86/kernel/mpparse.c4
-rw-r--r--arch/x86/kernel/mrst.c216
-rw-r--r--arch/x86/kernel/msr.c1
-rw-r--r--arch/x86/kernel/olpc.c10
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/pci-calgary_64.c2
-rw-r--r--arch/x86/kernel/pci-dma.c16
-rw-r--r--arch/x86/kernel/pci-gart_64.c6
-rw-r--r--arch/x86/kernel/pci-nommu.c1
-rw-r--r--arch/x86/kernel/process.c34
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/ptrace.c3
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/setup.c35
-rw-r--r--arch/x86/kernel/setup_percpu.c6
-rw-r--r--arch/x86/kernel/smp.c1
-rw-r--r--arch/x86/kernel/smpboot.c25
-rw-r--r--arch/x86/kernel/sys_i386_32.c185
-rw-r--r--arch/x86/kernel/sys_x86_64.c12
-rw-r--r--arch/x86/kernel/syscall_table_32.S4
-rw-r--r--arch/x86/kernel/time.c4
-rw-r--r--arch/x86/kernel/tlb_uv.c1
-rw-r--r--arch/x86/kernel/tsc.c4
-rw-r--r--arch/x86/kernel/uv_irq.c1
-rw-r--r--arch/x86/kernel/uv_time.c1
-rw-r--r--arch/x86/kernel/visws_quirks.c27
-rw-r--r--arch/x86/kernel/vmi_32.c36
-rw-r--r--arch/x86/kernel/vmiclock_32.c8
-rw-r--r--arch/x86/kernel/vmlinux.lds.S6
-rw-r--r--arch/x86/kernel/x86_init.c8
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/i8254.c1
-rw-r--r--arch/x86/kvm/i8259.c1
-rw-r--r--arch/x86/kvm/lapic.c1
-rw-r--r--arch/x86/kvm/mmu.c12
-rw-r--r--arch/x86/kvm/svm.c26
-rw-r--r--arch/x86/kvm/vmx.c25
-rw-r--r--arch/x86/kvm/x86.c49
-rw-r--r--arch/x86/lguest/boot.c61
-rw-r--r--arch/x86/lguest/i386_head.S2
-rw-r--r--arch/x86/lib/rwsem_64.S2
-rw-r--r--arch/x86/mm/hugetlbpage.c1
-rw-r--r--arch/x86/mm/init.c33
-rw-r--r--arch/x86/mm/init_32.c9
-rw-r--r--arch/x86/mm/init_64.c10
-rw-r--r--arch/x86/mm/ioremap.c14
-rw-r--r--arch/x86/mm/kmmio.c1
-rw-r--r--arch/x86/mm/mmio-mod.c1
-rw-r--r--arch/x86/mm/numa_32.c3
-rw-r--r--arch/x86/mm/numa_64.c97
-rw-r--r--arch/x86/mm/pageattr.c27
-rw-r--r--arch/x86/mm/pat.c2
-rw-r--r--arch/x86/mm/pgtable.c1
-rw-r--r--arch/x86/mm/pgtable_32.c3
-rw-r--r--arch/x86/oprofile/op_model_amd.c23
-rw-r--r--arch/x86/oprofile/op_model_ppro.c6
-rw-r--r--arch/x86/pci/Makefile5
-rw-r--r--arch/x86/pci/acpi.c88
-rw-r--r--arch/x86/pci/amd_bus.c127
-rw-r--r--arch/x86/pci/bus_numa.c25
-rw-r--r--arch/x86/pci/bus_numa.h9
-rw-r--r--arch/x86/pci/common.c7
-rw-r--r--arch/x86/pci/i386.c12
-rw-r--r--arch/x86/pci/init.c8
-rw-r--r--arch/x86/pci/irq.c17
-rw-r--r--arch/x86/pci/legacy.c24
-rw-r--r--arch/x86/pci/mmconfig-shared.c1
-rw-r--r--arch/x86/pci/mrst.c262
-rw-r--r--arch/x86/pci/numaq_32.c6
-rw-r--r--arch/x86/pci/olpc.c3
-rw-r--r--arch/x86/pci/pcbios.c1
-rw-r--r--arch/x86/pci/visws.c6
-rw-r--r--arch/x86/power/hibernate_32.c1
-rw-r--r--arch/x86/power/hibernate_64.c1
-rw-r--r--arch/x86/power/hibernate_asm_32.S15
-rw-r--r--arch/x86/vdso/vma.c1
-rw-r--r--arch/x86/xen/debugfs.c1
-rw-r--r--arch/x86/xen/enlighten.c8
-rw-r--r--arch/x86/xen/mmu.c22
-rw-r--r--arch/x86/xen/smp.c3
-rw-r--r--arch/x86/xen/spinlock.c1
-rw-r--r--arch/x86/xen/time.c1
-rw-r--r--arch/x86/xen/xen-asm_32.S4
198 files changed, 4397 insertions, 2281 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0896008f7509..9458685902bd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -31,6 +31,7 @@ config X86
31 select ARCH_WANT_FRAME_POINTERS 31 select ARCH_WANT_FRAME_POINTERS
32 select HAVE_DMA_ATTRS 32 select HAVE_DMA_ATTRS
33 select HAVE_KRETPROBES 33 select HAVE_KRETPROBES
34 select HAVE_OPTPROBES
34 select HAVE_FTRACE_MCOUNT_RECORD 35 select HAVE_FTRACE_MCOUNT_RECORD
35 select HAVE_DYNAMIC_FTRACE 36 select HAVE_DYNAMIC_FTRACE
36 select HAVE_FUNCTION_TRACER 37 select HAVE_FUNCTION_TRACER
@@ -101,6 +102,9 @@ config ZONE_DMA
101config SBUS 102config SBUS
102 bool 103 bool
103 104
105config NEED_DMA_MAP_STATE
106 def_bool (X86_64 || DMAR || DMA_API_DEBUG)
107
104config GENERIC_ISA_DMA 108config GENERIC_ISA_DMA
105 def_bool y 109 def_bool y
106 110
@@ -184,6 +188,9 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING
184config ARCH_SUPPORTS_DEBUG_PAGEALLOC 188config ARCH_SUPPORTS_DEBUG_PAGEALLOC
185 def_bool y 189 def_bool y
186 190
191config HAVE_EARLY_RES
192 def_bool y
193
187config HAVE_INTEL_TXT 194config HAVE_INTEL_TXT
188 def_bool y 195 def_bool y
189 depends on EXPERIMENTAL && DMAR && ACPI 196 depends on EXPERIMENTAL && DMAR && ACPI
@@ -389,8 +396,12 @@ config X86_ELAN
389 396
390config X86_MRST 397config X86_MRST
391 bool "Moorestown MID platform" 398 bool "Moorestown MID platform"
399 depends on PCI
400 depends on PCI_GOANY
392 depends on X86_32 401 depends on X86_32
393 depends on X86_EXTENDED_PLATFORM 402 depends on X86_EXTENDED_PLATFORM
403 depends on X86_IO_APIC
404 select APB_TIMER
394 ---help--- 405 ---help---
395 Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin 406 Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin
396 Internet Device(MID) platform. Moorestown consists of two chips: 407 Internet Device(MID) platform. Moorestown consists of two chips:
@@ -425,6 +436,7 @@ config X86_32_NON_STANDARD
425config X86_NUMAQ 436config X86_NUMAQ
426 bool "NUMAQ (IBM/Sequent)" 437 bool "NUMAQ (IBM/Sequent)"
427 depends on X86_32_NON_STANDARD 438 depends on X86_32_NON_STANDARD
439 depends on PCI
428 select NUMA 440 select NUMA
429 select X86_MPPARSE 441 select X86_MPPARSE
430 ---help--- 442 ---help---
@@ -569,6 +581,18 @@ config PARAVIRT_DEBUG
569 Enable to debug paravirt_ops internals. Specifically, BUG if 581 Enable to debug paravirt_ops internals. Specifically, BUG if
570 a paravirt_op is missing when it is called. 582 a paravirt_op is missing when it is called.
571 583
584config NO_BOOTMEM
585 default y
586 bool "Disable Bootmem code"
587 ---help---
588 Use early_res directly instead of bootmem before slab is ready.
589 - allocator (buddy) [generic]
590 - early allocator (bootmem) [generic]
591 - very early allocator (reserve_early*()) [x86]
592 - very very early allocator (early brk model) [x86]
593 So reduce one layer between early allocator to final allocator
594
595
572config MEMTEST 596config MEMTEST
573 bool "Memtest" 597 bool "Memtest"
574 ---help--- 598 ---help---
@@ -613,6 +637,16 @@ config HPET_EMULATE_RTC
613 def_bool y 637 def_bool y
614 depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y) 638 depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
615 639
640config APB_TIMER
641 def_bool y if MRST
642 prompt "Langwell APB Timer Support" if X86_MRST
643 help
644 APB timer is the replacement for 8254, HPET on X86 MID platforms.
645 The APBT provides a stable time base on SMP
646 systems, unlike the TSC, but it is more expensive to access,
647 as it is off-chip. APB timers are always running regardless of CPU
648 C states, they are used as per CPU clockevent device when possible.
649
616# Mark as embedded because too many people got it wrong. 650# Mark as embedded because too many people got it wrong.
617# The code disables itself when not needed. 651# The code disables itself when not needed.
618config DMI 652config DMI
@@ -628,7 +662,7 @@ config GART_IOMMU
628 bool "GART IOMMU support" if EMBEDDED 662 bool "GART IOMMU support" if EMBEDDED
629 default y 663 default y
630 select SWIOTLB 664 select SWIOTLB
631 depends on X86_64 && PCI 665 depends on X86_64 && PCI && K8_NB
632 ---help--- 666 ---help---
633 Support for full DMA access of devices with 32bit memory access only 667 Support for full DMA access of devices with 32bit memory access only
634 on systems with more than 3GB. This is usually needed for USB, 668 on systems with more than 3GB. This is usually needed for USB,
@@ -1182,8 +1216,8 @@ config NUMA_EMU
1182 1216
1183config NODES_SHIFT 1217config NODES_SHIFT
1184 int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP 1218 int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
1185 range 1 9 1219 range 1 10
1186 default "9" if MAXSMP 1220 default "10" if MAXSMP
1187 default "6" if X86_64 1221 default "6" if X86_64
1188 default "4" if X86_NUMAQ 1222 default "4" if X86_NUMAQ
1189 default "3" 1223 default "3"
@@ -2027,7 +2061,7 @@ endif # X86_32
2027 2061
2028config K8_NB 2062config K8_NB
2029 def_bool y 2063 def_bool y
2030 depends on AGP_AMD64 || (X86_64 && (GART_IOMMU || (PCI && NUMA))) 2064 depends on CPU_SUP_AMD && PCI
2031 2065
2032source "drivers/pcmcia/Kconfig" 2066source "drivers/pcmcia/Kconfig"
2033 2067
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
index daef6cd2b45d..1a8f8649c035 100644
--- a/arch/x86/crypto/fpu.c
+++ b/arch/x86/crypto/fpu.c
@@ -16,6 +16,7 @@
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/slab.h>
19#include <asm/i387.h> 20#include <asm/i387.h>
20 21
21struct crypto_fpu_ctx { 22struct crypto_fpu_ctx {
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S
index 39b98ed2c1b9..575331cb2a8a 100644
--- a/arch/x86/crypto/twofish-i586-asm_32.S
+++ b/arch/x86/crypto/twofish-i586-asm_32.S
@@ -22,7 +22,7 @@
22 22
23#include <asm/asm-offsets.h> 23#include <asm/asm-offsets.h>
24 24
25/* return adress at 0 */ 25/* return address at 0 */
26 26
27#define in_blk 12 /* input byte array address parameter*/ 27#define in_blk 12 /* input byte array address parameter*/
28#define out_blk 8 /* output byte array address parameter*/ 28#define out_blk 8 /* output byte array address parameter*/
@@ -230,8 +230,8 @@ twofish_enc_blk:
230 push %edi 230 push %edi
231 231
232 mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ 232 mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */
233 add $crypto_tfm_ctx_offset, %ebp /* ctx adress */ 233 add $crypto_tfm_ctx_offset, %ebp /* ctx address */
234 mov in_blk+16(%esp),%edi /* input adress in edi */ 234 mov in_blk+16(%esp),%edi /* input address in edi */
235 235
236 mov (%edi), %eax 236 mov (%edi), %eax
237 mov b_offset(%edi), %ebx 237 mov b_offset(%edi), %ebx
@@ -286,8 +286,8 @@ twofish_dec_blk:
286 286
287 287
288 mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ 288 mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */
289 add $crypto_tfm_ctx_offset, %ebp /* ctx adress */ 289 add $crypto_tfm_ctx_offset, %ebp /* ctx address */
290 mov in_blk+16(%esp),%edi /* input adress in edi */ 290 mov in_blk+16(%esp),%edi /* input address in edi */
291 291
292 mov (%edi), %eax 292 mov (%edi), %eax
293 mov b_offset(%edi), %ebx 293 mov b_offset(%edi), %ebx
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
index 35974a586615..573aa102542e 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
@@ -221,11 +221,11 @@
221twofish_enc_blk: 221twofish_enc_blk:
222 pushq R1 222 pushq R1
223 223
224 /* %rdi contains the crypto tfm adress */ 224 /* %rdi contains the crypto tfm address */
225 /* %rsi contains the output adress */ 225 /* %rsi contains the output address */
226 /* %rdx contains the input adress */ 226 /* %rdx contains the input address */
227 add $crypto_tfm_ctx_offset, %rdi /* set ctx adress */ 227 add $crypto_tfm_ctx_offset, %rdi /* set ctx address */
228 /* ctx adress is moved to free one non-rex register 228 /* ctx address is moved to free one non-rex register
229 as target for the 8bit high operations */ 229 as target for the 8bit high operations */
230 mov %rdi, %r11 230 mov %rdi, %r11
231 231
@@ -274,11 +274,11 @@ twofish_enc_blk:
274twofish_dec_blk: 274twofish_dec_blk:
275 pushq R1 275 pushq R1
276 276
277 /* %rdi contains the crypto tfm adress */ 277 /* %rdi contains the crypto tfm address */
278 /* %rsi contains the output adress */ 278 /* %rsi contains the output address */
279 /* %rdx contains the input adress */ 279 /* %rdx contains the input address */
280 add $crypto_tfm_ctx_offset, %rdi /* set ctx adress */ 280 add $crypto_tfm_ctx_offset, %rdi /* set ctx address */
281 /* ctx adress is moved to free one non-rex register 281 /* ctx address is moved to free one non-rex register
282 as target for the 8bit high operations */ 282 as target for the 8bit high operations */
283 mov %rdi, %r11 283 mov %rdi, %r11
284 284
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 9046e4af66ce..0350311906ae 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -21,7 +21,6 @@
21#include <linux/fcntl.h> 21#include <linux/fcntl.h>
22#include <linux/ptrace.h> 22#include <linux/ptrace.h>
23#include <linux/user.h> 23#include <linux/user.h>
24#include <linux/slab.h>
25#include <linux/binfmts.h> 24#include <linux/binfmts.h>
26#include <linux/personality.h> 25#include <linux/personality.h>
27#include <linux/init.h> 26#include <linux/init.h>
@@ -327,7 +326,6 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
327 current->mm->free_area_cache = TASK_UNMAPPED_BASE; 326 current->mm->free_area_cache = TASK_UNMAPPED_BASE;
328 current->mm->cached_hole_size = 0; 327 current->mm->cached_hole_size = 0;
329 328
330 current->mm->mmap = NULL;
331 install_exec_creds(bprm); 329 install_exec_creds(bprm);
332 current->flags &= ~PF_FORKNOEXEC; 330 current->flags &= ~PF_FORKNOEXEC;
333 331
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 53147ad85b96..e790bc1fbfa3 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -563,7 +563,7 @@ ia32_sys_call_table:
563 .quad quiet_ni_syscall /* old mpx syscall holder */ 563 .quad quiet_ni_syscall /* old mpx syscall holder */
564 .quad sys_setpgid 564 .quad sys_setpgid
565 .quad quiet_ni_syscall /* old ulimit syscall holder */ 565 .quad quiet_ni_syscall /* old ulimit syscall holder */
566 .quad sys32_olduname 566 .quad sys_olduname
567 .quad sys_umask /* 60 */ 567 .quad sys_umask /* 60 */
568 .quad sys_chroot 568 .quad sys_chroot
569 .quad compat_sys_ustat 569 .quad compat_sys_ustat
@@ -586,7 +586,7 @@ ia32_sys_call_table:
586 .quad compat_sys_settimeofday 586 .quad compat_sys_settimeofday
587 .quad sys_getgroups16 /* 80 */ 587 .quad sys_getgroups16 /* 80 */
588 .quad sys_setgroups16 588 .quad sys_setgroups16
589 .quad sys32_old_select 589 .quad compat_sys_old_select
590 .quad sys_symlink 590 .quad sys_symlink
591 .quad sys_lstat 591 .quad sys_lstat
592 .quad sys_readlink /* 85 */ 592 .quad sys_readlink /* 85 */
@@ -613,7 +613,7 @@ ia32_sys_call_table:
613 .quad compat_sys_newstat 613 .quad compat_sys_newstat
614 .quad compat_sys_newlstat 614 .quad compat_sys_newlstat
615 .quad compat_sys_newfstat 615 .quad compat_sys_newfstat
616 .quad sys32_uname 616 .quad sys_uname
617 .quad stub32_iopl /* 110 */ 617 .quad stub32_iopl /* 110 */
618 .quad sys_vhangup 618 .quad sys_vhangup
619 .quad quiet_ni_syscall /* old "idle" system call */ 619 .quad quiet_ni_syscall /* old "idle" system call */
@@ -626,7 +626,7 @@ ia32_sys_call_table:
626 .quad stub32_sigreturn 626 .quad stub32_sigreturn
627 .quad stub32_clone /* 120 */ 627 .quad stub32_clone /* 120 */
628 .quad sys_setdomainname 628 .quad sys_setdomainname
629 .quad sys_uname 629 .quad sys_newuname
630 .quad sys_modify_ldt 630 .quad sys_modify_ldt
631 .quad compat_sys_adjtimex 631 .quad compat_sys_adjtimex
632 .quad sys32_mprotect /* 125 */ 632 .quad sys32_mprotect /* 125 */
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 422572c77923..626be156d88d 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -40,6 +40,7 @@
40#include <linux/ptrace.h> 40#include <linux/ptrace.h>
41#include <linux/highuid.h> 41#include <linux/highuid.h>
42#include <linux/sysctl.h> 42#include <linux/sysctl.h>
43#include <linux/slab.h>
43#include <asm/mman.h> 44#include <asm/mman.h>
44#include <asm/types.h> 45#include <asm/types.h>
45#include <asm/uaccess.h> 46#include <asm/uaccess.h>
@@ -143,7 +144,7 @@ asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename,
143 * block for parameter passing.. 144 * block for parameter passing..
144 */ 145 */
145 146
146struct mmap_arg_struct { 147struct mmap_arg_struct32 {
147 unsigned int addr; 148 unsigned int addr;
148 unsigned int len; 149 unsigned int len;
149 unsigned int prot; 150 unsigned int prot;
@@ -152,9 +153,9 @@ struct mmap_arg_struct {
152 unsigned int offset; 153 unsigned int offset;
153}; 154};
154 155
155asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg) 156asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
156{ 157{
157 struct mmap_arg_struct a; 158 struct mmap_arg_struct32 a;
158 159
159 if (copy_from_user(&a, arg, sizeof(a))) 160 if (copy_from_user(&a, arg, sizeof(a)))
160 return -EFAULT; 161 return -EFAULT;
@@ -332,24 +333,6 @@ asmlinkage long sys32_alarm(unsigned int seconds)
332 return alarm_setitimer(seconds); 333 return alarm_setitimer(seconds);
333} 334}
334 335
335struct sel_arg_struct {
336 unsigned int n;
337 unsigned int inp;
338 unsigned int outp;
339 unsigned int exp;
340 unsigned int tvp;
341};
342
343asmlinkage long sys32_old_select(struct sel_arg_struct __user *arg)
344{
345 struct sel_arg_struct a;
346
347 if (copy_from_user(&a, arg, sizeof(a)))
348 return -EFAULT;
349 return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
350 compat_ptr(a.exp), compat_ptr(a.tvp));
351}
352
353asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, 336asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
354 int options) 337 int options)
355{ 338{
@@ -466,58 +449,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd,
466 return ret; 449 return ret;
467} 450}
468 451
469asmlinkage long sys32_olduname(struct oldold_utsname __user *name)
470{
471 char *arch = "x86_64";
472 int err;
473
474 if (!name)
475 return -EFAULT;
476 if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
477 return -EFAULT;
478
479 down_read(&uts_sem);
480
481 err = __copy_to_user(&name->sysname, &utsname()->sysname,
482 __OLD_UTS_LEN);
483 err |= __put_user(0, name->sysname+__OLD_UTS_LEN);
484 err |= __copy_to_user(&name->nodename, &utsname()->nodename,
485 __OLD_UTS_LEN);
486 err |= __put_user(0, name->nodename+__OLD_UTS_LEN);
487 err |= __copy_to_user(&name->release, &utsname()->release,
488 __OLD_UTS_LEN);
489 err |= __put_user(0, name->release+__OLD_UTS_LEN);
490 err |= __copy_to_user(&name->version, &utsname()->version,
491 __OLD_UTS_LEN);
492 err |= __put_user(0, name->version+__OLD_UTS_LEN);
493
494 if (personality(current->personality) == PER_LINUX32)
495 arch = "i686";
496
497 err |= __copy_to_user(&name->machine, arch, strlen(arch) + 1);
498
499 up_read(&uts_sem);
500
501 err = err ? -EFAULT : 0;
502
503 return err;
504}
505
506long sys32_uname(struct old_utsname __user *name)
507{
508 int err;
509
510 if (!name)
511 return -EFAULT;
512 down_read(&uts_sem);
513 err = copy_to_user(name, utsname(), sizeof(*name));
514 up_read(&uts_sem);
515 if (personality(current->personality) == PER_LINUX32)
516 err |= copy_to_user(&name->machine, "i686", 5);
517
518 return err ? -EFAULT : 0;
519}
520
521asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv, 452asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv,
522 compat_uptr_t __user *envp, struct pt_regs *regs) 453 compat_uptr_t __user *envp, struct pt_regs *regs)
523{ 454{
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index f1e253ceba4b..b09ec55650b3 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -165,10 +165,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
165 * invalid instruction possible) or if the instructions are changed from a 165 * invalid instruction possible) or if the instructions are changed from a
166 * consistent state to another consistent state atomically. 166 * consistent state to another consistent state atomically.
167 * More care must be taken when modifying code in the SMP case because of 167 * More care must be taken when modifying code in the SMP case because of
168 * Intel's errata. 168 * Intel's errata. text_poke_smp() takes care that errata, but still
169 * doesn't support NMI/MCE handler code modifying.
169 * On the local CPU you need to be protected again NMI or MCE handlers seeing an 170 * On the local CPU you need to be protected again NMI or MCE handlers seeing an
170 * inconsistent instruction while you patch. 171 * inconsistent instruction while you patch.
171 */ 172 */
172extern void *text_poke(void *addr, const void *opcode, size_t len); 173extern void *text_poke(void *addr, const void *opcode, size_t len);
174extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
173 175
174#endif /* _ASM_X86_ALTERNATIVE_H */ 176#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index b150c74e0d48..7014e88bc779 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -21,6 +21,7 @@
21#define _ASM_X86_AMD_IOMMU_TYPES_H 21#define _ASM_X86_AMD_IOMMU_TYPES_H
22 22
23#include <linux/types.h> 23#include <linux/types.h>
24#include <linux/mutex.h>
24#include <linux/list.h> 25#include <linux/list.h>
25#include <linux/spinlock.h> 26#include <linux/spinlock.h>
26 27
@@ -140,6 +141,7 @@
140 141
141/* constants to configure the command buffer */ 142/* constants to configure the command buffer */
142#define CMD_BUFFER_SIZE 8192 143#define CMD_BUFFER_SIZE 8192
144#define CMD_BUFFER_UNINITIALIZED 1
143#define CMD_BUFFER_ENTRIES 512 145#define CMD_BUFFER_ENTRIES 512
144#define MMIO_CMD_SIZE_SHIFT 56 146#define MMIO_CMD_SIZE_SHIFT 56
145#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) 147#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT)
@@ -271,6 +273,7 @@ struct protection_domain {
271 struct list_head list; /* for list of all protection domains */ 273 struct list_head list; /* for list of all protection domains */
272 struct list_head dev_list; /* List of all devices in this domain */ 274 struct list_head dev_list; /* List of all devices in this domain */
273 spinlock_t lock; /* mostly used to lock the page table*/ 275 spinlock_t lock; /* mostly used to lock the page table*/
276 struct mutex api_lock; /* protect page tables in the iommu-api path */
274 u16 id; /* the domain id written to the device table */ 277 u16 id; /* the domain id written to the device table */
275 int mode; /* paging mode (0-6 levels) */ 278 int mode; /* paging mode (0-6 levels) */
276 u64 *pt_root; /* page table root pointer */ 279 u64 *pt_root; /* page table root pointer */
diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h
new file mode 100644
index 000000000000..c74a2eebe570
--- /dev/null
+++ b/arch/x86/include/asm/apb_timer.h
@@ -0,0 +1,70 @@
1/*
2 * apb_timer.h: Driver for Langwell APB timer based on Synopsis DesignWare
3 *
4 * (C) Copyright 2009 Intel Corporation
5 * Author: Jacob Pan (jacob.jun.pan@intel.com)
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 *
12 * Note:
13 */
14
15#ifndef ASM_X86_APBT_H
16#define ASM_X86_APBT_H
17#include <linux/sfi.h>
18
19#ifdef CONFIG_APB_TIMER
20
21/* Langwell DW APB timer registers */
22#define APBTMR_N_LOAD_COUNT 0x00
23#define APBTMR_N_CURRENT_VALUE 0x04
24#define APBTMR_N_CONTROL 0x08
25#define APBTMR_N_EOI 0x0c
26#define APBTMR_N_INT_STATUS 0x10
27
28#define APBTMRS_INT_STATUS 0xa0
29#define APBTMRS_EOI 0xa4
30#define APBTMRS_RAW_INT_STATUS 0xa8
31#define APBTMRS_COMP_VERSION 0xac
32#define APBTMRS_REG_SIZE 0x14
33
34/* register bits */
35#define APBTMR_CONTROL_ENABLE (1<<0)
36#define APBTMR_CONTROL_MODE_PERIODIC (1<<1) /*1: periodic 0:free running */
37#define APBTMR_CONTROL_INT (1<<2)
38
39/* default memory mapped register base */
40#define LNW_SCU_ADDR 0xFF100000
41#define LNW_EXT_TIMER_OFFSET 0x1B800
42#define APBT_DEFAULT_BASE (LNW_SCU_ADDR+LNW_EXT_TIMER_OFFSET)
43#define LNW_EXT_TIMER_PGOFFSET 0x800
44
45/* APBT clock speed range from PCLK to fabric base, 25-100MHz */
46#define APBT_MAX_FREQ 50
47#define APBT_MIN_FREQ 1
48#define APBT_MMAP_SIZE 1024
49
50#define APBT_DEV_USED 1
51
52extern void apbt_time_init(void);
53extern struct clock_event_device *global_clock_event;
54extern unsigned long apbt_quick_calibrate(void);
55extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu);
56extern void apbt_setup_secondary_clock(void);
57extern unsigned int boot_cpu_id;
58extern int disable_apbt_percpu;
59
60extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint);
61extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr);
62extern int sfi_mtimer_num;
63
64#else /* CONFIG_APB_TIMER */
65
66static inline unsigned long apbt_quick_calibrate(void) {return 0; }
67static inline void apbt_time_init(void) {return 0; }
68
69#endif
70#endif /* ASM_X86_APBT_H */
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 9a9c7bdc923d..306160e58b48 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -8,7 +8,8 @@
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <asm/user32.h> 9#include <asm/user32.h>
10 10
11#define COMPAT_USER_HZ 100 11#define COMPAT_USER_HZ 100
12#define COMPAT_UTS_MACHINE "i686\0\0"
12 13
13typedef u32 compat_size_t; 14typedef u32 compat_size_t;
14typedef s32 compat_ssize_t; 15typedef s32 compat_ssize_t;
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 761249e396fe..0e22296790d3 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -111,11 +111,8 @@ extern unsigned long end_user_pfn;
111 111
112extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); 112extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
113extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); 113extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
114extern void reserve_early(u64 start, u64 end, char *name);
115extern void reserve_early_overlap_ok(u64 start, u64 end, char *name);
116extern void free_early(u64 start, u64 end);
117extern void early_res_to_bootmem(u64 start, u64 end);
118extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); 114extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
115#include <linux/early_res.h>
119 116
120extern unsigned long e820_end_of_ram_pfn(void); 117extern unsigned long e820_end_of_ram_pfn(void);
121extern unsigned long e820_end_of_low_ram_pfn(void); 118extern unsigned long e820_end_of_low_ram_pfn(void);
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 635f03bb4995..d07b44f7d1dc 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -82,6 +82,9 @@ enum fixed_addresses {
82#endif 82#endif
83 FIX_DBGP_BASE, 83 FIX_DBGP_BASE,
84 FIX_EARLYCON_MEM_BASE, 84 FIX_EARLYCON_MEM_BASE,
85#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
86 FIX_OHCI1394_BASE,
87#endif
85#ifdef CONFIG_X86_LOCAL_APIC 88#ifdef CONFIG_X86_LOCAL_APIC
86 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ 89 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
87#endif 90#endif
@@ -132,9 +135,6 @@ enum fixed_addresses {
132 (__end_of_permanent_fixed_addresses & (TOTAL_FIX_BTMAPS - 1)) 135 (__end_of_permanent_fixed_addresses & (TOTAL_FIX_BTMAPS - 1))
133 : __end_of_permanent_fixed_addresses, 136 : __end_of_permanent_fixed_addresses,
134 FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, 137 FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
135#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
136 FIX_OHCI1394_BASE,
137#endif
138#ifdef CONFIG_X86_32 138#ifdef CONFIG_X86_32
139 FIX_WP_TEST, 139 FIX_WP_TEST,
140#endif 140#endif
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index 014c2b85ae45..a726650fc80f 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -66,10 +66,6 @@ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
66void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); 66void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
67struct page *kmap_atomic_to_page(void *ptr); 67struct page *kmap_atomic_to_page(void *ptr);
68 68
69#ifndef CONFIG_PARAVIRT
70#define kmap_atomic_pte(page, type) kmap_atomic(page, type)
71#endif
72
73#define flush_cache_kmaps() do { } while (0) 69#define flush_cache_kmaps() do { } while (0)
74 70
75extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, 71extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn,
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 0675a7c4c20e..2a1bd8f4f23a 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -10,7 +10,6 @@
10 * (display/resolving) 10 * (display/resolving)
11 */ 11 */
12struct arch_hw_breakpoint { 12struct arch_hw_breakpoint {
13 char *name; /* Contains name of the symbol to set bkpt */
14 unsigned long address; 13 unsigned long address;
15 u8 len; 14 u8 len;
16 u8 type; 15 u8 type;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index eeac829a0f44..46c0fe05f230 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -53,13 +53,6 @@ extern void threshold_interrupt(void);
53extern void call_function_interrupt(void); 53extern void call_function_interrupt(void);
54extern void call_function_single_interrupt(void); 54extern void call_function_single_interrupt(void);
55 55
56/* PIC specific functions */
57extern void disable_8259A_irq(unsigned int irq);
58extern void enable_8259A_irq(unsigned int irq);
59extern int i8259A_irq_pending(unsigned int irq);
60extern void make_8259A_irq(unsigned int irq);
61extern void init_8259A(int aeoi);
62
63/* IOAPIC */ 56/* IOAPIC */
64#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) 57#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
65extern unsigned long io_apic_irqs; 58extern unsigned long io_apic_irqs;
@@ -140,6 +133,7 @@ extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
140 133
141typedef int vector_irq_t[NR_VECTORS]; 134typedef int vector_irq_t[NR_VECTORS];
142DECLARE_PER_CPU(vector_irq_t, vector_irq); 135DECLARE_PER_CPU(vector_irq_t, vector_irq);
136extern void setup_vector_irq(int cpu);
143 137
144#ifdef CONFIG_X86_IO_APIC 138#ifdef CONFIG_X86_IO_APIC
145extern void lock_vector_lock(void); 139extern void lock_vector_lock(void);
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 58d7091eeb1f..1655147646aa 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -24,12 +24,7 @@ extern unsigned int cached_irq_mask;
24#define SLAVE_ICW4_DEFAULT 0x01 24#define SLAVE_ICW4_DEFAULT 0x01
25#define PIC_ICW4_AEOI 2 25#define PIC_ICW4_AEOI 2
26 26
27extern spinlock_t i8259A_lock; 27extern raw_spinlock_t i8259A_lock;
28
29extern void init_8259A(int auto_eoi);
30extern void enable_8259A_irq(unsigned int irq);
31extern void disable_8259A_irq(unsigned int irq);
32extern unsigned int startup_8259A_irq(unsigned int irq);
33 28
34/* the PIC may need a careful delay on some platforms, hence specific calls */ 29/* the PIC may need a careful delay on some platforms, hence specific calls */
35static inline unsigned char inb_pic(unsigned int port) 30static inline unsigned char inb_pic(unsigned int port)
@@ -57,7 +52,17 @@ static inline void outb_pic(unsigned char value, unsigned int port)
57 52
58extern struct irq_chip i8259A_chip; 53extern struct irq_chip i8259A_chip;
59 54
60extern void mask_8259A(void); 55struct legacy_pic {
61extern void unmask_8259A(void); 56 int nr_legacy_irqs;
57 struct irq_chip *chip;
58 void (*mask_all)(void);
59 void (*restore_mask)(void);
60 void (*init)(int auto_eoi);
61 int (*irq_pending)(unsigned int irq);
62 void (*make_irq)(unsigned int irq);
63};
64
65extern struct legacy_pic *legacy_pic;
66extern struct legacy_pic null_legacy_pic;
62 67
63#endif /* _ASM_X86_I8259_H */ 68#endif /* _ASM_X86_I8259_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index a1dcfa3ab17d..30a3e9776123 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -347,6 +347,7 @@ extern void __iomem *early_ioremap(resource_size_t phys_addr,
347extern void __iomem *early_memremap(resource_size_t phys_addr, 347extern void __iomem *early_memremap(resource_size_t phys_addr,
348 unsigned long size); 348 unsigned long size);
349extern void early_iounmap(void __iomem *addr, unsigned long size); 349extern void early_iounmap(void __iomem *addr, unsigned long size);
350extern void fixup_early_ioremap(void);
350 351
351#define IO_SPACE_LIMIT 0xffff 352#define IO_SPACE_LIMIT 0xffff
352 353
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 7c7c16cde1f8..35832a03a515 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -143,8 +143,6 @@ extern int noioapicreroute;
143/* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ 143/* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */
144extern int timer_through_8259; 144extern int timer_through_8259;
145 145
146extern void io_apic_disable_legacy(void);
147
148/* 146/*
149 * If we use the IO-APIC for IRQ routing, disable automatic 147 * If we use the IO-APIC for IRQ routing, disable automatic
150 * assignment of PCI IRQ's. 148 * assignment of PCI IRQ's.
@@ -160,6 +158,7 @@ extern int io_apic_get_redir_entries(int ioapic);
160struct io_apic_irq_attr; 158struct io_apic_irq_attr;
161extern int io_apic_set_pci_routing(struct device *dev, int irq, 159extern int io_apic_set_pci_routing(struct device *dev, int irq,
162 struct io_apic_irq_attr *irq_attr); 160 struct io_apic_irq_attr *irq_attr);
161void setup_IO_APIC_irq_extra(u32 gsi);
163extern int (*ioapic_renumber_irq)(int ioapic, int irq); 162extern int (*ioapic_renumber_irq)(int ioapic, int irq);
164extern void ioapic_init_mappings(void); 163extern void ioapic_init_mappings(void);
165extern void ioapic_insert_resources(void); 164extern void ioapic_insert_resources(void);
@@ -188,6 +187,7 @@ extern struct mp_ioapic_gsi mp_gsi_routing[];
188int mp_find_ioapic(int gsi); 187int mp_find_ioapic(int gsi);
189int mp_find_ioapic_pin(int ioapic, int gsi); 188int mp_find_ioapic_pin(int ioapic, int gsi);
190void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); 189void __init mp_register_ioapic(int id, u32 address, u32 gsi_base);
190extern void __init pre_init_apic_IRQ0(void);
191 191
192#else /* !CONFIG_X86_IO_APIC */ 192#else /* !CONFIG_X86_IO_APIC */
193 193
@@ -197,7 +197,11 @@ static const int timer_through_8259 = 0;
197static inline void ioapic_init_mappings(void) { } 197static inline void ioapic_init_mappings(void) { }
198static inline void ioapic_insert_resources(void) { } 198static inline void ioapic_insert_resources(void) { }
199static inline void probe_nr_irqs_gsi(void) { } 199static inline void probe_nr_irqs_gsi(void) { }
200static inline int mp_find_ioapic(int gsi) { return 0; }
200 201
202struct io_apic_irq_attr;
203static inline int io_apic_set_pci_routing(struct device *dev, int irq,
204 struct io_apic_irq_attr *irq_attr) { return 0; }
201#endif 205#endif
202 206
203#endif /* _ASM_X86_IO_APIC_H */ 207#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 4611f085cd43..8767d99c4f64 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -28,28 +28,33 @@
28#define MCE_VECTOR 0x12 28#define MCE_VECTOR 0x12
29 29
30/* 30/*
31 * IDT vectors usable for external interrupt sources start 31 * IDT vectors usable for external interrupt sources start at 0x20.
32 * at 0x20: 32 * (0x80 is the syscall vector, 0x30-0x3f are for ISA)
33 */ 33 */
34#define FIRST_EXTERNAL_VECTOR 0x20 34#define FIRST_EXTERNAL_VECTOR 0x20
35 35/*
36#ifdef CONFIG_X86_32 36 * We start allocating at 0x21 to spread out vectors evenly between
37# define SYSCALL_VECTOR 0x80 37 * priority levels. (0x80 is the syscall vector)
38# define IA32_SYSCALL_VECTOR 0x80 38 */
39#else 39#define VECTOR_OFFSET_START 1
40# define IA32_SYSCALL_VECTOR 0x80
41#endif
42 40
43/* 41/*
44 * Reserve the lowest usable priority level 0x20 - 0x2f for triggering 42 * Reserve the lowest usable vector (and hence lowest priority) 0x20 for
45 * cleanup after irq migration. 43 * triggering cleanup after irq migration. 0x21-0x2f will still be used
44 * for device interrupts.
46 */ 45 */
47#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR 46#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
48 47
48#define IA32_SYSCALL_VECTOR 0x80
49#ifdef CONFIG_X86_32
50# define SYSCALL_VECTOR 0x80
51#endif
52
49/* 53/*
50 * Vectors 0x30-0x3f are used for ISA interrupts. 54 * Vectors 0x30-0x3f are used for ISA interrupts.
55 * round up to the next 16-vector boundary
51 */ 56 */
52#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) 57#define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15)
53 58
54#define IRQ1_VECTOR (IRQ0_VECTOR + 1) 59#define IRQ1_VECTOR (IRQ0_VECTOR + 1)
55#define IRQ2_VECTOR (IRQ0_VECTOR + 2) 60#define IRQ2_VECTOR (IRQ0_VECTOR + 2)
@@ -120,13 +125,6 @@
120 */ 125 */
121#define MCE_SELF_VECTOR 0xeb 126#define MCE_SELF_VECTOR 0xeb
122 127
123/*
124 * First APIC vector available to drivers: (vectors 0x30-0xee) we
125 * start at 0x31(0x41) to spread out vectors evenly between priority
126 * levels. (0x80 is the syscall vector)
127 */
128#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2)
129
130#define NR_VECTORS 256 128#define NR_VECTORS 256
131 129
132#define FPU_IRQ 13 130#define FPU_IRQ 13
@@ -154,21 +152,21 @@ static inline int invalid_vm86_irq(int irq)
154 152
155#define NR_IRQS_LEGACY 16 153#define NR_IRQS_LEGACY 16
156 154
157#define CPU_VECTOR_LIMIT ( 8 * NR_CPUS )
158#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) 155#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS )
159 156
160#ifdef CONFIG_X86_IO_APIC 157#ifdef CONFIG_X86_IO_APIC
161# ifdef CONFIG_SPARSE_IRQ 158# ifdef CONFIG_SPARSE_IRQ
159# define CPU_VECTOR_LIMIT (64 * NR_CPUS)
162# define NR_IRQS \ 160# define NR_IRQS \
163 (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ 161 (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \
164 (NR_VECTORS + CPU_VECTOR_LIMIT) : \ 162 (NR_VECTORS + CPU_VECTOR_LIMIT) : \
165 (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) 163 (NR_VECTORS + IO_APIC_VECTOR_LIMIT))
166# else 164# else
167# if NR_CPUS < MAX_IO_APICS 165# define CPU_VECTOR_LIMIT (32 * NR_CPUS)
168# define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT) 166# define NR_IRQS \
169# else 167 (CPU_VECTOR_LIMIT < IO_APIC_VECTOR_LIMIT ? \
170# define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) 168 (NR_VECTORS + CPU_VECTOR_LIMIT) : \
171# endif 169 (NR_VECTORS + IO_APIC_VECTOR_LIMIT))
172# endif 170# endif
173#else /* !CONFIG_X86_IO_APIC: */ 171#else /* !CONFIG_X86_IO_APIC: */
174# define NR_IRQS NR_IRQS_LEGACY 172# define NR_IRQS NR_IRQS_LEGACY
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 4fe681de1e76..4ffa345a8ccb 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -32,7 +32,10 @@ struct kprobe;
32 32
33typedef u8 kprobe_opcode_t; 33typedef u8 kprobe_opcode_t;
34#define BREAKPOINT_INSTRUCTION 0xcc 34#define BREAKPOINT_INSTRUCTION 0xcc
35#define RELATIVEJUMP_INSTRUCTION 0xe9 35#define RELATIVEJUMP_OPCODE 0xe9
36#define RELATIVEJUMP_SIZE 5
37#define RELATIVECALL_OPCODE 0xe8
38#define RELATIVE_ADDR_SIZE 4
36#define MAX_INSN_SIZE 16 39#define MAX_INSN_SIZE 16
37#define MAX_STACK_SIZE 64 40#define MAX_STACK_SIZE 64
38#define MIN_STACK_SIZE(ADDR) \ 41#define MIN_STACK_SIZE(ADDR) \
@@ -44,6 +47,17 @@ typedef u8 kprobe_opcode_t;
44 47
45#define flush_insn_slot(p) do { } while (0) 48#define flush_insn_slot(p) do { } while (0)
46 49
50/* optinsn template addresses */
51extern kprobe_opcode_t optprobe_template_entry;
52extern kprobe_opcode_t optprobe_template_val;
53extern kprobe_opcode_t optprobe_template_call;
54extern kprobe_opcode_t optprobe_template_end;
55#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE)
56#define MAX_OPTINSN_SIZE \
57 (((unsigned long)&optprobe_template_end - \
58 (unsigned long)&optprobe_template_entry) + \
59 MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE)
60
47extern const int kretprobe_blacklist_size; 61extern const int kretprobe_blacklist_size;
48 62
49void arch_remove_kprobe(struct kprobe *p); 63void arch_remove_kprobe(struct kprobe *p);
@@ -64,6 +78,21 @@ struct arch_specific_insn {
64 int boostable; 78 int boostable;
65}; 79};
66 80
81struct arch_optimized_insn {
82 /* copy of the original instructions */
83 kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE];
84 /* detour code buffer */
85 kprobe_opcode_t *insn;
86 /* the size of instructions copied to detour code buffer */
87 size_t size;
88};
89
90/* Return true (!0) if optinsn is prepared for optimization. */
91static inline int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
92{
93 return optinsn->size;
94}
95
67struct prev_kprobe { 96struct prev_kprobe {
68 struct kprobe *kp; 97 struct kprobe *kp;
69 unsigned long status; 98 unsigned long status;
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index ba0eed8aa1a6..b60f2924c413 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -28,22 +28,39 @@
28 28
29#ifndef __ASSEMBLY__ 29#ifndef __ASSEMBLY__
30#include <asm/hw_irq.h> 30#include <asm/hw_irq.h>
31#include <asm/kvm_para.h>
32 31
33/*G:030 32/*G:030
34 * But first, how does our Guest contact the Host to ask for privileged 33 * But first, how does our Guest contact the Host to ask for privileged
35 * operations? There are two ways: the direct way is to make a "hypercall", 34 * operations? There are two ways: the direct way is to make a "hypercall",
36 * to make requests of the Host Itself. 35 * to make requests of the Host Itself.
37 * 36 *
38 * We use the KVM hypercall mechanism, though completely different hypercall 37 * Our hypercall mechanism uses the highest unused trap code (traps 32 and
39 * numbers. Seventeen hypercalls are available: the hypercall number is put in 38 * above are used by real hardware interrupts). Seventeen hypercalls are
40 * the %eax register, and the arguments (when required) are placed in %ebx, 39 * available: the hypercall number is put in the %eax register, and the
41 * %ecx, %edx and %esi. If a return value makes sense, it's returned in %eax. 40 * arguments (when required) are placed in %ebx, %ecx, %edx and %esi.
41 * If a return value makes sense, it's returned in %eax.
42 * 42 *
43 * Grossly invalid calls result in Sudden Death at the hands of the vengeful 43 * Grossly invalid calls result in Sudden Death at the hands of the vengeful
44 * Host, rather than returning failure. This reflects Winston Churchill's 44 * Host, rather than returning failure. This reflects Winston Churchill's
45 * definition of a gentleman: "someone who is only rude intentionally". 45 * definition of a gentleman: "someone who is only rude intentionally".
46:*/ 46 */
47static inline unsigned long
48hcall(unsigned long call,
49 unsigned long arg1, unsigned long arg2, unsigned long arg3,
50 unsigned long arg4)
51{
52 /* "int" is the Intel instruction to trigger a trap. */
53 asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY)
54 /* The call in %eax (aka "a") might be overwritten */
55 : "=a"(call)
56 /* The arguments are in %eax, %ebx, %ecx, %edx & %esi */
57 : "a"(call), "b"(arg1), "c"(arg2), "d"(arg3), "S"(arg4)
58 /* "memory" means this might write somewhere in memory.
59 * This isn't true for all calls, but it's safe to tell
60 * gcc that it might happen so it doesn't get clever. */
61 : "memory");
62 return call;
63}
47 64
48/* Can't use our min() macro here: needs to be a constant */ 65/* Can't use our min() macro here: needs to be a constant */
49#define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) 66#define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32)
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 47b9b6f19057..2e9972468a5d 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -195,41 +195,4 @@ static inline long local_sub_return(long i, local_t *l)
195#define __local_add(i, l) local_add((i), (l)) 195#define __local_add(i, l) local_add((i), (l))
196#define __local_sub(i, l) local_sub((i), (l)) 196#define __local_sub(i, l) local_sub((i), (l))
197 197
198/* Use these for per-cpu local_t variables: on some archs they are
199 * much more efficient than these naive implementations. Note they take
200 * a variable, not an address.
201 *
202 * X86_64: This could be done better if we moved the per cpu data directly
203 * after GS.
204 */
205
206/* Need to disable preemption for the cpu local counters otherwise we could
207 still access a variable of a previous CPU in a non atomic way. */
208#define cpu_local_wrap_v(l) \
209({ \
210 local_t res__; \
211 preempt_disable(); \
212 res__ = (l); \
213 preempt_enable(); \
214 res__; \
215})
216#define cpu_local_wrap(l) \
217({ \
218 preempt_disable(); \
219 (l); \
220 preempt_enable(); \
221}) \
222
223#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var((l))))
224#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var((l)), (i)))
225#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var((l))))
226#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var((l))))
227#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var((l))))
228#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var((l))))
229
230#define __cpu_local_inc(l) cpu_local_inc((l))
231#define __cpu_local_dec(l) cpu_local_dec((l))
232#define __cpu_local_add(i, l) cpu_local_add((i), (l))
233#define __cpu_local_sub(i, l) cpu_local_sub((i), (l))
234
235#endif /* _ASM_X86_LOCAL_H */ 198#endif /* _ASM_X86_LOCAL_H */
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h
new file mode 100644
index 000000000000..451d30e7f62d
--- /dev/null
+++ b/arch/x86/include/asm/mrst.h
@@ -0,0 +1,19 @@
1/*
2 * mrst.h: Intel Moorestown platform specific setup code
3 *
4 * (C) Copyright 2009 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; version 2
9 * of the License.
10 */
11#ifndef _ASM_X86_MRST_H
12#define _ASM_X86_MRST_H
13extern int pci_mrst_init(void);
14int __init sfi_parse_mrtc(struct sfi_table_header *table);
15
16#define SFI_MTMR_MAX_NUM 8
17#define SFI_MRTC_MAX 8
18
19#endif /* _ASM_X86_MRST_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 1cd58cdbc03f..4604e6a54d36 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -105,6 +105,8 @@
105#define MSR_AMD64_PATCH_LEVEL 0x0000008b 105#define MSR_AMD64_PATCH_LEVEL 0x0000008b
106#define MSR_AMD64_NB_CFG 0xc001001f 106#define MSR_AMD64_NB_CFG 0xc001001f
107#define MSR_AMD64_PATCH_LOADER 0xc0010020 107#define MSR_AMD64_PATCH_LOADER 0xc0010020
108#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
109#define MSR_AMD64_OSVW_STATUS 0xc0010141
108#define MSR_AMD64_IBSFETCHCTL 0xc0011030 110#define MSR_AMD64_IBSFETCHCTL 0xc0011030
109#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 111#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
110#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 112#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h
index 13370b95ea94..37c516545ec8 100644
--- a/arch/x86/include/asm/numaq.h
+++ b/arch/x86/include/asm/numaq.h
@@ -30,6 +30,7 @@
30 30
31extern int found_numaq; 31extern int found_numaq;
32extern int get_memcfg_numaq(void); 32extern int get_memcfg_numaq(void);
33extern int pci_numaq_init(void);
33 34
34extern void *xquad_portio; 35extern void *xquad_portio;
35 36
diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h
index 3a57385d9fa7..101229b0d8ed 100644
--- a/arch/x86/include/asm/olpc.h
+++ b/arch/x86/include/asm/olpc.h
@@ -13,7 +13,6 @@ struct olpc_platform_t {
13 13
14#define OLPC_F_PRESENT 0x01 14#define OLPC_F_PRESENT 0x01
15#define OLPC_F_DCON 0x02 15#define OLPC_F_DCON 0x02
16#define OLPC_F_VSA 0x04
17 16
18#ifdef CONFIG_OLPC 17#ifdef CONFIG_OLPC
19 18
@@ -51,18 +50,6 @@ static inline int olpc_has_dcon(void)
51} 50}
52 51
53/* 52/*
54 * The VSA is software from AMD that typical Geode bioses will include.
55 * It is used to emulate the PCI bus, VGA, etc. OLPC's Open Firmware does
56 * not include the VSA; instead, PCI is emulated by the kernel.
57 *
58 * The VSA is described further in arch/x86/pci/olpc.c.
59 */
60static inline int olpc_has_vsa(void)
61{
62 return (olpc_platform_info.flags & OLPC_F_VSA) ? 1 : 0;
63}
64
65/*
66 * The "Mass Production" version of OLPC's XO is identified as being model 53 * The "Mass Production" version of OLPC's XO is identified as being model
67 * C2. During the prototype phase, the following models (in chronological 54 * C2. During the prototype phase, the following models (in chronological
68 * order) were created: A1, B1, B2, B3, B4, C1. The A1 through B2 models 55 * order) were created: A1, B1, B2, B3, B4, C1. The A1 through B2 models
@@ -87,13 +74,10 @@ static inline int olpc_has_dcon(void)
87 return 0; 74 return 0;
88} 75}
89 76
90static inline int olpc_has_vsa(void)
91{
92 return 0;
93}
94
95#endif 77#endif
96 78
79extern int pci_olpc_init(void);
80
97/* EC related functions */ 81/* EC related functions */
98 82
99extern int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen, 83extern int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen,
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index dd59a85a918f..5653f43d90e5 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -435,15 +435,6 @@ static inline void paravirt_release_pud(unsigned long pfn)
435 PVOP_VCALL1(pv_mmu_ops.release_pud, pfn); 435 PVOP_VCALL1(pv_mmu_ops.release_pud, pfn);
436} 436}
437 437
438#ifdef CONFIG_HIGHPTE
439static inline void *kmap_atomic_pte(struct page *page, enum km_type type)
440{
441 unsigned long ret;
442 ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type);
443 return (void *)ret;
444}
445#endif
446
447static inline void pte_update(struct mm_struct *mm, unsigned long addr, 438static inline void pte_update(struct mm_struct *mm, unsigned long addr,
448 pte_t *ptep) 439 pte_t *ptep)
449{ 440{
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index b1e70d51e40c..db9ef5532341 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -304,10 +304,6 @@ struct pv_mmu_ops {
304#endif /* PAGETABLE_LEVELS == 4 */ 304#endif /* PAGETABLE_LEVELS == 4 */
305#endif /* PAGETABLE_LEVELS >= 3 */ 305#endif /* PAGETABLE_LEVELS >= 3 */
306 306
307#ifdef CONFIG_HIGHPTE
308 void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
309#endif
310
311 struct pv_lazy_ops lazy_mode; 307 struct pv_lazy_ops lazy_mode;
312 308
313 /* dom0 ops */ 309 /* dom0 ops */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index ada8c201d513..404a880ea325 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -45,8 +45,15 @@ static inline int pci_proc_domain(struct pci_bus *bus)
45 45
46#ifdef CONFIG_PCI 46#ifdef CONFIG_PCI
47extern unsigned int pcibios_assign_all_busses(void); 47extern unsigned int pcibios_assign_all_busses(void);
48extern int pci_legacy_init(void);
49# ifdef CONFIG_ACPI
50# define x86_default_pci_init pci_acpi_init
51# else
52# define x86_default_pci_init pci_legacy_init
53# endif
48#else 54#else
49#define pcibios_assign_all_busses() 0 55# define pcibios_assign_all_busses() 0
56# define x86_default_pci_init NULL
50#endif 57#endif
51 58
52extern unsigned long pci_mem_start; 59extern unsigned long pci_mem_start;
@@ -90,40 +97,14 @@ extern void pci_iommu_alloc(void);
90 97
91#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) 98#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
92 99
93#if defined(CONFIG_X86_64) || defined(CONFIG_DMAR) || defined(CONFIG_DMA_API_DEBUG)
94
95#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
96 dma_addr_t ADDR_NAME;
97#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
98 __u32 LEN_NAME;
99#define pci_unmap_addr(PTR, ADDR_NAME) \
100 ((PTR)->ADDR_NAME)
101#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
102 (((PTR)->ADDR_NAME) = (VAL))
103#define pci_unmap_len(PTR, LEN_NAME) \
104 ((PTR)->LEN_NAME)
105#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
106 (((PTR)->LEN_NAME) = (VAL))
107
108#else
109
110#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME[0];
111#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) unsigned LEN_NAME[0];
112#define pci_unmap_addr(PTR, ADDR_NAME) sizeof((PTR)->ADDR_NAME)
113#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
114 do { break; } while (pci_unmap_addr(PTR, ADDR_NAME))
115#define pci_unmap_len(PTR, LEN_NAME) sizeof((PTR)->LEN_NAME)
116#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
117 do { break; } while (pci_unmap_len(PTR, LEN_NAME))
118
119#endif
120
121#endif /* __KERNEL__ */ 100#endif /* __KERNEL__ */
122 101
123#ifdef CONFIG_X86_64 102#ifdef CONFIG_X86_64
124#include "pci_64.h" 103#include "pci_64.h"
125#endif 104#endif
126 105
106void dma32_reserve_bootmem(void);
107
127/* implement the pci_ DMA API in terms of the generic device dma_ one */ 108/* implement the pci_ DMA API in terms of the generic device dma_ one */
128#include <asm-generic/pci-dma-compat.h> 109#include <asm-generic/pci-dma-compat.h>
129 110
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
index ae5e40f67daf..fe15cfb21b9b 100644
--- a/arch/x86/include/asm/pci_64.h
+++ b/arch/x86/include/asm/pci_64.h
@@ -22,8 +22,6 @@ extern int (*pci_config_read)(int seg, int bus, int dev, int fn,
22extern int (*pci_config_write)(int seg, int bus, int dev, int fn, 22extern int (*pci_config_write)(int seg, int bus, int dev, int fn,
23 int reg, int len, u32 value); 23 int reg, int len, u32 value);
24 24
25extern void dma32_reserve_bootmem(void);
26
27#endif /* __KERNEL__ */ 25#endif /* __KERNEL__ */
28 26
29#endif /* _ASM_X86_PCI_64_H */ 27#endif /* _ASM_X86_PCI_64_H */
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 05b58ccb2e82..1a0422348d6d 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -83,7 +83,6 @@ struct irq_routing_table {
83 83
84extern unsigned int pcibios_irq_mask; 84extern unsigned int pcibios_irq_mask;
85 85
86extern int pcibios_scanned;
87extern spinlock_t pci_config_lock; 86extern spinlock_t pci_config_lock;
88 87
89extern int (*pcibios_enable_irq)(struct pci_dev *dev); 88extern int (*pcibios_enable_irq)(struct pci_dev *dev);
@@ -106,16 +105,15 @@ extern bool port_cf9_safe;
106extern int pci_direct_probe(void); 105extern int pci_direct_probe(void);
107extern void pci_direct_init(int type); 106extern void pci_direct_init(int type);
108extern void pci_pcbios_init(void); 107extern void pci_pcbios_init(void);
109extern int pci_olpc_init(void);
110extern void __init dmi_check_pciprobe(void); 108extern void __init dmi_check_pciprobe(void);
111extern void __init dmi_check_skip_isa_align(void); 109extern void __init dmi_check_skip_isa_align(void);
112 110
113/* some common used subsys_initcalls */ 111/* some common used subsys_initcalls */
114extern int __init pci_acpi_init(void); 112extern int __init pci_acpi_init(void);
115extern int __init pcibios_irq_init(void); 113extern void __init pcibios_irq_init(void);
116extern int __init pci_visws_init(void);
117extern int __init pci_numaq_init(void);
118extern int __init pcibios_init(void); 114extern int __init pcibios_init(void);
115extern int pci_legacy_init(void);
116extern void pcibios_fixup_irqs(void);
119 117
120/* pci-mmconfig.c */ 118/* pci-mmconfig.c */
121 119
@@ -183,3 +181,17 @@ static inline void mmio_config_writel(void __iomem *pos, u32 val)
183{ 181{
184 asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory"); 182 asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
185} 183}
184
185#ifdef CONFIG_PCI
186# ifdef CONFIG_ACPI
187# define x86_default_pci_init pci_acpi_init
188# else
189# define x86_default_pci_init pci_legacy_init
190# endif
191# define x86_default_pci_init_irq pcibios_irq_init
192# define x86_default_pci_fixup_irqs pcibios_fixup_irqs
193#else
194# define x86_default_pci_init NULL
195# define x86_default_pci_init_irq NULL
196# define x86_default_pci_fixup_irqs NULL
197#endif
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 0c44196b78ac..66a272dfd8b8 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -25,19 +25,18 @@
25 */ 25 */
26#ifdef CONFIG_SMP 26#ifdef CONFIG_SMP
27#define PER_CPU(var, reg) \ 27#define PER_CPU(var, reg) \
28 __percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg; \ 28 __percpu_mov_op %__percpu_seg:this_cpu_off, reg; \
29 lea per_cpu__##var(reg), reg 29 lea var(reg), reg
30#define PER_CPU_VAR(var) %__percpu_seg:per_cpu__##var 30#define PER_CPU_VAR(var) %__percpu_seg:var
31#else /* ! SMP */ 31#else /* ! SMP */
32#define PER_CPU(var, reg) \ 32#define PER_CPU(var, reg) __percpu_mov_op $var, reg
33 __percpu_mov_op $per_cpu__##var, reg 33#define PER_CPU_VAR(var) var
34#define PER_CPU_VAR(var) per_cpu__##var
35#endif /* SMP */ 34#endif /* SMP */
36 35
37#ifdef CONFIG_X86_64_SMP 36#ifdef CONFIG_X86_64_SMP
38#define INIT_PER_CPU_VAR(var) init_per_cpu__##var 37#define INIT_PER_CPU_VAR(var) init_per_cpu__##var
39#else 38#else
40#define INIT_PER_CPU_VAR(var) per_cpu__##var 39#define INIT_PER_CPU_VAR(var) var
41#endif 40#endif
42 41
43#else /* ...!ASSEMBLY */ 42#else /* ...!ASSEMBLY */
@@ -60,12 +59,12 @@
60 * There also must be an entry in vmlinux_64.lds.S 59 * There also must be an entry in vmlinux_64.lds.S
61 */ 60 */
62#define DECLARE_INIT_PER_CPU(var) \ 61#define DECLARE_INIT_PER_CPU(var) \
63 extern typeof(per_cpu_var(var)) init_per_cpu_var(var) 62 extern typeof(var) init_per_cpu_var(var)
64 63
65#ifdef CONFIG_X86_64_SMP 64#ifdef CONFIG_X86_64_SMP
66#define init_per_cpu_var(var) init_per_cpu__##var 65#define init_per_cpu_var(var) init_per_cpu__##var
67#else 66#else
68#define init_per_cpu_var(var) per_cpu_var(var) 67#define init_per_cpu_var(var) var
69#endif 68#endif
70 69
71/* For arch-specific code, we can use direct single-insn ops (they 70/* For arch-specific code, we can use direct single-insn ops (they
@@ -104,6 +103,64 @@ do { \
104 } \ 103 } \
105} while (0) 104} while (0)
106 105
106/*
107 * Generate a percpu add to memory instruction and optimize code
108 * if a one is added or subtracted.
109 */
110#define percpu_add_op(var, val) \
111do { \
112 typedef typeof(var) pao_T__; \
113 const int pao_ID__ = (__builtin_constant_p(val) && \
114 ((val) == 1 || (val) == -1)) ? (val) : 0; \
115 if (0) { \
116 pao_T__ pao_tmp__; \
117 pao_tmp__ = (val); \
118 } \
119 switch (sizeof(var)) { \
120 case 1: \
121 if (pao_ID__ == 1) \
122 asm("incb "__percpu_arg(0) : "+m" (var)); \
123 else if (pao_ID__ == -1) \
124 asm("decb "__percpu_arg(0) : "+m" (var)); \
125 else \
126 asm("addb %1, "__percpu_arg(0) \
127 : "+m" (var) \
128 : "qi" ((pao_T__)(val))); \
129 break; \
130 case 2: \
131 if (pao_ID__ == 1) \
132 asm("incw "__percpu_arg(0) : "+m" (var)); \
133 else if (pao_ID__ == -1) \
134 asm("decw "__percpu_arg(0) : "+m" (var)); \
135 else \
136 asm("addw %1, "__percpu_arg(0) \
137 : "+m" (var) \
138 : "ri" ((pao_T__)(val))); \
139 break; \
140 case 4: \
141 if (pao_ID__ == 1) \
142 asm("incl "__percpu_arg(0) : "+m" (var)); \
143 else if (pao_ID__ == -1) \
144 asm("decl "__percpu_arg(0) : "+m" (var)); \
145 else \
146 asm("addl %1, "__percpu_arg(0) \
147 : "+m" (var) \
148 : "ri" ((pao_T__)(val))); \
149 break; \
150 case 8: \
151 if (pao_ID__ == 1) \
152 asm("incq "__percpu_arg(0) : "+m" (var)); \
153 else if (pao_ID__ == -1) \
154 asm("decq "__percpu_arg(0) : "+m" (var)); \
155 else \
156 asm("addq %1, "__percpu_arg(0) \
157 : "+m" (var) \
158 : "re" ((pao_T__)(val))); \
159 break; \
160 default: __bad_percpu_size(); \
161 } \
162} while (0)
163
107#define percpu_from_op(op, var, constraint) \ 164#define percpu_from_op(op, var, constraint) \
108({ \ 165({ \
109 typeof(var) pfo_ret__; \ 166 typeof(var) pfo_ret__; \
@@ -142,16 +199,14 @@ do { \
142 * per-thread variables implemented as per-cpu variables and thus 199 * per-thread variables implemented as per-cpu variables and thus
143 * stable for the duration of the respective task. 200 * stable for the duration of the respective task.
144 */ 201 */
145#define percpu_read(var) percpu_from_op("mov", per_cpu__##var, \ 202#define percpu_read(var) percpu_from_op("mov", var, "m" (var))
146 "m" (per_cpu__##var)) 203#define percpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var)))
147#define percpu_read_stable(var) percpu_from_op("mov", per_cpu__##var, \ 204#define percpu_write(var, val) percpu_to_op("mov", var, val)
148 "p" (&per_cpu__##var)) 205#define percpu_add(var, val) percpu_add_op(var, val)
149#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) 206#define percpu_sub(var, val) percpu_add_op(var, -(val))
150#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) 207#define percpu_and(var, val) percpu_to_op("and", var, val)
151#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) 208#define percpu_or(var, val) percpu_to_op("or", var, val)
152#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val) 209#define percpu_xor(var, val) percpu_to_op("xor", var, val)
153#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val)
154#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val)
155 210
156#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 211#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
157#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 212#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
@@ -160,9 +215,9 @@ do { \
160#define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) 215#define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
161#define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) 216#define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
162#define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) 217#define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
163#define __this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) 218#define __this_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
164#define __this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) 219#define __this_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
165#define __this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) 220#define __this_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
166#define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) 221#define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
167#define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) 222#define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
168#define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) 223#define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
@@ -179,9 +234,9 @@ do { \
179#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) 234#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
180#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) 235#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
181#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) 236#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
182#define this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) 237#define this_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
183#define this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) 238#define this_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
184#define this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) 239#define this_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
185#define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) 240#define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
186#define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) 241#define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
187#define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) 242#define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
@@ -192,9 +247,9 @@ do { \
192#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) 247#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
193#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) 248#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
194 249
195#define irqsafe_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) 250#define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
196#define irqsafe_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) 251#define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
197#define irqsafe_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) 252#define irqsafe_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
198#define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) 253#define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
199#define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) 254#define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
200#define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) 255#define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
@@ -212,19 +267,19 @@ do { \
212#ifdef CONFIG_X86_64 267#ifdef CONFIG_X86_64
213#define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 268#define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
214#define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) 269#define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
215#define __this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) 270#define __this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
216#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) 271#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
217#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) 272#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
218#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) 273#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
219 274
220#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 275#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
221#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) 276#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
222#define this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) 277#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
223#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) 278#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
224#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) 279#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
225#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) 280#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
226 281
227#define irqsafe_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) 282#define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
228#define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) 283#define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
229#define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) 284#define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
230#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) 285#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
@@ -236,7 +291,7 @@ do { \
236({ \ 291({ \
237 int old__; \ 292 int old__; \
238 asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \ 293 asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \
239 : "=r" (old__), "+m" (per_cpu__##var) \ 294 : "=r" (old__), "+m" (var) \
240 : "dIr" (bit)); \ 295 : "dIr" (bit)); \
241 old__; \ 296 old__; \
242}) 297})
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index befd172c82ad..db6109a885a7 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -18,7 +18,7 @@
18#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 18#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
19#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 19#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
20 20
21#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) 21#define ARCH_PERFMON_EVENTSEL_ENABLE (1 << 22)
22#define ARCH_PERFMON_EVENTSEL_ANY (1 << 21) 22#define ARCH_PERFMON_EVENTSEL_ANY (1 << 21)
23#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) 23#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
24#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) 24#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
@@ -50,7 +50,7 @@
50 INTEL_ARCH_INV_MASK| \ 50 INTEL_ARCH_INV_MASK| \
51 INTEL_ARCH_EDGE_MASK|\ 51 INTEL_ARCH_EDGE_MASK|\
52 INTEL_ARCH_UNIT_MASK|\ 52 INTEL_ARCH_UNIT_MASK|\
53 INTEL_ARCH_EVTSEL_MASK) 53 INTEL_ARCH_EVENT_MASK)
54 54
55#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c 55#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
56#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) 56#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
@@ -117,6 +117,18 @@ union cpuid10_edx {
117 */ 117 */
118#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) 118#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16)
119 119
120/* IbsFetchCtl bits/masks */
121#define IBS_FETCH_RAND_EN (1ULL<<57)
122#define IBS_FETCH_VAL (1ULL<<49)
123#define IBS_FETCH_ENABLE (1ULL<<48)
124#define IBS_FETCH_CNT 0xFFFF0000ULL
125#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
126
127/* IbsOpCtl bits */
128#define IBS_OP_CNT_CTL (1ULL<<19)
129#define IBS_OP_VAL (1ULL<<18)
130#define IBS_OP_ENABLE (1ULL<<17)
131#define IBS_OP_MAX_CNT 0x0000FFFFULL
120 132
121#ifdef CONFIG_PERF_EVENTS 133#ifdef CONFIG_PERF_EVENTS
122extern void init_hw_perf_events(void); 134extern void init_hw_perf_events(void);
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 01fd9461d323..2984a25ff383 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -19,7 +19,6 @@
19#include <asm/paravirt.h> 19#include <asm/paravirt.h>
20 20
21#include <linux/bitops.h> 21#include <linux/bitops.h>
22#include <linux/slab.h>
23#include <linux/list.h> 22#include <linux/list.h>
24#include <linux/spinlock.h> 23#include <linux/spinlock.h>
25 24
@@ -54,10 +53,10 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
54 in_irq() ? KM_IRQ_PTE : \ 53 in_irq() ? KM_IRQ_PTE : \
55 KM_PTE0) 54 KM_PTE0)
56#define pte_offset_map(dir, address) \ 55#define pte_offset_map(dir, address) \
57 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \ 56 ((pte_t *)kmap_atomic(pmd_page(*(dir)), __KM_PTE) + \
58 pte_index((address))) 57 pte_index((address)))
59#define pte_offset_map_nested(dir, address) \ 58#define pte_offset_map_nested(dir, address) \
60 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ 59 ((pte_t *)kmap_atomic(pmd_page(*(dir)), KM_PTE1) + \
61 pte_index((address))) 60 pte_index((address)))
62#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE) 61#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE)
63#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) 62#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
@@ -80,7 +79,7 @@ do { \
80 * The i386 doesn't have any external MMU info: the kernel page 79 * The i386 doesn't have any external MMU info: the kernel page
81 * tables contain all the necessary information. 80 * tables contain all the necessary information.
82 */ 81 */
83#define update_mmu_cache(vma, address, pte) do { } while (0) 82#define update_mmu_cache(vma, address, ptep) do { } while (0)
84 83
85#endif /* !__ASSEMBLY__ */ 84#endif /* !__ASSEMBLY__ */
86 85
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index c57a30117149..181be528c612 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -129,7 +129,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
129#define pte_unmap(pte) /* NOP */ 129#define pte_unmap(pte) /* NOP */
130#define pte_unmap_nested(pte) /* NOP */ 130#define pte_unmap_nested(pte) /* NOP */
131 131
132#define update_mmu_cache(vma, address, pte) do { } while (0) 132#define update_mmu_cache(vma, address, ptep) do { } while (0)
133 133
134/* Encode and de-code a swap entry */ 134/* Encode and de-code a swap entry */
135#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE 135#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 4009f6534f52..6f414ed88620 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -23,14 +23,4 @@ extern int reboot_force;
23 23
24long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); 24long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
25 25
26/*
27 * This looks more complex than it should be. But we need to
28 * get the type for the ~ right in round_down (it needs to be
29 * as wide as the result!), and we want to evaluate the macro
30 * arguments just once each.
31 */
32#define __round_mask(x,y) ((__typeof__(x))((y)-1))
33#define round_up(x,y) ((((x)-1) | __round_mask(x,y))+1)
34#define round_down(x,y) ((x) & ~__round_mask(x,y))
35
36#endif /* _ASM_X86_PROTO_H */ 26#endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 20102808b191..69a686a7dff0 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -274,14 +274,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
274 return 0; 274 return 0;
275} 275}
276 276
277/*
278 * These are defined as per linux/ptrace.h, which see.
279 */
280#define arch_has_single_step() (1) 277#define arch_has_single_step() (1)
281extern void user_enable_single_step(struct task_struct *);
282extern void user_disable_single_step(struct task_struct *);
283
284extern void user_enable_block_step(struct task_struct *);
285#ifdef CONFIG_X86_DEBUGCTLMSR 278#ifdef CONFIG_X86_DEBUGCTLMSR
286#define arch_has_block_step() (1) 279#define arch_has_block_step() (1)
287#else 280#else
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 18e496c98ff0..86b1506f4179 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -37,10 +37,8 @@ void setup_bios_corruption_check(void);
37 37
38#ifdef CONFIG_X86_VISWS 38#ifdef CONFIG_X86_VISWS
39extern void visws_early_detect(void); 39extern void visws_early_detect(void);
40extern int is_visws_box(void);
41#else 40#else
42static inline void visws_early_detect(void) { } 41static inline void visws_early_detect(void) { }
43static inline int is_visws_box(void) { return 0; }
44#endif 42#endif
45 43
46extern unsigned long saved_video_mode; 44extern unsigned long saved_video_mode;
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index d5f69045c100..3ad421784ae7 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -26,8 +26,8 @@ asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *);
26asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *); 26asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *);
27asmlinkage long sys32_fstatat(unsigned int, char __user *, 27asmlinkage long sys32_fstatat(unsigned int, char __user *,
28 struct stat64 __user *, int); 28 struct stat64 __user *, int);
29struct mmap_arg_struct; 29struct mmap_arg_struct32;
30asmlinkage long sys32_mmap(struct mmap_arg_struct __user *); 30asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *);
31asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long); 31asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long);
32 32
33struct sigaction32; 33struct sigaction32;
@@ -40,8 +40,6 @@ asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *,
40 compat_sigset_t __user *, unsigned int); 40 compat_sigset_t __user *, unsigned int);
41asmlinkage long sys32_alarm(unsigned int); 41asmlinkage long sys32_alarm(unsigned int);
42 42
43struct sel_arg_struct;
44asmlinkage long sys32_old_select(struct sel_arg_struct __user *);
45asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); 43asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int);
46asmlinkage long sys32_sysfs(int, u32, u32); 44asmlinkage long sys32_sysfs(int, u32, u32);
47 45
@@ -56,11 +54,6 @@ asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32);
56asmlinkage long sys32_personality(unsigned long); 54asmlinkage long sys32_personality(unsigned long);
57asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); 55asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
58 56
59struct oldold_utsname;
60struct old_utsname;
61asmlinkage long sys32_olduname(struct oldold_utsname __user *);
62long sys32_uname(struct old_utsname __user *);
63
64asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *, 57asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *,
65 compat_uptr_t __user *, struct pt_regs *); 58 compat_uptr_t __user *, struct pt_regs *);
66asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *); 59asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 8868b9420b0e..5c044b43e9a7 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -50,18 +50,6 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
50 struct old_sigaction __user *); 50 struct old_sigaction __user *);
51unsigned long sys_sigreturn(struct pt_regs *); 51unsigned long sys_sigreturn(struct pt_regs *);
52 52
53/* kernel/sys_i386_32.c */
54struct mmap_arg_struct;
55struct sel_arg_struct;
56struct oldold_utsname;
57struct old_utsname;
58
59asmlinkage int old_mmap(struct mmap_arg_struct __user *);
60asmlinkage int old_select(struct sel_arg_struct __user *);
61asmlinkage int sys_ipc(uint, int, int, int, void __user *, long);
62asmlinkage int sys_uname(struct old_utsname __user *);
63asmlinkage int sys_olduname(struct oldold_utsname __user *);
64
65/* kernel/vm86_32.c */ 53/* kernel/vm86_32.c */
66int sys_vm86old(struct vm86_struct __user *, struct pt_regs *); 54int sys_vm86old(struct vm86_struct __user *, struct pt_regs *);
67int sys_vm86(unsigned long, unsigned long, struct pt_regs *); 55int sys_vm86(unsigned long, unsigned long, struct pt_regs *);
@@ -73,11 +61,8 @@ int sys_vm86(unsigned long, unsigned long, struct pt_regs *);
73long sys_arch_prctl(int, unsigned long); 61long sys_arch_prctl(int, unsigned long);
74 62
75/* kernel/sys_x86_64.c */ 63/* kernel/sys_x86_64.c */
76struct new_utsname;
77
78asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, 64asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long,
79 unsigned long, unsigned long, unsigned long); 65 unsigned long, unsigned long, unsigned long);
80asmlinkage long sys_uname(struct new_utsname __user *);
81 66
82#endif /* CONFIG_X86_32 */ 67#endif /* CONFIG_X86_32 */
83#endif /* _ASM_X86_SYSCALLS_H */ 68#endif /* _ASM_X86_SYSCALLS_H */
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index e04740f7a0bb..b8fe48ee2ed9 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -32,7 +32,7 @@ extern void show_regs_common(void);
32 "movl %P[task_canary](%[next]), %%ebx\n\t" \ 32 "movl %P[task_canary](%[next]), %%ebx\n\t" \
33 "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" 33 "movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
34#define __switch_canary_oparam \ 34#define __switch_canary_oparam \
35 , [stack_canary] "=m" (per_cpu_var(stack_canary.canary)) 35 , [stack_canary] "=m" (stack_canary.canary)
36#define __switch_canary_iparam \ 36#define __switch_canary_iparam \
37 , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) 37 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
38#else /* CC_STACKPROTECTOR */ 38#else /* CC_STACKPROTECTOR */
@@ -114,7 +114,7 @@ do { \
114 "movq %P[task_canary](%%rsi),%%r8\n\t" \ 114 "movq %P[task_canary](%%rsi),%%r8\n\t" \
115 "movq %%r8,"__percpu_arg([gs_canary])"\n\t" 115 "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
116#define __switch_canary_oparam \ 116#define __switch_canary_oparam \
117 , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary)) 117 , [gs_canary] "=m" (irq_stack_union.stack_canary)
118#define __switch_canary_iparam \ 118#define __switch_canary_iparam \
119 , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) 119 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
120#else /* CC_STACKPROTECTOR */ 120#else /* CC_STACKPROTECTOR */
@@ -133,7 +133,7 @@ do { \
133 __switch_canary \ 133 __switch_canary \
134 "movq %P[thread_info](%%rsi),%%r8\n\t" \ 134 "movq %P[thread_info](%%rsi),%%r8\n\t" \
135 "movq %%rax,%%rdi\n\t" \ 135 "movq %%rax,%%rdi\n\t" \
136 "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ 136 "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
137 "jnz ret_from_fork\n\t" \ 137 "jnz ret_from_fork\n\t" \
138 RESTORE_CONTEXT \ 138 RESTORE_CONTEXT \
139 : "=a" (last) \ 139 : "=a" (last) \
@@ -143,7 +143,7 @@ do { \
143 [ti_flags] "i" (offsetof(struct thread_info, flags)), \ 143 [ti_flags] "i" (offsetof(struct thread_info, flags)), \
144 [_tif_fork] "i" (_TIF_FORK), \ 144 [_tif_fork] "i" (_TIF_FORK), \
145 [thread_info] "i" (offsetof(struct task_struct, stack)), \ 145 [thread_info] "i" (offsetof(struct task_struct, stack)), \
146 [current_task] "m" (per_cpu_var(current_task)) \ 146 [current_task] "m" (current_task) \
147 __switch_canary_iparam \ 147 __switch_canary_iparam \
148 : "memory", "cc" __EXTRA_CLOBBER) 148 : "memory", "cc" __EXTRA_CLOBBER)
149#endif 149#endif
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 3baf379fa840..beb9b5f8f8a4 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -354,6 +354,7 @@
354#define __ARCH_WANT_STAT64 354#define __ARCH_WANT_STAT64
355#define __ARCH_WANT_SYS_ALARM 355#define __ARCH_WANT_SYS_ALARM
356#define __ARCH_WANT_SYS_GETHOSTNAME 356#define __ARCH_WANT_SYS_GETHOSTNAME
357#define __ARCH_WANT_SYS_IPC
357#define __ARCH_WANT_SYS_PAUSE 358#define __ARCH_WANT_SYS_PAUSE
358#define __ARCH_WANT_SYS_SGETMASK 359#define __ARCH_WANT_SYS_SGETMASK
359#define __ARCH_WANT_SYS_SIGNAL 360#define __ARCH_WANT_SYS_SIGNAL
@@ -366,6 +367,9 @@
366#define __ARCH_WANT_SYS_LLSEEK 367#define __ARCH_WANT_SYS_LLSEEK
367#define __ARCH_WANT_SYS_NICE 368#define __ARCH_WANT_SYS_NICE
368#define __ARCH_WANT_SYS_OLD_GETRLIMIT 369#define __ARCH_WANT_SYS_OLD_GETRLIMIT
370#define __ARCH_WANT_SYS_OLD_UNAME
371#define __ARCH_WANT_SYS_OLD_MMAP
372#define __ARCH_WANT_SYS_OLD_SELECT
369#define __ARCH_WANT_SYS_OLDUMOUNT 373#define __ARCH_WANT_SYS_OLDUMOUNT
370#define __ARCH_WANT_SYS_SIGPENDING 374#define __ARCH_WANT_SYS_SIGPENDING
371#define __ARCH_WANT_SYS_SIGPROCMASK 375#define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 4843f7ba754a..ff4307b0e81e 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -146,7 +146,7 @@ __SYSCALL(__NR_wait4, sys_wait4)
146#define __NR_kill 62 146#define __NR_kill 62
147__SYSCALL(__NR_kill, sys_kill) 147__SYSCALL(__NR_kill, sys_kill)
148#define __NR_uname 63 148#define __NR_uname 63
149__SYSCALL(__NR_uname, sys_uname) 149__SYSCALL(__NR_uname, sys_newuname)
150 150
151#define __NR_semget 64 151#define __NR_semget 64
152__SYSCALL(__NR_semget, sys_semget) 152__SYSCALL(__NR_semget, sys_semget)
@@ -680,6 +680,7 @@ __SYSCALL(__NR_recvmmsg, sys_recvmmsg)
680#define __ARCH_WANT_SYS_LLSEEK 680#define __ARCH_WANT_SYS_LLSEEK
681#define __ARCH_WANT_SYS_NICE 681#define __ARCH_WANT_SYS_NICE
682#define __ARCH_WANT_SYS_OLD_GETRLIMIT 682#define __ARCH_WANT_SYS_OLD_GETRLIMIT
683#define __ARCH_WANT_SYS_OLD_UNAME
683#define __ARCH_WANT_SYS_OLDUMOUNT 684#define __ARCH_WANT_SYS_OLDUMOUNT
684#define __ARCH_WANT_SYS_SIGPENDING 685#define __ARCH_WANT_SYS_SIGPENDING
685#define __ARCH_WANT_SYS_SIGPROCMASK 686#define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/x86/include/asm/visws/cobalt.h b/arch/x86/include/asm/visws/cobalt.h
index 166adf61e770..2edb37637ead 100644
--- a/arch/x86/include/asm/visws/cobalt.h
+++ b/arch/x86/include/asm/visws/cobalt.h
@@ -122,4 +122,6 @@ extern char visws_board_type;
122 122
123extern char visws_board_rev; 123extern char visws_board_rev;
124 124
125extern int pci_visws_init(void);
126
125#endif /* _ASM_X86_VISWS_COBALT_H */ 127#endif /* _ASM_X86_VISWS_COBALT_H */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 60cc35269083..519b54327d75 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -99,6 +99,20 @@ struct x86_init_iommu {
99}; 99};
100 100
101/** 101/**
102 * struct x86_init_pci - platform specific pci init functions
103 * @arch_init: platform specific pci arch init call
104 * @init: platform specific pci subsystem init
105 * @init_irq: platform specific pci irq init
106 * @fixup_irqs: platform specific pci irq fixup
107 */
108struct x86_init_pci {
109 int (*arch_init)(void);
110 int (*init)(void);
111 void (*init_irq)(void);
112 void (*fixup_irqs)(void);
113};
114
115/**
102 * struct x86_init_ops - functions for platform specific setup 116 * struct x86_init_ops - functions for platform specific setup
103 * 117 *
104 */ 118 */
@@ -110,6 +124,7 @@ struct x86_init_ops {
110 struct x86_init_paging paging; 124 struct x86_init_paging paging;
111 struct x86_init_timers timers; 125 struct x86_init_timers timers;
112 struct x86_init_iommu iommu; 126 struct x86_init_iommu iommu;
127 struct x86_init_pci pci;
113}; 128};
114 129
115/** 130/**
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d87f09bc5a52..4c58352209e0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -87,6 +87,7 @@ obj-$(CONFIG_VM86) += vm86_32.o
87obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 87obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
88 88
89obj-$(CONFIG_HPET_TIMER) += hpet.o 89obj-$(CONFIG_HPET_TIMER) += hpet.o
90obj-$(CONFIG_APB_TIMER) += apb_timer.o
90 91
91obj-$(CONFIG_K8_NB) += k8.o 92obj-$(CONFIG_K8_NB) += k8.o
92obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o 93obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index f95703098f8d..cd40aba6aa95 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -31,10 +31,12 @@
31#include <linux/module.h> 31#include <linux/module.h>
32#include <linux/dmi.h> 32#include <linux/dmi.h>
33#include <linux/irq.h> 33#include <linux/irq.h>
34#include <linux/slab.h>
34#include <linux/bootmem.h> 35#include <linux/bootmem.h>
35#include <linux/ioport.h> 36#include <linux/ioport.h>
36#include <linux/pci.h> 37#include <linux/pci.h>
37 38
39#include <asm/pci_x86.h>
38#include <asm/pgtable.h> 40#include <asm/pgtable.h>
39#include <asm/io_apic.h> 41#include <asm/io_apic.h>
40#include <asm/apic.h> 42#include <asm/apic.h>
@@ -447,6 +449,12 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
447int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) 449int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
448{ 450{
449 *irq = gsi; 451 *irq = gsi;
452
453#ifdef CONFIG_X86_IO_APIC
454 if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC)
455 setup_IO_APIC_irq_extra(gsi);
456#endif
457
450 return 0; 458 return 0;
451} 459}
452 460
@@ -474,7 +482,8 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
474 plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); 482 plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity);
475 } 483 }
476#endif 484#endif
477 acpi_gsi_to_irq(plat_gsi, &irq); 485 irq = plat_gsi;
486
478 return irq; 487 return irq;
479} 488}
480 489
@@ -482,6 +491,7 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
482 * ACPI based hotplug support for CPU 491 * ACPI based hotplug support for CPU
483 */ 492 */
484#ifdef CONFIG_ACPI_HOTPLUG_CPU 493#ifdef CONFIG_ACPI_HOTPLUG_CPU
494#include <acpi/processor.h>
485 495
486static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) 496static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
487{ 497{
@@ -559,6 +569,8 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
559 goto free_new_map; 569 goto free_new_map;
560 } 570 }
561 571
572 acpi_processor_set_pdc(handle);
573
562 cpu = cpumask_first(new_map); 574 cpu = cpumask_first(new_map);
563 acpi_map_cpu2node(handle, cpu, physid); 575 acpi_map_cpu2node(handle, cpu, physid);
564 576
@@ -1285,23 +1297,6 @@ static int __init dmi_disable_acpi(const struct dmi_system_id *d)
1285} 1297}
1286 1298
1287/* 1299/*
1288 * Limit ACPI to CPU enumeration for HT
1289 */
1290static int __init force_acpi_ht(const struct dmi_system_id *d)
1291{
1292 if (!acpi_force) {
1293 printk(KERN_NOTICE "%s detected: force use of acpi=ht\n",
1294 d->ident);
1295 disable_acpi();
1296 acpi_ht = 1;
1297 } else {
1298 printk(KERN_NOTICE
1299 "Warning: acpi=force overrules DMI blacklist: acpi=ht\n");
1300 }
1301 return 0;
1302}
1303
1304/*
1305 * Force ignoring BIOS IRQ0 pin2 override 1300 * Force ignoring BIOS IRQ0 pin2 override
1306 */ 1301 */
1307static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) 1302static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
@@ -1337,82 +1332,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
1337 }, 1332 },
1338 1333
1339 /* 1334 /*
1340 * Boxes that need acpi=ht
1341 */
1342 {
1343 .callback = force_acpi_ht,
1344 .ident = "FSC Primergy T850",
1345 .matches = {
1346 DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
1347 DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
1348 },
1349 },
1350 {
1351 .callback = force_acpi_ht,
1352 .ident = "HP VISUALIZE NT Workstation",
1353 .matches = {
1354 DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
1355 DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
1356 },
1357 },
1358 {
1359 .callback = force_acpi_ht,
1360 .ident = "Compaq Workstation W8000",
1361 .matches = {
1362 DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
1363 DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
1364 },
1365 },
1366 {
1367 .callback = force_acpi_ht,
1368 .ident = "ASUS CUR-DLS",
1369 .matches = {
1370 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
1371 DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"),
1372 },
1373 },
1374 {
1375 .callback = force_acpi_ht,
1376 .ident = "ABIT i440BX-W83977",
1377 .matches = {
1378 DMI_MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
1379 DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
1380 },
1381 },
1382 {
1383 .callback = force_acpi_ht,
1384 .ident = "IBM Bladecenter",
1385 .matches = {
1386 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
1387 DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
1388 },
1389 },
1390 {
1391 .callback = force_acpi_ht,
1392 .ident = "IBM eServer xSeries 360",
1393 .matches = {
1394 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
1395 DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
1396 },
1397 },
1398 {
1399 .callback = force_acpi_ht,
1400 .ident = "IBM eserver xSeries 330",
1401 .matches = {
1402 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
1403 DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
1404 },
1405 },
1406 {
1407 .callback = force_acpi_ht,
1408 .ident = "IBM eserver xSeries 440",
1409 .matches = {
1410 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
1411 DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
1412 },
1413 },
1414
1415 /*
1416 * Boxes that need ACPI PCI IRQ routing disabled 1335 * Boxes that need ACPI PCI IRQ routing disabled
1417 */ 1336 */
1418 { 1337 {
@@ -1617,6 +1536,9 @@ int __init acpi_boot_init(void)
1617 1536
1618 acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet); 1537 acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet);
1619 1538
1539 if (!acpi_noirq)
1540 x86_init.pci.init = pci_acpi_init;
1541
1620 return 0; 1542 return 0;
1621} 1543}
1622 1544
@@ -1641,8 +1563,10 @@ static int __init parse_acpi(char *arg)
1641 } 1563 }
1642 /* Limit ACPI just to boot-time to enable HT */ 1564 /* Limit ACPI just to boot-time to enable HT */
1643 else if (strcmp(arg, "ht") == 0) { 1565 else if (strcmp(arg, "ht") == 0) {
1644 if (!acpi_force) 1566 if (!acpi_force) {
1567 printk(KERN_WARNING "acpi=ht will be removed in Linux-2.6.35\n");
1645 disable_acpi(); 1568 disable_acpi();
1569 }
1646 acpi_ht = 1; 1570 acpi_ht = 1;
1647 } 1571 }
1648 /* acpi=rsdt use RSDT instead of XSDT */ 1572 /* acpi=rsdt use RSDT instead of XSDT */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index e6ea0342c8f8..1a160d5d44d0 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -7,6 +7,8 @@
7#include <linux/mm.h> 7#include <linux/mm.h>
8#include <linux/vmalloc.h> 8#include <linux/vmalloc.h>
9#include <linux/memory.h> 9#include <linux/memory.h>
10#include <linux/stop_machine.h>
11#include <linux/slab.h>
10#include <asm/alternative.h> 12#include <asm/alternative.h>
11#include <asm/sections.h> 13#include <asm/sections.h>
12#include <asm/pgtable.h> 14#include <asm/pgtable.h>
@@ -572,3 +574,62 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
572 local_irq_restore(flags); 574 local_irq_restore(flags);
573 return addr; 575 return addr;
574} 576}
577
578/*
579 * Cross-modifying kernel text with stop_machine().
580 * This code originally comes from immediate value.
581 */
582static atomic_t stop_machine_first;
583static int wrote_text;
584
585struct text_poke_params {
586 void *addr;
587 const void *opcode;
588 size_t len;
589};
590
591static int __kprobes stop_machine_text_poke(void *data)
592{
593 struct text_poke_params *tpp = data;
594
595 if (atomic_dec_and_test(&stop_machine_first)) {
596 text_poke(tpp->addr, tpp->opcode, tpp->len);
597 smp_wmb(); /* Make sure other cpus see that this has run */
598 wrote_text = 1;
599 } else {
600 while (!wrote_text)
601 cpu_relax();
602 smp_mb(); /* Load wrote_text before following execution */
603 }
604
605 flush_icache_range((unsigned long)tpp->addr,
606 (unsigned long)tpp->addr + tpp->len);
607 return 0;
608}
609
610/**
611 * text_poke_smp - Update instructions on a live kernel on SMP
612 * @addr: address to modify
613 * @opcode: source of the copy
614 * @len: length to copy
615 *
616 * Modify multi-byte instruction by using stop_machine() on SMP. This allows
617 * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying
618 * should be allowed, since stop_machine() does _not_ protect code against
619 * NMI and MCE.
620 *
621 * Note: Must be called under get_online_cpus() and text_mutex.
622 */
623void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
624{
625 struct text_poke_params tpp;
626
627 tpp.addr = addr;
628 tpp.opcode = opcode;
629 tpp.len = len;
630 atomic_set(&stop_machine_first, 1);
631 wrote_text = 0;
632 stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
633 return addr;
634}
635
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index d8da9988edd9..fa5a1474cd18 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -18,8 +18,8 @@
18 */ 18 */
19 19
20#include <linux/pci.h> 20#include <linux/pci.h>
21#include <linux/gfp.h>
22#include <linux/bitmap.h> 21#include <linux/bitmap.h>
22#include <linux/slab.h>
23#include <linux/debugfs.h> 23#include <linux/debugfs.h>
24#include <linux/scatterlist.h> 24#include <linux/scatterlist.h>
25#include <linux/dma-mapping.h> 25#include <linux/dma-mapping.h>
@@ -118,7 +118,7 @@ static bool check_device(struct device *dev)
118 return false; 118 return false;
119 119
120 /* No device or no PCI device */ 120 /* No device or no PCI device */
121 if (!dev || dev->bus != &pci_bus_type) 121 if (dev->bus != &pci_bus_type)
122 return false; 122 return false;
123 123
124 devid = get_device_id(dev); 124 devid = get_device_id(dev);
@@ -392,6 +392,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
392 u32 tail, head; 392 u32 tail, head;
393 u8 *target; 393 u8 *target;
394 394
395 WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
395 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 396 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
396 target = iommu->cmd_buf + tail; 397 target = iommu->cmd_buf + tail;
397 memcpy_toio(target, cmd, sizeof(*cmd)); 398 memcpy_toio(target, cmd, sizeof(*cmd));
@@ -2253,7 +2254,7 @@ static void prealloc_protection_domains(void)
2253 struct dma_ops_domain *dma_dom; 2254 struct dma_ops_domain *dma_dom;
2254 u16 devid; 2255 u16 devid;
2255 2256
2256 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 2257 for_each_pci_dev(dev) {
2257 2258
2258 /* Do we handle this device? */ 2259 /* Do we handle this device? */
2259 if (!check_device(&dev->dev)) 2260 if (!check_device(&dev->dev))
@@ -2365,7 +2366,7 @@ static void cleanup_domain(struct protection_domain *domain)
2365 list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { 2366 list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) {
2366 struct device *dev = dev_data->dev; 2367 struct device *dev = dev_data->dev;
2367 2368
2368 do_detach(dev); 2369 __detach_device(dev);
2369 atomic_set(&dev_data->bind, 0); 2370 atomic_set(&dev_data->bind, 0);
2370 } 2371 }
2371 2372
@@ -2394,6 +2395,7 @@ static struct protection_domain *protection_domain_alloc(void)
2394 return NULL; 2395 return NULL;
2395 2396
2396 spin_lock_init(&domain->lock); 2397 spin_lock_init(&domain->lock);
2398 mutex_init(&domain->api_lock);
2397 domain->id = domain_id_alloc(); 2399 domain->id = domain_id_alloc();
2398 if (!domain->id) 2400 if (!domain->id)
2399 goto out_err; 2401 goto out_err;
@@ -2446,9 +2448,7 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom)
2446 2448
2447 free_pagetable(domain); 2449 free_pagetable(domain);
2448 2450
2449 domain_id_free(domain->id); 2451 protection_domain_free(domain);
2450
2451 kfree(domain);
2452 2452
2453 dom->priv = NULL; 2453 dom->priv = NULL;
2454} 2454}
@@ -2512,13 +2512,18 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
2512 unsigned long page_size = 0x1000UL << gfp_order; 2512 unsigned long page_size = 0x1000UL << gfp_order;
2513 struct protection_domain *domain = dom->priv; 2513 struct protection_domain *domain = dom->priv;
2514 int prot = 0; 2514 int prot = 0;
2515 int ret;
2515 2516
2516 if (iommu_prot & IOMMU_READ) 2517 if (iommu_prot & IOMMU_READ)
2517 prot |= IOMMU_PROT_IR; 2518 prot |= IOMMU_PROT_IR;
2518 if (iommu_prot & IOMMU_WRITE) 2519 if (iommu_prot & IOMMU_WRITE)
2519 prot |= IOMMU_PROT_IW; 2520 prot |= IOMMU_PROT_IW;
2520 2521
2521 return iommu_map_page(domain, iova, paddr, prot, page_size); 2522 mutex_lock(&domain->api_lock);
2523 ret = iommu_map_page(domain, iova, paddr, prot, page_size);
2524 mutex_unlock(&domain->api_lock);
2525
2526 return ret;
2522} 2527}
2523 2528
2524static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, 2529static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
@@ -2528,7 +2533,12 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
2528 unsigned long page_size, unmap_size; 2533 unsigned long page_size, unmap_size;
2529 2534
2530 page_size = 0x1000UL << gfp_order; 2535 page_size = 0x1000UL << gfp_order;
2536
2537 mutex_lock(&domain->api_lock);
2531 unmap_size = iommu_unmap_page(domain, iova, page_size); 2538 unmap_size = iommu_unmap_page(domain, iova, page_size);
2539 mutex_unlock(&domain->api_lock);
2540
2541 iommu_flush_tlb_pde(domain);
2532 2542
2533 return get_order(unmap_size); 2543 return get_order(unmap_size);
2534} 2544}
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 9dc91b431470..3bacb4d0844c 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -19,8 +19,8 @@
19 19
20#include <linux/pci.h> 20#include <linux/pci.h>
21#include <linux/acpi.h> 21#include <linux/acpi.h>
22#include <linux/gfp.h>
23#include <linux/list.h> 22#include <linux/list.h>
23#include <linux/slab.h>
24#include <linux/sysdev.h> 24#include <linux/sysdev.h>
25#include <linux/interrupt.h> 25#include <linux/interrupt.h>
26#include <linux/msi.h> 26#include <linux/msi.h>
@@ -120,6 +120,7 @@ struct ivmd_header {
120bool amd_iommu_dump; 120bool amd_iommu_dump;
121 121
122static int __initdata amd_iommu_detected; 122static int __initdata amd_iommu_detected;
123static bool __initdata amd_iommu_disabled;
123 124
124u16 amd_iommu_last_bdf; /* largest PCI device id we have 125u16 amd_iommu_last_bdf; /* largest PCI device id we have
125 to handle */ 126 to handle */
@@ -138,9 +139,9 @@ int amd_iommus_present;
138bool amd_iommu_np_cache __read_mostly; 139bool amd_iommu_np_cache __read_mostly;
139 140
140/* 141/*
141 * Set to true if ACPI table parsing and hardware intialization went properly 142 * The ACPI table parsing functions set this variable on an error
142 */ 143 */
143static bool amd_iommu_initialized; 144static int __initdata amd_iommu_init_err;
144 145
145/* 146/*
146 * List of protection domains - used during resume 147 * List of protection domains - used during resume
@@ -391,9 +392,11 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table)
391 */ 392 */
392 for (i = 0; i < table->length; ++i) 393 for (i = 0; i < table->length; ++i)
393 checksum += p[i]; 394 checksum += p[i];
394 if (checksum != 0) 395 if (checksum != 0) {
395 /* ACPI table corrupt */ 396 /* ACPI table corrupt */
396 return -ENODEV; 397 amd_iommu_init_err = -ENODEV;
398 return 0;
399 }
397 400
398 p += IVRS_HEADER_LENGTH; 401 p += IVRS_HEADER_LENGTH;
399 402
@@ -436,7 +439,7 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
436 if (cmd_buf == NULL) 439 if (cmd_buf == NULL)
437 return NULL; 440 return NULL;
438 441
439 iommu->cmd_buf_size = CMD_BUFFER_SIZE; 442 iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED;
440 443
441 return cmd_buf; 444 return cmd_buf;
442} 445}
@@ -472,12 +475,13 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu)
472 &entry, sizeof(entry)); 475 &entry, sizeof(entry));
473 476
474 amd_iommu_reset_cmd_buffer(iommu); 477 amd_iommu_reset_cmd_buffer(iommu);
478 iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED);
475} 479}
476 480
477static void __init free_command_buffer(struct amd_iommu *iommu) 481static void __init free_command_buffer(struct amd_iommu *iommu)
478{ 482{
479 free_pages((unsigned long)iommu->cmd_buf, 483 free_pages((unsigned long)iommu->cmd_buf,
480 get_order(iommu->cmd_buf_size)); 484 get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED)));
481} 485}
482 486
483/* allocates the memory where the IOMMU will log its events to */ 487/* allocates the memory where the IOMMU will log its events to */
@@ -920,11 +924,16 @@ static int __init init_iommu_all(struct acpi_table_header *table)
920 h->mmio_phys); 924 h->mmio_phys);
921 925
922 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); 926 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
923 if (iommu == NULL) 927 if (iommu == NULL) {
924 return -ENOMEM; 928 amd_iommu_init_err = -ENOMEM;
929 return 0;
930 }
931
925 ret = init_iommu_one(iommu, h); 932 ret = init_iommu_one(iommu, h);
926 if (ret) 933 if (ret) {
927 return ret; 934 amd_iommu_init_err = ret;
935 return 0;
936 }
928 break; 937 break;
929 default: 938 default:
930 break; 939 break;
@@ -934,8 +943,6 @@ static int __init init_iommu_all(struct acpi_table_header *table)
934 } 943 }
935 WARN_ON(p != end); 944 WARN_ON(p != end);
936 945
937 amd_iommu_initialized = true;
938
939 return 0; 946 return 0;
940} 947}
941 948
@@ -1211,6 +1218,10 @@ static int __init amd_iommu_init(void)
1211 if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) 1218 if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
1212 return -ENODEV; 1219 return -ENODEV;
1213 1220
1221 ret = amd_iommu_init_err;
1222 if (ret)
1223 goto out;
1224
1214 dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); 1225 dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE);
1215 alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); 1226 alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
1216 rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); 1227 rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
@@ -1270,12 +1281,19 @@ static int __init amd_iommu_init(void)
1270 if (acpi_table_parse("IVRS", init_iommu_all) != 0) 1281 if (acpi_table_parse("IVRS", init_iommu_all) != 0)
1271 goto free; 1282 goto free;
1272 1283
1273 if (!amd_iommu_initialized) 1284 if (amd_iommu_init_err) {
1285 ret = amd_iommu_init_err;
1274 goto free; 1286 goto free;
1287 }
1275 1288
1276 if (acpi_table_parse("IVRS", init_memory_definitions) != 0) 1289 if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
1277 goto free; 1290 goto free;
1278 1291
1292 if (amd_iommu_init_err) {
1293 ret = amd_iommu_init_err;
1294 goto free;
1295 }
1296
1279 ret = sysdev_class_register(&amd_iommu_sysdev_class); 1297 ret = sysdev_class_register(&amd_iommu_sysdev_class);
1280 if (ret) 1298 if (ret)
1281 goto free; 1299 goto free;
@@ -1288,6 +1306,8 @@ static int __init amd_iommu_init(void)
1288 if (ret) 1306 if (ret)
1289 goto free; 1307 goto free;
1290 1308
1309 enable_iommus();
1310
1291 if (iommu_pass_through) 1311 if (iommu_pass_through)
1292 ret = amd_iommu_init_passthrough(); 1312 ret = amd_iommu_init_passthrough();
1293 else 1313 else
@@ -1300,8 +1320,6 @@ static int __init amd_iommu_init(void)
1300 1320
1301 amd_iommu_init_notifier(); 1321 amd_iommu_init_notifier();
1302 1322
1303 enable_iommus();
1304
1305 if (iommu_pass_through) 1323 if (iommu_pass_through)
1306 goto out; 1324 goto out;
1307 1325
@@ -1315,6 +1333,7 @@ out:
1315 return ret; 1333 return ret;
1316 1334
1317free: 1335free:
1336 disable_iommus();
1318 1337
1319 amd_iommu_uninit_devices(); 1338 amd_iommu_uninit_devices();
1320 1339
@@ -1354,6 +1373,9 @@ void __init amd_iommu_detect(void)
1354 if (no_iommu || (iommu_detected && !gart_iommu_aperture)) 1373 if (no_iommu || (iommu_detected && !gart_iommu_aperture))
1355 return; 1374 return;
1356 1375
1376 if (amd_iommu_disabled)
1377 return;
1378
1357 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { 1379 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
1358 iommu_detected = 1; 1380 iommu_detected = 1;
1359 amd_iommu_detected = 1; 1381 amd_iommu_detected = 1;
@@ -1383,6 +1405,8 @@ static int __init parse_amd_iommu_options(char *str)
1383 for (; *str; ++str) { 1405 for (; *str; ++str) {
1384 if (strncmp(str, "fullflush", 9) == 0) 1406 if (strncmp(str, "fullflush", 9) == 0)
1385 amd_iommu_unmap_flush = true; 1407 amd_iommu_unmap_flush = true;
1408 if (strncmp(str, "off", 3) == 0)
1409 amd_iommu_disabled = true;
1386 } 1410 }
1387 1411
1388 return 1; 1412 return 1;
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
new file mode 100644
index 000000000000..a35347501d36
--- /dev/null
+++ b/arch/x86/kernel/apb_timer.c
@@ -0,0 +1,785 @@
1/*
2 * apb_timer.c: Driver for Langwell APB timers
3 *
4 * (C) Copyright 2009 Intel Corporation
5 * Author: Jacob Pan (jacob.jun.pan@intel.com)
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 *
12 * Note:
13 * Langwell is the south complex of Intel Moorestown MID platform. There are
14 * eight external timers in total that can be used by the operating system.
15 * The timer information, such as frequency and addresses, is provided to the
16 * OS via SFI tables.
17 * Timer interrupts are routed via FW/HW emulated IOAPIC independently via
18 * individual redirection table entries (RTE).
19 * Unlike HPET, there is no master counter, therefore one of the timers are
20 * used as clocksource. The overall allocation looks like:
21 * - timer 0 - NR_CPUs for per cpu timer
22 * - one timer for clocksource
23 * - one timer for watchdog driver.
24 * It is also worth notice that APB timer does not support true one-shot mode,
25 * free-running mode will be used here to emulate one-shot mode.
26 * APB timer can also be used as broadcast timer along with per cpu local APIC
27 * timer, but by default APB timer has higher rating than local APIC timers.
28 */
29
30#include <linux/clocksource.h>
31#include <linux/clockchips.h>
32#include <linux/delay.h>
33#include <linux/errno.h>
34#include <linux/init.h>
35#include <linux/sysdev.h>
36#include <linux/slab.h>
37#include <linux/pm.h>
38#include <linux/pci.h>
39#include <linux/sfi.h>
40#include <linux/interrupt.h>
41#include <linux/cpu.h>
42#include <linux/irq.h>
43
44#include <asm/fixmap.h>
45#include <asm/apb_timer.h>
46
47#define APBT_MASK CLOCKSOURCE_MASK(32)
48#define APBT_SHIFT 22
49#define APBT_CLOCKEVENT_RATING 150
50#define APBT_CLOCKSOURCE_RATING 250
51#define APBT_MIN_DELTA_USEC 200
52
53#define EVT_TO_APBT_DEV(evt) container_of(evt, struct apbt_dev, evt)
54#define APBT_CLOCKEVENT0_NUM (0)
55#define APBT_CLOCKEVENT1_NUM (1)
56#define APBT_CLOCKSOURCE_NUM (2)
57
58static unsigned long apbt_address;
59static int apb_timer_block_enabled;
60static void __iomem *apbt_virt_address;
61static int phy_cs_timer_id;
62
63/*
64 * Common DW APB timer info
65 */
66static uint64_t apbt_freq;
67
68static void apbt_set_mode(enum clock_event_mode mode,
69 struct clock_event_device *evt);
70static int apbt_next_event(unsigned long delta,
71 struct clock_event_device *evt);
72static cycle_t apbt_read_clocksource(struct clocksource *cs);
73static void apbt_restart_clocksource(struct clocksource *cs);
74
75struct apbt_dev {
76 struct clock_event_device evt;
77 unsigned int num;
78 int cpu;
79 unsigned int irq;
80 unsigned int tick;
81 unsigned int count;
82 unsigned int flags;
83 char name[10];
84};
85
86int disable_apbt_percpu __cpuinitdata;
87
88static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev);
89
90#ifdef CONFIG_SMP
91static unsigned int apbt_num_timers_used;
92static struct apbt_dev *apbt_devs;
93#endif
94
95static inline unsigned long apbt_readl_reg(unsigned long a)
96{
97 return readl(apbt_virt_address + a);
98}
99
100static inline void apbt_writel_reg(unsigned long d, unsigned long a)
101{
102 writel(d, apbt_virt_address + a);
103}
104
105static inline unsigned long apbt_readl(int n, unsigned long a)
106{
107 return readl(apbt_virt_address + a + n * APBTMRS_REG_SIZE);
108}
109
110static inline void apbt_writel(int n, unsigned long d, unsigned long a)
111{
112 writel(d, apbt_virt_address + a + n * APBTMRS_REG_SIZE);
113}
114
115static inline void apbt_set_mapping(void)
116{
117 struct sfi_timer_table_entry *mtmr;
118
119 if (apbt_virt_address) {
120 pr_debug("APBT base already mapped\n");
121 return;
122 }
123 mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM);
124 if (mtmr == NULL) {
125 printk(KERN_ERR "Failed to get MTMR %d from SFI\n",
126 APBT_CLOCKEVENT0_NUM);
127 return;
128 }
129 apbt_address = (unsigned long)mtmr->phys_addr;
130 if (!apbt_address) {
131 printk(KERN_WARNING "No timer base from SFI, use default\n");
132 apbt_address = APBT_DEFAULT_BASE;
133 }
134 apbt_virt_address = ioremap_nocache(apbt_address, APBT_MMAP_SIZE);
135 if (apbt_virt_address) {
136 pr_debug("Mapped APBT physical addr %p at virtual addr %p\n",\
137 (void *)apbt_address, (void *)apbt_virt_address);
138 } else {
139 pr_debug("Failed mapping APBT phy address at %p\n",\
140 (void *)apbt_address);
141 goto panic_noapbt;
142 }
143 apbt_freq = mtmr->freq_hz / USEC_PER_SEC;
144 sfi_free_mtmr(mtmr);
145
146 /* Now figure out the physical timer id for clocksource device */
147 mtmr = sfi_get_mtmr(APBT_CLOCKSOURCE_NUM);
148 if (mtmr == NULL)
149 goto panic_noapbt;
150
151 /* Now figure out the physical timer id */
152 phy_cs_timer_id = (unsigned int)(mtmr->phys_addr & 0xff)
153 / APBTMRS_REG_SIZE;
154 pr_debug("Use timer %d for clocksource\n", phy_cs_timer_id);
155 return;
156
157panic_noapbt:
158 panic("Failed to setup APB system timer\n");
159
160}
161
162static inline void apbt_clear_mapping(void)
163{
164 iounmap(apbt_virt_address);
165 apbt_virt_address = NULL;
166}
167
168/*
169 * APBT timer interrupt enable / disable
170 */
171static inline int is_apbt_capable(void)
172{
173 return apbt_virt_address ? 1 : 0;
174}
175
176static struct clocksource clocksource_apbt = {
177 .name = "apbt",
178 .rating = APBT_CLOCKSOURCE_RATING,
179 .read = apbt_read_clocksource,
180 .mask = APBT_MASK,
181 .shift = APBT_SHIFT,
182 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
183 .resume = apbt_restart_clocksource,
184};
185
186/* boot APB clock event device */
187static struct clock_event_device apbt_clockevent = {
188 .name = "apbt0",
189 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
190 .set_mode = apbt_set_mode,
191 .set_next_event = apbt_next_event,
192 .shift = APBT_SHIFT,
193 .irq = 0,
194 .rating = APBT_CLOCKEVENT_RATING,
195};
196
197/*
198 * if user does not want to use per CPU apb timer, just give it a lower rating
199 * than local apic timer and skip the late per cpu timer init.
200 */
201static inline int __init setup_x86_mrst_timer(char *arg)
202{
203 if (!arg)
204 return -EINVAL;
205
206 if (strcmp("apbt_only", arg) == 0)
207 disable_apbt_percpu = 0;
208 else if (strcmp("lapic_and_apbt", arg) == 0)
209 disable_apbt_percpu = 1;
210 else {
211 pr_warning("X86 MRST timer option %s not recognised"
212 " use x86_mrst_timer=apbt_only or lapic_and_apbt\n",
213 arg);
214 return -EINVAL;
215 }
216 return 0;
217}
218__setup("x86_mrst_timer=", setup_x86_mrst_timer);
219
220/*
221 * start count down from 0xffff_ffff. this is done by toggling the enable bit
222 * then load initial load count to ~0.
223 */
224static void apbt_start_counter(int n)
225{
226 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
227
228 ctrl &= ~APBTMR_CONTROL_ENABLE;
229 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
230 apbt_writel(n, ~0, APBTMR_N_LOAD_COUNT);
231 /* enable, mask interrupt */
232 ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC;
233 ctrl |= (APBTMR_CONTROL_ENABLE | APBTMR_CONTROL_INT);
234 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
235 /* read it once to get cached counter value initialized */
236 apbt_read_clocksource(&clocksource_apbt);
237}
238
239static irqreturn_t apbt_interrupt_handler(int irq, void *data)
240{
241 struct apbt_dev *dev = (struct apbt_dev *)data;
242 struct clock_event_device *aevt = &dev->evt;
243
244 if (!aevt->event_handler) {
245 printk(KERN_INFO "Spurious APBT timer interrupt on %d\n",
246 dev->num);
247 return IRQ_NONE;
248 }
249 aevt->event_handler(aevt);
250 return IRQ_HANDLED;
251}
252
253static void apbt_restart_clocksource(struct clocksource *cs)
254{
255 apbt_start_counter(phy_cs_timer_id);
256}
257
258/* Setup IRQ routing via IOAPIC */
259#ifdef CONFIG_SMP
260static void apbt_setup_irq(struct apbt_dev *adev)
261{
262 struct irq_chip *chip;
263 struct irq_desc *desc;
264
265 /* timer0 irq has been setup early */
266 if (adev->irq == 0)
267 return;
268 desc = irq_to_desc(adev->irq);
269 chip = get_irq_chip(adev->irq);
270 disable_irq(adev->irq);
271 desc->status |= IRQ_MOVE_PCNTXT;
272 irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
273 /* APB timer irqs are set up as mp_irqs, timer is edge triggerred */
274 set_irq_chip_and_handler_name(adev->irq, chip, handle_edge_irq, "edge");
275 enable_irq(adev->irq);
276 if (system_state == SYSTEM_BOOTING)
277 if (request_irq(adev->irq, apbt_interrupt_handler,
278 IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
279 adev->name, adev)) {
280 printk(KERN_ERR "Failed request IRQ for APBT%d\n",
281 adev->num);
282 }
283}
284#endif
285
286static void apbt_enable_int(int n)
287{
288 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
289 /* clear pending intr */
290 apbt_readl(n, APBTMR_N_EOI);
291 ctrl &= ~APBTMR_CONTROL_INT;
292 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
293}
294
295static void apbt_disable_int(int n)
296{
297 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
298
299 ctrl |= APBTMR_CONTROL_INT;
300 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
301}
302
303
304static int __init apbt_clockevent_register(void)
305{
306 struct sfi_timer_table_entry *mtmr;
307 struct apbt_dev *adev = &__get_cpu_var(cpu_apbt_dev);
308
309 mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM);
310 if (mtmr == NULL) {
311 printk(KERN_ERR "Failed to get MTMR %d from SFI\n",
312 APBT_CLOCKEVENT0_NUM);
313 return -ENODEV;
314 }
315
316 /*
317 * We need to calculate the scaled math multiplication factor for
318 * nanosecond to apbt tick conversion.
319 * mult = (nsec/cycle)*2^APBT_SHIFT
320 */
321 apbt_clockevent.mult = div_sc((unsigned long) mtmr->freq_hz
322 , NSEC_PER_SEC, APBT_SHIFT);
323
324 /* Calculate the min / max delta */
325 apbt_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
326 &apbt_clockevent);
327 apbt_clockevent.min_delta_ns = clockevent_delta2ns(
328 APBT_MIN_DELTA_USEC*apbt_freq,
329 &apbt_clockevent);
330 /*
331 * Start apbt with the boot cpu mask and make it
332 * global if not used for per cpu timer.
333 */
334 apbt_clockevent.cpumask = cpumask_of(smp_processor_id());
335 adev->num = smp_processor_id();
336 memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device));
337
338 if (disable_apbt_percpu) {
339 apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100;
340 global_clock_event = &adev->evt;
341 printk(KERN_DEBUG "%s clockevent registered as global\n",
342 global_clock_event->name);
343 }
344
345 if (request_irq(apbt_clockevent.irq, apbt_interrupt_handler,
346 IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
347 apbt_clockevent.name, adev)) {
348 printk(KERN_ERR "Failed request IRQ for APBT%d\n",
349 apbt_clockevent.irq);
350 }
351
352 clockevents_register_device(&adev->evt);
353 /* Start APBT 0 interrupts */
354 apbt_enable_int(APBT_CLOCKEVENT0_NUM);
355
356 sfi_free_mtmr(mtmr);
357 return 0;
358}
359
360#ifdef CONFIG_SMP
361/* Should be called with per cpu */
362void apbt_setup_secondary_clock(void)
363{
364 struct apbt_dev *adev;
365 struct clock_event_device *aevt;
366 int cpu;
367
368 /* Don't register boot CPU clockevent */
369 cpu = smp_processor_id();
370 if (cpu == boot_cpu_id)
371 return;
372 /*
373 * We need to calculate the scaled math multiplication factor for
374 * nanosecond to apbt tick conversion.
375 * mult = (nsec/cycle)*2^APBT_SHIFT
376 */
377 printk(KERN_INFO "Init per CPU clockevent %d\n", cpu);
378 adev = &per_cpu(cpu_apbt_dev, cpu);
379 aevt = &adev->evt;
380
381 memcpy(aevt, &apbt_clockevent, sizeof(*aevt));
382 aevt->cpumask = cpumask_of(cpu);
383 aevt->name = adev->name;
384 aevt->mode = CLOCK_EVT_MODE_UNUSED;
385
386 printk(KERN_INFO "Registering CPU %d clockevent device %s, mask %08x\n",
387 cpu, aevt->name, *(u32 *)aevt->cpumask);
388
389 apbt_setup_irq(adev);
390
391 clockevents_register_device(aevt);
392
393 apbt_enable_int(cpu);
394
395 return;
396}
397
398/*
399 * this notify handler process CPU hotplug events. in case of S0i3, nonboot
400 * cpus are disabled/enabled frequently, for performance reasons, we keep the
401 * per cpu timer irq registered so that we do need to do free_irq/request_irq.
402 *
403 * TODO: it might be more reliable to directly disable percpu clockevent device
404 * without the notifier chain. currently, cpu 0 may get interrupts from other
405 * cpu timers during the offline process due to the ordering of notification.
406 * the extra interrupt is harmless.
407 */
408static int apbt_cpuhp_notify(struct notifier_block *n,
409 unsigned long action, void *hcpu)
410{
411 unsigned long cpu = (unsigned long)hcpu;
412 struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu);
413
414 switch (action & 0xf) {
415 case CPU_DEAD:
416 apbt_disable_int(cpu);
417 if (system_state == SYSTEM_RUNNING)
418 pr_debug("skipping APBT CPU %lu offline\n", cpu);
419 else if (adev) {
420 pr_debug("APBT clockevent for cpu %lu offline\n", cpu);
421 free_irq(adev->irq, adev);
422 }
423 break;
424 default:
425 pr_debug(KERN_INFO "APBT notified %lu, no action\n", action);
426 }
427 return NOTIFY_OK;
428}
429
430static __init int apbt_late_init(void)
431{
432 if (disable_apbt_percpu || !apb_timer_block_enabled)
433 return 0;
434 /* This notifier should be called after workqueue is ready */
435 hotcpu_notifier(apbt_cpuhp_notify, -20);
436 return 0;
437}
438fs_initcall(apbt_late_init);
439#else
440
441void apbt_setup_secondary_clock(void) {}
442
443#endif /* CONFIG_SMP */
444
445static void apbt_set_mode(enum clock_event_mode mode,
446 struct clock_event_device *evt)
447{
448 unsigned long ctrl;
449 uint64_t delta;
450 int timer_num;
451 struct apbt_dev *adev = EVT_TO_APBT_DEV(evt);
452
453 timer_num = adev->num;
454 pr_debug("%s CPU %d timer %d mode=%d\n",
455 __func__, first_cpu(*evt->cpumask), timer_num, mode);
456
457 switch (mode) {
458 case CLOCK_EVT_MODE_PERIODIC:
459 delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * apbt_clockevent.mult;
460 delta >>= apbt_clockevent.shift;
461 ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL);
462 ctrl |= APBTMR_CONTROL_MODE_PERIODIC;
463 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
464 /*
465 * DW APB p. 46, have to disable timer before load counter,
466 * may cause sync problem.
467 */
468 ctrl &= ~APBTMR_CONTROL_ENABLE;
469 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
470 udelay(1);
471 pr_debug("Setting clock period %d for HZ %d\n", (int)delta, HZ);
472 apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT);
473 ctrl |= APBTMR_CONTROL_ENABLE;
474 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
475 break;
476 /* APB timer does not have one-shot mode, use free running mode */
477 case CLOCK_EVT_MODE_ONESHOT:
478 ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL);
479 /*
480 * set free running mode, this mode will let timer reload max
481 * timeout which will give time (3min on 25MHz clock) to rearm
482 * the next event, therefore emulate the one-shot mode.
483 */
484 ctrl &= ~APBTMR_CONTROL_ENABLE;
485 ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC;
486
487 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
488 /* write again to set free running mode */
489 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
490
491 /*
492 * DW APB p. 46, load counter with all 1s before starting free
493 * running mode.
494 */
495 apbt_writel(timer_num, ~0, APBTMR_N_LOAD_COUNT);
496 ctrl &= ~APBTMR_CONTROL_INT;
497 ctrl |= APBTMR_CONTROL_ENABLE;
498 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
499 break;
500
501 case CLOCK_EVT_MODE_UNUSED:
502 case CLOCK_EVT_MODE_SHUTDOWN:
503 apbt_disable_int(timer_num);
504 ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL);
505 ctrl &= ~APBTMR_CONTROL_ENABLE;
506 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
507 break;
508
509 case CLOCK_EVT_MODE_RESUME:
510 apbt_enable_int(timer_num);
511 break;
512 }
513}
514
515static int apbt_next_event(unsigned long delta,
516 struct clock_event_device *evt)
517{
518 unsigned long ctrl;
519 int timer_num;
520
521 struct apbt_dev *adev = EVT_TO_APBT_DEV(evt);
522
523 timer_num = adev->num;
524 /* Disable timer */
525 ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL);
526 ctrl &= ~APBTMR_CONTROL_ENABLE;
527 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
528 /* write new count */
529 apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT);
530 ctrl |= APBTMR_CONTROL_ENABLE;
531 apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL);
532 return 0;
533}
534
535/*
536 * APB timer clock is not in sync with pclk on Langwell, which translates to
537 * unreliable read value caused by sampling error. the error does not add up
538 * overtime and only happens when sampling a 0 as a 1 by mistake. so the time
539 * would go backwards. the following code is trying to prevent time traveling
540 * backwards. little bit paranoid.
541 */
542static cycle_t apbt_read_clocksource(struct clocksource *cs)
543{
544 unsigned long t0, t1, t2;
545 static unsigned long last_read;
546
547bad_count:
548 t1 = apbt_readl(phy_cs_timer_id,
549 APBTMR_N_CURRENT_VALUE);
550 t2 = apbt_readl(phy_cs_timer_id,
551 APBTMR_N_CURRENT_VALUE);
552 if (unlikely(t1 < t2)) {
553 pr_debug("APBT: read current count error %lx:%lx:%lx\n",
554 t1, t2, t2 - t1);
555 goto bad_count;
556 }
557 /*
558 * check against cached last read, makes sure time does not go back.
559 * it could be a normal rollover but we will do tripple check anyway
560 */
561 if (unlikely(t2 > last_read)) {
562 /* check if we have a normal rollover */
563 unsigned long raw_intr_status =
564 apbt_readl_reg(APBTMRS_RAW_INT_STATUS);
565 /*
566 * cs timer interrupt is masked but raw intr bit is set if
567 * rollover occurs. then we read EOI reg to clear it.
568 */
569 if (raw_intr_status & (1 << phy_cs_timer_id)) {
570 apbt_readl(phy_cs_timer_id, APBTMR_N_EOI);
571 goto out;
572 }
573 pr_debug("APB CS going back %lx:%lx:%lx ",
574 t2, last_read, t2 - last_read);
575bad_count_x3:
576 pr_debug(KERN_INFO "tripple check enforced\n");
577 t0 = apbt_readl(phy_cs_timer_id,
578 APBTMR_N_CURRENT_VALUE);
579 udelay(1);
580 t1 = apbt_readl(phy_cs_timer_id,
581 APBTMR_N_CURRENT_VALUE);
582 udelay(1);
583 t2 = apbt_readl(phy_cs_timer_id,
584 APBTMR_N_CURRENT_VALUE);
585 if ((t2 > t1) || (t1 > t0)) {
586 printk(KERN_ERR "Error: APB CS tripple check failed\n");
587 goto bad_count_x3;
588 }
589 }
590out:
591 last_read = t2;
592 return (cycle_t)~t2;
593}
594
595static int apbt_clocksource_register(void)
596{
597 u64 start, now;
598 cycle_t t1;
599
600 /* Start the counter, use timer 2 as source, timer 0/1 for event */
601 apbt_start_counter(phy_cs_timer_id);
602
603 /* Verify whether apbt counter works */
604 t1 = apbt_read_clocksource(&clocksource_apbt);
605 rdtscll(start);
606
607 /*
608 * We don't know the TSC frequency yet, but waiting for
609 * 200000 TSC cycles is safe:
610 * 4 GHz == 50us
611 * 1 GHz == 200us
612 */
613 do {
614 rep_nop();
615 rdtscll(now);
616 } while ((now - start) < 200000UL);
617
618 /* APBT is the only always on clocksource, it has to work! */
619 if (t1 == apbt_read_clocksource(&clocksource_apbt))
620 panic("APBT counter not counting. APBT disabled\n");
621
622 /*
623 * initialize and register APBT clocksource
624 * convert that to ns/clock cycle
625 * mult = (ns/c) * 2^APBT_SHIFT
626 */
627 clocksource_apbt.mult = div_sc(MSEC_PER_SEC,
628 (unsigned long) apbt_freq, APBT_SHIFT);
629 clocksource_register(&clocksource_apbt);
630
631 return 0;
632}
633
634/*
635 * Early setup the APBT timer, only use timer 0 for booting then switch to
636 * per CPU timer if possible.
637 * returns 1 if per cpu apbt is setup
638 * returns 0 if no per cpu apbt is chosen
639 * panic if set up failed, this is the only platform timer on Moorestown.
640 */
641void __init apbt_time_init(void)
642{
643#ifdef CONFIG_SMP
644 int i;
645 struct sfi_timer_table_entry *p_mtmr;
646 unsigned int percpu_timer;
647 struct apbt_dev *adev;
648#endif
649
650 if (apb_timer_block_enabled)
651 return;
652 apbt_set_mapping();
653 if (apbt_virt_address) {
654 pr_debug("Found APBT version 0x%lx\n",\
655 apbt_readl_reg(APBTMRS_COMP_VERSION));
656 } else
657 goto out_noapbt;
658 /*
659 * Read the frequency and check for a sane value, for ESL model
660 * we extend the possible clock range to allow time scaling.
661 */
662
663 if (apbt_freq < APBT_MIN_FREQ || apbt_freq > APBT_MAX_FREQ) {
664 pr_debug("APBT has invalid freq 0x%llx\n", apbt_freq);
665 goto out_noapbt;
666 }
667 if (apbt_clocksource_register()) {
668 pr_debug("APBT has failed to register clocksource\n");
669 goto out_noapbt;
670 }
671 if (!apbt_clockevent_register())
672 apb_timer_block_enabled = 1;
673 else {
674 pr_debug("APBT has failed to register clockevent\n");
675 goto out_noapbt;
676 }
677#ifdef CONFIG_SMP
678 /* kernel cmdline disable apb timer, so we will use lapic timers */
679 if (disable_apbt_percpu) {
680 printk(KERN_INFO "apbt: disabled per cpu timer\n");
681 return;
682 }
683 pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus());
684 if (num_possible_cpus() <= sfi_mtimer_num) {
685 percpu_timer = 1;
686 apbt_num_timers_used = num_possible_cpus();
687 } else {
688 percpu_timer = 0;
689 apbt_num_timers_used = 1;
690 adev = &per_cpu(cpu_apbt_dev, 0);
691 adev->flags &= ~APBT_DEV_USED;
692 }
693 pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used);
694
695 /* here we set up per CPU timer data structure */
696 apbt_devs = kzalloc(sizeof(struct apbt_dev) * apbt_num_timers_used,
697 GFP_KERNEL);
698 if (!apbt_devs) {
699 printk(KERN_ERR "Failed to allocate APB timer devices\n");
700 return;
701 }
702 for (i = 0; i < apbt_num_timers_used; i++) {
703 adev = &per_cpu(cpu_apbt_dev, i);
704 adev->num = i;
705 adev->cpu = i;
706 p_mtmr = sfi_get_mtmr(i);
707 if (p_mtmr) {
708 adev->tick = p_mtmr->freq_hz;
709 adev->irq = p_mtmr->irq;
710 } else
711 printk(KERN_ERR "Failed to get timer for cpu %d\n", i);
712 adev->count = 0;
713 sprintf(adev->name, "apbt%d", i);
714 }
715#endif
716
717 return;
718
719out_noapbt:
720 apbt_clear_mapping();
721 apb_timer_block_enabled = 0;
722 panic("failed to enable APB timer\n");
723}
724
725static inline void apbt_disable(int n)
726{
727 if (is_apbt_capable()) {
728 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
729 ctrl &= ~APBTMR_CONTROL_ENABLE;
730 apbt_writel(n, ctrl, APBTMR_N_CONTROL);
731 }
732}
733
734/* called before apb_timer_enable, use early map */
735unsigned long apbt_quick_calibrate()
736{
737 int i, scale;
738 u64 old, new;
739 cycle_t t1, t2;
740 unsigned long khz = 0;
741 u32 loop, shift;
742
743 apbt_set_mapping();
744 apbt_start_counter(phy_cs_timer_id);
745
746 /* check if the timer can count down, otherwise return */
747 old = apbt_read_clocksource(&clocksource_apbt);
748 i = 10000;
749 while (--i) {
750 if (old != apbt_read_clocksource(&clocksource_apbt))
751 break;
752 }
753 if (!i)
754 goto failed;
755
756 /* count 16 ms */
757 loop = (apbt_freq * 1000) << 4;
758
759 /* restart the timer to ensure it won't get to 0 in the calibration */
760 apbt_start_counter(phy_cs_timer_id);
761
762 old = apbt_read_clocksource(&clocksource_apbt);
763 old += loop;
764
765 t1 = __native_read_tsc();
766
767 do {
768 new = apbt_read_clocksource(&clocksource_apbt);
769 } while (new < old);
770
771 t2 = __native_read_tsc();
772
773 shift = 5;
774 if (unlikely(loop >> shift == 0)) {
775 printk(KERN_INFO
776 "APBT TSC calibration failed, not enough resolution\n");
777 return 0;
778 }
779 scale = (int)div_u64((t2 - t1), loop >> shift);
780 khz = (scale * apbt_freq * 1000) >> shift;
781 printk(KERN_INFO "TSC freq calculated by APB timer is %lu khz\n", khz);
782 return khz;
783failed:
784 return 0;
785}
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index f147a95fd84a..b5d8b0bcf235 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -31,7 +31,6 @@
31#include <asm/x86_init.h> 31#include <asm/x86_init.h>
32 32
33int gart_iommu_aperture; 33int gart_iommu_aperture;
34EXPORT_SYMBOL_GPL(gart_iommu_aperture);
35int gart_iommu_aperture_disabled __initdata; 34int gart_iommu_aperture_disabled __initdata;
36int gart_iommu_aperture_allowed __initdata; 35int gart_iommu_aperture_allowed __initdata;
37 36
@@ -394,6 +393,7 @@ void __init gart_iommu_hole_init(void)
394 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { 393 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
395 int bus; 394 int bus;
396 int dev_base, dev_limit; 395 int dev_base, dev_limit;
396 u32 ctl;
397 397
398 bus = bus_dev_ranges[i].bus; 398 bus = bus_dev_ranges[i].bus;
399 dev_base = bus_dev_ranges[i].dev_base; 399 dev_base = bus_dev_ranges[i].dev_base;
@@ -407,7 +407,19 @@ void __init gart_iommu_hole_init(void)
407 gart_iommu_aperture = 1; 407 gart_iommu_aperture = 1;
408 x86_init.iommu.iommu_init = gart_iommu_init; 408 x86_init.iommu.iommu_init = gart_iommu_init;
409 409
410 aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; 410 ctl = read_pci_config(bus, slot, 3,
411 AMD64_GARTAPERTURECTL);
412
413 /*
414 * Before we do anything else disable the GART. It may
415 * still be enabled if we boot into a crash-kernel here.
416 * Reconfiguring the GART while it is enabled could have
417 * unknown side-effects.
418 */
419 ctl &= ~GARTEN;
420 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
421
422 aper_order = (ctl >> 1) & 7;
411 aper_size = (32 * 1024 * 1024) << aper_order; 423 aper_size = (32 * 1024 * 1024) << aper_order;
412 aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; 424 aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;
413 aper_base <<= 25; 425 aper_base <<= 25;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 6e29b2a77aa8..e5a4a1e01618 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1390,7 +1390,7 @@ void __init enable_IR_x2apic(void)
1390 } 1390 }
1391 1391
1392 local_irq_save(flags); 1392 local_irq_save(flags);
1393 mask_8259A(); 1393 legacy_pic->mask_all();
1394 mask_IO_APIC_setup(ioapic_entries); 1394 mask_IO_APIC_setup(ioapic_entries);
1395 1395
1396 if (dmar_table_init_ret) 1396 if (dmar_table_init_ret)
@@ -1422,7 +1422,7 @@ void __init enable_IR_x2apic(void)
1422nox2apic: 1422nox2apic:
1423 if (!ret) /* IR enabling failed */ 1423 if (!ret) /* IR enabling failed */
1424 restore_IO_APIC_setup(ioapic_entries); 1424 restore_IO_APIC_setup(ioapic_entries);
1425 unmask_8259A(); 1425 legacy_pic->restore_mask();
1426 local_irq_restore(flags); 1426 local_irq_restore(flags);
1427 1427
1428out: 1428out:
@@ -1640,8 +1640,10 @@ int __init APIC_init_uniprocessor(void)
1640 } 1640 }
1641#endif 1641#endif
1642 1642
1643#ifndef CONFIG_SMP
1643 enable_IR_x2apic(); 1644 enable_IR_x2apic();
1644 default_setup_apic_routing(); 1645 default_setup_apic_routing();
1646#endif
1645 1647
1646 verify_local_APIC(); 1648 verify_local_APIC();
1647 connect_bsp_APIC(); 1649 connect_bsp_APIC();
@@ -2018,7 +2020,7 @@ static int lapic_resume(struct sys_device *dev)
2018 } 2020 }
2019 2021
2020 mask_IO_APIC_setup(ioapic_entries); 2022 mask_IO_APIC_setup(ioapic_entries);
2021 mask_8259A(); 2023 legacy_pic->mask_all();
2022 } 2024 }
2023 2025
2024 if (x2apic_mode) 2026 if (x2apic_mode)
@@ -2062,7 +2064,7 @@ static int lapic_resume(struct sys_device *dev)
2062 2064
2063 if (intr_remapping_enabled) { 2065 if (intr_remapping_enabled) {
2064 reenable_intr_remapping(x2apic_mode); 2066 reenable_intr_remapping(x2apic_mode);
2065 unmask_8259A(); 2067 legacy_pic->restore_mask();
2066 restore_IO_APIC_setup(ioapic_entries); 2068 restore_IO_APIC_setup(ioapic_entries);
2067 free_ioapic_entries(ioapic_entries); 2069 free_ioapic_entries(ioapic_entries);
2068 } 2070 }
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index e3c3d820c325..09d3b17ce0c2 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -223,7 +223,7 @@ struct apic apic_flat = {
223}; 223};
224 224
225/* 225/*
226 * Physflat mode is used when there are more than 8 CPUs on a AMD system. 226 * Physflat mode is used when there are more than 8 CPUs on a system.
227 * We cannot use logical delivery in this case because the mask 227 * We cannot use logical delivery in this case because the mask
228 * overflows, so use physical mode. 228 * overflows, so use physical mode.
229 */ 229 */
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index dd2b5f264643..03ba1b895f5e 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -42,6 +42,7 @@
42#include <linux/errno.h> 42#include <linux/errno.h>
43#include <linux/acpi.h> 43#include <linux/acpi.h>
44#include <linux/init.h> 44#include <linux/init.h>
45#include <linux/gfp.h>
45#include <linux/nmi.h> 46#include <linux/nmi.h>
46#include <linux/smp.h> 47#include <linux/smp.h>
47#include <linux/io.h> 48#include <linux/io.h>
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 6bdd2c7ead75..eb2789c3f721 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -36,6 +36,7 @@
36#include <linux/freezer.h> 36#include <linux/freezer.h>
37#include <linux/kthread.h> 37#include <linux/kthread.h>
38#include <linux/jiffies.h> /* time_after() */ 38#include <linux/jiffies.h> /* time_after() */
39#include <linux/slab.h>
39#ifdef CONFIG_ACPI 40#ifdef CONFIG_ACPI
40#include <acpi/acpi_bus.h> 41#include <acpi/acpi_bus.h>
41#endif 42#endif
@@ -73,8 +74,8 @@
73 */ 74 */
74int sis_apic_bug = -1; 75int sis_apic_bug = -1;
75 76
76static DEFINE_SPINLOCK(ioapic_lock); 77static DEFINE_RAW_SPINLOCK(ioapic_lock);
77static DEFINE_SPINLOCK(vector_lock); 78static DEFINE_RAW_SPINLOCK(vector_lock);
78 79
79/* 80/*
80 * # of IRQ routing registers 81 * # of IRQ routing registers
@@ -94,8 +95,6 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
94/* # of MP IRQ source entries */ 95/* # of MP IRQ source entries */
95int mp_irq_entries; 96int mp_irq_entries;
96 97
97/* Number of legacy interrupts */
98static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY;
99/* GSI interrupts */ 98/* GSI interrupts */
100static int nr_irqs_gsi = NR_IRQS_LEGACY; 99static int nr_irqs_gsi = NR_IRQS_LEGACY;
101 100
@@ -140,33 +139,10 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node)
140 139
141/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 140/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
142#ifdef CONFIG_SPARSE_IRQ 141#ifdef CONFIG_SPARSE_IRQ
143static struct irq_cfg irq_cfgx[] = { 142static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
144#else 143#else
145static struct irq_cfg irq_cfgx[NR_IRQS] = { 144static struct irq_cfg irq_cfgx[NR_IRQS];
146#endif 145#endif
147 [0] = { .vector = IRQ0_VECTOR, },
148 [1] = { .vector = IRQ1_VECTOR, },
149 [2] = { .vector = IRQ2_VECTOR, },
150 [3] = { .vector = IRQ3_VECTOR, },
151 [4] = { .vector = IRQ4_VECTOR, },
152 [5] = { .vector = IRQ5_VECTOR, },
153 [6] = { .vector = IRQ6_VECTOR, },
154 [7] = { .vector = IRQ7_VECTOR, },
155 [8] = { .vector = IRQ8_VECTOR, },
156 [9] = { .vector = IRQ9_VECTOR, },
157 [10] = { .vector = IRQ10_VECTOR, },
158 [11] = { .vector = IRQ11_VECTOR, },
159 [12] = { .vector = IRQ12_VECTOR, },
160 [13] = { .vector = IRQ13_VECTOR, },
161 [14] = { .vector = IRQ14_VECTOR, },
162 [15] = { .vector = IRQ15_VECTOR, },
163};
164
165void __init io_apic_disable_legacy(void)
166{
167 nr_legacy_irqs = 0;
168 nr_irqs_gsi = 0;
169}
170 146
171int __init arch_early_irq_init(void) 147int __init arch_early_irq_init(void)
172{ 148{
@@ -176,6 +152,11 @@ int __init arch_early_irq_init(void)
176 int node; 152 int node;
177 int i; 153 int i;
178 154
155 if (!legacy_pic->nr_legacy_irqs) {
156 nr_irqs_gsi = 0;
157 io_apic_irqs = ~0UL;
158 }
159
179 cfg = irq_cfgx; 160 cfg = irq_cfgx;
180 count = ARRAY_SIZE(irq_cfgx); 161 count = ARRAY_SIZE(irq_cfgx);
181 node= cpu_to_node(boot_cpu_id); 162 node= cpu_to_node(boot_cpu_id);
@@ -185,8 +166,14 @@ int __init arch_early_irq_init(void)
185 desc->chip_data = &cfg[i]; 166 desc->chip_data = &cfg[i];
186 zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); 167 zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
187 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); 168 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
188 if (i < nr_legacy_irqs) 169 /*
189 cpumask_setall(cfg[i].domain); 170 * For legacy IRQ's, start with assigning irq0 to irq15 to
171 * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
172 */
173 if (i < legacy_pic->nr_legacy_irqs) {
174 cfg[i].vector = IRQ0_VECTOR + i;
175 cpumask_set_cpu(0, cfg[i].domain);
176 }
190 } 177 }
191 178
192 return 0; 179 return 0;
@@ -406,7 +393,7 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
406 struct irq_pin_list *entry; 393 struct irq_pin_list *entry;
407 unsigned long flags; 394 unsigned long flags;
408 395
409 spin_lock_irqsave(&ioapic_lock, flags); 396 raw_spin_lock_irqsave(&ioapic_lock, flags);
410 for_each_irq_pin(entry, cfg->irq_2_pin) { 397 for_each_irq_pin(entry, cfg->irq_2_pin) {
411 unsigned int reg; 398 unsigned int reg;
412 int pin; 399 int pin;
@@ -415,11 +402,11 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
415 reg = io_apic_read(entry->apic, 0x10 + pin*2); 402 reg = io_apic_read(entry->apic, 0x10 + pin*2);
416 /* Is the remote IRR bit set? */ 403 /* Is the remote IRR bit set? */
417 if (reg & IO_APIC_REDIR_REMOTE_IRR) { 404 if (reg & IO_APIC_REDIR_REMOTE_IRR) {
418 spin_unlock_irqrestore(&ioapic_lock, flags); 405 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
419 return true; 406 return true;
420 } 407 }
421 } 408 }
422 spin_unlock_irqrestore(&ioapic_lock, flags); 409 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
423 410
424 return false; 411 return false;
425} 412}
@@ -433,10 +420,10 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
433{ 420{
434 union entry_union eu; 421 union entry_union eu;
435 unsigned long flags; 422 unsigned long flags;
436 spin_lock_irqsave(&ioapic_lock, flags); 423 raw_spin_lock_irqsave(&ioapic_lock, flags);
437 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); 424 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
438 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); 425 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
439 spin_unlock_irqrestore(&ioapic_lock, flags); 426 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
440 return eu.entry; 427 return eu.entry;
441} 428}
442 429
@@ -459,9 +446,9 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
459void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) 446void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
460{ 447{
461 unsigned long flags; 448 unsigned long flags;
462 spin_lock_irqsave(&ioapic_lock, flags); 449 raw_spin_lock_irqsave(&ioapic_lock, flags);
463 __ioapic_write_entry(apic, pin, e); 450 __ioapic_write_entry(apic, pin, e);
464 spin_unlock_irqrestore(&ioapic_lock, flags); 451 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
465} 452}
466 453
467/* 454/*
@@ -474,10 +461,10 @@ static void ioapic_mask_entry(int apic, int pin)
474 unsigned long flags; 461 unsigned long flags;
475 union entry_union eu = { .entry.mask = 1 }; 462 union entry_union eu = { .entry.mask = 1 };
476 463
477 spin_lock_irqsave(&ioapic_lock, flags); 464 raw_spin_lock_irqsave(&ioapic_lock, flags);
478 io_apic_write(apic, 0x10 + 2*pin, eu.w1); 465 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
479 io_apic_write(apic, 0x11 + 2*pin, eu.w2); 466 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
480 spin_unlock_irqrestore(&ioapic_lock, flags); 467 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
481} 468}
482 469
483/* 470/*
@@ -604,9 +591,9 @@ static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
604 591
605 BUG_ON(!cfg); 592 BUG_ON(!cfg);
606 593
607 spin_lock_irqsave(&ioapic_lock, flags); 594 raw_spin_lock_irqsave(&ioapic_lock, flags);
608 __mask_IO_APIC_irq(cfg); 595 __mask_IO_APIC_irq(cfg);
609 spin_unlock_irqrestore(&ioapic_lock, flags); 596 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
610} 597}
611 598
612static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) 599static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
@@ -614,9 +601,9 @@ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
614 struct irq_cfg *cfg = desc->chip_data; 601 struct irq_cfg *cfg = desc->chip_data;
615 unsigned long flags; 602 unsigned long flags;
616 603
617 spin_lock_irqsave(&ioapic_lock, flags); 604 raw_spin_lock_irqsave(&ioapic_lock, flags);
618 __unmask_IO_APIC_irq(cfg); 605 __unmask_IO_APIC_irq(cfg);
619 spin_unlock_irqrestore(&ioapic_lock, flags); 606 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
620} 607}
621 608
622static void mask_IO_APIC_irq(unsigned int irq) 609static void mask_IO_APIC_irq(unsigned int irq)
@@ -865,7 +852,7 @@ static int __init find_isa_irq_apic(int irq, int type)
865 */ 852 */
866static int EISA_ELCR(unsigned int irq) 853static int EISA_ELCR(unsigned int irq)
867{ 854{
868 if (irq < nr_legacy_irqs) { 855 if (irq < legacy_pic->nr_legacy_irqs) {
869 unsigned int port = 0x4d0 + (irq >> 3); 856 unsigned int port = 0x4d0 + (irq >> 3);
870 return (inb(port) >> (irq & 7)) & 1; 857 return (inb(port) >> (irq & 7)) & 1;
871 } 858 }
@@ -1140,12 +1127,12 @@ void lock_vector_lock(void)
1140 /* Used to the online set of cpus does not change 1127 /* Used to the online set of cpus does not change
1141 * during assign_irq_vector. 1128 * during assign_irq_vector.
1142 */ 1129 */
1143 spin_lock(&vector_lock); 1130 raw_spin_lock(&vector_lock);
1144} 1131}
1145 1132
1146void unlock_vector_lock(void) 1133void unlock_vector_lock(void)
1147{ 1134{
1148 spin_unlock(&vector_lock); 1135 raw_spin_unlock(&vector_lock);
1149} 1136}
1150 1137
1151static int 1138static int
@@ -1162,7 +1149,8 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1162 * Also, we've got to be careful not to trash gate 1149 * Also, we've got to be careful not to trash gate
1163 * 0x80, because int 0x80 is hm, kind of importantish. ;) 1150 * 0x80, because int 0x80 is hm, kind of importantish. ;)
1164 */ 1151 */
1165 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; 1152 static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
1153 static int current_offset = VECTOR_OFFSET_START % 8;
1166 unsigned int old_vector; 1154 unsigned int old_vector;
1167 int cpu, err; 1155 int cpu, err;
1168 cpumask_var_t tmp_mask; 1156 cpumask_var_t tmp_mask;
@@ -1198,7 +1186,7 @@ next:
1198 if (vector >= first_system_vector) { 1186 if (vector >= first_system_vector) {
1199 /* If out of vectors on large boxen, must share them. */ 1187 /* If out of vectors on large boxen, must share them. */
1200 offset = (offset + 1) % 8; 1188 offset = (offset + 1) % 8;
1201 vector = FIRST_DEVICE_VECTOR + offset; 1189 vector = FIRST_EXTERNAL_VECTOR + offset;
1202 } 1190 }
1203 if (unlikely(current_vector == vector)) 1191 if (unlikely(current_vector == vector))
1204 continue; 1192 continue;
@@ -1232,9 +1220,9 @@ int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1232 int err; 1220 int err;
1233 unsigned long flags; 1221 unsigned long flags;
1234 1222
1235 spin_lock_irqsave(&vector_lock, flags); 1223 raw_spin_lock_irqsave(&vector_lock, flags);
1236 err = __assign_irq_vector(irq, cfg, mask); 1224 err = __assign_irq_vector(irq, cfg, mask);
1237 spin_unlock_irqrestore(&vector_lock, flags); 1225 raw_spin_unlock_irqrestore(&vector_lock, flags);
1238 return err; 1226 return err;
1239} 1227}
1240 1228
@@ -1268,14 +1256,27 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1268void __setup_vector_irq(int cpu) 1256void __setup_vector_irq(int cpu)
1269{ 1257{
1270 /* Initialize vector_irq on a new cpu */ 1258 /* Initialize vector_irq on a new cpu */
1271 /* This function must be called with vector_lock held */
1272 int irq, vector; 1259 int irq, vector;
1273 struct irq_cfg *cfg; 1260 struct irq_cfg *cfg;
1274 struct irq_desc *desc; 1261 struct irq_desc *desc;
1275 1262
1263 /*
1264 * vector_lock will make sure that we don't run into irq vector
1265 * assignments that might be happening on another cpu in parallel,
1266 * while we setup our initial vector to irq mappings.
1267 */
1268 raw_spin_lock(&vector_lock);
1276 /* Mark the inuse vectors */ 1269 /* Mark the inuse vectors */
1277 for_each_irq_desc(irq, desc) { 1270 for_each_irq_desc(irq, desc) {
1278 cfg = desc->chip_data; 1271 cfg = desc->chip_data;
1272
1273 /*
1274 * If it is a legacy IRQ handled by the legacy PIC, this cpu
1275 * will be part of the irq_cfg's domain.
1276 */
1277 if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq))
1278 cpumask_set_cpu(cpu, cfg->domain);
1279
1279 if (!cpumask_test_cpu(cpu, cfg->domain)) 1280 if (!cpumask_test_cpu(cpu, cfg->domain))
1280 continue; 1281 continue;
1281 vector = cfg->vector; 1282 vector = cfg->vector;
@@ -1291,6 +1292,7 @@ void __setup_vector_irq(int cpu)
1291 if (!cpumask_test_cpu(cpu, cfg->domain)) 1292 if (!cpumask_test_cpu(cpu, cfg->domain))
1292 per_cpu(vector_irq, cpu)[vector] = -1; 1293 per_cpu(vector_irq, cpu)[vector] = -1;
1293 } 1294 }
1295 raw_spin_unlock(&vector_lock);
1294} 1296}
1295 1297
1296static struct irq_chip ioapic_chip; 1298static struct irq_chip ioapic_chip;
@@ -1440,6 +1442,14 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
1440 1442
1441 cfg = desc->chip_data; 1443 cfg = desc->chip_data;
1442 1444
1445 /*
1446 * For legacy irqs, cfg->domain starts with cpu 0 for legacy
1447 * controllers like 8259. Now that IO-APIC can handle this irq, update
1448 * the cfg->domain.
1449 */
1450 if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain))
1451 apic->vector_allocation_domain(0, cfg->domain);
1452
1443 if (assign_irq_vector(irq, cfg, apic->target_cpus())) 1453 if (assign_irq_vector(irq, cfg, apic->target_cpus()))
1444 return; 1454 return;
1445 1455
@@ -1461,8 +1471,8 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
1461 } 1471 }
1462 1472
1463 ioapic_register_intr(irq, desc, trigger); 1473 ioapic_register_intr(irq, desc, trigger);
1464 if (irq < nr_legacy_irqs) 1474 if (irq < legacy_pic->nr_legacy_irqs)
1465 disable_8259A_irq(irq); 1475 legacy_pic->chip->mask(irq);
1466 1476
1467 ioapic_write_entry(apic_id, pin, entry); 1477 ioapic_write_entry(apic_id, pin, entry);
1468} 1478}
@@ -1473,7 +1483,7 @@ static struct {
1473 1483
1474static void __init setup_IO_APIC_irqs(void) 1484static void __init setup_IO_APIC_irqs(void)
1475{ 1485{
1476 int apic_id = 0, pin, idx, irq; 1486 int apic_id, pin, idx, irq;
1477 int notcon = 0; 1487 int notcon = 0;
1478 struct irq_desc *desc; 1488 struct irq_desc *desc;
1479 struct irq_cfg *cfg; 1489 struct irq_cfg *cfg;
@@ -1481,14 +1491,7 @@ static void __init setup_IO_APIC_irqs(void)
1481 1491
1482 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1492 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1483 1493
1484#ifdef CONFIG_ACPI 1494 for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
1485 if (!acpi_disabled && acpi_ioapic) {
1486 apic_id = mp_find_ioapic(0);
1487 if (apic_id < 0)
1488 apic_id = 0;
1489 }
1490#endif
1491
1492 for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { 1495 for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
1493 idx = find_irq_entry(apic_id, pin, mp_INT); 1496 idx = find_irq_entry(apic_id, pin, mp_INT);
1494 if (idx == -1) { 1497 if (idx == -1) {
@@ -1510,6 +1513,9 @@ static void __init setup_IO_APIC_irqs(void)
1510 1513
1511 irq = pin_2_irq(idx, apic_id, pin); 1514 irq = pin_2_irq(idx, apic_id, pin);
1512 1515
1516 if ((apic_id > 0) && (irq > 16))
1517 continue;
1518
1513 /* 1519 /*
1514 * Skip the timer IRQ if there's a quirk handler 1520 * Skip the timer IRQ if there's a quirk handler
1515 * installed and if it returns 1: 1521 * installed and if it returns 1:
@@ -1539,6 +1545,56 @@ static void __init setup_IO_APIC_irqs(void)
1539} 1545}
1540 1546
1541/* 1547/*
1548 * for the gsit that is not in first ioapic
1549 * but could not use acpi_register_gsi()
1550 * like some special sci in IBM x3330
1551 */
1552void setup_IO_APIC_irq_extra(u32 gsi)
1553{
1554 int apic_id = 0, pin, idx, irq;
1555 int node = cpu_to_node(boot_cpu_id);
1556 struct irq_desc *desc;
1557 struct irq_cfg *cfg;
1558
1559 /*
1560 * Convert 'gsi' to 'ioapic.pin'.
1561 */
1562 apic_id = mp_find_ioapic(gsi);
1563 if (apic_id < 0)
1564 return;
1565
1566 pin = mp_find_ioapic_pin(apic_id, gsi);
1567 idx = find_irq_entry(apic_id, pin, mp_INT);
1568 if (idx == -1)
1569 return;
1570
1571 irq = pin_2_irq(idx, apic_id, pin);
1572#ifdef CONFIG_SPARSE_IRQ
1573 desc = irq_to_desc(irq);
1574 if (desc)
1575 return;
1576#endif
1577 desc = irq_to_desc_alloc_node(irq, node);
1578 if (!desc) {
1579 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1580 return;
1581 }
1582
1583 cfg = desc->chip_data;
1584 add_pin_to_irq_node(cfg, node, apic_id, pin);
1585
1586 if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
1587 pr_debug("Pin %d-%d already programmed\n",
1588 mp_ioapics[apic_id].apicid, pin);
1589 return;
1590 }
1591 set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
1592
1593 setup_IO_APIC_irq(apic_id, pin, irq, desc,
1594 irq_trigger(idx), irq_polarity(idx));
1595}
1596
1597/*
1542 * Set up the timer pin, possibly with the 8259A-master behind. 1598 * Set up the timer pin, possibly with the 8259A-master behind.
1543 */ 1599 */
1544static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, 1600static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
@@ -1601,14 +1657,14 @@ __apicdebuginit(void) print_IO_APIC(void)
1601 1657
1602 for (apic = 0; apic < nr_ioapics; apic++) { 1658 for (apic = 0; apic < nr_ioapics; apic++) {
1603 1659
1604 spin_lock_irqsave(&ioapic_lock, flags); 1660 raw_spin_lock_irqsave(&ioapic_lock, flags);
1605 reg_00.raw = io_apic_read(apic, 0); 1661 reg_00.raw = io_apic_read(apic, 0);
1606 reg_01.raw = io_apic_read(apic, 1); 1662 reg_01.raw = io_apic_read(apic, 1);
1607 if (reg_01.bits.version >= 0x10) 1663 if (reg_01.bits.version >= 0x10)
1608 reg_02.raw = io_apic_read(apic, 2); 1664 reg_02.raw = io_apic_read(apic, 2);
1609 if (reg_01.bits.version >= 0x20) 1665 if (reg_01.bits.version >= 0x20)
1610 reg_03.raw = io_apic_read(apic, 3); 1666 reg_03.raw = io_apic_read(apic, 3);
1611 spin_unlock_irqrestore(&ioapic_lock, flags); 1667 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1612 1668
1613 printk("\n"); 1669 printk("\n");
1614 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); 1670 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
@@ -1825,12 +1881,12 @@ __apicdebuginit(void) print_PIC(void)
1825 unsigned int v; 1881 unsigned int v;
1826 unsigned long flags; 1882 unsigned long flags;
1827 1883
1828 if (!nr_legacy_irqs) 1884 if (!legacy_pic->nr_legacy_irqs)
1829 return; 1885 return;
1830 1886
1831 printk(KERN_DEBUG "\nprinting PIC contents\n"); 1887 printk(KERN_DEBUG "\nprinting PIC contents\n");
1832 1888
1833 spin_lock_irqsave(&i8259A_lock, flags); 1889 raw_spin_lock_irqsave(&i8259A_lock, flags);
1834 1890
1835 v = inb(0xa1) << 8 | inb(0x21); 1891 v = inb(0xa1) << 8 | inb(0x21);
1836 printk(KERN_DEBUG "... PIC IMR: %04x\n", v); 1892 printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
@@ -1844,7 +1900,7 @@ __apicdebuginit(void) print_PIC(void)
1844 outb(0x0a,0xa0); 1900 outb(0x0a,0xa0);
1845 outb(0x0a,0x20); 1901 outb(0x0a,0x20);
1846 1902
1847 spin_unlock_irqrestore(&i8259A_lock, flags); 1903 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
1848 1904
1849 printk(KERN_DEBUG "... PIC ISR: %04x\n", v); 1905 printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
1850 1906
@@ -1903,13 +1959,13 @@ void __init enable_IO_APIC(void)
1903 * The number of IO-APIC IRQ registers (== #pins): 1959 * The number of IO-APIC IRQ registers (== #pins):
1904 */ 1960 */
1905 for (apic = 0; apic < nr_ioapics; apic++) { 1961 for (apic = 0; apic < nr_ioapics; apic++) {
1906 spin_lock_irqsave(&ioapic_lock, flags); 1962 raw_spin_lock_irqsave(&ioapic_lock, flags);
1907 reg_01.raw = io_apic_read(apic, 1); 1963 reg_01.raw = io_apic_read(apic, 1);
1908 spin_unlock_irqrestore(&ioapic_lock, flags); 1964 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1909 nr_ioapic_registers[apic] = reg_01.bits.entries+1; 1965 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
1910 } 1966 }
1911 1967
1912 if (!nr_legacy_irqs) 1968 if (!legacy_pic->nr_legacy_irqs)
1913 return; 1969 return;
1914 1970
1915 for(apic = 0; apic < nr_ioapics; apic++) { 1971 for(apic = 0; apic < nr_ioapics; apic++) {
@@ -1966,7 +2022,7 @@ void disable_IO_APIC(void)
1966 */ 2022 */
1967 clear_IO_APIC(); 2023 clear_IO_APIC();
1968 2024
1969 if (!nr_legacy_irqs) 2025 if (!legacy_pic->nr_legacy_irqs)
1970 return; 2026 return;
1971 2027
1972 /* 2028 /*
@@ -2045,9 +2101,9 @@ void __init setup_ioapic_ids_from_mpc(void)
2045 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { 2101 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
2046 2102
2047 /* Read the register 0 value */ 2103 /* Read the register 0 value */
2048 spin_lock_irqsave(&ioapic_lock, flags); 2104 raw_spin_lock_irqsave(&ioapic_lock, flags);
2049 reg_00.raw = io_apic_read(apic_id, 0); 2105 reg_00.raw = io_apic_read(apic_id, 0);
2050 spin_unlock_irqrestore(&ioapic_lock, flags); 2106 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2051 2107
2052 old_id = mp_ioapics[apic_id].apicid; 2108 old_id = mp_ioapics[apic_id].apicid;
2053 2109
@@ -2106,16 +2162,16 @@ void __init setup_ioapic_ids_from_mpc(void)
2106 mp_ioapics[apic_id].apicid); 2162 mp_ioapics[apic_id].apicid);
2107 2163
2108 reg_00.bits.ID = mp_ioapics[apic_id].apicid; 2164 reg_00.bits.ID = mp_ioapics[apic_id].apicid;
2109 spin_lock_irqsave(&ioapic_lock, flags); 2165 raw_spin_lock_irqsave(&ioapic_lock, flags);
2110 io_apic_write(apic_id, 0, reg_00.raw); 2166 io_apic_write(apic_id, 0, reg_00.raw);
2111 spin_unlock_irqrestore(&ioapic_lock, flags); 2167 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2112 2168
2113 /* 2169 /*
2114 * Sanity check 2170 * Sanity check
2115 */ 2171 */
2116 spin_lock_irqsave(&ioapic_lock, flags); 2172 raw_spin_lock_irqsave(&ioapic_lock, flags);
2117 reg_00.raw = io_apic_read(apic_id, 0); 2173 reg_00.raw = io_apic_read(apic_id, 0);
2118 spin_unlock_irqrestore(&ioapic_lock, flags); 2174 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2119 if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) 2175 if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
2120 printk("could not set ID!\n"); 2176 printk("could not set ID!\n");
2121 else 2177 else
@@ -2198,15 +2254,15 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
2198 unsigned long flags; 2254 unsigned long flags;
2199 struct irq_cfg *cfg; 2255 struct irq_cfg *cfg;
2200 2256
2201 spin_lock_irqsave(&ioapic_lock, flags); 2257 raw_spin_lock_irqsave(&ioapic_lock, flags);
2202 if (irq < nr_legacy_irqs) { 2258 if (irq < legacy_pic->nr_legacy_irqs) {
2203 disable_8259A_irq(irq); 2259 legacy_pic->chip->mask(irq);
2204 if (i8259A_irq_pending(irq)) 2260 if (legacy_pic->irq_pending(irq))
2205 was_pending = 1; 2261 was_pending = 1;
2206 } 2262 }
2207 cfg = irq_cfg(irq); 2263 cfg = irq_cfg(irq);
2208 __unmask_IO_APIC_irq(cfg); 2264 __unmask_IO_APIC_irq(cfg);
2209 spin_unlock_irqrestore(&ioapic_lock, flags); 2265 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2210 2266
2211 return was_pending; 2267 return was_pending;
2212} 2268}
@@ -2217,9 +2273,9 @@ static int ioapic_retrigger_irq(unsigned int irq)
2217 struct irq_cfg *cfg = irq_cfg(irq); 2273 struct irq_cfg *cfg = irq_cfg(irq);
2218 unsigned long flags; 2274 unsigned long flags;
2219 2275
2220 spin_lock_irqsave(&vector_lock, flags); 2276 raw_spin_lock_irqsave(&vector_lock, flags);
2221 apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); 2277 apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
2222 spin_unlock_irqrestore(&vector_lock, flags); 2278 raw_spin_unlock_irqrestore(&vector_lock, flags);
2223 2279
2224 return 1; 2280 return 1;
2225} 2281}
@@ -2312,14 +2368,14 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2312 irq = desc->irq; 2368 irq = desc->irq;
2313 cfg = desc->chip_data; 2369 cfg = desc->chip_data;
2314 2370
2315 spin_lock_irqsave(&ioapic_lock, flags); 2371 raw_spin_lock_irqsave(&ioapic_lock, flags);
2316 ret = set_desc_affinity(desc, mask, &dest); 2372 ret = set_desc_affinity(desc, mask, &dest);
2317 if (!ret) { 2373 if (!ret) {
2318 /* Only the high 8 bits are valid. */ 2374 /* Only the high 8 bits are valid. */
2319 dest = SET_APIC_LOGICAL_ID(dest); 2375 dest = SET_APIC_LOGICAL_ID(dest);
2320 __target_IO_APIC_irq(irq, dest, cfg); 2376 __target_IO_APIC_irq(irq, dest, cfg);
2321 } 2377 }
2322 spin_unlock_irqrestore(&ioapic_lock, flags); 2378 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2323 2379
2324 return ret; 2380 return ret;
2325} 2381}
@@ -2489,6 +2545,9 @@ void irq_force_complete_move(int irq)
2489 struct irq_desc *desc = irq_to_desc(irq); 2545 struct irq_desc *desc = irq_to_desc(irq);
2490 struct irq_cfg *cfg = desc->chip_data; 2546 struct irq_cfg *cfg = desc->chip_data;
2491 2547
2548 if (!cfg)
2549 return;
2550
2492 __irq_complete_move(&desc, cfg->vector); 2551 __irq_complete_move(&desc, cfg->vector);
2493} 2552}
2494#else 2553#else
@@ -2554,9 +2613,9 @@ static void eoi_ioapic_irq(struct irq_desc *desc)
2554 irq = desc->irq; 2613 irq = desc->irq;
2555 cfg = desc->chip_data; 2614 cfg = desc->chip_data;
2556 2615
2557 spin_lock_irqsave(&ioapic_lock, flags); 2616 raw_spin_lock_irqsave(&ioapic_lock, flags);
2558 __eoi_ioapic_irq(irq, cfg); 2617 __eoi_ioapic_irq(irq, cfg);
2559 spin_unlock_irqrestore(&ioapic_lock, flags); 2618 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2560} 2619}
2561 2620
2562static void ack_apic_level(unsigned int irq) 2621static void ack_apic_level(unsigned int irq)
@@ -2734,8 +2793,8 @@ static inline void init_IO_APIC_traps(void)
2734 * so default to an old-fashioned 8259 2793 * so default to an old-fashioned 8259
2735 * interrupt if we can.. 2794 * interrupt if we can..
2736 */ 2795 */
2737 if (irq < nr_legacy_irqs) 2796 if (irq < legacy_pic->nr_legacy_irqs)
2738 make_8259A_irq(irq); 2797 legacy_pic->make_irq(irq);
2739 else 2798 else
2740 /* Strange. Oh, well.. */ 2799 /* Strange. Oh, well.. */
2741 desc->chip = &no_irq_chip; 2800 desc->chip = &no_irq_chip;
@@ -2892,7 +2951,7 @@ static inline void __init check_timer(void)
2892 /* 2951 /*
2893 * get/set the timer IRQ vector: 2952 * get/set the timer IRQ vector:
2894 */ 2953 */
2895 disable_8259A_irq(0); 2954 legacy_pic->chip->mask(0);
2896 assign_irq_vector(0, cfg, apic->target_cpus()); 2955 assign_irq_vector(0, cfg, apic->target_cpus());
2897 2956
2898 /* 2957 /*
@@ -2905,7 +2964,7 @@ static inline void __init check_timer(void)
2905 * automatically. 2964 * automatically.
2906 */ 2965 */
2907 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2966 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2908 init_8259A(1); 2967 legacy_pic->init(1);
2909#ifdef CONFIG_X86_32 2968#ifdef CONFIG_X86_32
2910 { 2969 {
2911 unsigned int ver; 2970 unsigned int ver;
@@ -2964,7 +3023,7 @@ static inline void __init check_timer(void)
2964 if (timer_irq_works()) { 3023 if (timer_irq_works()) {
2965 if (nmi_watchdog == NMI_IO_APIC) { 3024 if (nmi_watchdog == NMI_IO_APIC) {
2966 setup_nmi(); 3025 setup_nmi();
2967 enable_8259A_irq(0); 3026 legacy_pic->chip->unmask(0);
2968 } 3027 }
2969 if (disable_timer_pin_1 > 0) 3028 if (disable_timer_pin_1 > 0)
2970 clear_IO_APIC_pin(0, pin1); 3029 clear_IO_APIC_pin(0, pin1);
@@ -2987,14 +3046,14 @@ static inline void __init check_timer(void)
2987 */ 3046 */
2988 replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); 3047 replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
2989 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); 3048 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
2990 enable_8259A_irq(0); 3049 legacy_pic->chip->unmask(0);
2991 if (timer_irq_works()) { 3050 if (timer_irq_works()) {
2992 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 3051 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2993 timer_through_8259 = 1; 3052 timer_through_8259 = 1;
2994 if (nmi_watchdog == NMI_IO_APIC) { 3053 if (nmi_watchdog == NMI_IO_APIC) {
2995 disable_8259A_irq(0); 3054 legacy_pic->chip->mask(0);
2996 setup_nmi(); 3055 setup_nmi();
2997 enable_8259A_irq(0); 3056 legacy_pic->chip->unmask(0);
2998 } 3057 }
2999 goto out; 3058 goto out;
3000 } 3059 }
@@ -3002,7 +3061,7 @@ static inline void __init check_timer(void)
3002 * Cleanup, just in case ... 3061 * Cleanup, just in case ...
3003 */ 3062 */
3004 local_irq_disable(); 3063 local_irq_disable();
3005 disable_8259A_irq(0); 3064 legacy_pic->chip->mask(0);
3006 clear_IO_APIC_pin(apic2, pin2); 3065 clear_IO_APIC_pin(apic2, pin2);
3007 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); 3066 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
3008 } 3067 }
@@ -3021,22 +3080,22 @@ static inline void __init check_timer(void)
3021 3080
3022 lapic_register_intr(0, desc); 3081 lapic_register_intr(0, desc);
3023 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ 3082 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
3024 enable_8259A_irq(0); 3083 legacy_pic->chip->unmask(0);
3025 3084
3026 if (timer_irq_works()) { 3085 if (timer_irq_works()) {
3027 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); 3086 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
3028 goto out; 3087 goto out;
3029 } 3088 }
3030 local_irq_disable(); 3089 local_irq_disable();
3031 disable_8259A_irq(0); 3090 legacy_pic->chip->mask(0);
3032 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); 3091 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
3033 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); 3092 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
3034 3093
3035 apic_printk(APIC_QUIET, KERN_INFO 3094 apic_printk(APIC_QUIET, KERN_INFO
3036 "...trying to set up timer as ExtINT IRQ...\n"); 3095 "...trying to set up timer as ExtINT IRQ...\n");
3037 3096
3038 init_8259A(0); 3097 legacy_pic->init(0);
3039 make_8259A_irq(0); 3098 legacy_pic->make_irq(0);
3040 apic_write(APIC_LVT0, APIC_DM_EXTINT); 3099 apic_write(APIC_LVT0, APIC_DM_EXTINT);
3041 3100
3042 unlock_ExtINT_logic(); 3101 unlock_ExtINT_logic();
@@ -3078,7 +3137,7 @@ void __init setup_IO_APIC(void)
3078 /* 3137 /*
3079 * calling enable_IO_APIC() is moved to setup_local_APIC for BP 3138 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
3080 */ 3139 */
3081 io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL; 3140 io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
3082 3141
3083 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); 3142 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
3084 /* 3143 /*
@@ -3089,7 +3148,7 @@ void __init setup_IO_APIC(void)
3089 sync_Arb_IDs(); 3148 sync_Arb_IDs();
3090 setup_IO_APIC_irqs(); 3149 setup_IO_APIC_irqs();
3091 init_IO_APIC_traps(); 3150 init_IO_APIC_traps();
3092 if (nr_legacy_irqs) 3151 if (legacy_pic->nr_legacy_irqs)
3093 check_timer(); 3152 check_timer();
3094} 3153}
3095 3154
@@ -3138,13 +3197,13 @@ static int ioapic_resume(struct sys_device *dev)
3138 data = container_of(dev, struct sysfs_ioapic_data, dev); 3197 data = container_of(dev, struct sysfs_ioapic_data, dev);
3139 entry = data->entry; 3198 entry = data->entry;
3140 3199
3141 spin_lock_irqsave(&ioapic_lock, flags); 3200 raw_spin_lock_irqsave(&ioapic_lock, flags);
3142 reg_00.raw = io_apic_read(dev->id, 0); 3201 reg_00.raw = io_apic_read(dev->id, 0);
3143 if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { 3202 if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
3144 reg_00.bits.ID = mp_ioapics[dev->id].apicid; 3203 reg_00.bits.ID = mp_ioapics[dev->id].apicid;
3145 io_apic_write(dev->id, 0, reg_00.raw); 3204 io_apic_write(dev->id, 0, reg_00.raw);
3146 } 3205 }
3147 spin_unlock_irqrestore(&ioapic_lock, flags); 3206 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3148 for (i = 0; i < nr_ioapic_registers[dev->id]; i++) 3207 for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
3149 ioapic_write_entry(dev->id, i, entry[i]); 3208 ioapic_write_entry(dev->id, i, entry[i]);
3150 3209
@@ -3207,7 +3266,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
3207 if (irq_want < nr_irqs_gsi) 3266 if (irq_want < nr_irqs_gsi)
3208 irq_want = nr_irqs_gsi; 3267 irq_want = nr_irqs_gsi;
3209 3268
3210 spin_lock_irqsave(&vector_lock, flags); 3269 raw_spin_lock_irqsave(&vector_lock, flags);
3211 for (new = irq_want; new < nr_irqs; new++) { 3270 for (new = irq_want; new < nr_irqs; new++) {
3212 desc_new = irq_to_desc_alloc_node(new, node); 3271 desc_new = irq_to_desc_alloc_node(new, node);
3213 if (!desc_new) { 3272 if (!desc_new) {
@@ -3226,14 +3285,11 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
3226 irq = new; 3285 irq = new;
3227 break; 3286 break;
3228 } 3287 }
3229 spin_unlock_irqrestore(&vector_lock, flags); 3288 raw_spin_unlock_irqrestore(&vector_lock, flags);
3289
3290 if (irq > 0)
3291 dynamic_irq_init_keep_chip_data(irq);
3230 3292
3231 if (irq > 0) {
3232 dynamic_irq_init(irq);
3233 /* restore it, in case dynamic_irq_init clear it */
3234 if (desc_new)
3235 desc_new->chip_data = cfg_new;
3236 }
3237 return irq; 3293 return irq;
3238} 3294}
3239 3295
@@ -3255,20 +3311,13 @@ int create_irq(void)
3255void destroy_irq(unsigned int irq) 3311void destroy_irq(unsigned int irq)
3256{ 3312{
3257 unsigned long flags; 3313 unsigned long flags;
3258 struct irq_cfg *cfg;
3259 struct irq_desc *desc;
3260 3314
3261 /* store it, in case dynamic_irq_cleanup clear it */ 3315 dynamic_irq_cleanup_keep_chip_data(irq);
3262 desc = irq_to_desc(irq);
3263 cfg = desc->chip_data;
3264 dynamic_irq_cleanup(irq);
3265 /* connect back irq_cfg */
3266 desc->chip_data = cfg;
3267 3316
3268 free_irte(irq); 3317 free_irte(irq);
3269 spin_lock_irqsave(&vector_lock, flags); 3318 raw_spin_lock_irqsave(&vector_lock, flags);
3270 __clear_irq_vector(irq, cfg); 3319 __clear_irq_vector(irq, get_irq_chip_data(irq));
3271 spin_unlock_irqrestore(&vector_lock, flags); 3320 raw_spin_unlock_irqrestore(&vector_lock, flags);
3272} 3321}
3273 3322
3274/* 3323/*
@@ -3805,9 +3854,9 @@ int __init io_apic_get_redir_entries (int ioapic)
3805 union IO_APIC_reg_01 reg_01; 3854 union IO_APIC_reg_01 reg_01;
3806 unsigned long flags; 3855 unsigned long flags;
3807 3856
3808 spin_lock_irqsave(&ioapic_lock, flags); 3857 raw_spin_lock_irqsave(&ioapic_lock, flags);
3809 reg_01.raw = io_apic_read(ioapic, 1); 3858 reg_01.raw = io_apic_read(ioapic, 1);
3810 spin_unlock_irqrestore(&ioapic_lock, flags); 3859 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3811 3860
3812 return reg_01.bits.entries; 3861 return reg_01.bits.entries;
3813} 3862}
@@ -3890,7 +3939,7 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
3890 /* 3939 /*
3891 * IRQs < 16 are already in the irq_2_pin[] map 3940 * IRQs < 16 are already in the irq_2_pin[] map
3892 */ 3941 */
3893 if (irq >= nr_legacy_irqs) { 3942 if (irq >= legacy_pic->nr_legacy_irqs) {
3894 cfg = desc->chip_data; 3943 cfg = desc->chip_data;
3895 if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { 3944 if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
3896 printk(KERN_INFO "can not add pin %d for irq %d\n", 3945 printk(KERN_INFO "can not add pin %d for irq %d\n",
@@ -3969,9 +4018,9 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
3969 if (physids_empty(apic_id_map)) 4018 if (physids_empty(apic_id_map))
3970 apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); 4019 apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
3971 4020
3972 spin_lock_irqsave(&ioapic_lock, flags); 4021 raw_spin_lock_irqsave(&ioapic_lock, flags);
3973 reg_00.raw = io_apic_read(ioapic, 0); 4022 reg_00.raw = io_apic_read(ioapic, 0);
3974 spin_unlock_irqrestore(&ioapic_lock, flags); 4023 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3975 4024
3976 if (apic_id >= get_physical_broadcast()) { 4025 if (apic_id >= get_physical_broadcast()) {
3977 printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " 4026 printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
@@ -4005,10 +4054,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
4005 if (reg_00.bits.ID != apic_id) { 4054 if (reg_00.bits.ID != apic_id) {
4006 reg_00.bits.ID = apic_id; 4055 reg_00.bits.ID = apic_id;
4007 4056
4008 spin_lock_irqsave(&ioapic_lock, flags); 4057 raw_spin_lock_irqsave(&ioapic_lock, flags);
4009 io_apic_write(ioapic, 0, reg_00.raw); 4058 io_apic_write(ioapic, 0, reg_00.raw);
4010 reg_00.raw = io_apic_read(ioapic, 0); 4059 reg_00.raw = io_apic_read(ioapic, 0);
4011 spin_unlock_irqrestore(&ioapic_lock, flags); 4060 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
4012 4061
4013 /* Sanity check */ 4062 /* Sanity check */
4014 if (reg_00.bits.ID != apic_id) { 4063 if (reg_00.bits.ID != apic_id) {
@@ -4029,9 +4078,9 @@ int __init io_apic_get_version(int ioapic)
4029 union IO_APIC_reg_01 reg_01; 4078 union IO_APIC_reg_01 reg_01;
4030 unsigned long flags; 4079 unsigned long flags;
4031 4080
4032 spin_lock_irqsave(&ioapic_lock, flags); 4081 raw_spin_lock_irqsave(&ioapic_lock, flags);
4033 reg_01.raw = io_apic_read(ioapic, 1); 4082 reg_01.raw = io_apic_read(ioapic, 1);
4034 spin_unlock_irqrestore(&ioapic_lock, flags); 4083 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
4035 4084
4036 return reg_01.bits.version; 4085 return reg_01.bits.version;
4037} 4086}
@@ -4063,27 +4112,23 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
4063#ifdef CONFIG_SMP 4112#ifdef CONFIG_SMP
4064void __init setup_ioapic_dest(void) 4113void __init setup_ioapic_dest(void)
4065{ 4114{
4066 int pin, ioapic = 0, irq, irq_entry; 4115 int pin, ioapic, irq, irq_entry;
4067 struct irq_desc *desc; 4116 struct irq_desc *desc;
4068 const struct cpumask *mask; 4117 const struct cpumask *mask;
4069 4118
4070 if (skip_ioapic_setup == 1) 4119 if (skip_ioapic_setup == 1)
4071 return; 4120 return;
4072 4121
4073#ifdef CONFIG_ACPI 4122 for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
4074 if (!acpi_disabled && acpi_ioapic) {
4075 ioapic = mp_find_ioapic(0);
4076 if (ioapic < 0)
4077 ioapic = 0;
4078 }
4079#endif
4080
4081 for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { 4123 for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
4082 irq_entry = find_irq_entry(ioapic, pin, mp_INT); 4124 irq_entry = find_irq_entry(ioapic, pin, mp_INT);
4083 if (irq_entry == -1) 4125 if (irq_entry == -1)
4084 continue; 4126 continue;
4085 irq = pin_2_irq(irq_entry, ioapic, pin); 4127 irq = pin_2_irq(irq_entry, ioapic, pin);
4086 4128
4129 if ((ioapic > 0) && (irq > 16))
4130 continue;
4131
4087 desc = irq_to_desc(irq); 4132 desc = irq_to_desc(irq);
4088 4133
4089 /* 4134 /*
@@ -4268,3 +4313,24 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
4268 4313
4269 nr_ioapics++; 4314 nr_ioapics++;
4270} 4315}
4316
4317/* Enable IOAPIC early just for system timer */
4318void __init pre_init_apic_IRQ0(void)
4319{
4320 struct irq_cfg *cfg;
4321 struct irq_desc *desc;
4322
4323 printk(KERN_INFO "Early APIC setup for system timer0\n");
4324#ifndef CONFIG_SMP
4325 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
4326#endif
4327 desc = irq_to_desc_alloc_node(0, 0);
4328
4329 setup_local_APIC();
4330
4331 cfg = irq_cfg(0);
4332 add_pin_to_irq_node(cfg, 0, 0, 0);
4333 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
4334
4335 setup_IO_APIC_irq(0, 0, 0, desc, 0, 0);
4336}
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index 0159a69396cb..1edaf15c0b8e 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -18,6 +18,7 @@
18#include <linux/delay.h> 18#include <linux/delay.h>
19#include <linux/interrupt.h> 19#include <linux/interrupt.h>
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/slab.h>
21#include <linux/sysdev.h> 22#include <linux/sysdev.h>
22#include <linux/sysctl.h> 23#include <linux/sysctl.h>
23#include <linux/percpu.h> 24#include <linux/percpu.h>
@@ -177,7 +178,7 @@ int __init check_nmi_watchdog(void)
177error: 178error:
178 if (nmi_watchdog == NMI_IO_APIC) { 179 if (nmi_watchdog == NMI_IO_APIC) {
179 if (!timer_through_8259) 180 if (!timer_through_8259)
180 disable_8259A_irq(0); 181 legacy_pic->chip->mask(0);
181 on_each_cpu(__acpi_nmi_disable, NULL, 1); 182 on_each_cpu(__acpi_nmi_disable, NULL, 1);
182 } 183 }
183 184
@@ -416,13 +417,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
416 417
417 /* We can be called before check_nmi_watchdog, hence NULL check. */ 418 /* We can be called before check_nmi_watchdog, hence NULL check. */
418 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { 419 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
419 static DEFINE_SPINLOCK(lock); /* Serialise the printks */ 420 static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
420 421
421 spin_lock(&lock); 422 raw_spin_lock(&lock);
422 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); 423 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
423 show_regs(regs); 424 show_regs(regs);
424 dump_stack(); 425 dump_stack();
425 spin_unlock(&lock); 426 raw_spin_unlock(&lock);
426 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); 427 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
427 428
428 rc = 1; 429 rc = 1;
@@ -438,8 +439,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
438 * Ayiee, looks like this CPU is stuck ... 439 * Ayiee, looks like this CPU is stuck ...
439 * wait a few IRQs (5 seconds) before doing the oops ... 440 * wait a few IRQs (5 seconds) before doing the oops ...
440 */ 441 */
441 __this_cpu_inc(per_cpu_var(alert_counter)); 442 __this_cpu_inc(alert_counter);
442 if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz) 443 if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
443 /* 444 /*
444 * die_nmi will return ONLY if NOTIFY_STOP happens.. 445 * die_nmi will return ONLY if NOTIFY_STOP happens..
445 */ 446 */
@@ -447,7 +448,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
447 regs, panic_on_timeout); 448 regs, panic_on_timeout);
448 } else { 449 } else {
449 __get_cpu_var(last_irq_sum) = sum; 450 __get_cpu_var(last_irq_sum) = sum;
450 __this_cpu_write(per_cpu_var(alert_counter), 0); 451 __this_cpu_write(alert_counter, 0);
451 } 452 }
452 453
453 /* see if the nmi watchdog went off */ 454 /* see if the nmi watchdog went off */
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 47dd856708e5..3e28401f161c 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -277,6 +277,7 @@ static __init void early_check_numaq(void)
277 x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; 277 x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus;
278 x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info; 278 x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info;
279 x86_init.timers.tsc_pre_init = numaq_tsc_init; 279 x86_init.timers.tsc_pre_init = numaq_tsc_init;
280 x86_init.pci.init = pci_numaq_init;
280 } 281 }
281} 282}
282 283
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 3740c8a4eae7..c085d52dbaf2 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -17,6 +17,7 @@
17#include <linux/ctype.h> 17#include <linux/ctype.h>
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/timer.h> 19#include <linux/timer.h>
20#include <linux/slab.h>
20#include <linux/cpu.h> 21#include <linux/cpu.h>
21#include <linux/init.h> 22#include <linux/init.h>
22#include <linux/io.h> 23#include <linux/io.h>
@@ -120,11 +121,9 @@ EXPORT_SYMBOL_GPL(uv_possible_blades);
120unsigned long sn_rtc_cycles_per_second; 121unsigned long sn_rtc_cycles_per_second;
121EXPORT_SYMBOL(sn_rtc_cycles_per_second); 122EXPORT_SYMBOL(sn_rtc_cycles_per_second);
122 123
123/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
124
125static const struct cpumask *uv_target_cpus(void) 124static const struct cpumask *uv_target_cpus(void)
126{ 125{
127 return cpumask_of(0); 126 return cpu_online_mask;
128} 127}
129 128
130static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) 129static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
diff --git a/arch/x86/kernel/bootflag.c b/arch/x86/kernel/bootflag.c
index 30f25a75fe28..5de7f4c56971 100644
--- a/arch/x86/kernel/bootflag.c
+++ b/arch/x86/kernel/bootflag.c
@@ -5,7 +5,6 @@
5#include <linux/kernel.h> 5#include <linux/kernel.h>
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/slab.h>
9#include <linux/spinlock.h> 8#include <linux/spinlock.h>
10#include <linux/acpi.h> 9#include <linux/acpi.h>
11#include <asm/io.h> 10#include <asm/io.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index f138c6c389b9..870e6cc6ad28 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -10,6 +10,20 @@ if CPU_FREQ
10 10
11comment "CPUFreq processor drivers" 11comment "CPUFreq processor drivers"
12 12
13config X86_PCC_CPUFREQ
14 tristate "Processor Clocking Control interface driver"
15 depends on ACPI && ACPI_PROCESSOR
16 help
17 This driver adds support for the PCC interface.
18
19 For details, take a look at:
20 <file:Documentation/cpu-freq/pcc-cpufreq.txt>.
21
22 To compile this driver as a module, choose M here: the
23 module will be called pcc-cpufreq.
24
25 If in doubt, say N.
26
13config X86_ACPI_CPUFREQ 27config X86_ACPI_CPUFREQ
14 tristate "ACPI Processor P-States driver" 28 tristate "ACPI Processor P-States driver"
15 select CPU_FREQ_TABLE 29 select CPU_FREQ_TABLE
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile
index 509296df294d..1840c0a5170b 100644
--- a/arch/x86/kernel/cpu/cpufreq/Makefile
+++ b/arch/x86/kernel/cpu/cpufreq/Makefile
@@ -4,6 +4,7 @@
4 4
5obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o 5obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
6obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o 6obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
7obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o
7obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o 8obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
8obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o 9obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o
9obj-$(CONFIG_X86_LONGHAUL) += longhaul.o 10obj-$(CONFIG_X86_LONGHAUL) += longhaul.o
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 1b1920fa7c80..459168083b77 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,6 +33,7 @@
33#include <linux/cpufreq.h> 33#include <linux/cpufreq.h>
34#include <linux/compiler.h> 34#include <linux/compiler.h>
35#include <linux/dmi.h> 35#include <linux/dmi.h>
36#include <linux/slab.h>
36#include <trace/events/power.h> 37#include <trace/events/power.h>
37 38
38#include <linux/acpi.h> 39#include <linux/acpi.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index 006b278b0d5d..c587db472a75 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -20,7 +20,6 @@
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/init.h> 21#include <linux/init.h>
22 22
23#include <linux/slab.h>
24#include <linux/delay.h> 23#include <linux/delay.h>
25#include <linux/cpufreq.h> 24#include <linux/cpufreq.h>
26 25
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
index ac27ec2264d5..16e3483be9e3 100644
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
@@ -80,6 +80,7 @@
80#include <linux/cpufreq.h> 80#include <linux/cpufreq.h>
81#include <linux/pci.h> 81#include <linux/pci.h>
82#include <linux/errno.h> 82#include <linux/errno.h>
83#include <linux/slab.h>
83 84
84#include <asm/processor-cyrix.h> 85#include <asm/processor-cyrix.h>
85 86
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index da5f70fcb766..e7b559d74c52 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -9,7 +9,6 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/slab.h>
13#include <linux/cpufreq.h> 12#include <linux/cpufreq.h>
14#include <linux/timex.h> 13#include <linux/timex.h>
15 14
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 869615193720..7b8a8ba67b07 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -25,7 +25,6 @@
25#include <linux/init.h> 25#include <linux/init.h>
26#include <linux/smp.h> 26#include <linux/smp.h>
27#include <linux/cpufreq.h> 27#include <linux/cpufreq.h>
28#include <linux/slab.h>
29#include <linux/cpumask.h> 28#include <linux/cpumask.h>
30#include <linux/timex.h> 29#include <linux/timex.h>
31 30
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
new file mode 100644
index 000000000000..ce7cde713e71
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -0,0 +1,621 @@
1/*
2 * pcc-cpufreq.c - Processor Clocking Control firmware cpufreq interface
3 *
4 * Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com>
5 * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
6 * Nagananda Chumbalkar <nagananda.chumbalkar@hp.com>
7 *
8 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; version 2 of the License.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON
17 * INFRINGEMENT. See the GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 675 Mass Ave, Cambridge, MA 02139, USA.
22 *
23 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
24 */
25
26#include <linux/kernel.h>
27#include <linux/module.h>
28#include <linux/init.h>
29#include <linux/smp.h>
30#include <linux/sched.h>
31#include <linux/cpufreq.h>
32#include <linux/compiler.h>
33#include <linux/slab.h>
34
35#include <linux/acpi.h>
36#include <linux/io.h>
37#include <linux/spinlock.h>
38#include <linux/uaccess.h>
39
40#include <acpi/processor.h>
41
42#define PCC_VERSION "1.00.00"
43#define POLL_LOOPS 300
44
45#define CMD_COMPLETE 0x1
46#define CMD_GET_FREQ 0x0
47#define CMD_SET_FREQ 0x1
48
49#define BUF_SZ 4
50
51#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
52 "pcc-cpufreq", msg)
53
54struct pcc_register_resource {
55 u8 descriptor;
56 u16 length;
57 u8 space_id;
58 u8 bit_width;
59 u8 bit_offset;
60 u8 access_size;
61 u64 address;
62} __attribute__ ((packed));
63
64struct pcc_memory_resource {
65 u8 descriptor;
66 u16 length;
67 u8 space_id;
68 u8 resource_usage;
69 u8 type_specific;
70 u64 granularity;
71 u64 minimum;
72 u64 maximum;
73 u64 translation_offset;
74 u64 address_length;
75} __attribute__ ((packed));
76
77static struct cpufreq_driver pcc_cpufreq_driver;
78
79struct pcc_header {
80 u32 signature;
81 u16 length;
82 u8 major;
83 u8 minor;
84 u32 features;
85 u16 command;
86 u16 status;
87 u32 latency;
88 u32 minimum_time;
89 u32 maximum_time;
90 u32 nominal;
91 u32 throttled_frequency;
92 u32 minimum_frequency;
93};
94
95static void __iomem *pcch_virt_addr;
96static struct pcc_header __iomem *pcch_hdr;
97
98static DEFINE_SPINLOCK(pcc_lock);
99
100static struct acpi_generic_address doorbell;
101
102static u64 doorbell_preserve;
103static u64 doorbell_write;
104
105static u8 OSC_UUID[16] = {0x63, 0x9B, 0x2C, 0x9F, 0x70, 0x91, 0x49, 0x1f,
106 0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46};
107
108struct pcc_cpu {
109 u32 input_offset;
110 u32 output_offset;
111};
112
113static struct pcc_cpu *pcc_cpu_info;
114
115static int pcc_cpufreq_verify(struct cpufreq_policy *policy)
116{
117 cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
118 policy->cpuinfo.max_freq);
119 return 0;
120}
121
122static inline void pcc_cmd(void)
123{
124 u64 doorbell_value;
125 int i;
126
127 acpi_read(&doorbell_value, &doorbell);
128 acpi_write((doorbell_value & doorbell_preserve) | doorbell_write,
129 &doorbell);
130
131 for (i = 0; i < POLL_LOOPS; i++) {
132 if (ioread16(&pcch_hdr->status) & CMD_COMPLETE)
133 break;
134 }
135}
136
137static inline void pcc_clear_mapping(void)
138{
139 if (pcch_virt_addr)
140 iounmap(pcch_virt_addr);
141 pcch_virt_addr = NULL;
142}
143
144static unsigned int pcc_get_freq(unsigned int cpu)
145{
146 struct pcc_cpu *pcc_cpu_data;
147 unsigned int curr_freq;
148 unsigned int freq_limit;
149 u16 status;
150 u32 input_buffer;
151 u32 output_buffer;
152
153 spin_lock(&pcc_lock);
154
155 dprintk("get: get_freq for CPU %d\n", cpu);
156 pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
157
158 input_buffer = 0x1;
159 iowrite32(input_buffer,
160 (pcch_virt_addr + pcc_cpu_data->input_offset));
161 iowrite16(CMD_GET_FREQ, &pcch_hdr->command);
162
163 pcc_cmd();
164
165 output_buffer =
166 ioread32(pcch_virt_addr + pcc_cpu_data->output_offset);
167
168 /* Clear the input buffer - we are done with the current command */
169 memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
170
171 status = ioread16(&pcch_hdr->status);
172 if (status != CMD_COMPLETE) {
173 dprintk("get: FAILED: for CPU %d, status is %d\n",
174 cpu, status);
175 goto cmd_incomplete;
176 }
177 iowrite16(0, &pcch_hdr->status);
178 curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff))
179 / 100) * 1000);
180
181 dprintk("get: SUCCESS: (virtual) output_offset for cpu %d is "
182 "0x%x, contains a value of: 0x%x. Speed is: %d MHz\n",
183 cpu, (pcch_virt_addr + pcc_cpu_data->output_offset),
184 output_buffer, curr_freq);
185
186 freq_limit = (output_buffer >> 8) & 0xff;
187 if (freq_limit != 0xff) {
188 dprintk("get: frequency for cpu %d is being temporarily"
189 " capped at %d\n", cpu, curr_freq);
190 }
191
192 spin_unlock(&pcc_lock);
193 return curr_freq;
194
195cmd_incomplete:
196 iowrite16(0, &pcch_hdr->status);
197 spin_unlock(&pcc_lock);
198 return -EINVAL;
199}
200
201static int pcc_cpufreq_target(struct cpufreq_policy *policy,
202 unsigned int target_freq,
203 unsigned int relation)
204{
205 struct pcc_cpu *pcc_cpu_data;
206 struct cpufreq_freqs freqs;
207 u16 status;
208 u32 input_buffer;
209 int cpu;
210
211 spin_lock(&pcc_lock);
212 cpu = policy->cpu;
213 pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
214
215 dprintk("target: CPU %d should go to target freq: %d "
216 "(virtual) input_offset is 0x%x\n",
217 cpu, target_freq,
218 (pcch_virt_addr + pcc_cpu_data->input_offset));
219
220 freqs.new = target_freq;
221 freqs.cpu = cpu;
222 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
223
224 input_buffer = 0x1 | (((target_freq * 100)
225 / (ioread32(&pcch_hdr->nominal) * 1000)) << 8);
226 iowrite32(input_buffer,
227 (pcch_virt_addr + pcc_cpu_data->input_offset));
228 iowrite16(CMD_SET_FREQ, &pcch_hdr->command);
229
230 pcc_cmd();
231
232 /* Clear the input buffer - we are done with the current command */
233 memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
234
235 status = ioread16(&pcch_hdr->status);
236 if (status != CMD_COMPLETE) {
237 dprintk("target: FAILED for cpu %d, with status: 0x%x\n",
238 cpu, status);
239 goto cmd_incomplete;
240 }
241 iowrite16(0, &pcch_hdr->status);
242
243 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
244 dprintk("target: was SUCCESSFUL for cpu %d\n", cpu);
245 spin_unlock(&pcc_lock);
246
247 return 0;
248
249cmd_incomplete:
250 iowrite16(0, &pcch_hdr->status);
251 spin_unlock(&pcc_lock);
252 return -EINVAL;
253}
254
255static int pcc_get_offset(int cpu)
256{
257 acpi_status status;
258 struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
259 union acpi_object *pccp, *offset;
260 struct pcc_cpu *pcc_cpu_data;
261 struct acpi_processor *pr;
262 int ret = 0;
263
264 pr = per_cpu(processors, cpu);
265 pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
266
267 status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer);
268 if (ACPI_FAILURE(status))
269 return -ENODEV;
270
271 pccp = buffer.pointer;
272 if (!pccp || pccp->type != ACPI_TYPE_PACKAGE) {
273 ret = -ENODEV;
274 goto out_free;
275 };
276
277 offset = &(pccp->package.elements[0]);
278 if (!offset || offset->type != ACPI_TYPE_INTEGER) {
279 ret = -ENODEV;
280 goto out_free;
281 }
282
283 pcc_cpu_data->input_offset = offset->integer.value;
284
285 offset = &(pccp->package.elements[1]);
286 if (!offset || offset->type != ACPI_TYPE_INTEGER) {
287 ret = -ENODEV;
288 goto out_free;
289 }
290
291 pcc_cpu_data->output_offset = offset->integer.value;
292
293 memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
294 memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ);
295
296 dprintk("pcc_get_offset: for CPU %d: pcc_cpu_data "
297 "input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n",
298 cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset);
299out_free:
300 kfree(buffer.pointer);
301 return ret;
302}
303
304static int __init pcc_cpufreq_do_osc(acpi_handle *handle)
305{
306 acpi_status status;
307 struct acpi_object_list input;
308 struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
309 union acpi_object in_params[4];
310 union acpi_object *out_obj;
311 u32 capabilities[2];
312 u32 errors;
313 u32 supported;
314 int ret = 0;
315
316 input.count = 4;
317 input.pointer = in_params;
318 input.count = 4;
319 input.pointer = in_params;
320 in_params[0].type = ACPI_TYPE_BUFFER;
321 in_params[0].buffer.length = 16;
322 in_params[0].buffer.pointer = OSC_UUID;
323 in_params[1].type = ACPI_TYPE_INTEGER;
324 in_params[1].integer.value = 1;
325 in_params[2].type = ACPI_TYPE_INTEGER;
326 in_params[2].integer.value = 2;
327 in_params[3].type = ACPI_TYPE_BUFFER;
328 in_params[3].buffer.length = 8;
329 in_params[3].buffer.pointer = (u8 *)&capabilities;
330
331 capabilities[0] = OSC_QUERY_ENABLE;
332 capabilities[1] = 0x1;
333
334 status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
335 if (ACPI_FAILURE(status))
336 return -ENODEV;
337
338 if (!output.length)
339 return -ENODEV;
340
341 out_obj = output.pointer;
342 if (out_obj->type != ACPI_TYPE_BUFFER) {
343 ret = -ENODEV;
344 goto out_free;
345 }
346
347 errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
348 if (errors) {
349 ret = -ENODEV;
350 goto out_free;
351 }
352
353 supported = *((u32 *)(out_obj->buffer.pointer + 4));
354 if (!(supported & 0x1)) {
355 ret = -ENODEV;
356 goto out_free;
357 }
358
359 kfree(output.pointer);
360 capabilities[0] = 0x0;
361 capabilities[1] = 0x1;
362
363 status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
364 if (ACPI_FAILURE(status))
365 return -ENODEV;
366
367 if (!output.length)
368 return -ENODEV;
369
370 out_obj = output.pointer;
371 if (out_obj->type != ACPI_TYPE_BUFFER) {
372 ret = -ENODEV;
373 goto out_free;
374 }
375
376 errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
377 if (errors) {
378 ret = -ENODEV;
379 goto out_free;
380 }
381
382 supported = *((u32 *)(out_obj->buffer.pointer + 4));
383 if (!(supported & 0x1)) {
384 ret = -ENODEV;
385 goto out_free;
386 }
387
388out_free:
389 kfree(output.pointer);
390 return ret;
391}
392
393static int __init pcc_cpufreq_probe(void)
394{
395 acpi_status status;
396 struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
397 struct pcc_memory_resource *mem_resource;
398 struct pcc_register_resource *reg_resource;
399 union acpi_object *out_obj, *member;
400 acpi_handle handle, osc_handle;
401 int ret = 0;
402
403 status = acpi_get_handle(NULL, "\\_SB", &handle);
404 if (ACPI_FAILURE(status))
405 return -ENODEV;
406
407 status = acpi_get_handle(handle, "_OSC", &osc_handle);
408 if (ACPI_SUCCESS(status)) {
409 ret = pcc_cpufreq_do_osc(&osc_handle);
410 if (ret)
411 dprintk("probe: _OSC evaluation did not succeed\n");
412 /* Firmware's use of _OSC is optional */
413 ret = 0;
414 }
415
416 status = acpi_evaluate_object(handle, "PCCH", NULL, &output);
417 if (ACPI_FAILURE(status))
418 return -ENODEV;
419
420 out_obj = output.pointer;
421 if (out_obj->type != ACPI_TYPE_PACKAGE) {
422 ret = -ENODEV;
423 goto out_free;
424 }
425
426 member = &out_obj->package.elements[0];
427 if (member->type != ACPI_TYPE_BUFFER) {
428 ret = -ENODEV;
429 goto out_free;
430 }
431
432 mem_resource = (struct pcc_memory_resource *)member->buffer.pointer;
433
434 dprintk("probe: mem_resource descriptor: 0x%x,"
435 " length: %d, space_id: %d, resource_usage: %d,"
436 " type_specific: %d, granularity: 0x%llx,"
437 " minimum: 0x%llx, maximum: 0x%llx,"
438 " translation_offset: 0x%llx, address_length: 0x%llx\n",
439 mem_resource->descriptor, mem_resource->length,
440 mem_resource->space_id, mem_resource->resource_usage,
441 mem_resource->type_specific, mem_resource->granularity,
442 mem_resource->minimum, mem_resource->maximum,
443 mem_resource->translation_offset,
444 mem_resource->address_length);
445
446 if (mem_resource->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) {
447 ret = -ENODEV;
448 goto out_free;
449 }
450
451 pcch_virt_addr = ioremap_nocache(mem_resource->minimum,
452 mem_resource->address_length);
453 if (pcch_virt_addr == NULL) {
454 dprintk("probe: could not map shared mem region\n");
455 goto out_free;
456 }
457 pcch_hdr = pcch_virt_addr;
458
459 dprintk("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
460 dprintk("probe: PCCH header is at physical address: 0x%llx,"
461 " signature: 0x%x, length: %d bytes, major: %d, minor: %d,"
462 " supported features: 0x%x, command field: 0x%x,"
463 " status field: 0x%x, nominal latency: %d us\n",
464 mem_resource->minimum, ioread32(&pcch_hdr->signature),
465 ioread16(&pcch_hdr->length), ioread8(&pcch_hdr->major),
466 ioread8(&pcch_hdr->minor), ioread32(&pcch_hdr->features),
467 ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status),
468 ioread32(&pcch_hdr->latency));
469
470 dprintk("probe: min time between commands: %d us,"
471 " max time between commands: %d us,"
472 " nominal CPU frequency: %d MHz,"
473 " minimum CPU frequency: %d MHz,"
474 " minimum CPU frequency without throttling: %d MHz\n",
475 ioread32(&pcch_hdr->minimum_time),
476 ioread32(&pcch_hdr->maximum_time),
477 ioread32(&pcch_hdr->nominal),
478 ioread32(&pcch_hdr->throttled_frequency),
479 ioread32(&pcch_hdr->minimum_frequency));
480
481 member = &out_obj->package.elements[1];
482 if (member->type != ACPI_TYPE_BUFFER) {
483 ret = -ENODEV;
484 goto pcch_free;
485 }
486
487 reg_resource = (struct pcc_register_resource *)member->buffer.pointer;
488
489 doorbell.space_id = reg_resource->space_id;
490 doorbell.bit_width = reg_resource->bit_width;
491 doorbell.bit_offset = reg_resource->bit_offset;
492 doorbell.access_width = 64;
493 doorbell.address = reg_resource->address;
494
495 dprintk("probe: doorbell: space_id is %d, bit_width is %d, "
496 "bit_offset is %d, access_width is %d, address is 0x%llx\n",
497 doorbell.space_id, doorbell.bit_width, doorbell.bit_offset,
498 doorbell.access_width, reg_resource->address);
499
500 member = &out_obj->package.elements[2];
501 if (member->type != ACPI_TYPE_INTEGER) {
502 ret = -ENODEV;
503 goto pcch_free;
504 }
505
506 doorbell_preserve = member->integer.value;
507
508 member = &out_obj->package.elements[3];
509 if (member->type != ACPI_TYPE_INTEGER) {
510 ret = -ENODEV;
511 goto pcch_free;
512 }
513
514 doorbell_write = member->integer.value;
515
516 dprintk("probe: doorbell_preserve: 0x%llx,"
517 " doorbell_write: 0x%llx\n",
518 doorbell_preserve, doorbell_write);
519
520 pcc_cpu_info = alloc_percpu(struct pcc_cpu);
521 if (!pcc_cpu_info) {
522 ret = -ENOMEM;
523 goto pcch_free;
524 }
525
526 printk(KERN_DEBUG "pcc-cpufreq: (v%s) driver loaded with frequency"
527 " limits: %d MHz, %d MHz\n", PCC_VERSION,
528 ioread32(&pcch_hdr->minimum_frequency),
529 ioread32(&pcch_hdr->nominal));
530 kfree(output.pointer);
531 return ret;
532pcch_free:
533 pcc_clear_mapping();
534out_free:
535 kfree(output.pointer);
536 return ret;
537}
538
539static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
540{
541 unsigned int cpu = policy->cpu;
542 unsigned int result = 0;
543
544 if (!pcch_virt_addr) {
545 result = -1;
546 goto pcch_null;
547 }
548
549 result = pcc_get_offset(cpu);
550 if (result) {
551 dprintk("init: PCCP evaluation failed\n");
552 goto free;
553 }
554
555 policy->max = policy->cpuinfo.max_freq =
556 ioread32(&pcch_hdr->nominal) * 1000;
557 policy->min = policy->cpuinfo.min_freq =
558 ioread32(&pcch_hdr->minimum_frequency) * 1000;
559 policy->cur = pcc_get_freq(cpu);
560
561 dprintk("init: policy->max is %d, policy->min is %d\n",
562 policy->max, policy->min);
563
564 return 0;
565free:
566 pcc_clear_mapping();
567 free_percpu(pcc_cpu_info);
568pcch_null:
569 return result;
570}
571
572static int pcc_cpufreq_cpu_exit(struct cpufreq_policy *policy)
573{
574 return 0;
575}
576
577static struct cpufreq_driver pcc_cpufreq_driver = {
578 .flags = CPUFREQ_CONST_LOOPS,
579 .get = pcc_get_freq,
580 .verify = pcc_cpufreq_verify,
581 .target = pcc_cpufreq_target,
582 .init = pcc_cpufreq_cpu_init,
583 .exit = pcc_cpufreq_cpu_exit,
584 .name = "pcc-cpufreq",
585 .owner = THIS_MODULE,
586};
587
588static int __init pcc_cpufreq_init(void)
589{
590 int ret;
591
592 if (acpi_disabled)
593 return 0;
594
595 ret = pcc_cpufreq_probe();
596 if (ret) {
597 dprintk("pcc_cpufreq_init: PCCH evaluation failed\n");
598 return ret;
599 }
600
601 ret = cpufreq_register_driver(&pcc_cpufreq_driver);
602
603 return ret;
604}
605
606static void __exit pcc_cpufreq_exit(void)
607{
608 cpufreq_unregister_driver(&pcc_cpufreq_driver);
609
610 pcc_clear_mapping();
611
612 free_percpu(pcc_cpu_info);
613}
614
615MODULE_AUTHOR("Matthew Garrett, Naga Chumbalkar");
616MODULE_VERSION(PCC_VERSION);
617MODULE_DESCRIPTION("Processor Clocking Control interface driver");
618MODULE_LICENSE("GPL");
619
620late_initcall(pcc_cpufreq_init);
621module_exit(pcc_cpufreq_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index cb01dac267d3..b3379d6a5c57 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -13,7 +13,6 @@
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/cpufreq.h> 14#include <linux/cpufreq.h>
15#include <linux/ioport.h> 15#include <linux/ioport.h>
16#include <linux/slab.h>
17#include <linux/timex.h> 16#include <linux/timex.h>
18#include <linux/io.h> 17#include <linux/io.h>
19 18
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 6e44519960c8..b6215b9798e2 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -806,7 +806,7 @@ static int find_psb_table(struct powernow_k8_data *data)
806static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, 806static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
807 unsigned int index) 807 unsigned int index)
808{ 808{
809 acpi_integer control; 809 u64 control;
810 810
811 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) 811 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
812 return; 812 return;
@@ -824,7 +824,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
824{ 824{
825 struct cpufreq_frequency_table *powernow_table; 825 struct cpufreq_frequency_table *powernow_table;
826 int ret_val = -ENODEV; 826 int ret_val = -ENODEV;
827 acpi_integer control, status; 827 u64 control, status;
828 828
829 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { 829 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
830 dprintk("register performance failed: bad ACPI data\n"); 830 dprintk("register performance failed: bad ACPI data\n");
@@ -929,7 +929,8 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data,
929 powernow_table[i].index = index; 929 powernow_table[i].index = index;
930 930
931 /* Frequency may be rounded for these */ 931 /* Frequency may be rounded for these */
932 if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) { 932 if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
933 || boot_cpu_data.x86 == 0x11) {
933 powernow_table[i].frequency = 934 powernow_table[i].frequency =
934 freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); 935 freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);
935 } else 936 } else
@@ -948,7 +949,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
948 u32 fid; 949 u32 fid;
949 u32 vid; 950 u32 vid;
950 u32 freq, index; 951 u32 freq, index;
951 acpi_integer status, control; 952 u64 status, control;
952 953
953 if (data->exttype) { 954 if (data->exttype) {
954 status = data->acpi_data.states[i].status; 955 status = data->acpi_data.states[i].status;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 8d672ef162ce..9b1ff37de46a 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -20,6 +20,7 @@
20#include <linux/sched.h> /* current */ 20#include <linux/sched.h> /* current */
21#include <linux/delay.h> 21#include <linux/delay.h>
22#include <linux/compiler.h> 22#include <linux/compiler.h>
23#include <linux/gfp.h>
23 24
24#include <asm/msr.h> 25#include <asm/msr.h>
25#include <asm/processor.h> 26#include <asm/processor.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 2ce8e0b5cc54..561758e95180 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -23,7 +23,6 @@
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/cpufreq.h> 24#include <linux/cpufreq.h>
25#include <linux/pci.h> 25#include <linux/pci.h>
26#include <linux/slab.h>
27#include <linux/sched.h> 26#include <linux/sched.h>
28 27
29#include "speedstep-lib.h" 28#include "speedstep-lib.h"
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index ad0083abfa23..a94ec6be69fa 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -13,7 +13,6 @@
13#include <linux/moduleparam.h> 13#include <linux/moduleparam.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/cpufreq.h> 15#include <linux/cpufreq.h>
16#include <linux/slab.h>
17 16
18#include <asm/msr.h> 17#include <asm/msr.h>
19#include <asm/tsc.h> 18#include <asm/tsc.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index 04d73c114e49..8abd869baabf 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -17,7 +17,6 @@
17#include <linux/moduleparam.h> 17#include <linux/moduleparam.h>
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/cpufreq.h> 19#include <linux/cpufreq.h>
20#include <linux/slab.h>
21#include <linux/delay.h> 20#include <linux/delay.h>
22#include <linux/io.h> 21#include <linux/io.h>
23#include <asm/ist.h> 22#include <asm/ist.h>
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 879666f4d871..1366c7cfd483 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -47,6 +47,27 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
47 (c->x86 == 0x6 && c->x86_model >= 0x0e)) 47 (c->x86 == 0x6 && c->x86_model >= 0x0e))
48 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 48 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
49 49
50 /*
51 * Atom erratum AAE44/AAF40/AAG38/AAH41:
52 *
53 * A race condition between speculative fetches and invalidating
54 * a large page. This is worked around in microcode, but we
55 * need the microcode to have already been loaded... so if it is
56 * not, recommend a BIOS update and disable large pages.
57 */
58 if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2) {
59 u32 ucode, junk;
60
61 wrmsr(MSR_IA32_UCODE_REV, 0, 0);
62 sync_core();
63 rdmsr(MSR_IA32_UCODE_REV, junk, ucode);
64
65 if (ucode < 0x20e) {
66 printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
67 clear_cpu_cap(c, X86_FEATURE_PSE);
68 }
69 }
70
50#ifdef CONFIG_X86_64 71#ifdef CONFIG_X86_64
51 set_cpu_cap(c, X86_FEATURE_SYSENTER32); 72 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
52#else 73#else
@@ -70,7 +91,8 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
70 if (c->x86_power & (1 << 8)) { 91 if (c->x86_power & (1 << 8)) {
71 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 92 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
72 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 93 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
73 sched_clock_stable = 1; 94 if (!check_tsc_unstable())
95 sched_clock_stable = 1;
74 } 96 }
75 97
76 /* 98 /*
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index eddb1bdd1b8f..b3eeb66c0a51 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -903,7 +903,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
903 return ret; 903 return ret;
904} 904}
905 905
906static struct sysfs_ops sysfs_ops = { 906static const struct sysfs_ops sysfs_ops = {
907 .show = show, 907 .show = show,
908 .store = store, 908 .store = store,
909}; 909};
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 73734baa50f2..e7dbde7bfedb 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -22,6 +22,7 @@
22#include <linux/kdebug.h> 22#include <linux/kdebug.h>
23#include <linux/cpu.h> 23#include <linux/cpu.h>
24#include <linux/sched.h> 24#include <linux/sched.h>
25#include <linux/gfp.h>
25#include <asm/mce.h> 26#include <asm/mce.h>
26#include <asm/apic.h> 27#include <asm/apic.h>
27 28
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a8aacd4b513c..8a6f0afa767e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -26,6 +26,7 @@
26#include <linux/sched.h> 26#include <linux/sched.h>
27#include <linux/sysfs.h> 27#include <linux/sysfs.h>
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/slab.h>
29#include <linux/init.h> 30#include <linux/init.h>
30#include <linux/kmod.h> 31#include <linux/kmod.h>
31#include <linux/poll.h> 32#include <linux/poll.h>
@@ -46,6 +47,13 @@
46 47
47#include "mce-internal.h" 48#include "mce-internal.h"
48 49
50static DEFINE_MUTEX(mce_read_mutex);
51
52#define rcu_dereference_check_mce(p) \
53 rcu_dereference_check((p), \
54 rcu_read_lock_sched_held() || \
55 lockdep_is_held(&mce_read_mutex))
56
49#define CREATE_TRACE_POINTS 57#define CREATE_TRACE_POINTS
50#include <trace/events/mce.h> 58#include <trace/events/mce.h>
51 59
@@ -158,7 +166,7 @@ void mce_log(struct mce *mce)
158 mce->finished = 0; 166 mce->finished = 0;
159 wmb(); 167 wmb();
160 for (;;) { 168 for (;;) {
161 entry = rcu_dereference(mcelog.next); 169 entry = rcu_dereference_check_mce(mcelog.next);
162 for (;;) { 170 for (;;) {
163 /* 171 /*
164 * When the buffer fills up discard new entries. 172 * When the buffer fills up discard new entries.
@@ -1485,8 +1493,6 @@ static void collect_tscs(void *data)
1485 rdtscll(cpu_tsc[smp_processor_id()]); 1493 rdtscll(cpu_tsc[smp_processor_id()]);
1486} 1494}
1487 1495
1488static DEFINE_MUTEX(mce_read_mutex);
1489
1490static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, 1496static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
1491 loff_t *off) 1497 loff_t *off)
1492{ 1498{
@@ -1500,7 +1506,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
1500 return -ENOMEM; 1506 return -ENOMEM;
1501 1507
1502 mutex_lock(&mce_read_mutex); 1508 mutex_lock(&mce_read_mutex);
1503 next = rcu_dereference(mcelog.next); 1509 next = rcu_dereference_check_mce(mcelog.next);
1504 1510
1505 /* Only supports full reads right now */ 1511 /* Only supports full reads right now */
1506 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { 1512 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
@@ -1565,7 +1571,7 @@ timeout:
1565static unsigned int mce_poll(struct file *file, poll_table *wait) 1571static unsigned int mce_poll(struct file *file, poll_table *wait)
1566{ 1572{
1567 poll_wait(file, &mce_wait, wait); 1573 poll_wait(file, &mce_wait, wait);
1568 if (rcu_dereference(mcelog.next)) 1574 if (rcu_dereference_check_mce(mcelog.next))
1569 return POLLIN | POLLRDNORM; 1575 return POLLIN | POLLRDNORM;
1570 return 0; 1576 return 0;
1571} 1577}
@@ -2044,6 +2050,7 @@ static __init void mce_init_banks(void)
2044 struct mce_bank *b = &mce_banks[i]; 2050 struct mce_bank *b = &mce_banks[i];
2045 struct sysdev_attribute *a = &b->attr; 2051 struct sysdev_attribute *a = &b->attr;
2046 2052
2053 sysfs_attr_init(&a->attr);
2047 a->attr.name = b->attrname; 2054 a->attr.name = b->attrname;
2048 snprintf(b->attrname, ATTR_LEN, "bank%d", i); 2055 snprintf(b->attrname, ATTR_LEN, "bank%d", i);
2049 2056
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 83a3d1f4efca..224392d8fe8c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -21,6 +21,7 @@
21#include <linux/errno.h> 21#include <linux/errno.h>
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/sysfs.h> 23#include <linux/sysfs.h>
24#include <linux/slab.h>
24#include <linux/init.h> 25#include <linux/init.h>
25#include <linux/cpu.h> 26#include <linux/cpu.h>
26#include <linux/smp.h> 27#include <linux/smp.h>
@@ -388,7 +389,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
388 return ret; 389 return ret;
389} 390}
390 391
391static struct sysfs_ops threshold_ops = { 392static const struct sysfs_ops threshold_ops = {
392 .show = show, 393 .show = show,
393 .store = store, 394 .store = store,
394}; 395};
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 7c785634af2b..62b48e40920a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -5,6 +5,7 @@
5 * Author: Andi Kleen 5 * Author: Andi Kleen
6 */ 6 */
7 7
8#include <linux/gfp.h>
8#include <linux/init.h> 9#include <linux/init.h>
9#include <linux/interrupt.h> 10#include <linux/interrupt.h>
10#include <linux/percpu.h> 11#include <linux/percpu.h>
@@ -95,7 +96,7 @@ static void cmci_discover(int banks, int boot)
95 96
96 /* Already owned by someone else? */ 97 /* Already owned by someone else? */
97 if (val & CMCI_EN) { 98 if (val & CMCI_EN) {
98 if (test_and_clear_bit(i, owned) || boot) 99 if (test_and_clear_bit(i, owned) && !boot)
99 print_update("SHD", &hdr, i); 100 print_update("SHD", &hdr, i);
100 __clear_bit(i, __get_cpu_var(mce_poll_banks)); 101 __clear_bit(i, __get_cpu_var(mce_poll_banks));
101 continue; 102 continue;
@@ -107,7 +108,7 @@ static void cmci_discover(int banks, int boot)
107 108
108 /* Did the enable bit stick? -- the bank supports CMCI */ 109 /* Did the enable bit stick? -- the bank supports CMCI */
109 if (val & CMCI_EN) { 110 if (val & CMCI_EN) {
110 if (!test_and_set_bit(i, owned) || boot) 111 if (!test_and_set_bit(i, owned) && !boot)
111 print_update("CMCI", &hdr, i); 112 print_update("CMCI", &hdr, i);
112 __clear_bit(i, __get_cpu_var(mce_poll_banks)); 113 __clear_bit(i, __get_cpu_var(mce_poll_banks));
113 } else { 114 } else {
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 09b1698e0466..06130b52f012 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -22,10 +22,10 @@
22#include <linux/pci.h> 22#include <linux/pci.h>
23#include <linux/smp.h> 23#include <linux/smp.h>
24#include <linux/cpu.h> 24#include <linux/cpu.h>
25#include <linux/sort.h>
26#include <linux/mutex.h> 25#include <linux/mutex.h>
27#include <linux/uaccess.h> 26#include <linux/uaccess.h>
28#include <linux/kvm_para.h> 27#include <linux/kvm_para.h>
28#include <linux/range.h>
29 29
30#include <asm/processor.h> 30#include <asm/processor.h>
31#include <asm/e820.h> 31#include <asm/e820.h>
@@ -34,11 +34,6 @@
34 34
35#include "mtrr.h" 35#include "mtrr.h"
36 36
37struct res_range {
38 unsigned long start;
39 unsigned long end;
40};
41
42struct var_mtrr_range_state { 37struct var_mtrr_range_state {
43 unsigned long base_pfn; 38 unsigned long base_pfn;
44 unsigned long size_pfn; 39 unsigned long size_pfn;
@@ -56,7 +51,7 @@ struct var_mtrr_state {
56/* Should be related to MTRR_VAR_RANGES nums */ 51/* Should be related to MTRR_VAR_RANGES nums */
57#define RANGE_NUM 256 52#define RANGE_NUM 256
58 53
59static struct res_range __initdata range[RANGE_NUM]; 54static struct range __initdata range[RANGE_NUM];
60static int __initdata nr_range; 55static int __initdata nr_range;
61 56
62static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; 57static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
@@ -64,152 +59,11 @@ static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
64static int __initdata debug_print; 59static int __initdata debug_print;
65#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) 60#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
66 61
67
68static int __init
69add_range(struct res_range *range, int nr_range,
70 unsigned long start, unsigned long end)
71{
72 /* Out of slots: */
73 if (nr_range >= RANGE_NUM)
74 return nr_range;
75
76 range[nr_range].start = start;
77 range[nr_range].end = end;
78
79 nr_range++;
80
81 return nr_range;
82}
83
84static int __init
85add_range_with_merge(struct res_range *range, int nr_range,
86 unsigned long start, unsigned long end)
87{
88 int i;
89
90 /* Try to merge it with old one: */
91 for (i = 0; i < nr_range; i++) {
92 unsigned long final_start, final_end;
93 unsigned long common_start, common_end;
94
95 if (!range[i].end)
96 continue;
97
98 common_start = max(range[i].start, start);
99 common_end = min(range[i].end, end);
100 if (common_start > common_end + 1)
101 continue;
102
103 final_start = min(range[i].start, start);
104 final_end = max(range[i].end, end);
105
106 range[i].start = final_start;
107 range[i].end = final_end;
108 return nr_range;
109 }
110
111 /* Need to add it: */
112 return add_range(range, nr_range, start, end);
113}
114
115static void __init
116subtract_range(struct res_range *range, unsigned long start, unsigned long end)
117{
118 int i, j;
119
120 for (j = 0; j < RANGE_NUM; j++) {
121 if (!range[j].end)
122 continue;
123
124 if (start <= range[j].start && end >= range[j].end) {
125 range[j].start = 0;
126 range[j].end = 0;
127 continue;
128 }
129
130 if (start <= range[j].start && end < range[j].end &&
131 range[j].start < end + 1) {
132 range[j].start = end + 1;
133 continue;
134 }
135
136
137 if (start > range[j].start && end >= range[j].end &&
138 range[j].end > start - 1) {
139 range[j].end = start - 1;
140 continue;
141 }
142
143 if (start > range[j].start && end < range[j].end) {
144 /* Find the new spare: */
145 for (i = 0; i < RANGE_NUM; i++) {
146 if (range[i].end == 0)
147 break;
148 }
149 if (i < RANGE_NUM) {
150 range[i].end = range[j].end;
151 range[i].start = end + 1;
152 } else {
153 printk(KERN_ERR "run of slot in ranges\n");
154 }
155 range[j].end = start - 1;
156 continue;
157 }
158 }
159}
160
161static int __init cmp_range(const void *x1, const void *x2)
162{
163 const struct res_range *r1 = x1;
164 const struct res_range *r2 = x2;
165 long start1, start2;
166
167 start1 = r1->start;
168 start2 = r2->start;
169
170 return start1 - start2;
171}
172
173static int __init clean_sort_range(struct res_range *range, int az)
174{
175 int i, j, k = az - 1, nr_range = 0;
176
177 for (i = 0; i < k; i++) {
178 if (range[i].end)
179 continue;
180 for (j = k; j > i; j--) {
181 if (range[j].end) {
182 k = j;
183 break;
184 }
185 }
186 if (j == i)
187 break;
188 range[i].start = range[k].start;
189 range[i].end = range[k].end;
190 range[k].start = 0;
191 range[k].end = 0;
192 k--;
193 }
194 /* count it */
195 for (i = 0; i < az; i++) {
196 if (!range[i].end) {
197 nr_range = i;
198 break;
199 }
200 }
201
202 /* sort them */
203 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
204
205 return nr_range;
206}
207
208#define BIOS_BUG_MSG KERN_WARNING \ 62#define BIOS_BUG_MSG KERN_WARNING \
209 "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" 63 "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
210 64
211static int __init 65static int __init
212x86_get_mtrr_mem_range(struct res_range *range, int nr_range, 66x86_get_mtrr_mem_range(struct range *range, int nr_range,
213 unsigned long extra_remove_base, 67 unsigned long extra_remove_base,
214 unsigned long extra_remove_size) 68 unsigned long extra_remove_size)
215{ 69{
@@ -223,14 +77,14 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
223 continue; 77 continue;
224 base = range_state[i].base_pfn; 78 base = range_state[i].base_pfn;
225 size = range_state[i].size_pfn; 79 size = range_state[i].size_pfn;
226 nr_range = add_range_with_merge(range, nr_range, base, 80 nr_range = add_range_with_merge(range, RANGE_NUM, nr_range,
227 base + size - 1); 81 base, base + size);
228 } 82 }
229 if (debug_print) { 83 if (debug_print) {
230 printk(KERN_DEBUG "After WB checking\n"); 84 printk(KERN_DEBUG "After WB checking\n");
231 for (i = 0; i < nr_range; i++) 85 for (i = 0; i < nr_range; i++)
232 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 86 printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
233 range[i].start, range[i].end + 1); 87 range[i].start, range[i].end);
234 } 88 }
235 89
236 /* Take out UC ranges: */ 90 /* Take out UC ranges: */
@@ -252,19 +106,19 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
252 size -= (1<<(20-PAGE_SHIFT)) - base; 106 size -= (1<<(20-PAGE_SHIFT)) - base;
253 base = 1<<(20-PAGE_SHIFT); 107 base = 1<<(20-PAGE_SHIFT);
254 } 108 }
255 subtract_range(range, base, base + size - 1); 109 subtract_range(range, RANGE_NUM, base, base + size);
256 } 110 }
257 if (extra_remove_size) 111 if (extra_remove_size)
258 subtract_range(range, extra_remove_base, 112 subtract_range(range, RANGE_NUM, extra_remove_base,
259 extra_remove_base + extra_remove_size - 1); 113 extra_remove_base + extra_remove_size);
260 114
261 if (debug_print) { 115 if (debug_print) {
262 printk(KERN_DEBUG "After UC checking\n"); 116 printk(KERN_DEBUG "After UC checking\n");
263 for (i = 0; i < RANGE_NUM; i++) { 117 for (i = 0; i < RANGE_NUM; i++) {
264 if (!range[i].end) 118 if (!range[i].end)
265 continue; 119 continue;
266 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 120 printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
267 range[i].start, range[i].end + 1); 121 range[i].start, range[i].end);
268 } 122 }
269 } 123 }
270 124
@@ -273,26 +127,22 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
273 if (debug_print) { 127 if (debug_print) {
274 printk(KERN_DEBUG "After sorting\n"); 128 printk(KERN_DEBUG "After sorting\n");
275 for (i = 0; i < nr_range; i++) 129 for (i = 0; i < nr_range; i++)
276 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 130 printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
277 range[i].start, range[i].end + 1); 131 range[i].start, range[i].end);
278 } 132 }
279 133
280 /* clear those is not used */
281 for (i = nr_range; i < RANGE_NUM; i++)
282 memset(&range[i], 0, sizeof(range[i]));
283
284 return nr_range; 134 return nr_range;
285} 135}
286 136
287#ifdef CONFIG_MTRR_SANITIZER 137#ifdef CONFIG_MTRR_SANITIZER
288 138
289static unsigned long __init sum_ranges(struct res_range *range, int nr_range) 139static unsigned long __init sum_ranges(struct range *range, int nr_range)
290{ 140{
291 unsigned long sum = 0; 141 unsigned long sum = 0;
292 int i; 142 int i;
293 143
294 for (i = 0; i < nr_range; i++) 144 for (i = 0; i < nr_range; i++)
295 sum += range[i].end + 1 - range[i].start; 145 sum += range[i].end - range[i].start;
296 146
297 return sum; 147 return sum;
298} 148}
@@ -621,7 +471,7 @@ static int __init parse_mtrr_spare_reg(char *arg)
621early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); 471early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
622 472
623static int __init 473static int __init
624x86_setup_var_mtrrs(struct res_range *range, int nr_range, 474x86_setup_var_mtrrs(struct range *range, int nr_range,
625 u64 chunk_size, u64 gran_size) 475 u64 chunk_size, u64 gran_size)
626{ 476{
627 struct var_mtrr_state var_state; 477 struct var_mtrr_state var_state;
@@ -639,7 +489,7 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
639 /* Write the range: */ 489 /* Write the range: */
640 for (i = 0; i < nr_range; i++) { 490 for (i = 0; i < nr_range; i++) {
641 set_var_mtrr_range(&var_state, range[i].start, 491 set_var_mtrr_range(&var_state, range[i].start,
642 range[i].end - range[i].start + 1); 492 range[i].end - range[i].start);
643 } 493 }
644 494
645 /* Write the last range: */ 495 /* Write the last range: */
@@ -742,7 +592,7 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
742 unsigned long x_remove_base, 592 unsigned long x_remove_base,
743 unsigned long x_remove_size, int i) 593 unsigned long x_remove_size, int i)
744{ 594{
745 static struct res_range range_new[RANGE_NUM]; 595 static struct range range_new[RANGE_NUM];
746 unsigned long range_sums_new; 596 unsigned long range_sums_new;
747 static int nr_range_new; 597 static int nr_range_new;
748 int num_reg; 598 int num_reg;
@@ -869,10 +719,10 @@ int __init mtrr_cleanup(unsigned address_bits)
869 * [0, 1M) should always be covered by var mtrr with WB 719 * [0, 1M) should always be covered by var mtrr with WB
870 * and fixed mtrrs should take effect before var mtrr for it: 720 * and fixed mtrrs should take effect before var mtrr for it:
871 */ 721 */
872 nr_range = add_range_with_merge(range, nr_range, 0, 722 nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0,
873 (1ULL<<(20 - PAGE_SHIFT)) - 1); 723 1ULL<<(20 - PAGE_SHIFT));
874 /* Sort the ranges: */ 724 /* Sort the ranges: */
875 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); 725 sort_range(range, nr_range);
876 726
877 range_sums = sum_ranges(range, nr_range); 727 range_sums = sum_ranges(range, nr_range);
878 printk(KERN_INFO "total RAM covered: %ldM\n", 728 printk(KERN_INFO "total RAM covered: %ldM\n",
@@ -1089,9 +939,9 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1089 nr_range = 0; 939 nr_range = 0;
1090 if (mtrr_tom2) { 940 if (mtrr_tom2) {
1091 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); 941 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
1092 range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; 942 range[nr_range].end = mtrr_tom2 >> PAGE_SHIFT;
1093 if (highest_pfn < range[nr_range].end + 1) 943 if (highest_pfn < range[nr_range].end)
1094 highest_pfn = range[nr_range].end + 1; 944 highest_pfn = range[nr_range].end;
1095 nr_range++; 945 nr_range++;
1096 } 946 }
1097 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); 947 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
@@ -1103,15 +953,15 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1103 953
1104 /* Check the holes: */ 954 /* Check the holes: */
1105 for (i = 0; i < nr_range - 1; i++) { 955 for (i = 0; i < nr_range - 1; i++) {
1106 if (range[i].end + 1 < range[i+1].start) 956 if (range[i].end < range[i+1].start)
1107 total_trim_size += real_trim_memory(range[i].end + 1, 957 total_trim_size += real_trim_memory(range[i].end,
1108 range[i+1].start); 958 range[i+1].start);
1109 } 959 }
1110 960
1111 /* Check the top: */ 961 /* Check the top: */
1112 i = nr_range - 1; 962 i = nr_range - 1;
1113 if (range[i].end + 1 < end_pfn) 963 if (range[i].end < end_pfn)
1114 total_trim_size += real_trim_memory(range[i].end + 1, 964 total_trim_size += real_trim_memory(range[i].end,
1115 end_pfn); 965 end_pfn);
1116 966
1117 if (total_trim_size) { 967 if (total_trim_size) {
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 9aa5dc76ff4a..fd31a441c61c 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -6,7 +6,6 @@
6 6
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/slab.h>
10#include <linux/io.h> 9#include <linux/io.h>
11#include <linux/mm.h> 10#include <linux/mm.h>
12 11
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index e006e56f699c..79289632cb27 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -5,6 +5,7 @@
5#include <linux/module.h> 5#include <linux/module.h>
6#include <linux/ctype.h> 6#include <linux/ctype.h>
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/slab.h>
8#include <linux/init.h> 9#include <linux/init.h>
9 10
10#define LINE_SIZE 80 11#define LINE_SIZE 80
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index fe4622e8c837..79556bd9b602 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -145,6 +145,7 @@ struct set_mtrr_data {
145 145
146/** 146/**
147 * ipi_handler - Synchronisation handler. Executed by "other" CPUs. 147 * ipi_handler - Synchronisation handler. Executed by "other" CPUs.
148 * @info: pointer to mtrr configuration data
148 * 149 *
149 * Returns nothing. 150 * Returns nothing.
150 */ 151 */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 641ccb9dddbc..db5bdc8addf8 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -21,6 +21,7 @@
21#include <linux/kdebug.h> 21#include <linux/kdebug.h>
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/uaccess.h> 23#include <linux/uaccess.h>
24#include <linux/slab.h>
24#include <linux/highmem.h> 25#include <linux/highmem.h>
25#include <linux/cpu.h> 26#include <linux/cpu.h>
26#include <linux/bitops.h> 27#include <linux/bitops.h>
@@ -28,6 +29,7 @@
28#include <asm/apic.h> 29#include <asm/apic.h>
29#include <asm/stacktrace.h> 30#include <asm/stacktrace.h>
30#include <asm/nmi.h> 31#include <asm/nmi.h>
32#include <asm/compat.h>
31 33
32static u64 perf_event_mask __read_mostly; 34static u64 perf_event_mask __read_mostly;
33 35
@@ -73,10 +75,10 @@ struct debug_store {
73struct event_constraint { 75struct event_constraint {
74 union { 76 union {
75 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 77 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
76 u64 idxmsk64[1]; 78 u64 idxmsk64;
77 }; 79 };
78 int code; 80 u64 code;
79 int cmask; 81 u64 cmask;
80 int weight; 82 int weight;
81}; 83};
82 84
@@ -103,7 +105,7 @@ struct cpu_hw_events {
103}; 105};
104 106
105#define __EVENT_CONSTRAINT(c, n, m, w) {\ 107#define __EVENT_CONSTRAINT(c, n, m, w) {\
106 { .idxmsk64[0] = (n) }, \ 108 { .idxmsk64 = (n) }, \
107 .code = (c), \ 109 .code = (c), \
108 .cmask = (m), \ 110 .cmask = (m), \
109 .weight = (w), \ 111 .weight = (w), \
@@ -116,7 +118,7 @@ struct cpu_hw_events {
116 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) 118 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
117 119
118#define FIXED_EVENT_CONSTRAINT(c, n) \ 120#define FIXED_EVENT_CONSTRAINT(c, n) \
119 EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK) 121 EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK)
120 122
121#define EVENT_CONSTRAINT_END \ 123#define EVENT_CONSTRAINT_END \
122 EVENT_CONSTRAINT(0, 0, 0) 124 EVENT_CONSTRAINT(0, 0, 0)
@@ -133,8 +135,8 @@ struct x86_pmu {
133 int (*handle_irq)(struct pt_regs *); 135 int (*handle_irq)(struct pt_regs *);
134 void (*disable_all)(void); 136 void (*disable_all)(void);
135 void (*enable_all)(void); 137 void (*enable_all)(void);
136 void (*enable)(struct hw_perf_event *, int); 138 void (*enable)(struct perf_event *);
137 void (*disable)(struct hw_perf_event *, int); 139 void (*disable)(struct perf_event *);
138 unsigned eventsel; 140 unsigned eventsel;
139 unsigned perfctr; 141 unsigned perfctr;
140 u64 (*event_map)(int); 142 u64 (*event_map)(int);
@@ -157,6 +159,11 @@ struct x86_pmu {
157 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 159 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
158 struct perf_event *event); 160 struct perf_event *event);
159 struct event_constraint *event_constraints; 161 struct event_constraint *event_constraints;
162
163 int (*cpu_prepare)(int cpu);
164 void (*cpu_starting)(int cpu);
165 void (*cpu_dying)(int cpu);
166 void (*cpu_dead)(int cpu);
160}; 167};
161 168
162static struct x86_pmu x86_pmu __read_mostly; 169static struct x86_pmu x86_pmu __read_mostly;
@@ -165,8 +172,7 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
165 .enabled = 1, 172 .enabled = 1,
166}; 173};
167 174
168static int x86_perf_event_set_period(struct perf_event *event, 175static int x86_perf_event_set_period(struct perf_event *event);
169 struct hw_perf_event *hwc, int idx);
170 176
171/* 177/*
172 * Generalized hw caching related hw_event table, filled 178 * Generalized hw caching related hw_event table, filled
@@ -189,11 +195,12 @@ static u64 __read_mostly hw_cache_event_ids
189 * Returns the delta events processed. 195 * Returns the delta events processed.
190 */ 196 */
191static u64 197static u64
192x86_perf_event_update(struct perf_event *event, 198x86_perf_event_update(struct perf_event *event)
193 struct hw_perf_event *hwc, int idx)
194{ 199{
200 struct hw_perf_event *hwc = &event->hw;
195 int shift = 64 - x86_pmu.event_bits; 201 int shift = 64 - x86_pmu.event_bits;
196 u64 prev_raw_count, new_raw_count; 202 u64 prev_raw_count, new_raw_count;
203 int idx = hwc->idx;
197 s64 delta; 204 s64 delta;
198 205
199 if (idx == X86_PMC_IDX_FIXED_BTS) 206 if (idx == X86_PMC_IDX_FIXED_BTS)
@@ -293,7 +300,7 @@ static inline bool bts_available(void)
293 return x86_pmu.enable_bts != NULL; 300 return x86_pmu.enable_bts != NULL;
294} 301}
295 302
296static inline void init_debug_store_on_cpu(int cpu) 303static void init_debug_store_on_cpu(int cpu)
297{ 304{
298 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 305 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
299 306
@@ -305,7 +312,7 @@ static inline void init_debug_store_on_cpu(int cpu)
305 (u32)((u64)(unsigned long)ds >> 32)); 312 (u32)((u64)(unsigned long)ds >> 32));
306} 313}
307 314
308static inline void fini_debug_store_on_cpu(int cpu) 315static void fini_debug_store_on_cpu(int cpu)
309{ 316{
310 if (!per_cpu(cpu_hw_events, cpu).ds) 317 if (!per_cpu(cpu_hw_events, cpu).ds)
311 return; 318 return;
@@ -503,6 +510,9 @@ static int __hw_perf_event_init(struct perf_event *event)
503 */ 510 */
504 if (attr->type == PERF_TYPE_RAW) { 511 if (attr->type == PERF_TYPE_RAW) {
505 hwc->config |= x86_pmu.raw_event(attr->config); 512 hwc->config |= x86_pmu.raw_event(attr->config);
513 if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
514 perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
515 return -EACCES;
506 return 0; 516 return 0;
507 } 517 }
508 518
@@ -553,9 +563,9 @@ static void x86_pmu_disable_all(void)
553 if (!test_bit(idx, cpuc->active_mask)) 563 if (!test_bit(idx, cpuc->active_mask))
554 continue; 564 continue;
555 rdmsrl(x86_pmu.eventsel + idx, val); 565 rdmsrl(x86_pmu.eventsel + idx, val);
556 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) 566 if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
557 continue; 567 continue;
558 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 568 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
559 wrmsrl(x86_pmu.eventsel + idx, val); 569 wrmsrl(x86_pmu.eventsel + idx, val);
560 } 570 }
561} 571}
@@ -590,7 +600,7 @@ static void x86_pmu_enable_all(void)
590 continue; 600 continue;
591 601
592 val = event->hw.config; 602 val = event->hw.config;
593 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 603 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
594 wrmsrl(x86_pmu.eventsel + idx, val); 604 wrmsrl(x86_pmu.eventsel + idx, val);
595 } 605 }
596} 606}
@@ -612,8 +622,8 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
612 bitmap_zero(used_mask, X86_PMC_IDX_MAX); 622 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
613 623
614 for (i = 0; i < n; i++) { 624 for (i = 0; i < n; i++) {
615 constraints[i] = 625 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
616 x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); 626 constraints[i] = c;
617 } 627 }
618 628
619 /* 629 /*
@@ -635,7 +645,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
635 if (test_bit(hwc->idx, used_mask)) 645 if (test_bit(hwc->idx, used_mask))
636 break; 646 break;
637 647
638 set_bit(hwc->idx, used_mask); 648 __set_bit(hwc->idx, used_mask);
639 if (assign) 649 if (assign)
640 assign[i] = hwc->idx; 650 assign[i] = hwc->idx;
641 } 651 }
@@ -676,7 +686,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
676 if (c->weight != w) 686 if (c->weight != w)
677 continue; 687 continue;
678 688
679 for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { 689 for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
680 if (!test_bit(j, used_mask)) 690 if (!test_bit(j, used_mask))
681 break; 691 break;
682 } 692 }
@@ -684,7 +694,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
684 if (j == X86_PMC_IDX_MAX) 694 if (j == X86_PMC_IDX_MAX)
685 break; 695 break;
686 696
687 set_bit(j, used_mask); 697 __set_bit(j, used_mask);
688 698
689 if (assign) 699 if (assign)
690 assign[i] = j; 700 assign[i] = j;
@@ -777,6 +787,7 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
777 hwc->last_tag == cpuc->tags[i]; 787 hwc->last_tag == cpuc->tags[i];
778} 788}
779 789
790static int x86_pmu_start(struct perf_event *event);
780static void x86_pmu_stop(struct perf_event *event); 791static void x86_pmu_stop(struct perf_event *event);
781 792
782void hw_perf_enable(void) 793void hw_perf_enable(void)
@@ -793,6 +804,7 @@ void hw_perf_enable(void)
793 return; 804 return;
794 805
795 if (cpuc->n_added) { 806 if (cpuc->n_added) {
807 int n_running = cpuc->n_events - cpuc->n_added;
796 /* 808 /*
797 * apply assignment obtained either from 809 * apply assignment obtained either from
798 * hw_perf_group_sched_in() or x86_pmu_enable() 810 * hw_perf_group_sched_in() or x86_pmu_enable()
@@ -800,8 +812,7 @@ void hw_perf_enable(void)
800 * step1: save events moving to new counters 812 * step1: save events moving to new counters
801 * step2: reprogram moved events into new counters 813 * step2: reprogram moved events into new counters
802 */ 814 */
803 for (i = 0; i < cpuc->n_events; i++) { 815 for (i = 0; i < n_running; i++) {
804
805 event = cpuc->event_list[i]; 816 event = cpuc->event_list[i];
806 hwc = &event->hw; 817 hwc = &event->hw;
807 818
@@ -816,29 +827,18 @@ void hw_perf_enable(void)
816 continue; 827 continue;
817 828
818 x86_pmu_stop(event); 829 x86_pmu_stop(event);
819
820 hwc->idx = -1;
821 } 830 }
822 831
823 for (i = 0; i < cpuc->n_events; i++) { 832 for (i = 0; i < cpuc->n_events; i++) {
824
825 event = cpuc->event_list[i]; 833 event = cpuc->event_list[i];
826 hwc = &event->hw; 834 hwc = &event->hw;
827 835
828 if (hwc->idx == -1) { 836 if (!match_prev_assignment(hwc, cpuc, i))
829 x86_assign_hw_event(event, cpuc, i); 837 x86_assign_hw_event(event, cpuc, i);
830 x86_perf_event_set_period(event, hwc, hwc->idx); 838 else if (i < n_running)
831 } 839 continue;
832 /*
833 * need to mark as active because x86_pmu_disable()
834 * clear active_mask and events[] yet it preserves
835 * idx
836 */
837 set_bit(hwc->idx, cpuc->active_mask);
838 cpuc->events[hwc->idx] = event;
839 840
840 x86_pmu.enable(hwc, hwc->idx); 841 x86_pmu_start(event);
841 perf_event_update_userpage(event);
842 } 842 }
843 cpuc->n_added = 0; 843 cpuc->n_added = 0;
844 perf_events_lapic_init(); 844 perf_events_lapic_init();
@@ -850,15 +850,16 @@ void hw_perf_enable(void)
850 x86_pmu.enable_all(); 850 x86_pmu.enable_all();
851} 851}
852 852
853static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) 853static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc)
854{ 854{
855 (void)checking_wrmsrl(hwc->config_base + idx, 855 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
856 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); 856 hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);
857} 857}
858 858
859static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) 859static inline void x86_pmu_disable_event(struct perf_event *event)
860{ 860{
861 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); 861 struct hw_perf_event *hwc = &event->hw;
862 (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config);
862} 863}
863 864
864static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 865static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -868,12 +869,12 @@ static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
868 * To be called with the event disabled in hw: 869 * To be called with the event disabled in hw:
869 */ 870 */
870static int 871static int
871x86_perf_event_set_period(struct perf_event *event, 872x86_perf_event_set_period(struct perf_event *event)
872 struct hw_perf_event *hwc, int idx)
873{ 873{
874 struct hw_perf_event *hwc = &event->hw;
874 s64 left = atomic64_read(&hwc->period_left); 875 s64 left = atomic64_read(&hwc->period_left);
875 s64 period = hwc->sample_period; 876 s64 period = hwc->sample_period;
876 int err, ret = 0; 877 int err, ret = 0, idx = hwc->idx;
877 878
878 if (idx == X86_PMC_IDX_FIXED_BTS) 879 if (idx == X86_PMC_IDX_FIXED_BTS)
879 return 0; 880 return 0;
@@ -919,11 +920,11 @@ x86_perf_event_set_period(struct perf_event *event,
919 return ret; 920 return ret;
920} 921}
921 922
922static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) 923static void x86_pmu_enable_event(struct perf_event *event)
923{ 924{
924 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 925 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
925 if (cpuc->enabled) 926 if (cpuc->enabled)
926 __x86_pmu_enable_event(hwc, idx); 927 __x86_pmu_enable_event(&event->hw);
927} 928}
928 929
929/* 930/*
@@ -959,34 +960,32 @@ static int x86_pmu_enable(struct perf_event *event)
959 memcpy(cpuc->assign, assign, n*sizeof(int)); 960 memcpy(cpuc->assign, assign, n*sizeof(int));
960 961
961 cpuc->n_events = n; 962 cpuc->n_events = n;
962 cpuc->n_added = n - n0; 963 cpuc->n_added += n - n0;
963 964
964 return 0; 965 return 0;
965} 966}
966 967
967static int x86_pmu_start(struct perf_event *event) 968static int x86_pmu_start(struct perf_event *event)
968{ 969{
969 struct hw_perf_event *hwc = &event->hw; 970 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
971 int idx = event->hw.idx;
970 972
971 if (hwc->idx == -1) 973 if (idx == -1)
972 return -EAGAIN; 974 return -EAGAIN;
973 975
974 x86_perf_event_set_period(event, hwc, hwc->idx); 976 x86_perf_event_set_period(event);
975 x86_pmu.enable(hwc, hwc->idx); 977 cpuc->events[idx] = event;
978 __set_bit(idx, cpuc->active_mask);
979 x86_pmu.enable(event);
980 perf_event_update_userpage(event);
976 981
977 return 0; 982 return 0;
978} 983}
979 984
980static void x86_pmu_unthrottle(struct perf_event *event) 985static void x86_pmu_unthrottle(struct perf_event *event)
981{ 986{
982 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 987 int ret = x86_pmu_start(event);
983 struct hw_perf_event *hwc = &event->hw; 988 WARN_ON_ONCE(ret);
984
985 if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
986 cpuc->events[hwc->idx] != event))
987 return;
988
989 x86_pmu.enable(hwc, hwc->idx);
990} 989}
991 990
992void perf_event_print_debug(void) 991void perf_event_print_debug(void)
@@ -1046,18 +1045,16 @@ static void x86_pmu_stop(struct perf_event *event)
1046 struct hw_perf_event *hwc = &event->hw; 1045 struct hw_perf_event *hwc = &event->hw;
1047 int idx = hwc->idx; 1046 int idx = hwc->idx;
1048 1047
1049 /* 1048 if (!__test_and_clear_bit(idx, cpuc->active_mask))
1050 * Must be done before we disable, otherwise the nmi handler 1049 return;
1051 * could reenable again: 1050
1052 */ 1051 x86_pmu.disable(event);
1053 clear_bit(idx, cpuc->active_mask);
1054 x86_pmu.disable(hwc, idx);
1055 1052
1056 /* 1053 /*
1057 * Drain the remaining delta count out of a event 1054 * Drain the remaining delta count out of a event
1058 * that we are disabling: 1055 * that we are disabling:
1059 */ 1056 */
1060 x86_perf_event_update(event, hwc, idx); 1057 x86_perf_event_update(event);
1061 1058
1062 cpuc->events[idx] = NULL; 1059 cpuc->events[idx] = NULL;
1063} 1060}
@@ -1094,8 +1091,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1094 int idx, handled = 0; 1091 int idx, handled = 0;
1095 u64 val; 1092 u64 val;
1096 1093
1097 data.addr = 0; 1094 perf_sample_data_init(&data, 0);
1098 data.raw = NULL;
1099 1095
1100 cpuc = &__get_cpu_var(cpu_hw_events); 1096 cpuc = &__get_cpu_var(cpu_hw_events);
1101 1097
@@ -1106,7 +1102,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1106 event = cpuc->events[idx]; 1102 event = cpuc->events[idx];
1107 hwc = &event->hw; 1103 hwc = &event->hw;
1108 1104
1109 val = x86_perf_event_update(event, hwc, idx); 1105 val = x86_perf_event_update(event);
1110 if (val & (1ULL << (x86_pmu.event_bits - 1))) 1106 if (val & (1ULL << (x86_pmu.event_bits - 1)))
1111 continue; 1107 continue;
1112 1108
@@ -1116,11 +1112,11 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1116 handled = 1; 1112 handled = 1;
1117 data.period = event->hw.last_period; 1113 data.period = event->hw.last_period;
1118 1114
1119 if (!x86_perf_event_set_period(event, hwc, idx)) 1115 if (!x86_perf_event_set_period(event))
1120 continue; 1116 continue;
1121 1117
1122 if (perf_event_overflow(event, 1, &data, regs)) 1118 if (perf_event_overflow(event, 1, &data, regs))
1123 x86_pmu.disable(hwc, idx); 1119 x86_pmu_stop(event);
1124 } 1120 }
1125 1121
1126 if (handled) 1122 if (handled)
@@ -1307,7 +1303,7 @@ int hw_perf_group_sched_in(struct perf_event *leader,
1307 memcpy(cpuc->assign, assign, n0*sizeof(int)); 1303 memcpy(cpuc->assign, assign, n0*sizeof(int));
1308 1304
1309 cpuc->n_events = n0; 1305 cpuc->n_events = n0;
1310 cpuc->n_added = n1; 1306 cpuc->n_added += n1;
1311 ctx->nr_active += n1; 1307 ctx->nr_active += n1;
1312 1308
1313 /* 1309 /*
@@ -1335,6 +1331,41 @@ undo:
1335#include "perf_event_p6.c" 1331#include "perf_event_p6.c"
1336#include "perf_event_intel.c" 1332#include "perf_event_intel.c"
1337 1333
1334static int __cpuinit
1335x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1336{
1337 unsigned int cpu = (long)hcpu;
1338 int ret = NOTIFY_OK;
1339
1340 switch (action & ~CPU_TASKS_FROZEN) {
1341 case CPU_UP_PREPARE:
1342 if (x86_pmu.cpu_prepare)
1343 ret = x86_pmu.cpu_prepare(cpu);
1344 break;
1345
1346 case CPU_STARTING:
1347 if (x86_pmu.cpu_starting)
1348 x86_pmu.cpu_starting(cpu);
1349 break;
1350
1351 case CPU_DYING:
1352 if (x86_pmu.cpu_dying)
1353 x86_pmu.cpu_dying(cpu);
1354 break;
1355
1356 case CPU_UP_CANCELED:
1357 case CPU_DEAD:
1358 if (x86_pmu.cpu_dead)
1359 x86_pmu.cpu_dead(cpu);
1360 break;
1361
1362 default:
1363 break;
1364 }
1365
1366 return ret;
1367}
1368
1338static void __init pmu_check_apic(void) 1369static void __init pmu_check_apic(void)
1339{ 1370{
1340 if (cpu_has_apic) 1371 if (cpu_has_apic)
@@ -1347,6 +1378,7 @@ static void __init pmu_check_apic(void)
1347 1378
1348void __init init_hw_perf_events(void) 1379void __init init_hw_perf_events(void)
1349{ 1380{
1381 struct event_constraint *c;
1350 int err; 1382 int err;
1351 1383
1352 pr_info("Performance Events: "); 1384 pr_info("Performance Events: ");
@@ -1395,6 +1427,16 @@ void __init init_hw_perf_events(void)
1395 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 1427 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
1396 0, x86_pmu.num_events); 1428 0, x86_pmu.num_events);
1397 1429
1430 if (x86_pmu.event_constraints) {
1431 for_each_event_constraint(c, x86_pmu.event_constraints) {
1432 if (c->cmask != INTEL_ARCH_FIXED_MASK)
1433 continue;
1434
1435 c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1;
1436 c->weight += x86_pmu.num_events;
1437 }
1438 }
1439
1398 pr_info("... version: %d\n", x86_pmu.version); 1440 pr_info("... version: %d\n", x86_pmu.version);
1399 pr_info("... bit width: %d\n", x86_pmu.event_bits); 1441 pr_info("... bit width: %d\n", x86_pmu.event_bits);
1400 pr_info("... generic registers: %d\n", x86_pmu.num_events); 1442 pr_info("... generic registers: %d\n", x86_pmu.num_events);
@@ -1402,11 +1444,13 @@ void __init init_hw_perf_events(void)
1402 pr_info("... max period: %016Lx\n", x86_pmu.max_period); 1444 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
1403 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); 1445 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed);
1404 pr_info("... event mask: %016Lx\n", perf_event_mask); 1446 pr_info("... event mask: %016Lx\n", perf_event_mask);
1447
1448 perf_cpu_notifier(x86_pmu_notifier);
1405} 1449}
1406 1450
1407static inline void x86_pmu_read(struct perf_event *event) 1451static inline void x86_pmu_read(struct perf_event *event)
1408{ 1452{
1409 x86_perf_event_update(event, &event->hw, event->hw.idx); 1453 x86_perf_event_update(event);
1410} 1454}
1411 1455
1412static const struct pmu pmu = { 1456static const struct pmu pmu = {
@@ -1588,14 +1632,42 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1588 return len; 1632 return len;
1589} 1633}
1590 1634
1591static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) 1635#ifdef CONFIG_COMPAT
1636static inline int
1637perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1592{ 1638{
1593 unsigned long bytes; 1639 /* 32-bit process in 64-bit kernel. */
1640 struct stack_frame_ia32 frame;
1641 const void __user *fp;
1594 1642
1595 bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); 1643 if (!test_thread_flag(TIF_IA32))
1644 return 0;
1596 1645
1597 return bytes == sizeof(*frame); 1646 fp = compat_ptr(regs->bp);
1647 while (entry->nr < PERF_MAX_STACK_DEPTH) {
1648 unsigned long bytes;
1649 frame.next_frame = 0;
1650 frame.return_address = 0;
1651
1652 bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
1653 if (bytes != sizeof(frame))
1654 break;
1655
1656 if (fp < compat_ptr(regs->sp))
1657 break;
1658
1659 callchain_store(entry, frame.return_address);
1660 fp = compat_ptr(frame.next_frame);
1661 }
1662 return 1;
1663}
1664#else
1665static inline int
1666perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1667{
1668 return 0;
1598} 1669}
1670#endif
1599 1671
1600static void 1672static void
1601perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) 1673perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
@@ -1611,11 +1683,16 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1611 callchain_store(entry, PERF_CONTEXT_USER); 1683 callchain_store(entry, PERF_CONTEXT_USER);
1612 callchain_store(entry, regs->ip); 1684 callchain_store(entry, regs->ip);
1613 1685
1686 if (perf_callchain_user32(regs, entry))
1687 return;
1688
1614 while (entry->nr < PERF_MAX_STACK_DEPTH) { 1689 while (entry->nr < PERF_MAX_STACK_DEPTH) {
1690 unsigned long bytes;
1615 frame.next_frame = NULL; 1691 frame.next_frame = NULL;
1616 frame.return_address = 0; 1692 frame.return_address = 0;
1617 1693
1618 if (!copy_stack_frame(fp, &frame)) 1694 bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
1695 if (bytes != sizeof(frame))
1619 break; 1696 break;
1620 1697
1621 if ((unsigned long)fp < regs->sp) 1698 if ((unsigned long)fp < regs->sp)
@@ -1662,28 +1739,14 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1662 return entry; 1739 return entry;
1663} 1740}
1664 1741
1665void hw_perf_event_setup_online(int cpu) 1742void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
1666{
1667 init_debug_store_on_cpu(cpu);
1668
1669 switch (boot_cpu_data.x86_vendor) {
1670 case X86_VENDOR_AMD:
1671 amd_pmu_cpu_online(cpu);
1672 break;
1673 default:
1674 return;
1675 }
1676}
1677
1678void hw_perf_event_setup_offline(int cpu)
1679{ 1743{
1680 init_debug_store_on_cpu(cpu); 1744 regs->ip = ip;
1681 1745 /*
1682 switch (boot_cpu_data.x86_vendor) { 1746 * perf_arch_fetch_caller_regs adds another call, we need to increment
1683 case X86_VENDOR_AMD: 1747 * the skip level
1684 amd_pmu_cpu_offline(cpu); 1748 */
1685 break; 1749 regs->bp = rewind_frame_pointer(skip + 1);
1686 default: 1750 regs->cs = __KERNEL_CS;
1687 return; 1751 local_save_flags(regs->flags);
1688 }
1689} 1752}
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 8f3dbfda3c4f..db6f7d4056e1 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -137,6 +137,13 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
137 return (hwc->config & 0xe0) == 0xe0; 137 return (hwc->config & 0xe0) == 0xe0;
138} 138}
139 139
140static inline int amd_has_nb(struct cpu_hw_events *cpuc)
141{
142 struct amd_nb *nb = cpuc->amd_nb;
143
144 return nb && nb->nb_id != -1;
145}
146
140static void amd_put_event_constraints(struct cpu_hw_events *cpuc, 147static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
141 struct perf_event *event) 148 struct perf_event *event)
142{ 149{
@@ -147,7 +154,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
147 /* 154 /*
148 * only care about NB events 155 * only care about NB events
149 */ 156 */
150 if (!(nb && amd_is_nb_event(hwc))) 157 if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
151 return; 158 return;
152 159
153 /* 160 /*
@@ -214,7 +221,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
214 /* 221 /*
215 * if not NB event or no NB, then no constraints 222 * if not NB event or no NB, then no constraints
216 */ 223 */
217 if (!(nb && amd_is_nb_event(hwc))) 224 if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
218 return &unconstrained; 225 return &unconstrained;
219 226
220 /* 227 /*
@@ -271,28 +278,6 @@ done:
271 return &emptyconstraint; 278 return &emptyconstraint;
272} 279}
273 280
274static __initconst struct x86_pmu amd_pmu = {
275 .name = "AMD",
276 .handle_irq = x86_pmu_handle_irq,
277 .disable_all = x86_pmu_disable_all,
278 .enable_all = x86_pmu_enable_all,
279 .enable = x86_pmu_enable_event,
280 .disable = x86_pmu_disable_event,
281 .eventsel = MSR_K7_EVNTSEL0,
282 .perfctr = MSR_K7_PERFCTR0,
283 .event_map = amd_pmu_event_map,
284 .raw_event = amd_pmu_raw_event,
285 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
286 .num_events = 4,
287 .event_bits = 48,
288 .event_mask = (1ULL << 48) - 1,
289 .apic = 1,
290 /* use highest bit to detect overflow */
291 .max_period = (1ULL << 47) - 1,
292 .get_event_constraints = amd_get_event_constraints,
293 .put_event_constraints = amd_put_event_constraints
294};
295
296static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) 281static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
297{ 282{
298 struct amd_nb *nb; 283 struct amd_nb *nb;
@@ -309,57 +294,61 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
309 * initialize all possible NB constraints 294 * initialize all possible NB constraints
310 */ 295 */
311 for (i = 0; i < x86_pmu.num_events; i++) { 296 for (i = 0; i < x86_pmu.num_events; i++) {
312 set_bit(i, nb->event_constraints[i].idxmsk); 297 __set_bit(i, nb->event_constraints[i].idxmsk);
313 nb->event_constraints[i].weight = 1; 298 nb->event_constraints[i].weight = 1;
314 } 299 }
315 return nb; 300 return nb;
316} 301}
317 302
318static void amd_pmu_cpu_online(int cpu) 303static int amd_pmu_cpu_prepare(int cpu)
319{ 304{
320 struct cpu_hw_events *cpu1, *cpu2; 305 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
321 struct amd_nb *nb = NULL; 306
307 WARN_ON_ONCE(cpuc->amd_nb);
308
309 if (boot_cpu_data.x86_max_cores < 2)
310 return NOTIFY_OK;
311
312 cpuc->amd_nb = amd_alloc_nb(cpu, -1);
313 if (!cpuc->amd_nb)
314 return NOTIFY_BAD;
315
316 return NOTIFY_OK;
317}
318
319static void amd_pmu_cpu_starting(int cpu)
320{
321 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
322 struct amd_nb *nb;
322 int i, nb_id; 323 int i, nb_id;
323 324
324 if (boot_cpu_data.x86_max_cores < 2) 325 if (boot_cpu_data.x86_max_cores < 2)
325 return; 326 return;
326 327
327 /*
328 * function may be called too early in the
329 * boot process, in which case nb_id is bogus
330 */
331 nb_id = amd_get_nb_id(cpu); 328 nb_id = amd_get_nb_id(cpu);
332 if (nb_id == BAD_APICID) 329 WARN_ON_ONCE(nb_id == BAD_APICID);
333 return;
334
335 cpu1 = &per_cpu(cpu_hw_events, cpu);
336 cpu1->amd_nb = NULL;
337 330
338 raw_spin_lock(&amd_nb_lock); 331 raw_spin_lock(&amd_nb_lock);
339 332
340 for_each_online_cpu(i) { 333 for_each_online_cpu(i) {
341 cpu2 = &per_cpu(cpu_hw_events, i); 334 nb = per_cpu(cpu_hw_events, i).amd_nb;
342 nb = cpu2->amd_nb; 335 if (WARN_ON_ONCE(!nb))
343 if (!nb)
344 continue; 336 continue;
345 if (nb->nb_id == nb_id)
346 goto found;
347 }
348 337
349 nb = amd_alloc_nb(cpu, nb_id); 338 if (nb->nb_id == nb_id) {
350 if (!nb) { 339 kfree(cpuc->amd_nb);
351 pr_err("perf_events: failed NB allocation for CPU%d\n", cpu); 340 cpuc->amd_nb = nb;
352 raw_spin_unlock(&amd_nb_lock); 341 break;
353 return; 342 }
354 } 343 }
355found: 344
356 nb->refcnt++; 345 cpuc->amd_nb->nb_id = nb_id;
357 cpu1->amd_nb = nb; 346 cpuc->amd_nb->refcnt++;
358 347
359 raw_spin_unlock(&amd_nb_lock); 348 raw_spin_unlock(&amd_nb_lock);
360} 349}
361 350
362static void amd_pmu_cpu_offline(int cpu) 351static void amd_pmu_cpu_dead(int cpu)
363{ 352{
364 struct cpu_hw_events *cpuhw; 353 struct cpu_hw_events *cpuhw;
365 354
@@ -370,14 +359,44 @@ static void amd_pmu_cpu_offline(int cpu)
370 359
371 raw_spin_lock(&amd_nb_lock); 360 raw_spin_lock(&amd_nb_lock);
372 361
373 if (--cpuhw->amd_nb->refcnt == 0) 362 if (cpuhw->amd_nb) {
374 kfree(cpuhw->amd_nb); 363 struct amd_nb *nb = cpuhw->amd_nb;
364
365 if (nb->nb_id == -1 || --nb->refcnt == 0)
366 kfree(nb);
375 367
376 cpuhw->amd_nb = NULL; 368 cpuhw->amd_nb = NULL;
369 }
377 370
378 raw_spin_unlock(&amd_nb_lock); 371 raw_spin_unlock(&amd_nb_lock);
379} 372}
380 373
374static __initconst struct x86_pmu amd_pmu = {
375 .name = "AMD",
376 .handle_irq = x86_pmu_handle_irq,
377 .disable_all = x86_pmu_disable_all,
378 .enable_all = x86_pmu_enable_all,
379 .enable = x86_pmu_enable_event,
380 .disable = x86_pmu_disable_event,
381 .eventsel = MSR_K7_EVNTSEL0,
382 .perfctr = MSR_K7_PERFCTR0,
383 .event_map = amd_pmu_event_map,
384 .raw_event = amd_pmu_raw_event,
385 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
386 .num_events = 4,
387 .event_bits = 48,
388 .event_mask = (1ULL << 48) - 1,
389 .apic = 1,
390 /* use highest bit to detect overflow */
391 .max_period = (1ULL << 47) - 1,
392 .get_event_constraints = amd_get_event_constraints,
393 .put_event_constraints = amd_put_event_constraints,
394
395 .cpu_prepare = amd_pmu_cpu_prepare,
396 .cpu_starting = amd_pmu_cpu_starting,
397 .cpu_dead = amd_pmu_cpu_dead,
398};
399
381static __init int amd_pmu_init(void) 400static __init int amd_pmu_init(void)
382{ 401{
383 /* Performance-monitoring supported from K7 and later: */ 402 /* Performance-monitoring supported from K7 and later: */
@@ -390,11 +409,6 @@ static __init int amd_pmu_init(void)
390 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, 409 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
391 sizeof(hw_cache_event_ids)); 410 sizeof(hw_cache_event_ids));
392 411
393 /*
394 * explicitly initialize the boot cpu, other cpus will get
395 * the cpu hotplug callbacks from smp_init()
396 */
397 amd_pmu_cpu_online(smp_processor_id());
398 return 0; 412 return 0;
399} 413}
400 414
@@ -405,12 +419,4 @@ static int amd_pmu_init(void)
405 return 0; 419 return 0;
406} 420}
407 421
408static void amd_pmu_cpu_online(int cpu)
409{
410}
411
412static void amd_pmu_cpu_offline(int cpu)
413{
414}
415
416#endif 422#endif
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index cf6590cf4a5f..9c794ac87837 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,7 +1,7 @@
1#ifdef CONFIG_CPU_SUP_INTEL 1#ifdef CONFIG_CPU_SUP_INTEL
2 2
3/* 3/*
4 * Intel PerfMon v3. Used on Core2 and later. 4 * Intel PerfMon, used on Core and later.
5 */ 5 */
6static const u64 intel_perfmon_event_map[] = 6static const u64 intel_perfmon_event_map[] =
7{ 7{
@@ -27,8 +27,14 @@ static struct event_constraint intel_core_event_constraints[] =
27 27
28static struct event_constraint intel_core2_event_constraints[] = 28static struct event_constraint intel_core2_event_constraints[] =
29{ 29{
30 FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ 30 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
31 FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ 31 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
32 /*
33 * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
34 * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
35 * ratio between these counters.
36 */
37 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
32 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ 38 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
33 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 39 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
34 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ 40 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
@@ -37,14 +43,16 @@ static struct event_constraint intel_core2_event_constraints[] =
37 INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ 43 INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
38 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ 44 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
39 INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ 45 INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
46 INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
40 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ 47 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
41 EVENT_CONSTRAINT_END 48 EVENT_CONSTRAINT_END
42}; 49};
43 50
44static struct event_constraint intel_nehalem_event_constraints[] = 51static struct event_constraint intel_nehalem_event_constraints[] =
45{ 52{
46 FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ 53 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
47 FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ 54 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
55 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
48 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ 56 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
49 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ 57 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
50 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ 58 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
@@ -58,8 +66,9 @@ static struct event_constraint intel_nehalem_event_constraints[] =
58 66
59static struct event_constraint intel_westmere_event_constraints[] = 67static struct event_constraint intel_westmere_event_constraints[] =
60{ 68{
61 FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ 69 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
62 FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ 70 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
71 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
63 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ 72 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
64 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ 73 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
65 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ 74 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
@@ -68,8 +77,9 @@ static struct event_constraint intel_westmere_event_constraints[] =
68 77
69static struct event_constraint intel_gen_event_constraints[] = 78static struct event_constraint intel_gen_event_constraints[] =
70{ 79{
71 FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ 80 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
72 FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ 81 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
82 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
73 EVENT_CONSTRAINT_END 83 EVENT_CONSTRAINT_END
74}; 84};
75 85
@@ -538,9 +548,9 @@ static inline void intel_pmu_ack_status(u64 ack)
538} 548}
539 549
540static inline void 550static inline void
541intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) 551intel_pmu_disable_fixed(struct hw_perf_event *hwc)
542{ 552{
543 int idx = __idx - X86_PMC_IDX_FIXED; 553 int idx = hwc->idx - X86_PMC_IDX_FIXED;
544 u64 ctrl_val, mask; 554 u64 ctrl_val, mask;
545 555
546 mask = 0xfULL << (idx * 4); 556 mask = 0xfULL << (idx * 4);
@@ -580,10 +590,9 @@ static void intel_pmu_drain_bts_buffer(void)
580 590
581 ds->bts_index = ds->bts_buffer_base; 591 ds->bts_index = ds->bts_buffer_base;
582 592
593 perf_sample_data_init(&data, 0);
583 594
584 data.period = event->hw.last_period; 595 data.period = event->hw.last_period;
585 data.addr = 0;
586 data.raw = NULL;
587 regs.ip = 0; 596 regs.ip = 0;
588 597
589 /* 598 /*
@@ -612,26 +621,28 @@ static void intel_pmu_drain_bts_buffer(void)
612} 621}
613 622
614static inline void 623static inline void
615intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) 624intel_pmu_disable_event(struct perf_event *event)
616{ 625{
617 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { 626 struct hw_perf_event *hwc = &event->hw;
627
628 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
618 intel_pmu_disable_bts(); 629 intel_pmu_disable_bts();
619 intel_pmu_drain_bts_buffer(); 630 intel_pmu_drain_bts_buffer();
620 return; 631 return;
621 } 632 }
622 633
623 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 634 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
624 intel_pmu_disable_fixed(hwc, idx); 635 intel_pmu_disable_fixed(hwc);
625 return; 636 return;
626 } 637 }
627 638
628 x86_pmu_disable_event(hwc, idx); 639 x86_pmu_disable_event(event);
629} 640}
630 641
631static inline void 642static inline void
632intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) 643intel_pmu_enable_fixed(struct hw_perf_event *hwc)
633{ 644{
634 int idx = __idx - X86_PMC_IDX_FIXED; 645 int idx = hwc->idx - X86_PMC_IDX_FIXED;
635 u64 ctrl_val, bits, mask; 646 u64 ctrl_val, bits, mask;
636 int err; 647 int err;
637 648
@@ -661,9 +672,11 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
661 err = checking_wrmsrl(hwc->config_base, ctrl_val); 672 err = checking_wrmsrl(hwc->config_base, ctrl_val);
662} 673}
663 674
664static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) 675static void intel_pmu_enable_event(struct perf_event *event)
665{ 676{
666 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { 677 struct hw_perf_event *hwc = &event->hw;
678
679 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
667 if (!__get_cpu_var(cpu_hw_events).enabled) 680 if (!__get_cpu_var(cpu_hw_events).enabled)
668 return; 681 return;
669 682
@@ -672,11 +685,11 @@ static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
672 } 685 }
673 686
674 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 687 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
675 intel_pmu_enable_fixed(hwc, idx); 688 intel_pmu_enable_fixed(hwc);
676 return; 689 return;
677 } 690 }
678 691
679 __x86_pmu_enable_event(hwc, idx); 692 __x86_pmu_enable_event(hwc);
680} 693}
681 694
682/* 695/*
@@ -685,14 +698,8 @@ static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
685 */ 698 */
686static int intel_pmu_save_and_restart(struct perf_event *event) 699static int intel_pmu_save_and_restart(struct perf_event *event)
687{ 700{
688 struct hw_perf_event *hwc = &event->hw; 701 x86_perf_event_update(event);
689 int idx = hwc->idx; 702 return x86_perf_event_set_period(event);
690 int ret;
691
692 x86_perf_event_update(event, hwc, idx);
693 ret = x86_perf_event_set_period(event, hwc, idx);
694
695 return ret;
696} 703}
697 704
698static void intel_pmu_reset(void) 705static void intel_pmu_reset(void)
@@ -732,16 +739,15 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
732 int bit, loops; 739 int bit, loops;
733 u64 ack, status; 740 u64 ack, status;
734 741
735 data.addr = 0; 742 perf_sample_data_init(&data, 0);
736 data.raw = NULL;
737 743
738 cpuc = &__get_cpu_var(cpu_hw_events); 744 cpuc = &__get_cpu_var(cpu_hw_events);
739 745
740 perf_disable(); 746 intel_pmu_disable_all();
741 intel_pmu_drain_bts_buffer(); 747 intel_pmu_drain_bts_buffer();
742 status = intel_pmu_get_status(); 748 status = intel_pmu_get_status();
743 if (!status) { 749 if (!status) {
744 perf_enable(); 750 intel_pmu_enable_all();
745 return 0; 751 return 0;
746 } 752 }
747 753
@@ -751,16 +757,14 @@ again:
751 WARN_ONCE(1, "perfevents: irq loop stuck!\n"); 757 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
752 perf_event_print_debug(); 758 perf_event_print_debug();
753 intel_pmu_reset(); 759 intel_pmu_reset();
754 perf_enable(); 760 goto done;
755 return 1;
756 } 761 }
757 762
758 inc_irq_stat(apic_perf_irqs); 763 inc_irq_stat(apic_perf_irqs);
759 ack = status; 764 ack = status;
760 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { 765 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
761 struct perf_event *event = cpuc->events[bit]; 766 struct perf_event *event = cpuc->events[bit];
762 767
763 clear_bit(bit, (unsigned long *) &status);
764 if (!test_bit(bit, cpuc->active_mask)) 768 if (!test_bit(bit, cpuc->active_mask))
765 continue; 769 continue;
766 770
@@ -770,7 +774,7 @@ again:
770 data.period = event->hw.last_period; 774 data.period = event->hw.last_period;
771 775
772 if (perf_event_overflow(event, 1, &data, regs)) 776 if (perf_event_overflow(event, 1, &data, regs))
773 intel_pmu_disable_event(&event->hw, bit); 777 x86_pmu_stop(event);
774 } 778 }
775 779
776 intel_pmu_ack_status(ack); 780 intel_pmu_ack_status(ack);
@@ -782,8 +786,8 @@ again:
782 if (status) 786 if (status)
783 goto again; 787 goto again;
784 788
785 perf_enable(); 789done:
786 790 intel_pmu_enable_all();
787 return 1; 791 return 1;
788} 792}
789 793
@@ -862,7 +866,10 @@ static __initconst struct x86_pmu intel_pmu = {
862 .max_period = (1ULL << 31) - 1, 866 .max_period = (1ULL << 31) - 1,
863 .enable_bts = intel_pmu_enable_bts, 867 .enable_bts = intel_pmu_enable_bts,
864 .disable_bts = intel_pmu_disable_bts, 868 .disable_bts = intel_pmu_disable_bts,
865 .get_event_constraints = intel_get_event_constraints 869 .get_event_constraints = intel_get_event_constraints,
870
871 .cpu_starting = init_debug_store_on_cpu,
872 .cpu_dying = fini_debug_store_on_cpu,
866}; 873};
867 874
868static __init int intel_pmu_init(void) 875static __init int intel_pmu_init(void)
@@ -929,13 +936,14 @@ static __init int intel_pmu_init(void)
929 936
930 case 26: /* 45 nm nehalem, "Bloomfield" */ 937 case 26: /* 45 nm nehalem, "Bloomfield" */
931 case 30: /* 45 nm nehalem, "Lynnfield" */ 938 case 30: /* 45 nm nehalem, "Lynnfield" */
939 case 46: /* 45 nm nehalem-ex, "Beckton" */
932 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, 940 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
933 sizeof(hw_cache_event_ids)); 941 sizeof(hw_cache_event_ids));
934 942
935 x86_pmu.event_constraints = intel_nehalem_event_constraints; 943 x86_pmu.event_constraints = intel_nehalem_event_constraints;
936 pr_cont("Nehalem/Corei7 events, "); 944 pr_cont("Nehalem/Corei7 events, ");
937 break; 945 break;
938 case 28: 946 case 28: /* Atom */
939 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, 947 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
940 sizeof(hw_cache_event_ids)); 948 sizeof(hw_cache_event_ids));
941 949
@@ -951,6 +959,7 @@ static __init int intel_pmu_init(void)
951 x86_pmu.event_constraints = intel_westmere_event_constraints; 959 x86_pmu.event_constraints = intel_westmere_event_constraints;
952 pr_cont("Westmere events, "); 960 pr_cont("Westmere events, ");
953 break; 961 break;
962
954 default: 963 default:
955 /* 964 /*
956 * default constraints for v2 and up 965 * default constraints for v2 and up
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 1ca5ba078afd..a330485d14da 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -62,7 +62,7 @@ static void p6_pmu_disable_all(void)
62 62
63 /* p6 only has one enable register */ 63 /* p6 only has one enable register */
64 rdmsrl(MSR_P6_EVNTSEL0, val); 64 rdmsrl(MSR_P6_EVNTSEL0, val);
65 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 65 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
66 wrmsrl(MSR_P6_EVNTSEL0, val); 66 wrmsrl(MSR_P6_EVNTSEL0, val);
67} 67}
68 68
@@ -72,32 +72,34 @@ static void p6_pmu_enable_all(void)
72 72
73 /* p6 only has one enable register */ 73 /* p6 only has one enable register */
74 rdmsrl(MSR_P6_EVNTSEL0, val); 74 rdmsrl(MSR_P6_EVNTSEL0, val);
75 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 75 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
76 wrmsrl(MSR_P6_EVNTSEL0, val); 76 wrmsrl(MSR_P6_EVNTSEL0, val);
77} 77}
78 78
79static inline void 79static inline void
80p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) 80p6_pmu_disable_event(struct perf_event *event)
81{ 81{
82 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 82 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
83 struct hw_perf_event *hwc = &event->hw;
83 u64 val = P6_NOP_EVENT; 84 u64 val = P6_NOP_EVENT;
84 85
85 if (cpuc->enabled) 86 if (cpuc->enabled)
86 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 87 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
87 88
88 (void)checking_wrmsrl(hwc->config_base + idx, val); 89 (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
89} 90}
90 91
91static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) 92static void p6_pmu_enable_event(struct perf_event *event)
92{ 93{
93 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 94 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
95 struct hw_perf_event *hwc = &event->hw;
94 u64 val; 96 u64 val;
95 97
96 val = hwc->config; 98 val = hwc->config;
97 if (cpuc->enabled) 99 if (cpuc->enabled)
98 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 100 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
99 101
100 (void)checking_wrmsrl(hwc->config_base + idx, val); 102 (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
101} 103}
102 104
103static __initconst struct x86_pmu p6_pmu = { 105static __initconst struct x86_pmu p6_pmu = {
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 74f4e85a5727..fb329e9f8494 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -680,7 +680,7 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
680 cpu_nmi_set_wd_enabled(); 680 cpu_nmi_set_wd_enabled();
681 681
682 apic_write(APIC_LVTPC, APIC_DM_NMI); 682 apic_write(APIC_LVTPC, APIC_DM_NMI);
683 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; 683 evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
684 wrmsr(evntsel_msr, evntsel, 0); 684 wrmsr(evntsel_msr, evntsel, 0);
685 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); 685 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
686 return 1; 686 return 1;
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 1cbed97b59cf..dfdb4dba2320 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -22,6 +22,7 @@
22 */ 22 */
23 23
24#include <linux/dmi.h> 24#include <linux/dmi.h>
25#include <linux/module.h>
25#include <asm/div64.h> 26#include <asm/div64.h>
26#include <asm/vmware.h> 27#include <asm/vmware.h>
27#include <asm/x86_init.h> 28#include <asm/x86_init.h>
@@ -101,6 +102,7 @@ int vmware_platform(void)
101 102
102 return 0; 103 return 0;
103} 104}
105EXPORT_SYMBOL(vmware_platform);
104 106
105/* 107/*
106 * VMware hypervisor takes care of exporting a reliable TSC to the guest. 108 * VMware hypervisor takes care of exporting a reliable TSC to the guest.
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 83e5e628de73..8b862d5900fe 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -40,6 +40,7 @@
40#include <linux/cpu.h> 40#include <linux/cpu.h>
41#include <linux/notifier.h> 41#include <linux/notifier.h>
42#include <linux/uaccess.h> 42#include <linux/uaccess.h>
43#include <linux/gfp.h>
43 44
44#include <asm/processor.h> 45#include <asm/processor.h>
45#include <asm/msr.h> 46#include <asm/msr.h>
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index a4849c10a77e..ebd4c51d096a 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -27,7 +27,6 @@
27#include <asm/cpu.h> 27#include <asm/cpu.h>
28#include <asm/reboot.h> 28#include <asm/reboot.h>
29#include <asm/virtext.h> 29#include <asm/virtext.h>
30#include <asm/x86_init.h>
31 30
32#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) 31#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
33 32
@@ -103,10 +102,5 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
103#ifdef CONFIG_HPET_TIMER 102#ifdef CONFIG_HPET_TIMER
104 hpet_disable(); 103 hpet_disable();
105#endif 104#endif
106
107#ifdef CONFIG_X86_64
108 x86_platform.iommu_shutdown();
109#endif
110
111 crash_save_cpu(regs, safe_smp_processor_id()); 105 crash_save_cpu(regs, safe_smp_processor_id());
112} 106}
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index cd97ce18c29d..67414550c3cc 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -5,6 +5,7 @@
5 * Copyright (C) IBM Corporation, 2004. All rights reserved 5 * Copyright (C) IBM Corporation, 2004. All rights reserved
6 */ 6 */
7 7
8#include <linux/slab.h>
8#include <linux/errno.h> 9#include <linux/errno.h>
9#include <linux/highmem.h> 10#include <linux/highmem.h>
10#include <linux/crash_dump.h> 11#include <linux/crash_dump.h>
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
index 4fd1420faffa..e1a93be4fd44 100644
--- a/arch/x86/kernel/dumpstack.h
+++ b/arch/x86/kernel/dumpstack.h
@@ -14,6 +14,8 @@
14#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) 14#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
15#endif 15#endif
16 16
17#include <linux/uaccess.h>
18
17extern void 19extern void
18show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 20show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
19 unsigned long *stack, unsigned long bp, char *log_lvl); 21 unsigned long *stack, unsigned long bp, char *log_lvl);
@@ -29,4 +31,26 @@ struct stack_frame {
29 struct stack_frame *next_frame; 31 struct stack_frame *next_frame;
30 unsigned long return_address; 32 unsigned long return_address;
31}; 33};
34
35struct stack_frame_ia32 {
36 u32 next_frame;
37 u32 return_address;
38};
39
40static inline unsigned long rewind_frame_pointer(int n)
41{
42 struct stack_frame *frame;
43
44 get_bp(frame);
45
46#ifdef CONFIG_FRAME_POINTER
47 while (n--) {
48 if (probe_kernel_address(&frame->next_frame, frame))
49 break;
50 }
32#endif 51#endif
52
53 return (unsigned long)frame;
54}
55
56#endif /* DUMPSTACK_H */
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index dce99abb4496..272c9f1f05f3 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -120,9 +120,15 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
120{ 120{
121#ifdef CONFIG_FRAME_POINTER 121#ifdef CONFIG_FRAME_POINTER
122 struct stack_frame *frame = (struct stack_frame *)bp; 122 struct stack_frame *frame = (struct stack_frame *)bp;
123 unsigned long next;
123 124
124 if (!in_irq_stack(stack, irq_stack, irq_stack_end)) 125 if (!in_irq_stack(stack, irq_stack, irq_stack_end)) {
125 return (unsigned long)frame->next_frame; 126 if (!probe_kernel_address(&frame->next_frame, next))
127 return next;
128 else
129 WARN_ONCE(1, "Perf: bad frame pointer = %p in "
130 "callchain\n", &frame->next_frame);
131 }
126#endif 132#endif
127 return bp; 133 return bp;
128} 134}
@@ -202,7 +208,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
202 if (in_irq_stack(stack, irq_stack, irq_stack_end)) { 208 if (in_irq_stack(stack, irq_stack, irq_stack_end)) {
203 if (ops->stack(data, "IRQ") < 0) 209 if (ops->stack(data, "IRQ") < 0)
204 break; 210 break;
205 bp = print_context_stack(tinfo, stack, bp, 211 bp = ops->walk_stack(tinfo, stack, bp,
206 ops, data, irq_stack_end, &graph); 212 ops, data, irq_stack_end, &graph);
207 /* 213 /*
208 * We link to the next stack (which would be 214 * We link to the next stack (which would be
@@ -223,7 +229,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
223 /* 229 /*
224 * This handles the process stack: 230 * This handles the process stack:
225 */ 231 */
226 bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph); 232 bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph);
227 put_cpu(); 233 put_cpu();
228} 234}
229EXPORT_SYMBOL(dump_trace); 235EXPORT_SYMBOL(dump_trace);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index a966b753e496..7bca3c6a02fb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -12,21 +12,13 @@
12#include <linux/types.h> 12#include <linux/types.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/bootmem.h> 14#include <linux/bootmem.h>
15#include <linux/ioport.h>
16#include <linux/string.h>
17#include <linux/kexec.h>
18#include <linux/module.h>
19#include <linux/mm.h>
20#include <linux/pfn.h> 15#include <linux/pfn.h>
21#include <linux/suspend.h> 16#include <linux/suspend.h>
22#include <linux/firmware-map.h> 17#include <linux/firmware-map.h>
23 18
24#include <asm/pgtable.h>
25#include <asm/page.h>
26#include <asm/e820.h> 19#include <asm/e820.h>
27#include <asm/proto.h> 20#include <asm/proto.h>
28#include <asm/setup.h> 21#include <asm/setup.h>
29#include <asm/trampoline.h>
30 22
31/* 23/*
32 * The e820 map is the map that gets modified e.g. with command line parameters 24 * The e820 map is the map that gets modified e.g. with command line parameters
@@ -527,29 +519,45 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
527 printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ", 519 printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
528 (unsigned long long) start, 520 (unsigned long long) start,
529 (unsigned long long) end); 521 (unsigned long long) end);
530 e820_print_type(old_type); 522 if (checktype)
523 e820_print_type(old_type);
531 printk(KERN_CONT "\n"); 524 printk(KERN_CONT "\n");
532 525
533 for (i = 0; i < e820.nr_map; i++) { 526 for (i = 0; i < e820.nr_map; i++) {
534 struct e820entry *ei = &e820.map[i]; 527 struct e820entry *ei = &e820.map[i];
535 u64 final_start, final_end; 528 u64 final_start, final_end;
529 u64 ei_end;
536 530
537 if (checktype && ei->type != old_type) 531 if (checktype && ei->type != old_type)
538 continue; 532 continue;
533
534 ei_end = ei->addr + ei->size;
539 /* totally covered? */ 535 /* totally covered? */
540 if (ei->addr >= start && 536 if (ei->addr >= start && ei_end <= end) {
541 (ei->addr + ei->size) <= (start + size)) {
542 real_removed_size += ei->size; 537 real_removed_size += ei->size;
543 memset(ei, 0, sizeof(struct e820entry)); 538 memset(ei, 0, sizeof(struct e820entry));
544 continue; 539 continue;
545 } 540 }
541
542 /* new range is totally covered? */
543 if (ei->addr < start && ei_end > end) {
544 e820_add_region(end, ei_end - end, ei->type);
545 ei->size = start - ei->addr;
546 real_removed_size += size;
547 continue;
548 }
549
546 /* partially covered */ 550 /* partially covered */
547 final_start = max(start, ei->addr); 551 final_start = max(start, ei->addr);
548 final_end = min(start + size, ei->addr + ei->size); 552 final_end = min(end, ei_end);
549 if (final_start >= final_end) 553 if (final_start >= final_end)
550 continue; 554 continue;
551 real_removed_size += final_end - final_start; 555 real_removed_size += final_end - final_start;
552 556
557 /*
558 * left range could be head or tail, so need to update
559 * size at first.
560 */
553 ei->size -= final_end - final_start; 561 ei->size -= final_end - final_start;
554 if (ei->addr < final_start) 562 if (ei->addr < final_start)
555 continue; 563 continue;
@@ -730,319 +738,44 @@ core_initcall(e820_mark_nvs_memory);
730#endif 738#endif
731 739
732/* 740/*
733 * Early reserved memory areas. 741 * Find a free area with specified alignment in a specific range.
734 */
735#define MAX_EARLY_RES 32
736
737struct early_res {
738 u64 start, end;
739 char name[16];
740 char overlap_ok;
741};
742static struct early_res early_res[MAX_EARLY_RES] __initdata = {
743 { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */
744#if defined(CONFIG_X86_32) && defined(CONFIG_X86_TRAMPOLINE)
745 /*
746 * But first pinch a few for the stack/trampoline stuff
747 * FIXME: Don't need the extra page at 4K, but need to fix
748 * trampoline before removing it. (see the GDT stuff)
749 */
750 { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE", 1 },
751#endif
752
753 {}
754};
755
756static int __init find_overlapped_early(u64 start, u64 end)
757{
758 int i;
759 struct early_res *r;
760
761 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
762 r = &early_res[i];
763 if (end > r->start && start < r->end)
764 break;
765 }
766
767 return i;
768}
769
770/*
771 * Drop the i-th range from the early reservation map,
772 * by copying any higher ranges down one over it, and
773 * clearing what had been the last slot.
774 */
775static void __init drop_range(int i)
776{
777 int j;
778
779 for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
780 ;
781
782 memmove(&early_res[i], &early_res[i + 1],
783 (j - 1 - i) * sizeof(struct early_res));
784
785 early_res[j - 1].end = 0;
786}
787
788/*
789 * Split any existing ranges that:
790 * 1) are marked 'overlap_ok', and
791 * 2) overlap with the stated range [start, end)
792 * into whatever portion (if any) of the existing range is entirely
793 * below or entirely above the stated range. Drop the portion
794 * of the existing range that overlaps with the stated range,
795 * which will allow the caller of this routine to then add that
796 * stated range without conflicting with any existing range.
797 */ 742 */
798static void __init drop_overlaps_that_are_ok(u64 start, u64 end) 743u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
799{ 744{
800 int i; 745 int i;
801 struct early_res *r;
802 u64 lower_start, lower_end;
803 u64 upper_start, upper_end;
804 char name[16];
805 746
806 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { 747 for (i = 0; i < e820.nr_map; i++) {
807 r = &early_res[i]; 748 struct e820entry *ei = &e820.map[i];
749 u64 addr;
750 u64 ei_start, ei_last;
808 751
809 /* Continue past non-overlapping ranges */ 752 if (ei->type != E820_RAM)
810 if (end <= r->start || start >= r->end)
811 continue; 753 continue;
812 754
813 /* 755 ei_last = ei->addr + ei->size;
814 * Leave non-ok overlaps as is; let caller 756 ei_start = ei->addr;
815 * panic "Overlapping early reservations" 757 addr = find_early_area(ei_start, ei_last, start, end,
816 * when it hits this overlap. 758 size, align);
817 */
818 if (!r->overlap_ok)
819 return;
820
821 /*
822 * We have an ok overlap. We will drop it from the early
823 * reservation map, and add back in any non-overlapping
824 * portions (lower or upper) as separate, overlap_ok,
825 * non-overlapping ranges.
826 */
827
828 /* 1. Note any non-overlapping (lower or upper) ranges. */
829 strncpy(name, r->name, sizeof(name) - 1);
830
831 lower_start = lower_end = 0;
832 upper_start = upper_end = 0;
833 if (r->start < start) {
834 lower_start = r->start;
835 lower_end = start;
836 }
837 if (r->end > end) {
838 upper_start = end;
839 upper_end = r->end;
840 }
841
842 /* 2. Drop the original ok overlapping range */
843 drop_range(i);
844
845 i--; /* resume for-loop on copied down entry */
846
847 /* 3. Add back in any non-overlapping ranges. */
848 if (lower_end)
849 reserve_early_overlap_ok(lower_start, lower_end, name);
850 if (upper_end)
851 reserve_early_overlap_ok(upper_start, upper_end, name);
852 }
853}
854
855static void __init __reserve_early(u64 start, u64 end, char *name,
856 int overlap_ok)
857{
858 int i;
859 struct early_res *r;
860
861 i = find_overlapped_early(start, end);
862 if (i >= MAX_EARLY_RES)
863 panic("Too many early reservations");
864 r = &early_res[i];
865 if (r->end)
866 panic("Overlapping early reservations "
867 "%llx-%llx %s to %llx-%llx %s\n",
868 start, end - 1, name?name:"", r->start,
869 r->end - 1, r->name);
870 r->start = start;
871 r->end = end;
872 r->overlap_ok = overlap_ok;
873 if (name)
874 strncpy(r->name, name, sizeof(r->name) - 1);
875}
876
877/*
878 * A few early reservtations come here.
879 *
880 * The 'overlap_ok' in the name of this routine does -not- mean it
881 * is ok for these reservations to overlap an earlier reservation.
882 * Rather it means that it is ok for subsequent reservations to
883 * overlap this one.
884 *
885 * Use this entry point to reserve early ranges when you are doing
886 * so out of "Paranoia", reserving perhaps more memory than you need,
887 * just in case, and don't mind a subsequent overlapping reservation
888 * that is known to be needed.
889 *
890 * The drop_overlaps_that_are_ok() call here isn't really needed.
891 * It would be needed if we had two colliding 'overlap_ok'
892 * reservations, so that the second such would not panic on the
893 * overlap with the first. We don't have any such as of this
894 * writing, but might as well tolerate such if it happens in
895 * the future.
896 */
897void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
898{
899 drop_overlaps_that_are_ok(start, end);
900 __reserve_early(start, end, name, 1);
901}
902
903/*
904 * Most early reservations come here.
905 *
906 * We first have drop_overlaps_that_are_ok() drop any pre-existing
907 * 'overlap_ok' ranges, so that we can then reserve this memory
908 * range without risk of panic'ing on an overlapping overlap_ok
909 * early reservation.
910 */
911void __init reserve_early(u64 start, u64 end, char *name)
912{
913 if (start >= end)
914 return;
915
916 drop_overlaps_that_are_ok(start, end);
917 __reserve_early(start, end, name, 0);
918}
919
920void __init free_early(u64 start, u64 end)
921{
922 struct early_res *r;
923 int i;
924
925 i = find_overlapped_early(start, end);
926 r = &early_res[i];
927 if (i >= MAX_EARLY_RES || r->end != end || r->start != start)
928 panic("free_early on not reserved area: %llx-%llx!",
929 start, end - 1);
930
931 drop_range(i);
932}
933 759
934void __init early_res_to_bootmem(u64 start, u64 end) 760 if (addr != -1ULL)
935{ 761 return addr;
936 int i, count;
937 u64 final_start, final_end;
938
939 count = 0;
940 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++)
941 count++;
942
943 printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n",
944 count, start, end);
945 for (i = 0; i < count; i++) {
946 struct early_res *r = &early_res[i];
947 printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i,
948 r->start, r->end, r->name);
949 final_start = max(start, r->start);
950 final_end = min(end, r->end);
951 if (final_start >= final_end) {
952 printk(KERN_CONT "\n");
953 continue;
954 }
955 printk(KERN_CONT " ==> [%010llx - %010llx]\n",
956 final_start, final_end);
957 reserve_bootmem_generic(final_start, final_end - final_start,
958 BOOTMEM_DEFAULT);
959 } 762 }
763 return -1ULL;
960} 764}
961 765
962/* Check for already reserved areas */ 766u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align)
963static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
964{
965 int i;
966 u64 addr = *addrp;
967 int changed = 0;
968 struct early_res *r;
969again:
970 i = find_overlapped_early(addr, addr + size);
971 r = &early_res[i];
972 if (i < MAX_EARLY_RES && r->end) {
973 *addrp = addr = round_up(r->end, align);
974 changed = 1;
975 goto again;
976 }
977 return changed;
978}
979
980/* Check for already reserved areas */
981static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
982{ 767{
983 int i; 768 return find_e820_area(start, end, size, align);
984 u64 addr = *addrp, last;
985 u64 size = *sizep;
986 int changed = 0;
987again:
988 last = addr + size;
989 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
990 struct early_res *r = &early_res[i];
991 if (last > r->start && addr < r->start) {
992 size = r->start - addr;
993 changed = 1;
994 goto again;
995 }
996 if (last > r->end && addr < r->end) {
997 addr = round_up(r->end, align);
998 size = last - addr;
999 changed = 1;
1000 goto again;
1001 }
1002 if (last <= r->end && addr >= r->start) {
1003 (*sizep)++;
1004 return 0;
1005 }
1006 }
1007 if (changed) {
1008 *addrp = addr;
1009 *sizep = size;
1010 }
1011 return changed;
1012} 769}
1013 770
1014/* 771u64 __init get_max_mapped(void)
1015 * Find a free area with specified alignment in a specific range.
1016 */
1017u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
1018{ 772{
1019 int i; 773 u64 end = max_pfn_mapped;
1020 774
1021 for (i = 0; i < e820.nr_map; i++) { 775 end <<= PAGE_SHIFT;
1022 struct e820entry *ei = &e820.map[i];
1023 u64 addr, last;
1024 u64 ei_last;
1025 776
1026 if (ei->type != E820_RAM) 777 return end;
1027 continue;
1028 addr = round_up(ei->addr, align);
1029 ei_last = ei->addr + ei->size;
1030 if (addr < start)
1031 addr = round_up(start, align);
1032 if (addr >= ei_last)
1033 continue;
1034 while (bad_addr(&addr, size, align) && addr+size <= ei_last)
1035 ;
1036 last = addr + size;
1037 if (last > ei_last)
1038 continue;
1039 if (last > end)
1040 continue;
1041 return addr;
1042 }
1043 return -1ULL;
1044} 778}
1045
1046/* 779/*
1047 * Find next free range after *start 780 * Find next free range after *start
1048 */ 781 */
@@ -1052,25 +785,19 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
1052 785
1053 for (i = 0; i < e820.nr_map; i++) { 786 for (i = 0; i < e820.nr_map; i++) {
1054 struct e820entry *ei = &e820.map[i]; 787 struct e820entry *ei = &e820.map[i];
1055 u64 addr, last; 788 u64 addr;
1056 u64 ei_last; 789 u64 ei_start, ei_last;
1057 790
1058 if (ei->type != E820_RAM) 791 if (ei->type != E820_RAM)
1059 continue; 792 continue;
1060 addr = round_up(ei->addr, align); 793
1061 ei_last = ei->addr + ei->size; 794 ei_last = ei->addr + ei->size;
1062 if (addr < start) 795 ei_start = ei->addr;
1063 addr = round_up(start, align); 796 addr = find_early_area_size(ei_start, ei_last, start,
1064 if (addr >= ei_last) 797 sizep, align);
1065 continue; 798
1066 *sizep = ei_last - addr; 799 if (addr != -1ULL)
1067 while (bad_addr_size(&addr, sizep, align) && 800 return addr;
1068 addr + *sizep <= ei_last)
1069 ;
1070 last = addr + *sizep;
1071 if (last > ei_last)
1072 continue;
1073 return addr;
1074 } 801 }
1075 802
1076 return -1ULL; 803 return -1ULL;
@@ -1429,6 +1156,8 @@ void __init e820_reserve_resources_late(void)
1429 end = MAX_RESOURCE_SIZE; 1156 end = MAX_RESOURCE_SIZE;
1430 if (start >= end) 1157 if (start >= end)
1431 continue; 1158 continue;
1159 printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ",
1160 start, end);
1432 reserve_region_with_split(&iomem_resource, start, end, 1161 reserve_region_with_split(&iomem_resource, start, end,
1433 "RAM buffer"); 1162 "RAM buffer");
1434 } 1163 }
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 5051b94c9069..b2e246037392 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -7,6 +7,7 @@
7 7
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/start_kernel.h> 9#include <linux/start_kernel.h>
10#include <linux/mm.h>
10 11
11#include <asm/setup.h> 12#include <asm/setup.h>
12#include <asm/sections.h> 13#include <asm/sections.h>
@@ -29,14 +30,25 @@ static void __init i386_default_early_setup(void)
29 30
30void __init i386_start_kernel(void) 31void __init i386_start_kernel(void)
31{ 32{
33#ifdef CONFIG_X86_TRAMPOLINE
34 /*
35 * But first pinch a few for the stack/trampoline stuff
36 * FIXME: Don't need the extra page at 4K, but need to fix
37 * trampoline before removing it. (see the GDT stuff)
38 */
39 reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE,
40 "EX TRAMPOLINE");
41#endif
42
32 reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); 43 reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
33 44
34#ifdef CONFIG_BLK_DEV_INITRD 45#ifdef CONFIG_BLK_DEV_INITRD
35 /* Reserve INITRD */ 46 /* Reserve INITRD */
36 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { 47 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
48 /* Assume only end is not page aligned */
37 u64 ramdisk_image = boot_params.hdr.ramdisk_image; 49 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
38 u64 ramdisk_size = boot_params.hdr.ramdisk_size; 50 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
39 u64 ramdisk_end = ramdisk_image + ramdisk_size; 51 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
40 reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); 52 reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
41 } 53 }
42#endif 54#endif
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index b5a9896ca1e7..7147143fd614 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -103,9 +103,10 @@ void __init x86_64_start_reservations(char *real_mode_data)
103#ifdef CONFIG_BLK_DEV_INITRD 103#ifdef CONFIG_BLK_DEV_INITRD
104 /* Reserve INITRD */ 104 /* Reserve INITRD */
105 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { 105 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
106 /* Assume only end is not page aligned */
106 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; 107 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
107 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; 108 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
108 unsigned long ramdisk_end = ramdisk_image + ramdisk_size; 109 unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
109 reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); 110 reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
110 } 111 }
111#endif 112#endif
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 7fd318bac59c..37c3d4b17d85 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -442,8 +442,8 @@ is386: movl $2,%ecx # set MP
442 */ 442 */
443 cmpb $0,ready 443 cmpb $0,ready
444 jne 1f 444 jne 1f
445 movl $per_cpu__gdt_page,%eax 445 movl $gdt_page,%eax
446 movl $per_cpu__stack_canary,%ecx 446 movl $stack_canary,%ecx
447 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) 447 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
448 shrl $16, %ecx 448 shrl $16, %ecx
449 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) 449 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
@@ -706,7 +706,7 @@ idt_descr:
706 .word 0 # 32 bit align gdt_desc.address 706 .word 0 # 32 bit align gdt_desc.address
707ENTRY(early_gdt_descr) 707ENTRY(early_gdt_descr)
708 .word GDT_ENTRIES*8-1 708 .word GDT_ENTRIES*8-1
709 .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ 709 .long gdt_page /* Overwritten for secondary CPUs */
710 710
711/* 711/*
712 * The boot_gdt must mirror the equivalent in setup.S and is 712 * The boot_gdt must mirror the equivalent in setup.S and is
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 2d8b5035371c..3d1e6f16b7a6 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -27,7 +27,7 @@
27#define GET_CR2_INTO_RCX movq %cr2, %rcx 27#define GET_CR2_INTO_RCX movq %cr2, %rcx
28#endif 28#endif
29 29
30/* we are not able to switch in one step to the final KERNEL ADRESS SPACE 30/* we are not able to switch in one step to the final KERNEL ADDRESS SPACE
31 * because we need identity-mapped pages. 31 * because we need identity-mapped pages.
32 * 32 *
33 */ 33 */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ad80a1c718c6..23b4ecdffa9b 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -4,6 +4,7 @@
4#include <linux/sysdev.h> 4#include <linux/sysdev.h>
5#include <linux/delay.h> 5#include <linux/delay.h>
6#include <linux/errno.h> 6#include <linux/errno.h>
7#include <linux/slab.h>
7#include <linux/hpet.h> 8#include <linux/hpet.h>
8#include <linux/init.h> 9#include <linux/init.h>
9#include <linux/cpu.h> 10#include <linux/cpu.h>
@@ -266,7 +267,7 @@ static void hpet_resume_device(void)
266 force_hpet_resume(); 267 force_hpet_resume();
267} 268}
268 269
269static void hpet_resume_counter(void) 270static void hpet_resume_counter(struct clocksource *cs)
270{ 271{
271 hpet_resume_device(); 272 hpet_resume_device();
272 hpet_restart_counter(); 273 hpet_restart_counter();
@@ -399,9 +400,15 @@ static int hpet_next_event(unsigned long delta,
399 * then we might have a real hardware problem. We can not do 400 * then we might have a real hardware problem. We can not do
400 * much about it here, but at least alert the user/admin with 401 * much about it here, but at least alert the user/admin with
401 * a prominent warning. 402 * a prominent warning.
403 * An erratum on some chipsets (ICH9,..), results in comparator read
404 * immediately following a write returning old value. Workaround
405 * for this is to read this value second time, when first
406 * read returns old value.
402 */ 407 */
403 WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt, 408 if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) {
409 WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt,
404 KERN_WARNING "hpet: compare register read back failed.\n"); 410 KERN_WARNING "hpet: compare register read back failed.\n");
411 }
405 412
406 return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; 413 return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
407} 414}
@@ -1143,6 +1150,7 @@ int hpet_set_periodic_freq(unsigned long freq)
1143 do_div(clc, freq); 1150 do_div(clc, freq);
1144 clc >>= hpet_clockevent.shift; 1151 clc >>= hpet_clockevent.shift;
1145 hpet_pie_delta = clc; 1152 hpet_pie_delta = clc;
1153 hpet_pie_limit = 0;
1146 } 1154 }
1147 return 1; 1155 return 1;
1148} 1156}
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index dca2802c666f..d6cc065f519f 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -344,13 +344,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
344 } 344 }
345 345
346 /* 346 /*
347 * For kernel-addresses, either the address or symbol name can be
348 * specified.
349 */
350 if (info->name)
351 info->address = (unsigned long)
352 kallsyms_lookup_name(info->name);
353 /*
354 * Check that the low-order bits of the address are appropriate 347 * Check that the low-order bits of the address are appropriate
355 * for the alignment implied by len. 348 * for the alignment implied by len.
356 */ 349 */
@@ -535,8 +528,3 @@ void hw_breakpoint_pmu_read(struct perf_event *bp)
535{ 528{
536 /* TODO */ 529 /* TODO */
537} 530}
538
539void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
540{
541 /* TODO */
542}
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index c01a2b846d47..54c31c285488 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -8,6 +8,7 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/regset.h> 9#include <linux/regset.h>
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/slab.h>
11 12
12#include <asm/sigcontext.h> 13#include <asm/sigcontext.h>
13#include <asm/processor.h> 14#include <asm/processor.h>
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index df89102bef80..7c9f02c130f3 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -5,7 +5,6 @@
5#include <linux/ioport.h> 5#include <linux/ioport.h>
6#include <linux/interrupt.h> 6#include <linux/interrupt.h>
7#include <linux/timex.h> 7#include <linux/timex.h>
8#include <linux/slab.h>
9#include <linux/random.h> 8#include <linux/random.h>
10#include <linux/init.h> 9#include <linux/init.h>
11#include <linux/kernel_stat.h> 10#include <linux/kernel_stat.h>
@@ -32,8 +31,14 @@
32 */ 31 */
33 32
34static int i8259A_auto_eoi; 33static int i8259A_auto_eoi;
35DEFINE_SPINLOCK(i8259A_lock); 34DEFINE_RAW_SPINLOCK(i8259A_lock);
36static void mask_and_ack_8259A(unsigned int); 35static void mask_and_ack_8259A(unsigned int);
36static void mask_8259A(void);
37static void unmask_8259A(void);
38static void disable_8259A_irq(unsigned int irq);
39static void enable_8259A_irq(unsigned int irq);
40static void init_8259A(int auto_eoi);
41static int i8259A_irq_pending(unsigned int irq);
37 42
38struct irq_chip i8259A_chip = { 43struct irq_chip i8259A_chip = {
39 .name = "XT-PIC", 44 .name = "XT-PIC",
@@ -63,51 +68,51 @@ unsigned int cached_irq_mask = 0xffff;
63 */ 68 */
64unsigned long io_apic_irqs; 69unsigned long io_apic_irqs;
65 70
66void disable_8259A_irq(unsigned int irq) 71static void disable_8259A_irq(unsigned int irq)
67{ 72{
68 unsigned int mask = 1 << irq; 73 unsigned int mask = 1 << irq;
69 unsigned long flags; 74 unsigned long flags;
70 75
71 spin_lock_irqsave(&i8259A_lock, flags); 76 raw_spin_lock_irqsave(&i8259A_lock, flags);
72 cached_irq_mask |= mask; 77 cached_irq_mask |= mask;
73 if (irq & 8) 78 if (irq & 8)
74 outb(cached_slave_mask, PIC_SLAVE_IMR); 79 outb(cached_slave_mask, PIC_SLAVE_IMR);
75 else 80 else
76 outb(cached_master_mask, PIC_MASTER_IMR); 81 outb(cached_master_mask, PIC_MASTER_IMR);
77 spin_unlock_irqrestore(&i8259A_lock, flags); 82 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
78} 83}
79 84
80void enable_8259A_irq(unsigned int irq) 85static void enable_8259A_irq(unsigned int irq)
81{ 86{
82 unsigned int mask = ~(1 << irq); 87 unsigned int mask = ~(1 << irq);
83 unsigned long flags; 88 unsigned long flags;
84 89
85 spin_lock_irqsave(&i8259A_lock, flags); 90 raw_spin_lock_irqsave(&i8259A_lock, flags);
86 cached_irq_mask &= mask; 91 cached_irq_mask &= mask;
87 if (irq & 8) 92 if (irq & 8)
88 outb(cached_slave_mask, PIC_SLAVE_IMR); 93 outb(cached_slave_mask, PIC_SLAVE_IMR);
89 else 94 else
90 outb(cached_master_mask, PIC_MASTER_IMR); 95 outb(cached_master_mask, PIC_MASTER_IMR);
91 spin_unlock_irqrestore(&i8259A_lock, flags); 96 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
92} 97}
93 98
94int i8259A_irq_pending(unsigned int irq) 99static int i8259A_irq_pending(unsigned int irq)
95{ 100{
96 unsigned int mask = 1<<irq; 101 unsigned int mask = 1<<irq;
97 unsigned long flags; 102 unsigned long flags;
98 int ret; 103 int ret;
99 104
100 spin_lock_irqsave(&i8259A_lock, flags); 105 raw_spin_lock_irqsave(&i8259A_lock, flags);
101 if (irq < 8) 106 if (irq < 8)
102 ret = inb(PIC_MASTER_CMD) & mask; 107 ret = inb(PIC_MASTER_CMD) & mask;
103 else 108 else
104 ret = inb(PIC_SLAVE_CMD) & (mask >> 8); 109 ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
105 spin_unlock_irqrestore(&i8259A_lock, flags); 110 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
106 111
107 return ret; 112 return ret;
108} 113}
109 114
110void make_8259A_irq(unsigned int irq) 115static void make_8259A_irq(unsigned int irq)
111{ 116{
112 disable_irq_nosync(irq); 117 disable_irq_nosync(irq);
113 io_apic_irqs &= ~(1<<irq); 118 io_apic_irqs &= ~(1<<irq);
@@ -150,7 +155,7 @@ static void mask_and_ack_8259A(unsigned int irq)
150 unsigned int irqmask = 1 << irq; 155 unsigned int irqmask = 1 << irq;
151 unsigned long flags; 156 unsigned long flags;
152 157
153 spin_lock_irqsave(&i8259A_lock, flags); 158 raw_spin_lock_irqsave(&i8259A_lock, flags);
154 /* 159 /*
155 * Lightweight spurious IRQ detection. We do not want 160 * Lightweight spurious IRQ detection. We do not want
156 * to overdo spurious IRQ handling - it's usually a sign 161 * to overdo spurious IRQ handling - it's usually a sign
@@ -183,7 +188,7 @@ handle_real_irq:
183 outb(cached_master_mask, PIC_MASTER_IMR); 188 outb(cached_master_mask, PIC_MASTER_IMR);
184 outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ 189 outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */
185 } 190 }
186 spin_unlock_irqrestore(&i8259A_lock, flags); 191 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
187 return; 192 return;
188 193
189spurious_8259A_irq: 194spurious_8259A_irq:
@@ -281,37 +286,37 @@ static int __init i8259A_init_sysfs(void)
281 286
282device_initcall(i8259A_init_sysfs); 287device_initcall(i8259A_init_sysfs);
283 288
284void mask_8259A(void) 289static void mask_8259A(void)
285{ 290{
286 unsigned long flags; 291 unsigned long flags;
287 292
288 spin_lock_irqsave(&i8259A_lock, flags); 293 raw_spin_lock_irqsave(&i8259A_lock, flags);
289 294
290 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ 295 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
291 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ 296 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
292 297
293 spin_unlock_irqrestore(&i8259A_lock, flags); 298 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
294} 299}
295 300
296void unmask_8259A(void) 301static void unmask_8259A(void)
297{ 302{
298 unsigned long flags; 303 unsigned long flags;
299 304
300 spin_lock_irqsave(&i8259A_lock, flags); 305 raw_spin_lock_irqsave(&i8259A_lock, flags);
301 306
302 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ 307 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
303 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ 308 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
304 309
305 spin_unlock_irqrestore(&i8259A_lock, flags); 310 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
306} 311}
307 312
308void init_8259A(int auto_eoi) 313static void init_8259A(int auto_eoi)
309{ 314{
310 unsigned long flags; 315 unsigned long flags;
311 316
312 i8259A_auto_eoi = auto_eoi; 317 i8259A_auto_eoi = auto_eoi;
313 318
314 spin_lock_irqsave(&i8259A_lock, flags); 319 raw_spin_lock_irqsave(&i8259A_lock, flags);
315 320
316 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ 321 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
317 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ 322 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
@@ -356,5 +361,49 @@ void init_8259A(int auto_eoi)
356 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ 361 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
357 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ 362 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
358 363
359 spin_unlock_irqrestore(&i8259A_lock, flags); 364 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
360} 365}
366
367/*
368 * make i8259 a driver so that we can select pic functions at run time. the goal
369 * is to make x86 binary compatible among pc compatible and non-pc compatible
370 * platforms, such as x86 MID.
371 */
372
373static void legacy_pic_noop(void) { };
374static void legacy_pic_uint_noop(unsigned int unused) { };
375static void legacy_pic_int_noop(int unused) { };
376
377static struct irq_chip dummy_pic_chip = {
378 .name = "dummy pic",
379 .mask = legacy_pic_uint_noop,
380 .unmask = legacy_pic_uint_noop,
381 .disable = legacy_pic_uint_noop,
382 .mask_ack = legacy_pic_uint_noop,
383};
384static int legacy_pic_irq_pending_noop(unsigned int irq)
385{
386 return 0;
387}
388
389struct legacy_pic null_legacy_pic = {
390 .nr_legacy_irqs = 0,
391 .chip = &dummy_pic_chip,
392 .mask_all = legacy_pic_noop,
393 .restore_mask = legacy_pic_noop,
394 .init = legacy_pic_int_noop,
395 .irq_pending = legacy_pic_irq_pending_noop,
396 .make_irq = legacy_pic_uint_noop,
397};
398
399struct legacy_pic default_legacy_pic = {
400 .nr_legacy_irqs = NR_IRQS_LEGACY,
401 .chip = &i8259A_chip,
402 .mask_all = mask_8259A,
403 .restore_mask = unmask_8259A,
404 .init = init_8259A,
405 .irq_pending = i8259A_irq_pending,
406 .make_irq = make_8259A_irq,
407};
408
409struct legacy_pic *legacy_pic = &default_legacy_pic;
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index d5932226614f..0ed2d300cd46 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -5,7 +5,6 @@
5#include <linux/ioport.h> 5#include <linux/ioport.h>
6#include <linux/interrupt.h> 6#include <linux/interrupt.h>
7#include <linux/timex.h> 7#include <linux/timex.h>
8#include <linux/slab.h>
9#include <linux/random.h> 8#include <linux/random.h>
10#include <linux/kprobes.h> 9#include <linux/kprobes.h>
11#include <linux/init.h> 10#include <linux/init.h>
@@ -84,24 +83,7 @@ static struct irqaction irq2 = {
84}; 83};
85 84
86DEFINE_PER_CPU(vector_irq_t, vector_irq) = { 85DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
87 [0 ... IRQ0_VECTOR - 1] = -1, 86 [0 ... NR_VECTORS - 1] = -1,
88 [IRQ0_VECTOR] = 0,
89 [IRQ1_VECTOR] = 1,
90 [IRQ2_VECTOR] = 2,
91 [IRQ3_VECTOR] = 3,
92 [IRQ4_VECTOR] = 4,
93 [IRQ5_VECTOR] = 5,
94 [IRQ6_VECTOR] = 6,
95 [IRQ7_VECTOR] = 7,
96 [IRQ8_VECTOR] = 8,
97 [IRQ9_VECTOR] = 9,
98 [IRQ10_VECTOR] = 10,
99 [IRQ11_VECTOR] = 11,
100 [IRQ12_VECTOR] = 12,
101 [IRQ13_VECTOR] = 13,
102 [IRQ14_VECTOR] = 14,
103 [IRQ15_VECTOR] = 15,
104 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
105}; 87};
106 88
107int vector_used_by_percpu_irq(unsigned int vector) 89int vector_used_by_percpu_irq(unsigned int vector)
@@ -123,12 +105,12 @@ void __init init_ISA_irqs(void)
123#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) 105#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
124 init_bsp_APIC(); 106 init_bsp_APIC();
125#endif 107#endif
126 init_8259A(0); 108 legacy_pic->init(0);
127 109
128 /* 110 /*
129 * 16 old-style INTA-cycle interrupts: 111 * 16 old-style INTA-cycle interrupts:
130 */ 112 */
131 for (i = 0; i < NR_IRQS_LEGACY; i++) { 113 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) {
132 struct irq_desc *desc = irq_to_desc(i); 114 struct irq_desc *desc = irq_to_desc(i);
133 115
134 desc->status = IRQ_DISABLED; 116 desc->status = IRQ_DISABLED;
@@ -142,9 +124,44 @@ void __init init_ISA_irqs(void)
142 124
143void __init init_IRQ(void) 125void __init init_IRQ(void)
144{ 126{
127 int i;
128
129 /*
130 * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15.
131 * If these IRQ's are handled by legacy interrupt-controllers like PIC,
132 * then this configuration will likely be static after the boot. If
133 * these IRQ's are handled by more mordern controllers like IO-APIC,
134 * then this vector space can be freed and re-used dynamically as the
135 * irq's migrate etc.
136 */
137 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++)
138 per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i;
139
145 x86_init.irqs.intr_init(); 140 x86_init.irqs.intr_init();
146} 141}
147 142
143/*
144 * Setup the vector to irq mappings.
145 */
146void setup_vector_irq(int cpu)
147{
148#ifndef CONFIG_X86_IO_APIC
149 int irq;
150
151 /*
152 * On most of the platforms, legacy PIC delivers the interrupts on the
153 * boot cpu. But there are certain platforms where PIC interrupts are
154 * delivered to multiple cpu's. If the legacy IRQ is handled by the
155 * legacy PIC, for the new cpu that is coming online, setup the static
156 * legacy vector to irq mapping:
157 */
158 for (irq = 0; irq < legacy_pic->nr_legacy_irqs; irq++)
159 per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq;
160#endif
161
162 __setup_vector_irq(cpu);
163}
164
148static void __init smp_intr_init(void) 165static void __init smp_intr_init(void)
149{ 166{
150#ifdef CONFIG_SMP 167#ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c
index cbc4332a77b2..0f7bc20cfcde 100644
--- a/arch/x86/kernel/k8.c
+++ b/arch/x86/kernel/k8.c
@@ -2,8 +2,8 @@
2 * Shared support code for AMD K8 northbridges and derivates. 2 * Shared support code for AMD K8 northbridges and derivates.
3 * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. 3 * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.
4 */ 4 */
5#include <linux/gfp.h>
6#include <linux/types.h> 5#include <linux/types.h>
6#include <linux/slab.h>
7#include <linux/init.h> 7#include <linux/init.h>
8#include <linux/errno.h> 8#include <linux/errno.h>
9#include <linux/module.h> 9#include <linux/module.h>
@@ -121,3 +121,17 @@ void k8_flush_garts(void)
121} 121}
122EXPORT_SYMBOL_GPL(k8_flush_garts); 122EXPORT_SYMBOL_GPL(k8_flush_garts);
123 123
124static __init int init_k8_nbs(void)
125{
126 int err = 0;
127
128 err = cache_k8_northbridges();
129
130 if (err < 0)
131 printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n");
132
133 return err;
134}
135
136/* This has to go after the PCI subsystem */
137fs_initcall(init_k8_nbs);
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index e444357375ce..8afd9f321f10 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -9,6 +9,7 @@
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/uaccess.h> 10#include <linux/uaccess.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/slab.h>
12#include <linux/init.h> 13#include <linux/init.h>
13#include <linux/stat.h> 14#include <linux/stat.h>
14#include <linux/io.h> 15#include <linux/io.h>
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index bfba6019d762..b2258ca91003 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -618,8 +618,8 @@ int kgdb_arch_init(void)
618 * portion of kgdb because this operation requires mutexs to 618 * portion of kgdb because this operation requires mutexs to
619 * complete. 619 * complete.
620 */ 620 */
621 hw_breakpoint_init(&attr);
621 attr.bp_addr = (unsigned long)kgdb_arch_init; 622 attr.bp_addr = (unsigned long)kgdb_arch_init;
622 attr.type = PERF_TYPE_BREAKPOINT;
623 attr.bp_len = HW_BREAKPOINT_LEN_1; 623 attr.bp_len = HW_BREAKPOINT_LEN_1;
624 attr.bp_type = HW_BREAKPOINT_W; 624 attr.bp_type = HW_BREAKPOINT_W;
625 attr.disabled = 1; 625 attr.disabled = 1;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 5de9f4a9c3fd..b43bbaebe2c0 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -49,6 +49,7 @@
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/kdebug.h> 50#include <linux/kdebug.h>
51#include <linux/kallsyms.h> 51#include <linux/kallsyms.h>
52#include <linux/ftrace.h>
52 53
53#include <asm/cacheflush.h> 54#include <asm/cacheflush.h>
54#include <asm/desc.h> 55#include <asm/desc.h>
@@ -106,16 +107,22 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
106}; 107};
107const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); 108const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
108 109
109/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ 110static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
110static void __kprobes set_jmp_op(void *from, void *to)
111{ 111{
112 struct __arch_jmp_op { 112 struct __arch_relative_insn {
113 char op; 113 u8 op;
114 s32 raddr; 114 s32 raddr;
115 } __attribute__((packed)) * jop; 115 } __attribute__((packed)) *insn;
116 jop = (struct __arch_jmp_op *)from; 116
117 jop->raddr = (s32)((long)(to) - ((long)(from) + 5)); 117 insn = (struct __arch_relative_insn *)from;
118 jop->op = RELATIVEJUMP_INSTRUCTION; 118 insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
119 insn->op = op;
120}
121
122/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
123static void __kprobes synthesize_reljump(void *from, void *to)
124{
125 __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
119} 126}
120 127
121/* 128/*
@@ -202,7 +209,7 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
202 /* 209 /*
203 * Basically, kp->ainsn.insn has an original instruction. 210 * Basically, kp->ainsn.insn has an original instruction.
204 * However, RIP-relative instruction can not do single-stepping 211 * However, RIP-relative instruction can not do single-stepping
205 * at different place, fix_riprel() tweaks the displacement of 212 * at different place, __copy_instruction() tweaks the displacement of
206 * that instruction. In that case, we can't recover the instruction 213 * that instruction. In that case, we can't recover the instruction
207 * from the kp->ainsn.insn. 214 * from the kp->ainsn.insn.
208 * 215 *
@@ -284,21 +291,37 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
284} 291}
285 292
286/* 293/*
287 * Adjust the displacement if the instruction uses the %rip-relative 294 * Copy an instruction and adjust the displacement if the instruction
288 * addressing mode. 295 * uses the %rip-relative addressing mode.
289 * If it does, Return the address of the 32-bit displacement word. 296 * If it does, Return the address of the 32-bit displacement word.
290 * If not, return null. 297 * If not, return null.
291 * Only applicable to 64-bit x86. 298 * Only applicable to 64-bit x86.
292 */ 299 */
293static void __kprobes fix_riprel(struct kprobe *p) 300static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
294{ 301{
295#ifdef CONFIG_X86_64
296 struct insn insn; 302 struct insn insn;
297 kernel_insn_init(&insn, p->ainsn.insn); 303 int ret;
304 kprobe_opcode_t buf[MAX_INSN_SIZE];
298 305
306 kernel_insn_init(&insn, src);
307 if (recover) {
308 insn_get_opcode(&insn);
309 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
310 ret = recover_probed_instruction(buf,
311 (unsigned long)src);
312 if (ret)
313 return 0;
314 kernel_insn_init(&insn, buf);
315 }
316 }
317 insn_get_length(&insn);
318 memcpy(dest, insn.kaddr, insn.length);
319
320#ifdef CONFIG_X86_64
299 if (insn_rip_relative(&insn)) { 321 if (insn_rip_relative(&insn)) {
300 s64 newdisp; 322 s64 newdisp;
301 u8 *disp; 323 u8 *disp;
324 kernel_insn_init(&insn, dest);
302 insn_get_displacement(&insn); 325 insn_get_displacement(&insn);
303 /* 326 /*
304 * The copied instruction uses the %rip-relative addressing 327 * The copied instruction uses the %rip-relative addressing
@@ -312,20 +335,23 @@ static void __kprobes fix_riprel(struct kprobe *p)
312 * extension of the original signed 32-bit displacement would 335 * extension of the original signed 32-bit displacement would
313 * have given. 336 * have given.
314 */ 337 */
315 newdisp = (u8 *) p->addr + (s64) insn.displacement.value - 338 newdisp = (u8 *) src + (s64) insn.displacement.value -
316 (u8 *) p->ainsn.insn; 339 (u8 *) dest;
317 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ 340 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
318 disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn); 341 disp = (u8 *) dest + insn_offset_displacement(&insn);
319 *(s32 *) disp = (s32) newdisp; 342 *(s32 *) disp = (s32) newdisp;
320 } 343 }
321#endif 344#endif
345 return insn.length;
322} 346}
323 347
324static void __kprobes arch_copy_kprobe(struct kprobe *p) 348static void __kprobes arch_copy_kprobe(struct kprobe *p)
325{ 349{
326 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); 350 /*
327 351 * Copy an instruction without recovering int3, because it will be
328 fix_riprel(p); 352 * put by another subsystem.
353 */
354 __copy_instruction(p->ainsn.insn, p->addr, 0);
329 355
330 if (can_boost(p->addr)) 356 if (can_boost(p->addr))
331 p->ainsn.boostable = 0; 357 p->ainsn.boostable = 0;
@@ -406,18 +432,6 @@ static void __kprobes restore_btf(void)
406 update_debugctlmsr(current->thread.debugctlmsr); 432 update_debugctlmsr(current->thread.debugctlmsr);
407} 433}
408 434
409static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
410{
411 clear_btf();
412 regs->flags |= X86_EFLAGS_TF;
413 regs->flags &= ~X86_EFLAGS_IF;
414 /* single step inline if the instruction is an int3 */
415 if (p->opcode == BREAKPOINT_INSTRUCTION)
416 regs->ip = (unsigned long)p->addr;
417 else
418 regs->ip = (unsigned long)p->ainsn.insn;
419}
420
421void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 435void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
422 struct pt_regs *regs) 436 struct pt_regs *regs)
423{ 437{
@@ -429,20 +443,50 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
429 *sara = (unsigned long) &kretprobe_trampoline; 443 *sara = (unsigned long) &kretprobe_trampoline;
430} 444}
431 445
446#ifdef CONFIG_OPTPROBES
447static int __kprobes setup_detour_execution(struct kprobe *p,
448 struct pt_regs *regs,
449 int reenter);
450#else
451#define setup_detour_execution(p, regs, reenter) (0)
452#endif
453
432static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, 454static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
433 struct kprobe_ctlblk *kcb) 455 struct kprobe_ctlblk *kcb, int reenter)
434{ 456{
457 if (setup_detour_execution(p, regs, reenter))
458 return;
459
435#if !defined(CONFIG_PREEMPT) 460#if !defined(CONFIG_PREEMPT)
436 if (p->ainsn.boostable == 1 && !p->post_handler) { 461 if (p->ainsn.boostable == 1 && !p->post_handler) {
437 /* Boost up -- we can execute copied instructions directly */ 462 /* Boost up -- we can execute copied instructions directly */
438 reset_current_kprobe(); 463 if (!reenter)
464 reset_current_kprobe();
465 /*
466 * Reentering boosted probe doesn't reset current_kprobe,
467 * nor set current_kprobe, because it doesn't use single
468 * stepping.
469 */
439 regs->ip = (unsigned long)p->ainsn.insn; 470 regs->ip = (unsigned long)p->ainsn.insn;
440 preempt_enable_no_resched(); 471 preempt_enable_no_resched();
441 return; 472 return;
442 } 473 }
443#endif 474#endif
444 prepare_singlestep(p, regs); 475 if (reenter) {
445 kcb->kprobe_status = KPROBE_HIT_SS; 476 save_previous_kprobe(kcb);
477 set_current_kprobe(p, regs, kcb);
478 kcb->kprobe_status = KPROBE_REENTER;
479 } else
480 kcb->kprobe_status = KPROBE_HIT_SS;
481 /* Prepare real single stepping */
482 clear_btf();
483 regs->flags |= X86_EFLAGS_TF;
484 regs->flags &= ~X86_EFLAGS_IF;
485 /* single step inline if the instruction is an int3 */
486 if (p->opcode == BREAKPOINT_INSTRUCTION)
487 regs->ip = (unsigned long)p->addr;
488 else
489 regs->ip = (unsigned long)p->ainsn.insn;
446} 490}
447 491
448/* 492/*
@@ -456,11 +500,8 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
456 switch (kcb->kprobe_status) { 500 switch (kcb->kprobe_status) {
457 case KPROBE_HIT_SSDONE: 501 case KPROBE_HIT_SSDONE:
458 case KPROBE_HIT_ACTIVE: 502 case KPROBE_HIT_ACTIVE:
459 save_previous_kprobe(kcb);
460 set_current_kprobe(p, regs, kcb);
461 kprobes_inc_nmissed_count(p); 503 kprobes_inc_nmissed_count(p);
462 prepare_singlestep(p, regs); 504 setup_singlestep(p, regs, kcb, 1);
463 kcb->kprobe_status = KPROBE_REENTER;
464 break; 505 break;
465 case KPROBE_HIT_SS: 506 case KPROBE_HIT_SS:
466 /* A probe has been hit in the codepath leading up to, or just 507 /* A probe has been hit in the codepath leading up to, or just
@@ -535,13 +576,13 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
535 * more here. 576 * more here.
536 */ 577 */
537 if (!p->pre_handler || !p->pre_handler(p, regs)) 578 if (!p->pre_handler || !p->pre_handler(p, regs))
538 setup_singlestep(p, regs, kcb); 579 setup_singlestep(p, regs, kcb, 0);
539 return 1; 580 return 1;
540 } 581 }
541 } else if (kprobe_running()) { 582 } else if (kprobe_running()) {
542 p = __get_cpu_var(current_kprobe); 583 p = __get_cpu_var(current_kprobe);
543 if (p->break_handler && p->break_handler(p, regs)) { 584 if (p->break_handler && p->break_handler(p, regs)) {
544 setup_singlestep(p, regs, kcb); 585 setup_singlestep(p, regs, kcb, 0);
545 return 1; 586 return 1;
546 } 587 }
547 } /* else: not a kprobe fault; let the kernel handle it */ 588 } /* else: not a kprobe fault; let the kernel handle it */
@@ -550,6 +591,69 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
550 return 0; 591 return 0;
551} 592}
552 593
594#ifdef CONFIG_X86_64
595#define SAVE_REGS_STRING \
596 /* Skip cs, ip, orig_ax. */ \
597 " subq $24, %rsp\n" \
598 " pushq %rdi\n" \
599 " pushq %rsi\n" \
600 " pushq %rdx\n" \
601 " pushq %rcx\n" \
602 " pushq %rax\n" \
603 " pushq %r8\n" \
604 " pushq %r9\n" \
605 " pushq %r10\n" \
606 " pushq %r11\n" \
607 " pushq %rbx\n" \
608 " pushq %rbp\n" \
609 " pushq %r12\n" \
610 " pushq %r13\n" \
611 " pushq %r14\n" \
612 " pushq %r15\n"
613#define RESTORE_REGS_STRING \
614 " popq %r15\n" \
615 " popq %r14\n" \
616 " popq %r13\n" \
617 " popq %r12\n" \
618 " popq %rbp\n" \
619 " popq %rbx\n" \
620 " popq %r11\n" \
621 " popq %r10\n" \
622 " popq %r9\n" \
623 " popq %r8\n" \
624 " popq %rax\n" \
625 " popq %rcx\n" \
626 " popq %rdx\n" \
627 " popq %rsi\n" \
628 " popq %rdi\n" \
629 /* Skip orig_ax, ip, cs */ \
630 " addq $24, %rsp\n"
631#else
632#define SAVE_REGS_STRING \
633 /* Skip cs, ip, orig_ax and gs. */ \
634 " subl $16, %esp\n" \
635 " pushl %fs\n" \
636 " pushl %ds\n" \
637 " pushl %es\n" \
638 " pushl %eax\n" \
639 " pushl %ebp\n" \
640 " pushl %edi\n" \
641 " pushl %esi\n" \
642 " pushl %edx\n" \
643 " pushl %ecx\n" \
644 " pushl %ebx\n"
645#define RESTORE_REGS_STRING \
646 " popl %ebx\n" \
647 " popl %ecx\n" \
648 " popl %edx\n" \
649 " popl %esi\n" \
650 " popl %edi\n" \
651 " popl %ebp\n" \
652 " popl %eax\n" \
653 /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
654 " addl $24, %esp\n"
655#endif
656
553/* 657/*
554 * When a retprobed function returns, this code saves registers and 658 * When a retprobed function returns, this code saves registers and
555 * calls trampoline_handler() runs, which calls the kretprobe's handler. 659 * calls trampoline_handler() runs, which calls the kretprobe's handler.
@@ -563,65 +667,16 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
563 /* We don't bother saving the ss register */ 667 /* We don't bother saving the ss register */
564 " pushq %rsp\n" 668 " pushq %rsp\n"
565 " pushfq\n" 669 " pushfq\n"
566 /* 670 SAVE_REGS_STRING
567 * Skip cs, ip, orig_ax.
568 * trampoline_handler() will plug in these values
569 */
570 " subq $24, %rsp\n"
571 " pushq %rdi\n"
572 " pushq %rsi\n"
573 " pushq %rdx\n"
574 " pushq %rcx\n"
575 " pushq %rax\n"
576 " pushq %r8\n"
577 " pushq %r9\n"
578 " pushq %r10\n"
579 " pushq %r11\n"
580 " pushq %rbx\n"
581 " pushq %rbp\n"
582 " pushq %r12\n"
583 " pushq %r13\n"
584 " pushq %r14\n"
585 " pushq %r15\n"
586 " movq %rsp, %rdi\n" 671 " movq %rsp, %rdi\n"
587 " call trampoline_handler\n" 672 " call trampoline_handler\n"
588 /* Replace saved sp with true return address. */ 673 /* Replace saved sp with true return address. */
589 " movq %rax, 152(%rsp)\n" 674 " movq %rax, 152(%rsp)\n"
590 " popq %r15\n" 675 RESTORE_REGS_STRING
591 " popq %r14\n"
592 " popq %r13\n"
593 " popq %r12\n"
594 " popq %rbp\n"
595 " popq %rbx\n"
596 " popq %r11\n"
597 " popq %r10\n"
598 " popq %r9\n"
599 " popq %r8\n"
600 " popq %rax\n"
601 " popq %rcx\n"
602 " popq %rdx\n"
603 " popq %rsi\n"
604 " popq %rdi\n"
605 /* Skip orig_ax, ip, cs */
606 " addq $24, %rsp\n"
607 " popfq\n" 676 " popfq\n"
608#else 677#else
609 " pushf\n" 678 " pushf\n"
610 /* 679 SAVE_REGS_STRING
611 * Skip cs, ip, orig_ax and gs.
612 * trampoline_handler() will plug in these values
613 */
614 " subl $16, %esp\n"
615 " pushl %fs\n"
616 " pushl %es\n"
617 " pushl %ds\n"
618 " pushl %eax\n"
619 " pushl %ebp\n"
620 " pushl %edi\n"
621 " pushl %esi\n"
622 " pushl %edx\n"
623 " pushl %ecx\n"
624 " pushl %ebx\n"
625 " movl %esp, %eax\n" 680 " movl %esp, %eax\n"
626 " call trampoline_handler\n" 681 " call trampoline_handler\n"
627 /* Move flags to cs */ 682 /* Move flags to cs */
@@ -629,15 +684,7 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
629 " movl %edx, 52(%esp)\n" 684 " movl %edx, 52(%esp)\n"
630 /* Replace saved flags with true return address. */ 685 /* Replace saved flags with true return address. */
631 " movl %eax, 56(%esp)\n" 686 " movl %eax, 56(%esp)\n"
632 " popl %ebx\n" 687 RESTORE_REGS_STRING
633 " popl %ecx\n"
634 " popl %edx\n"
635 " popl %esi\n"
636 " popl %edi\n"
637 " popl %ebp\n"
638 " popl %eax\n"
639 /* Skip ds, es, fs, gs, orig_ax and ip */
640 " addl $24, %esp\n"
641 " popf\n" 688 " popf\n"
642#endif 689#endif
643 " ret\n"); 690 " ret\n");
@@ -805,8 +852,8 @@ static void __kprobes resume_execution(struct kprobe *p,
805 * These instructions can be executed directly if it 852 * These instructions can be executed directly if it
806 * jumps back to correct address. 853 * jumps back to correct address.
807 */ 854 */
808 set_jmp_op((void *)regs->ip, 855 synthesize_reljump((void *)regs->ip,
809 (void *)orig_ip + (regs->ip - copy_ip)); 856 (void *)orig_ip + (regs->ip - copy_ip));
810 p->ainsn.boostable = 1; 857 p->ainsn.boostable = 1;
811 } else { 858 } else {
812 p->ainsn.boostable = -1; 859 p->ainsn.boostable = -1;
@@ -1033,6 +1080,358 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1033 return 0; 1080 return 0;
1034} 1081}
1035 1082
1083
1084#ifdef CONFIG_OPTPROBES
1085
1086/* Insert a call instruction at address 'from', which calls address 'to'.*/
1087static void __kprobes synthesize_relcall(void *from, void *to)
1088{
1089 __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
1090}
1091
1092/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
1093static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
1094 unsigned long val)
1095{
1096#ifdef CONFIG_X86_64
1097 *addr++ = 0x48;
1098 *addr++ = 0xbf;
1099#else
1100 *addr++ = 0xb8;
1101#endif
1102 *(unsigned long *)addr = val;
1103}
1104
1105void __kprobes kprobes_optinsn_template_holder(void)
1106{
1107 asm volatile (
1108 ".global optprobe_template_entry\n"
1109 "optprobe_template_entry: \n"
1110#ifdef CONFIG_X86_64
1111 /* We don't bother saving the ss register */
1112 " pushq %rsp\n"
1113 " pushfq\n"
1114 SAVE_REGS_STRING
1115 " movq %rsp, %rsi\n"
1116 ".global optprobe_template_val\n"
1117 "optprobe_template_val: \n"
1118 ASM_NOP5
1119 ASM_NOP5
1120 ".global optprobe_template_call\n"
1121 "optprobe_template_call: \n"
1122 ASM_NOP5
1123 /* Move flags to rsp */
1124 " movq 144(%rsp), %rdx\n"
1125 " movq %rdx, 152(%rsp)\n"
1126 RESTORE_REGS_STRING
1127 /* Skip flags entry */
1128 " addq $8, %rsp\n"
1129 " popfq\n"
1130#else /* CONFIG_X86_32 */
1131 " pushf\n"
1132 SAVE_REGS_STRING
1133 " movl %esp, %edx\n"
1134 ".global optprobe_template_val\n"
1135 "optprobe_template_val: \n"
1136 ASM_NOP5
1137 ".global optprobe_template_call\n"
1138 "optprobe_template_call: \n"
1139 ASM_NOP5
1140 RESTORE_REGS_STRING
1141 " addl $4, %esp\n" /* skip cs */
1142 " popf\n"
1143#endif
1144 ".global optprobe_template_end\n"
1145 "optprobe_template_end: \n");
1146}
1147
1148#define TMPL_MOVE_IDX \
1149 ((long)&optprobe_template_val - (long)&optprobe_template_entry)
1150#define TMPL_CALL_IDX \
1151 ((long)&optprobe_template_call - (long)&optprobe_template_entry)
1152#define TMPL_END_IDX \
1153 ((long)&optprobe_template_end - (long)&optprobe_template_entry)
1154
1155#define INT3_SIZE sizeof(kprobe_opcode_t)
1156
1157/* Optimized kprobe call back function: called from optinsn */
1158static void __kprobes optimized_callback(struct optimized_kprobe *op,
1159 struct pt_regs *regs)
1160{
1161 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1162
1163 preempt_disable();
1164 if (kprobe_running()) {
1165 kprobes_inc_nmissed_count(&op->kp);
1166 } else {
1167 /* Save skipped registers */
1168#ifdef CONFIG_X86_64
1169 regs->cs = __KERNEL_CS;
1170#else
1171 regs->cs = __KERNEL_CS | get_kernel_rpl();
1172 regs->gs = 0;
1173#endif
1174 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
1175 regs->orig_ax = ~0UL;
1176
1177 __get_cpu_var(current_kprobe) = &op->kp;
1178 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1179 opt_pre_handler(&op->kp, regs);
1180 __get_cpu_var(current_kprobe) = NULL;
1181 }
1182 preempt_enable_no_resched();
1183}
1184
1185static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
1186{
1187 int len = 0, ret;
1188
1189 while (len < RELATIVEJUMP_SIZE) {
1190 ret = __copy_instruction(dest + len, src + len, 1);
1191 if (!ret || !can_boost(dest + len))
1192 return -EINVAL;
1193 len += ret;
1194 }
1195 /* Check whether the address range is reserved */
1196 if (ftrace_text_reserved(src, src + len - 1) ||
1197 alternatives_text_reserved(src, src + len - 1))
1198 return -EBUSY;
1199
1200 return len;
1201}
1202
1203/* Check whether insn is indirect jump */
1204static int __kprobes insn_is_indirect_jump(struct insn *insn)
1205{
1206 return ((insn->opcode.bytes[0] == 0xff &&
1207 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
1208 insn->opcode.bytes[0] == 0xea); /* Segment based jump */
1209}
1210
1211/* Check whether insn jumps into specified address range */
1212static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
1213{
1214 unsigned long target = 0;
1215
1216 switch (insn->opcode.bytes[0]) {
1217 case 0xe0: /* loopne */
1218 case 0xe1: /* loope */
1219 case 0xe2: /* loop */
1220 case 0xe3: /* jcxz */
1221 case 0xe9: /* near relative jump */
1222 case 0xeb: /* short relative jump */
1223 break;
1224 case 0x0f:
1225 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
1226 break;
1227 return 0;
1228 default:
1229 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
1230 break;
1231 return 0;
1232 }
1233 target = (unsigned long)insn->next_byte + insn->immediate.value;
1234
1235 return (start <= target && target <= start + len);
1236}
1237
1238/* Decode whole function to ensure any instructions don't jump into target */
1239static int __kprobes can_optimize(unsigned long paddr)
1240{
1241 int ret;
1242 unsigned long addr, size = 0, offset = 0;
1243 struct insn insn;
1244 kprobe_opcode_t buf[MAX_INSN_SIZE];
1245 /* Dummy buffers for lookup_symbol_attrs */
1246 static char __dummy_buf[KSYM_NAME_LEN];
1247
1248 /* Lookup symbol including addr */
1249 if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf))
1250 return 0;
1251
1252 /* Check there is enough space for a relative jump. */
1253 if (size - offset < RELATIVEJUMP_SIZE)
1254 return 0;
1255
1256 /* Decode instructions */
1257 addr = paddr - offset;
1258 while (addr < paddr - offset + size) { /* Decode until function end */
1259 if (search_exception_tables(addr))
1260 /*
1261 * Since some fixup code will jumps into this function,
1262 * we can't optimize kprobe in this function.
1263 */
1264 return 0;
1265 kernel_insn_init(&insn, (void *)addr);
1266 insn_get_opcode(&insn);
1267 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
1268 ret = recover_probed_instruction(buf, addr);
1269 if (ret)
1270 return 0;
1271 kernel_insn_init(&insn, buf);
1272 }
1273 insn_get_length(&insn);
1274 /* Recover address */
1275 insn.kaddr = (void *)addr;
1276 insn.next_byte = (void *)(addr + insn.length);
1277 /* Check any instructions don't jump into target */
1278 if (insn_is_indirect_jump(&insn) ||
1279 insn_jump_into_range(&insn, paddr + INT3_SIZE,
1280 RELATIVE_ADDR_SIZE))
1281 return 0;
1282 addr += insn.length;
1283 }
1284
1285 return 1;
1286}
1287
1288/* Check optimized_kprobe can actually be optimized. */
1289int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
1290{
1291 int i;
1292 struct kprobe *p;
1293
1294 for (i = 1; i < op->optinsn.size; i++) {
1295 p = get_kprobe(op->kp.addr + i);
1296 if (p && !kprobe_disabled(p))
1297 return -EEXIST;
1298 }
1299
1300 return 0;
1301}
1302
1303/* Check the addr is within the optimized instructions. */
1304int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op,
1305 unsigned long addr)
1306{
1307 return ((unsigned long)op->kp.addr <= addr &&
1308 (unsigned long)op->kp.addr + op->optinsn.size > addr);
1309}
1310
1311/* Free optimized instruction slot */
1312static __kprobes
1313void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
1314{
1315 if (op->optinsn.insn) {
1316 free_optinsn_slot(op->optinsn.insn, dirty);
1317 op->optinsn.insn = NULL;
1318 op->optinsn.size = 0;
1319 }
1320}
1321
1322void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
1323{
1324 __arch_remove_optimized_kprobe(op, 1);
1325}
1326
1327/*
1328 * Copy replacing target instructions
1329 * Target instructions MUST be relocatable (checked inside)
1330 */
1331int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
1332{
1333 u8 *buf;
1334 int ret;
1335 long rel;
1336
1337 if (!can_optimize((unsigned long)op->kp.addr))
1338 return -EILSEQ;
1339
1340 op->optinsn.insn = get_optinsn_slot();
1341 if (!op->optinsn.insn)
1342 return -ENOMEM;
1343
1344 /*
1345 * Verify if the address gap is in 2GB range, because this uses
1346 * a relative jump.
1347 */
1348 rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
1349 if (abs(rel) > 0x7fffffff)
1350 return -ERANGE;
1351
1352 buf = (u8 *)op->optinsn.insn;
1353
1354 /* Copy instructions into the out-of-line buffer */
1355 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
1356 if (ret < 0) {
1357 __arch_remove_optimized_kprobe(op, 0);
1358 return ret;
1359 }
1360 op->optinsn.size = ret;
1361
1362 /* Copy arch-dep-instance from template */
1363 memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
1364
1365 /* Set probe information */
1366 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
1367
1368 /* Set probe function call */
1369 synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
1370
1371 /* Set returning jmp instruction at the tail of out-of-line buffer */
1372 synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
1373 (u8 *)op->kp.addr + op->optinsn.size);
1374
1375 flush_icache_range((unsigned long) buf,
1376 (unsigned long) buf + TMPL_END_IDX +
1377 op->optinsn.size + RELATIVEJUMP_SIZE);
1378 return 0;
1379}
1380
1381/* Replace a breakpoint (int3) with a relative jump. */
1382int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
1383{
1384 unsigned char jmp_code[RELATIVEJUMP_SIZE];
1385 s32 rel = (s32)((long)op->optinsn.insn -
1386 ((long)op->kp.addr + RELATIVEJUMP_SIZE));
1387
1388 /* Backup instructions which will be replaced by jump address */
1389 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
1390 RELATIVE_ADDR_SIZE);
1391
1392 jmp_code[0] = RELATIVEJUMP_OPCODE;
1393 *(s32 *)(&jmp_code[1]) = rel;
1394
1395 /*
1396 * text_poke_smp doesn't support NMI/MCE code modifying.
1397 * However, since kprobes itself also doesn't support NMI/MCE
1398 * code probing, it's not a problem.
1399 */
1400 text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE);
1401 return 0;
1402}
1403
1404/* Replace a relative jump with a breakpoint (int3). */
1405void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
1406{
1407 u8 buf[RELATIVEJUMP_SIZE];
1408
1409 /* Set int3 to first byte for kprobes */
1410 buf[0] = BREAKPOINT_INSTRUCTION;
1411 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
1412 text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
1413}
1414
1415static int __kprobes setup_detour_execution(struct kprobe *p,
1416 struct pt_regs *regs,
1417 int reenter)
1418{
1419 struct optimized_kprobe *op;
1420
1421 if (p->flags & KPROBE_FLAG_OPTIMIZED) {
1422 /* This kprobe is really able to run optimized path. */
1423 op = container_of(p, struct optimized_kprobe, kp);
1424 /* Detour through copied instructions */
1425 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
1426 if (!reenter)
1427 reset_current_kprobe();
1428 preempt_enable_no_resched();
1429 return 1;
1430 }
1431 return 0;
1432}
1433#endif
1434
1036int __init arch_init_kprobes(void) 1435int __init arch_init_kprobes(void)
1037{ 1436{
1038 return 0; 1437 return 0;
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index ec6ef60cbd17..ea697263b373 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -7,6 +7,7 @@
7 */ 7 */
8 8
9#include <linux/errno.h> 9#include <linux/errno.h>
10#include <linux/gfp.h>
10#include <linux/sched.h> 11#include <linux/sched.h>
11#include <linux/string.h> 12#include <linux/string.h>
12#include <linux/mm.h> 13#include <linux/mm.h>
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 4a8bb82248ae..035c8c529181 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -9,6 +9,7 @@
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/kexec.h> 10#include <linux/kexec.h>
11#include <linux/string.h> 11#include <linux/string.h>
12#include <linux/gfp.h>
12#include <linux/reboot.h> 13#include <linux/reboot.h>
13#include <linux/numa.h> 14#include <linux/numa.h>
14#include <linux/ftrace.h> 15#include <linux/ftrace.h>
diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c
index 845d80ce1ef1..63eaf6596233 100644
--- a/arch/x86/kernel/mca_32.c
+++ b/arch/x86/kernel/mca_32.c
@@ -42,6 +42,7 @@
42#include <linux/kernel.h> 42#include <linux/kernel.h>
43#include <linux/mca.h> 43#include <linux/mca.h>
44#include <linux/kprobes.h> 44#include <linux/kprobes.h>
45#include <linux/slab.h>
45#include <asm/system.h> 46#include <asm/system.h>
46#include <asm/io.h> 47#include <asm/io.h>
47#include <linux/proc_fs.h> 48#include <linux/proc_fs.h>
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index 712d15fdc416..71825806cd44 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -7,6 +7,8 @@
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/pci.h> 8#include <linux/pci.h>
9#include <linux/dmi.h> 9#include <linux/dmi.h>
10#include <linux/range.h>
11
10#include <asm/pci-direct.h> 12#include <asm/pci-direct.h>
11#include <linux/sort.h> 13#include <linux/sort.h>
12#include <asm/io.h> 14#include <asm/io.h>
@@ -30,11 +32,6 @@ static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = {
30 { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, 32 { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 },
31}; 33};
32 34
33struct range {
34 u64 start;
35 u64 end;
36};
37
38static int __cpuinit cmp_range(const void *x1, const void *x2) 35static int __cpuinit cmp_range(const void *x1, const void *x2)
39{ 36{
40 const struct range *r1 = x1; 37 const struct range *r1 = x1;
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 89f386f044e4..e0bc186d7501 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -23,6 +23,7 @@
23#include <linux/kernel.h> 23#include <linux/kernel.h>
24#include <linux/bug.h> 24#include <linux/bug.h>
25#include <linux/mm.h> 25#include <linux/mm.h>
26#include <linux/gfp.h>
26 27
27#include <asm/system.h> 28#include <asm/system.h>
28#include <asm/page.h> 29#include <asm/page.h>
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index a2c1edd2d3ac..e81030f71a8f 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -664,7 +664,7 @@ static void __init smp_reserve_memory(struct mpf_intel *mpf)
664{ 664{
665 unsigned long size = get_mpc_size(mpf->physptr); 665 unsigned long size = get_mpc_size(mpf->physptr);
666 666
667 reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc"); 667 reserve_early_overlap_ok(mpf->physptr, mpf->physptr+size, "MP-table mpc");
668} 668}
669 669
670static int __init smp_scan_config(unsigned long base, unsigned long length) 670static int __init smp_scan_config(unsigned long base, unsigned long length)
@@ -693,7 +693,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
693 mpf, (u64)virt_to_phys(mpf)); 693 mpf, (u64)virt_to_phys(mpf));
694 694
695 mem = virt_to_phys(mpf); 695 mem = virt_to_phys(mpf);
696 reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf"); 696 reserve_early_overlap_ok(mem, mem + sizeof(*mpf), "MP-table mpf");
697 if (mpf->physptr) 697 if (mpf->physptr)
698 smp_reserve_memory(mpf); 698 smp_reserve_memory(mpf);
699 699
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c
index 3b7078abc871..0aad8670858e 100644
--- a/arch/x86/kernel/mrst.c
+++ b/arch/x86/kernel/mrst.c
@@ -10,8 +10,211 @@
10 * of the License. 10 * of the License.
11 */ 11 */
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/kernel.h>
14#include <linux/sfi.h>
15#include <linux/irq.h>
16#include <linux/module.h>
13 17
14#include <asm/setup.h> 18#include <asm/setup.h>
19#include <asm/mpspec_def.h>
20#include <asm/hw_irq.h>
21#include <asm/apic.h>
22#include <asm/io_apic.h>
23#include <asm/mrst.h>
24#include <asm/io.h>
25#include <asm/i8259.h>
26#include <asm/apb_timer.h>
27
28static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM];
29static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM];
30int sfi_mtimer_num;
31
32struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX];
33EXPORT_SYMBOL_GPL(sfi_mrtc_array);
34int sfi_mrtc_num;
35
36static inline void assign_to_mp_irq(struct mpc_intsrc *m,
37 struct mpc_intsrc *mp_irq)
38{
39 memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
40}
41
42static inline int mp_irq_cmp(struct mpc_intsrc *mp_irq,
43 struct mpc_intsrc *m)
44{
45 return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
46}
47
48static void save_mp_irq(struct mpc_intsrc *m)
49{
50 int i;
51
52 for (i = 0; i < mp_irq_entries; i++) {
53 if (!mp_irq_cmp(&mp_irqs[i], m))
54 return;
55 }
56
57 assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
58 if (++mp_irq_entries == MAX_IRQ_SOURCES)
59 panic("Max # of irq sources exceeded!!\n");
60}
61
62/* parse all the mtimer info to a static mtimer array */
63static int __init sfi_parse_mtmr(struct sfi_table_header *table)
64{
65 struct sfi_table_simple *sb;
66 struct sfi_timer_table_entry *pentry;
67 struct mpc_intsrc mp_irq;
68 int totallen;
69
70 sb = (struct sfi_table_simple *)table;
71 if (!sfi_mtimer_num) {
72 sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb,
73 struct sfi_timer_table_entry);
74 pentry = (struct sfi_timer_table_entry *) sb->pentry;
75 totallen = sfi_mtimer_num * sizeof(*pentry);
76 memcpy(sfi_mtimer_array, pentry, totallen);
77 }
78
79 printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num);
80 pentry = sfi_mtimer_array;
81 for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
82 printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz,"
83 " irq = %d\n", totallen, (u32)pentry->phys_addr,
84 pentry->freq_hz, pentry->irq);
85 if (!pentry->irq)
86 continue;
87 mp_irq.type = MP_IOAPIC;
88 mp_irq.irqtype = mp_INT;
89/* triggering mode edge bit 2-3, active high polarity bit 0-1 */
90 mp_irq.irqflag = 5;
91 mp_irq.srcbus = 0;
92 mp_irq.srcbusirq = pentry->irq; /* IRQ */
93 mp_irq.dstapic = MP_APIC_ALL;
94 mp_irq.dstirq = pentry->irq;
95 save_mp_irq(&mp_irq);
96 }
97
98 return 0;
99}
100
101struct sfi_timer_table_entry *sfi_get_mtmr(int hint)
102{
103 int i;
104 if (hint < sfi_mtimer_num) {
105 if (!sfi_mtimer_usage[hint]) {
106 pr_debug("hint taken for timer %d irq %d\n",\
107 hint, sfi_mtimer_array[hint].irq);
108 sfi_mtimer_usage[hint] = 1;
109 return &sfi_mtimer_array[hint];
110 }
111 }
112 /* take the first timer available */
113 for (i = 0; i < sfi_mtimer_num;) {
114 if (!sfi_mtimer_usage[i]) {
115 sfi_mtimer_usage[i] = 1;
116 return &sfi_mtimer_array[i];
117 }
118 i++;
119 }
120 return NULL;
121}
122
123void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr)
124{
125 int i;
126 for (i = 0; i < sfi_mtimer_num;) {
127 if (mtmr->irq == sfi_mtimer_array[i].irq) {
128 sfi_mtimer_usage[i] = 0;
129 return;
130 }
131 i++;
132 }
133}
134
135/* parse all the mrtc info to a global mrtc array */
136int __init sfi_parse_mrtc(struct sfi_table_header *table)
137{
138 struct sfi_table_simple *sb;
139 struct sfi_rtc_table_entry *pentry;
140 struct mpc_intsrc mp_irq;
141
142 int totallen;
143
144 sb = (struct sfi_table_simple *)table;
145 if (!sfi_mrtc_num) {
146 sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb,
147 struct sfi_rtc_table_entry);
148 pentry = (struct sfi_rtc_table_entry *)sb->pentry;
149 totallen = sfi_mrtc_num * sizeof(*pentry);
150 memcpy(sfi_mrtc_array, pentry, totallen);
151 }
152
153 printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num);
154 pentry = sfi_mrtc_array;
155 for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
156 printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n",
157 totallen, (u32)pentry->phys_addr, pentry->irq);
158 mp_irq.type = MP_IOAPIC;
159 mp_irq.irqtype = mp_INT;
160 mp_irq.irqflag = 0;
161 mp_irq.srcbus = 0;
162 mp_irq.srcbusirq = pentry->irq; /* IRQ */
163 mp_irq.dstapic = MP_APIC_ALL;
164 mp_irq.dstirq = pentry->irq;
165 save_mp_irq(&mp_irq);
166 }
167 return 0;
168}
169
170/*
171 * the secondary clock in Moorestown can be APBT or LAPIC clock, default to
172 * APBT but cmdline option can also override it.
173 */
174static void __cpuinit mrst_setup_secondary_clock(void)
175{
176 /* restore default lapic clock if disabled by cmdline */
177 if (disable_apbt_percpu)
178 return setup_secondary_APIC_clock();
179 apbt_setup_secondary_clock();
180}
181
182static unsigned long __init mrst_calibrate_tsc(void)
183{
184 unsigned long flags, fast_calibrate;
185
186 local_irq_save(flags);
187 fast_calibrate = apbt_quick_calibrate();
188 local_irq_restore(flags);
189
190 if (fast_calibrate)
191 return fast_calibrate;
192
193 return 0;
194}
195
196void __init mrst_time_init(void)
197{
198 sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
199 pre_init_apic_IRQ0();
200 apbt_time_init();
201}
202
203void __init mrst_rtc_init(void)
204{
205 sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
206}
207
208/*
209 * if we use per cpu apb timer, the bootclock already setup. if we use lapic
210 * timer and one apbt timer for broadcast, we need to set up lapic boot clock.
211 */
212static void __init mrst_setup_boot_clock(void)
213{
214 pr_info("%s: per cpu apbt flag %d \n", __func__, disable_apbt_percpu);
215 if (disable_apbt_percpu)
216 setup_boot_APIC_clock();
217};
15 218
16/* 219/*
17 * Moorestown specific x86_init function overrides and early setup 220 * Moorestown specific x86_init function overrides and early setup
@@ -21,4 +224,17 @@ void __init x86_mrst_early_setup(void)
21{ 224{
22 x86_init.resources.probe_roms = x86_init_noop; 225 x86_init.resources.probe_roms = x86_init_noop;
23 x86_init.resources.reserve_resources = x86_init_noop; 226 x86_init.resources.reserve_resources = x86_init_noop;
227
228 x86_init.timers.timer_init = mrst_time_init;
229 x86_init.timers.setup_percpu_clockev = mrst_setup_boot_clock;
230
231 x86_init.irqs.pre_vector_init = x86_init_noop;
232
233 x86_cpuinit.setup_percpu_clockev = mrst_setup_secondary_clock;
234
235 x86_platform.calibrate_tsc = mrst_calibrate_tsc;
236 x86_init.pci.init = pci_mrst_init;
237 x86_init.pci.fixup_irqs = x86_init_noop;
238
239 legacy_pic = &null_legacy_pic;
24} 240}
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 206735ac8cbd..4d4468e9f47c 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -37,6 +37,7 @@
37#include <linux/cpu.h> 37#include <linux/cpu.h>
38#include <linux/notifier.h> 38#include <linux/notifier.h>
39#include <linux/uaccess.h> 39#include <linux/uaccess.h>
40#include <linux/gfp.h>
40 41
41#include <asm/processor.h> 42#include <asm/processor.h>
42#include <asm/msr.h> 43#include <asm/msr.h>
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 9d1d263f786f..8297160c41b3 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -17,7 +17,9 @@
17#include <linux/spinlock.h> 17#include <linux/spinlock.h>
18#include <linux/io.h> 18#include <linux/io.h>
19#include <linux/string.h> 19#include <linux/string.h>
20
20#include <asm/geode.h> 21#include <asm/geode.h>
22#include <asm/setup.h>
21#include <asm/olpc.h> 23#include <asm/olpc.h>
22 24
23#ifdef CONFIG_OPEN_FIRMWARE 25#ifdef CONFIG_OPEN_FIRMWARE
@@ -243,9 +245,11 @@ static int __init olpc_init(void)
243 olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, 245 olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0,
244 (unsigned char *) &olpc_platform_info.ecver, 1); 246 (unsigned char *) &olpc_platform_info.ecver, 1);
245 247
246 /* check to see if the VSA exists */ 248#ifdef CONFIG_PCI_OLPC
247 if (cs5535_has_vsa2()) 249 /* If the VSA exists let it emulate PCI, if not emulate in kernel */
248 olpc_platform_info.flags |= OLPC_F_VSA; 250 if (!cs5535_has_vsa2())
251 x86_init.pci.arch_init = pci_olpc_init;
252#endif
249 253
250 printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n", 254 printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n",
251 ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", 255 ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "",
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1b1739d16310..1db183ed7c01 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -428,10 +428,6 @@ struct pv_mmu_ops pv_mmu_ops = {
428 .ptep_modify_prot_start = __ptep_modify_prot_start, 428 .ptep_modify_prot_start = __ptep_modify_prot_start,
429 .ptep_modify_prot_commit = __ptep_modify_prot_commit, 429 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
430 430
431#ifdef CONFIG_HIGHPTE
432 .kmap_atomic_pte = kmap_atomic,
433#endif
434
435#if PAGETABLE_LEVELS >= 3 431#if PAGETABLE_LEVELS >= 3
436#ifdef CONFIG_X86_PAE 432#ifdef CONFIG_X86_PAE
437 .set_pte_atomic = native_set_pte_atomic, 433 .set_pte_atomic = native_set_pte_atomic,
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 2bbde6078143..fb99f7edb341 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1309,7 +1309,7 @@ static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
1309/* 1309/*
1310 * get_tce_space_from_tar(): 1310 * get_tce_space_from_tar():
1311 * Function for kdump case. Get the tce tables from first kernel 1311 * Function for kdump case. Get the tce tables from first kernel
1312 * by reading the contents of the base adress register of calgary iommu 1312 * by reading the contents of the base address register of calgary iommu
1313 */ 1313 */
1314static void __init get_tce_space_from_tar(void) 1314static void __init get_tce_space_from_tar(void)
1315{ 1315{
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 75e14e21f61a..4b7e3d8b01dd 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -2,6 +2,7 @@
2#include <linux/dma-debug.h> 2#include <linux/dma-debug.h>
3#include <linux/dmar.h> 3#include <linux/dmar.h>
4#include <linux/bootmem.h> 4#include <linux/bootmem.h>
5#include <linux/gfp.h>
5#include <linux/pci.h> 6#include <linux/pci.h>
6#include <linux/kmemleak.h> 7#include <linux/kmemleak.h>
7 8
@@ -38,7 +39,7 @@ int iommu_detected __read_mostly = 0;
38 * This variable becomes 1 if iommu=pt is passed on the kernel command line. 39 * This variable becomes 1 if iommu=pt is passed on the kernel command line.
39 * If this variable is 1, IOMMU implementations do no DMA translation for 40 * If this variable is 1, IOMMU implementations do no DMA translation for
40 * devices and allow every device to access to whole physical memory. This is 41 * devices and allow every device to access to whole physical memory. This is
41 * useful if a user want to use an IOMMU only for KVM device assignment to 42 * useful if a user wants to use an IOMMU only for KVM device assignment to
42 * guests and not for driver dma translation. 43 * guests and not for driver dma translation.
43 */ 44 */
44int iommu_pass_through __read_mostly; 45int iommu_pass_through __read_mostly;
@@ -65,7 +66,7 @@ int dma_set_mask(struct device *dev, u64 mask)
65} 66}
66EXPORT_SYMBOL(dma_set_mask); 67EXPORT_SYMBOL(dma_set_mask);
67 68
68#ifdef CONFIG_X86_64 69#if defined(CONFIG_X86_64) && !defined(CONFIG_NUMA)
69static __initdata void *dma32_bootmem_ptr; 70static __initdata void *dma32_bootmem_ptr;
70static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); 71static unsigned long dma32_bootmem_size __initdata = (128ULL<<20);
71 72
@@ -116,14 +117,21 @@ static void __init dma32_free_bootmem(void)
116 dma32_bootmem_ptr = NULL; 117 dma32_bootmem_ptr = NULL;
117 dma32_bootmem_size = 0; 118 dma32_bootmem_size = 0;
118} 119}
120#else
121void __init dma32_reserve_bootmem(void)
122{
123}
124static void __init dma32_free_bootmem(void)
125{
126}
127
119#endif 128#endif
120 129
121void __init pci_iommu_alloc(void) 130void __init pci_iommu_alloc(void)
122{ 131{
123#ifdef CONFIG_X86_64
124 /* free the range so iommu could get some range less than 4G */ 132 /* free the range so iommu could get some range less than 4G */
125 dma32_free_bootmem(); 133 dma32_free_bootmem();
126#endif 134
127 if (pci_swiotlb_detect()) 135 if (pci_swiotlb_detect())
128 goto out; 136 goto out;
129 137
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 34de53b46f87..0f7f130caa67 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -29,6 +29,7 @@
29#include <linux/iommu-helper.h> 29#include <linux/iommu-helper.h>
30#include <linux/sysdev.h> 30#include <linux/sysdev.h>
31#include <linux/io.h> 31#include <linux/io.h>
32#include <linux/gfp.h>
32#include <asm/atomic.h> 33#include <asm/atomic.h>
33#include <asm/mtrr.h> 34#include <asm/mtrr.h>
34#include <asm/pgtable.h> 35#include <asm/pgtable.h>
@@ -564,6 +565,9 @@ static void enable_gart_translations(void)
564 565
565 enable_gart_translation(dev, __pa(agp_gatt_table)); 566 enable_gart_translation(dev, __pa(agp_gatt_table));
566 } 567 }
568
569 /* Flush the GART-TLB to remove stale entries */
570 k8_flush_garts();
567} 571}
568 572
569/* 573/*
@@ -735,7 +739,7 @@ int __init gart_iommu_init(void)
735 unsigned long scratch; 739 unsigned long scratch;
736 long i; 740 long i;
737 741
738 if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) 742 if (num_k8_northbridges == 0)
739 return 0; 743 return 0;
740 744
741#ifndef CONFIG_AGP_AMD64 745#ifndef CONFIG_AGP_AMD64
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 22be12b60a8f..3af4af810c07 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -4,6 +4,7 @@
4#include <linux/scatterlist.h> 4#include <linux/scatterlist.h>
5#include <linux/string.h> 5#include <linux/string.h>
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/gfp.h>
7#include <linux/pci.h> 8#include <linux/pci.h>
8#include <linux/mm.h> 9#include <linux/mm.h>
9 10
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 02d678065d7d..28ad9f4d8b94 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -526,21 +526,37 @@ static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
526} 526}
527 527
528/* 528/*
529 * Check for AMD CPUs, which have potentially C1E support 529 * Check for AMD CPUs, where APIC timer interrupt does not wake up CPU from C1e.
530 * For more information see
531 * - Erratum #400 for NPT family 0xf and family 0x10 CPUs
532 * - Erratum #365 for family 0x11 (not affected because C1e not in use)
530 */ 533 */
531static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) 534static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
532{ 535{
536 u64 val;
533 if (c->x86_vendor != X86_VENDOR_AMD) 537 if (c->x86_vendor != X86_VENDOR_AMD)
534 return 0; 538 goto no_c1e_idle;
535
536 if (c->x86 < 0x0F)
537 return 0;
538 539
539 /* Family 0x0f models < rev F do not have C1E */ 540 /* Family 0x0f models < rev F do not have C1E */
540 if (c->x86 == 0x0f && c->x86_model < 0x40) 541 if (c->x86 == 0x0F && c->x86_model >= 0x40)
541 return 0; 542 return 1;
542 543
543 return 1; 544 if (c->x86 == 0x10) {
545 /*
546 * check OSVW bit for CPUs that are not affected
547 * by erratum #400
548 */
549 rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val);
550 if (val >= 2) {
551 rdmsrl(MSR_AMD64_OSVW_STATUS, val);
552 if (!(val & BIT(1)))
553 goto no_c1e_idle;
554 }
555 return 1;
556 }
557
558no_c1e_idle:
559 return 0;
544} 560}
545 561
546static cpumask_var_t c1e_mask; 562static cpumask_var_t c1e_mask;
@@ -607,7 +623,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
607{ 623{
608#ifdef CONFIG_SMP 624#ifdef CONFIG_SMP
609 if (pm_idle == poll_idle && smp_num_siblings > 1) { 625 if (pm_idle == poll_idle && smp_num_siblings > 1) {
610 printk(KERN_WARNING "WARNING: polling idle and HT enabled," 626 printk_once(KERN_WARNING "WARNING: polling idle and HT enabled,"
611 " performance may degrade.\n"); 627 " performance may degrade.\n");
612 } 628 }
613#endif 629#endif
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index dc9690b4c4cc..17cb3295cbf7 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -276,12 +276,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
276 276
277 set_tsk_thread_flag(p, TIF_FORK); 277 set_tsk_thread_flag(p, TIF_FORK);
278 278
279 p->thread.fs = me->thread.fs;
280 p->thread.gs = me->thread.gs;
281 p->thread.io_bitmap_ptr = NULL; 279 p->thread.io_bitmap_ptr = NULL;
282 280
283 savesegment(gs, p->thread.gsindex); 281 savesegment(gs, p->thread.gsindex);
282 p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs;
284 savesegment(fs, p->thread.fsindex); 283 savesegment(fs, p->thread.fsindex);
284 p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
285 savesegment(es, p->thread.es); 285 savesegment(es, p->thread.es);
286 savesegment(ds, p->thread.ds); 286 savesegment(ds, p->thread.ds);
287 287
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 2d96aab82a48..2e9b55027b7e 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -12,6 +12,7 @@
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/smp.h> 13#include <linux/smp.h>
14#include <linux/errno.h> 14#include <linux/errno.h>
15#include <linux/slab.h>
15#include <linux/ptrace.h> 16#include <linux/ptrace.h>
16#include <linux/regset.h> 17#include <linux/regset.h>
17#include <linux/tracehook.h> 18#include <linux/tracehook.h>
@@ -581,7 +582,7 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
581 struct perf_event_attr attr; 582 struct perf_event_attr attr;
582 583
583 /* 584 /*
584 * We shoud have at least an inactive breakpoint at this 585 * We should have at least an inactive breakpoint at this
585 * slot. It means the user is writing dr7 without having 586 * slot. It means the user is writing dr7 without having
586 * written the address register first 587 * written the address register first
587 */ 588 */
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 704bddcdf64d..8e1aac86b50c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -461,6 +461,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
461 DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"), 461 DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"),
462 }, 462 },
463 }, 463 },
464 { /* Handle problems with rebooting on the iMac9,1. */
465 .callback = set_pci_reboot,
466 .ident = "Apple iMac9,1",
467 .matches = {
468 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
469 DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"),
470 },
471 },
464 { } 472 { }
465}; 473};
466 474
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index cb42109a55b4..c4851eff57b3 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -55,7 +55,6 @@
55#include <linux/stddef.h> 55#include <linux/stddef.h>
56#include <linux/unistd.h> 56#include <linux/unistd.h>
57#include <linux/ptrace.h> 57#include <linux/ptrace.h>
58#include <linux/slab.h>
59#include <linux/user.h> 58#include <linux/user.h>
60#include <linux/delay.h> 59#include <linux/delay.h>
61 60
@@ -314,16 +313,17 @@ static void __init reserve_brk(void)
314#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) 313#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
315static void __init relocate_initrd(void) 314static void __init relocate_initrd(void)
316{ 315{
317 316 /* Assume only end is not page aligned */
318 u64 ramdisk_image = boot_params.hdr.ramdisk_image; 317 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
319 u64 ramdisk_size = boot_params.hdr.ramdisk_size; 318 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
319 u64 area_size = PAGE_ALIGN(ramdisk_size);
320 u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; 320 u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
321 u64 ramdisk_here; 321 u64 ramdisk_here;
322 unsigned long slop, clen, mapaddr; 322 unsigned long slop, clen, mapaddr;
323 char *p, *q; 323 char *p, *q;
324 324
325 /* We need to move the initrd down into lowmem */ 325 /* We need to move the initrd down into lowmem */
326 ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size, 326 ramdisk_here = find_e820_area(0, end_of_lowmem, area_size,
327 PAGE_SIZE); 327 PAGE_SIZE);
328 328
329 if (ramdisk_here == -1ULL) 329 if (ramdisk_here == -1ULL)
@@ -332,7 +332,7 @@ static void __init relocate_initrd(void)
332 332
333 /* Note: this includes all the lowmem currently occupied by 333 /* Note: this includes all the lowmem currently occupied by
334 the initrd, we rely on that fact to keep the data intact. */ 334 the initrd, we rely on that fact to keep the data intact. */
335 reserve_early(ramdisk_here, ramdisk_here + ramdisk_size, 335 reserve_early(ramdisk_here, ramdisk_here + area_size,
336 "NEW RAMDISK"); 336 "NEW RAMDISK");
337 initrd_start = ramdisk_here + PAGE_OFFSET; 337 initrd_start = ramdisk_here + PAGE_OFFSET;
338 initrd_end = initrd_start + ramdisk_size; 338 initrd_end = initrd_start + ramdisk_size;
@@ -376,9 +376,10 @@ static void __init relocate_initrd(void)
376 376
377static void __init reserve_initrd(void) 377static void __init reserve_initrd(void)
378{ 378{
379 /* Assume only end is not page aligned */
379 u64 ramdisk_image = boot_params.hdr.ramdisk_image; 380 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
380 u64 ramdisk_size = boot_params.hdr.ramdisk_size; 381 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
381 u64 ramdisk_end = ramdisk_image + ramdisk_size; 382 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
382 u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; 383 u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
383 384
384 if (!boot_params.hdr.type_of_loader || 385 if (!boot_params.hdr.type_of_loader ||
@@ -606,6 +607,16 @@ static int __init setup_elfcorehdr(char *arg)
606early_param("elfcorehdr", setup_elfcorehdr); 607early_param("elfcorehdr", setup_elfcorehdr);
607#endif 608#endif
608 609
610static __init void reserve_ibft_region(void)
611{
612 unsigned long addr, size = 0;
613
614 addr = find_ibft_region(&size);
615
616 if (size)
617 reserve_early_overlap_ok(addr, addr + size, "ibft");
618}
619
609#ifdef CONFIG_X86_RESERVE_LOW_64K 620#ifdef CONFIG_X86_RESERVE_LOW_64K
610static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) 621static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
611{ 622{
@@ -908,6 +919,8 @@ void __init setup_arch(char **cmdline_p)
908 */ 919 */
909 find_smp_config(); 920 find_smp_config();
910 921
922 reserve_ibft_region();
923
911 reserve_trampoline_memory(); 924 reserve_trampoline_memory();
912 925
913#ifdef CONFIG_ACPI_SLEEP 926#ifdef CONFIG_ACPI_SLEEP
@@ -969,17 +982,11 @@ void __init setup_arch(char **cmdline_p)
969#endif 982#endif
970 983
971 initmem_init(0, max_pfn, acpi, k8); 984 initmem_init(0, max_pfn, acpi, k8);
972 985#ifndef CONFIG_NO_BOOTMEM
973#ifdef CONFIG_X86_64 986 early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
974 /*
975 * dma32_reserve_bootmem() allocates bootmem which may conflict
976 * with the crashkernel command line, so do that after
977 * reserve_crashkernel()
978 */
979 dma32_reserve_bootmem();
980#endif 987#endif
981 988
982 reserve_ibft_region(); 989 dma32_reserve_bootmem();
983 990
984#ifdef CONFIG_KVM_CLOCK 991#ifdef CONFIG_KVM_CLOCK
985 kvmclock_init(); 992 kvmclock_init();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 35abcb8b00e9..ef6370b00e70 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -137,7 +137,13 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
137 137
138static void __init pcpu_fc_free(void *ptr, size_t size) 138static void __init pcpu_fc_free(void *ptr, size_t size)
139{ 139{
140#ifdef CONFIG_NO_BOOTMEM
141 u64 start = __pa(ptr);
142 u64 end = start + size;
143 free_early_partial(start, end);
144#else
140 free_bootmem(__pa(ptr), size); 145 free_bootmem(__pa(ptr), size);
146#endif
141} 147}
142 148
143static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) 149static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index ec1de97600e7..d801210945d6 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -21,6 +21,7 @@
21#include <linux/cache.h> 21#include <linux/cache.h>
22#include <linux/interrupt.h> 22#include <linux/interrupt.h>
23#include <linux/cpu.h> 23#include <linux/cpu.h>
24#include <linux/gfp.h>
24 25
25#include <asm/mtrr.h> 26#include <asm/mtrr.h>
26#include <asm/tlbflush.h> 27#include <asm/tlbflush.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9b4401115ea1..763d815e27a0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -48,6 +48,8 @@
48#include <linux/err.h> 48#include <linux/err.h>
49#include <linux/nmi.h> 49#include <linux/nmi.h>
50#include <linux/tboot.h> 50#include <linux/tboot.h>
51#include <linux/stackprotector.h>
52#include <linux/gfp.h>
51 53
52#include <asm/acpi.h> 54#include <asm/acpi.h>
53#include <asm/desc.h> 55#include <asm/desc.h>
@@ -67,6 +69,7 @@
67#include <linux/mc146818rtc.h> 69#include <linux/mc146818rtc.h>
68 70
69#include <asm/smpboot_hooks.h> 71#include <asm/smpboot_hooks.h>
72#include <asm/i8259.h>
70 73
71#ifdef CONFIG_X86_32 74#ifdef CONFIG_X86_32
72u8 apicid_2_node[MAX_APICID]; 75u8 apicid_2_node[MAX_APICID];
@@ -240,7 +243,10 @@ static void __cpuinit smp_callin(void)
240 end_local_APIC_setup(); 243 end_local_APIC_setup();
241 map_cpu_to_logical_apicid(); 244 map_cpu_to_logical_apicid();
242 245
243 notify_cpu_starting(cpuid); 246 /*
247 * Need to setup vector mappings before we enable interrupts.
248 */
249 setup_vector_irq(smp_processor_id());
244 /* 250 /*
245 * Get our bogomips. 251 * Get our bogomips.
246 * 252 *
@@ -257,6 +263,8 @@ static void __cpuinit smp_callin(void)
257 */ 263 */
258 smp_store_cpu_info(cpuid); 264 smp_store_cpu_info(cpuid);
259 265
266 notify_cpu_starting(cpuid);
267
260 /* 268 /*
261 * Allow the master to continue. 269 * Allow the master to continue.
262 */ 270 */
@@ -286,9 +294,9 @@ notrace static void __cpuinit start_secondary(void *unused)
286 check_tsc_sync_target(); 294 check_tsc_sync_target();
287 295
288 if (nmi_watchdog == NMI_IO_APIC) { 296 if (nmi_watchdog == NMI_IO_APIC) {
289 disable_8259A_irq(0); 297 legacy_pic->chip->mask(0);
290 enable_NMI_through_LVT0(); 298 enable_NMI_through_LVT0();
291 enable_8259A_irq(0); 299 legacy_pic->chip->unmask(0);
292 } 300 }
293 301
294#ifdef CONFIG_X86_32 302#ifdef CONFIG_X86_32
@@ -315,7 +323,6 @@ notrace static void __cpuinit start_secondary(void *unused)
315 */ 323 */
316 ipi_call_lock(); 324 ipi_call_lock();
317 lock_vector_lock(); 325 lock_vector_lock();
318 __setup_vector_irq(smp_processor_id());
319 set_cpu_online(smp_processor_id(), true); 326 set_cpu_online(smp_processor_id(), true);
320 unlock_vector_lock(); 327 unlock_vector_lock();
321 ipi_call_unlock(); 328 ipi_call_unlock();
@@ -325,6 +332,9 @@ notrace static void __cpuinit start_secondary(void *unused)
325 /* enable local interrupts */ 332 /* enable local interrupts */
326 local_irq_enable(); 333 local_irq_enable();
327 334
335 /* to prevent fake stack check failure in clock setup */
336 boot_init_stack_canary();
337
328 x86_cpuinit.setup_percpu_clockev(); 338 x86_cpuinit.setup_percpu_clockev();
329 339
330 wmb(); 340 wmb();
@@ -1212,11 +1222,12 @@ __init void prefill_possible_map(void)
1212 1222
1213 total_cpus = max_t(int, possible, num_processors + disabled_cpus); 1223 total_cpus = max_t(int, possible, num_processors + disabled_cpus);
1214 1224
1215 if (possible > CONFIG_NR_CPUS) { 1225 /* nr_cpu_ids could be reduced via nr_cpus= */
1226 if (possible > nr_cpu_ids) {
1216 printk(KERN_WARNING 1227 printk(KERN_WARNING
1217 "%d Processors exceeds NR_CPUS limit of %d\n", 1228 "%d Processors exceeds NR_CPUS limit of %d\n",
1218 possible, CONFIG_NR_CPUS); 1229 possible, nr_cpu_ids);
1219 possible = CONFIG_NR_CPUS; 1230 possible = nr_cpu_ids;
1220 } 1231 }
1221 1232
1222 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", 1233 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index dee1ff7cba58..196552bb412c 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -25,191 +25,6 @@
25#include <asm/syscalls.h> 25#include <asm/syscalls.h>
26 26
27/* 27/*
28 * Perform the select(nd, in, out, ex, tv) and mmap() system
29 * calls. Linux/i386 didn't use to be able to handle more than
30 * 4 system call parameters, so these system calls used a memory
31 * block for parameter passing..
32 */
33
34struct mmap_arg_struct {
35 unsigned long addr;
36 unsigned long len;
37 unsigned long prot;
38 unsigned long flags;
39 unsigned long fd;
40 unsigned long offset;
41};
42
43asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
44{
45 struct mmap_arg_struct a;
46 int err = -EFAULT;
47
48 if (copy_from_user(&a, arg, sizeof(a)))
49 goto out;
50
51 err = -EINVAL;
52 if (a.offset & ~PAGE_MASK)
53 goto out;
54
55 err = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags,
56 a.fd, a.offset >> PAGE_SHIFT);
57out:
58 return err;
59}
60
61
62struct sel_arg_struct {
63 unsigned long n;
64 fd_set __user *inp, *outp, *exp;
65 struct timeval __user *tvp;
66};
67
68asmlinkage int old_select(struct sel_arg_struct __user *arg)
69{
70 struct sel_arg_struct a;
71
72 if (copy_from_user(&a, arg, sizeof(a)))
73 return -EFAULT;
74 /* sys_select() does the appropriate kernel locking */
75 return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
76}
77
78/*
79 * sys_ipc() is the de-multiplexer for the SysV IPC calls..
80 *
81 * This is really horribly ugly.
82 */
83asmlinkage int sys_ipc(uint call, int first, int second,
84 int third, void __user *ptr, long fifth)
85{
86 int version, ret;
87
88 version = call >> 16; /* hack for backward compatibility */
89 call &= 0xffff;
90
91 switch (call) {
92 case SEMOP:
93 return sys_semtimedop(first, (struct sembuf __user *)ptr, second, NULL);
94 case SEMTIMEDOP:
95 return sys_semtimedop(first, (struct sembuf __user *)ptr, second,
96 (const struct timespec __user *)fifth);
97
98 case SEMGET:
99 return sys_semget(first, second, third);
100 case SEMCTL: {
101 union semun fourth;
102 if (!ptr)
103 return -EINVAL;
104 if (get_user(fourth.__pad, (void __user * __user *) ptr))
105 return -EFAULT;
106 return sys_semctl(first, second, third, fourth);
107 }
108
109 case MSGSND:
110 return sys_msgsnd(first, (struct msgbuf __user *) ptr,
111 second, third);
112 case MSGRCV:
113 switch (version) {
114 case 0: {
115 struct ipc_kludge tmp;
116 if (!ptr)
117 return -EINVAL;
118
119 if (copy_from_user(&tmp,
120 (struct ipc_kludge __user *) ptr,
121 sizeof(tmp)))
122 return -EFAULT;
123 return sys_msgrcv(first, tmp.msgp, second,
124 tmp.msgtyp, third);
125 }
126 default:
127 return sys_msgrcv(first,
128 (struct msgbuf __user *) ptr,
129 second, fifth, third);
130 }
131 case MSGGET:
132 return sys_msgget((key_t) first, second);
133 case MSGCTL:
134 return sys_msgctl(first, second, (struct msqid_ds __user *) ptr);
135
136 case SHMAT:
137 switch (version) {
138 default: {
139 ulong raddr;
140 ret = do_shmat(first, (char __user *) ptr, second, &raddr);
141 if (ret)
142 return ret;
143 return put_user(raddr, (ulong __user *) third);
144 }
145 case 1: /* iBCS2 emulator entry point */
146 if (!segment_eq(get_fs(), get_ds()))
147 return -EINVAL;
148 /* The "(ulong *) third" is valid _only_ because of the kernel segment thing */
149 return do_shmat(first, (char __user *) ptr, second, (ulong *) third);
150 }
151 case SHMDT:
152 return sys_shmdt((char __user *)ptr);
153 case SHMGET:
154 return sys_shmget(first, second, third);
155 case SHMCTL:
156 return sys_shmctl(first, second,
157 (struct shmid_ds __user *) ptr);
158 default:
159 return -ENOSYS;
160 }
161}
162
163/*
164 * Old cruft
165 */
166asmlinkage int sys_uname(struct old_utsname __user *name)
167{
168 int err;
169 if (!name)
170 return -EFAULT;
171 down_read(&uts_sem);
172 err = copy_to_user(name, utsname(), sizeof(*name));
173 up_read(&uts_sem);
174 return err? -EFAULT:0;
175}
176
177asmlinkage int sys_olduname(struct oldold_utsname __user *name)
178{
179 int error;
180
181 if (!name)
182 return -EFAULT;
183 if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
184 return -EFAULT;
185
186 down_read(&uts_sem);
187
188 error = __copy_to_user(&name->sysname, &utsname()->sysname,
189 __OLD_UTS_LEN);
190 error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
191 error |= __copy_to_user(&name->nodename, &utsname()->nodename,
192 __OLD_UTS_LEN);
193 error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
194 error |= __copy_to_user(&name->release, &utsname()->release,
195 __OLD_UTS_LEN);
196 error |= __put_user(0, name->release + __OLD_UTS_LEN);
197 error |= __copy_to_user(&name->version, &utsname()->version,
198 __OLD_UTS_LEN);
199 error |= __put_user(0, name->version + __OLD_UTS_LEN);
200 error |= __copy_to_user(&name->machine, &utsname()->machine,
201 __OLD_UTS_LEN);
202 error |= __put_user(0, name->machine + __OLD_UTS_LEN);
203
204 up_read(&uts_sem);
205
206 error = error ? -EFAULT : 0;
207
208 return error;
209}
210
211
212/*
213 * Do a system call from kernel instead of calling sys_execve so we 28 * Do a system call from kernel instead of calling sys_execve so we
214 * end up with proper pt_regs. 29 * end up with proper pt_regs.
215 */ 30 */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 8aa2057efd12..ff14a5044ce6 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -209,15 +209,3 @@ bottomup:
209 209
210 return addr; 210 return addr;
211} 211}
212
213
214SYSCALL_DEFINE1(uname, struct new_utsname __user *, name)
215{
216 int err;
217 down_read(&uts_sem);
218 err = copy_to_user(name, utsname(), sizeof(*name));
219 up_read(&uts_sem);
220 if (personality(current->personality) == PER_LINUX32)
221 err |= copy_to_user(&name->machine, "i686", 5);
222 return err ? -EFAULT : 0;
223}
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 15228b5d3eb7..8b3729341216 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -81,7 +81,7 @@ ENTRY(sys_call_table)
81 .long sys_settimeofday 81 .long sys_settimeofday
82 .long sys_getgroups16 /* 80 */ 82 .long sys_getgroups16 /* 80 */
83 .long sys_setgroups16 83 .long sys_setgroups16
84 .long old_select 84 .long sys_old_select
85 .long sys_symlink 85 .long sys_symlink
86 .long sys_lstat 86 .long sys_lstat
87 .long sys_readlink /* 85 */ 87 .long sys_readlink /* 85 */
@@ -89,7 +89,7 @@ ENTRY(sys_call_table)
89 .long sys_swapon 89 .long sys_swapon
90 .long sys_reboot 90 .long sys_reboot
91 .long sys_old_readdir 91 .long sys_old_readdir
92 .long old_mmap /* 90 */ 92 .long sys_old_mmap /* 90 */
93 .long sys_munmap 93 .long sys_munmap
94 .long sys_truncate 94 .long sys_truncate
95 .long sys_ftruncate 95 .long sys_ftruncate
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index be2573448ed9..fb5cc5e14cfa 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -70,11 +70,11 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
70 * manually to deassert NMI lines for the watchdog if run 70 * manually to deassert NMI lines for the watchdog if run
71 * on an 82489DX-based system. 71 * on an 82489DX-based system.
72 */ 72 */
73 spin_lock(&i8259A_lock); 73 raw_spin_lock(&i8259A_lock);
74 outb(0x0c, PIC_MASTER_OCW3); 74 outb(0x0c, PIC_MASTER_OCW3);
75 /* Ack the IRQ; AEOI will end it automatically. */ 75 /* Ack the IRQ; AEOI will end it automatically. */
76 inb(PIC_MASTER_POLL); 76 inb(PIC_MASTER_POLL);
77 spin_unlock(&i8259A_lock); 77 raw_spin_unlock(&i8259A_lock);
78 } 78 }
79 79
80 global_clock_event->event_handler(global_clock_event); 80 global_clock_event->event_handler(global_clock_event);
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 364d015efebc..17b03dd3a6b5 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -9,6 +9,7 @@
9#include <linux/seq_file.h> 9#include <linux/seq_file.h>
10#include <linux/proc_fs.h> 10#include <linux/proc_fs.h>
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/slab.h>
12 13
13#include <asm/mmu_context.h> 14#include <asm/mmu_context.h>
14#include <asm/uv/uv.h> 15#include <asm/uv/uv.h>
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 23066ecf12fa..9faf91ae1841 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -50,7 +50,7 @@ u64 native_sched_clock(void)
50 * unstable. We do this because unlike Time Of Day, 50 * unstable. We do this because unlike Time Of Day,
51 * the scheduler clock tolerates small errors and it's 51 * the scheduler clock tolerates small errors and it's
52 * very important for it to be as fast as the platform 52 * very important for it to be as fast as the platform
53 * can achive it. ) 53 * can achieve it. )
54 */ 54 */
55 if (unlikely(tsc_disabled)) { 55 if (unlikely(tsc_disabled)) {
56 /* No locking but a rare wrong value is not a big deal: */ 56 /* No locking but a rare wrong value is not a big deal: */
@@ -740,7 +740,7 @@ static cycle_t __vsyscall_fn vread_tsc(void)
740} 740}
741#endif 741#endif
742 742
743static void resume_tsc(void) 743static void resume_tsc(struct clocksource *cs)
744{ 744{
745 clocksource_tsc.cycle_last = 0; 745 clocksource_tsc.cycle_last = 0;
746} 746}
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index ece73d8e3240..1d40336b030a 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -10,6 +10,7 @@
10 10
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/rbtree.h> 12#include <linux/rbtree.h>
13#include <linux/slab.h>
13#include <linux/irq.h> 14#include <linux/irq.h>
14 15
15#include <asm/apic.h> 16#include <asm/apic.h>
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c
index 2b75ef638dbc..56e421bc379b 100644
--- a/arch/x86/kernel/uv_time.c
+++ b/arch/x86/kernel/uv_time.c
@@ -19,6 +19,7 @@
19 * Copyright (c) Dimitri Sivanich 19 * Copyright (c) Dimitri Sivanich
20 */ 20 */
21#include <linux/clockchips.h> 21#include <linux/clockchips.h>
22#include <linux/slab.h>
22 23
23#include <asm/uv/uv_mmrs.h> 24#include <asm/uv/uv_mmrs.h>
24#include <asm/uv/uv_hub.h> 25#include <asm/uv/uv_hub.h>
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 34a279a7471d..e680ea52db9b 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -49,11 +49,6 @@ extern int no_broadcast;
49char visws_board_type = -1; 49char visws_board_type = -1;
50char visws_board_rev = -1; 50char visws_board_rev = -1;
51 51
52int is_visws_box(void)
53{
54 return visws_board_type >= 0;
55}
56
57static void __init visws_time_init(void) 52static void __init visws_time_init(void)
58{ 53{
59 printk(KERN_INFO "Starting Cobalt Timer system clock\n"); 54 printk(KERN_INFO "Starting Cobalt Timer system clock\n");
@@ -242,6 +237,8 @@ void __init visws_early_detect(void)
242 x86_init.irqs.pre_vector_init = visws_pre_intr_init; 237 x86_init.irqs.pre_vector_init = visws_pre_intr_init;
243 x86_init.irqs.trap_init = visws_trap_init; 238 x86_init.irqs.trap_init = visws_trap_init;
244 x86_init.timers.timer_init = visws_time_init; 239 x86_init.timers.timer_init = visws_time_init;
240 x86_init.pci.init = pci_visws_init;
241 x86_init.pci.init_irq = x86_init_noop;
245 242
246 /* 243 /*
247 * Install reboot quirks: 244 * Install reboot quirks:
@@ -508,7 +505,7 @@ static struct irq_chip cobalt_irq_type = {
508 */ 505 */
509static unsigned int startup_piix4_master_irq(unsigned int irq) 506static unsigned int startup_piix4_master_irq(unsigned int irq)
510{ 507{
511 init_8259A(0); 508 legacy_pic->init(0);
512 509
513 return startup_cobalt_irq(irq); 510 return startup_cobalt_irq(irq);
514} 511}
@@ -532,9 +529,6 @@ static struct irq_chip piix4_master_irq_type = {
532 529
533static struct irq_chip piix4_virtual_irq_type = { 530static struct irq_chip piix4_virtual_irq_type = {
534 .name = "PIIX4-virtual", 531 .name = "PIIX4-virtual",
535 .shutdown = disable_8259A_irq,
536 .enable = enable_8259A_irq,
537 .disable = disable_8259A_irq,
538}; 532};
539 533
540 534
@@ -559,7 +553,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
559 struct irq_desc *desc; 553 struct irq_desc *desc;
560 unsigned long flags; 554 unsigned long flags;
561 555
562 spin_lock_irqsave(&i8259A_lock, flags); 556 raw_spin_lock_irqsave(&i8259A_lock, flags);
563 557
564 /* Find out what's interrupting in the PIIX4 master 8259 */ 558 /* Find out what's interrupting in the PIIX4 master 8259 */
565 outb(0x0c, 0x20); /* OCW3 Poll command */ 559 outb(0x0c, 0x20); /* OCW3 Poll command */
@@ -596,7 +590,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
596 outb(0x60 + realirq, 0x20); 590 outb(0x60 + realirq, 0x20);
597 } 591 }
598 592
599 spin_unlock_irqrestore(&i8259A_lock, flags); 593 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
600 594
601 desc = irq_to_desc(realirq); 595 desc = irq_to_desc(realirq);
602 596
@@ -609,12 +603,12 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
609 handle_IRQ_event(realirq, desc->action); 603 handle_IRQ_event(realirq, desc->action);
610 604
611 if (!(desc->status & IRQ_DISABLED)) 605 if (!(desc->status & IRQ_DISABLED))
612 enable_8259A_irq(realirq); 606 legacy_pic->chip->unmask(realirq);
613 607
614 return IRQ_HANDLED; 608 return IRQ_HANDLED;
615 609
616out_unlock: 610out_unlock:
617 spin_unlock_irqrestore(&i8259A_lock, flags); 611 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
618 return IRQ_NONE; 612 return IRQ_NONE;
619} 613}
620 614
@@ -628,6 +622,12 @@ static struct irqaction cascade_action = {
628 .name = "cascade", 622 .name = "cascade",
629}; 623};
630 624
625static inline void set_piix4_virtual_irq_type(void)
626{
627 piix4_virtual_irq_type.shutdown = i8259A_chip.mask;
628 piix4_virtual_irq_type.enable = i8259A_chip.unmask;
629 piix4_virtual_irq_type.disable = i8259A_chip.mask;
630}
631 631
632void init_VISWS_APIC_irqs(void) 632void init_VISWS_APIC_irqs(void)
633{ 633{
@@ -653,6 +653,7 @@ void init_VISWS_APIC_irqs(void)
653 desc->chip = &piix4_master_irq_type; 653 desc->chip = &piix4_master_irq_type;
654 } 654 }
655 else if (i < CO_IRQ_APIC0) { 655 else if (i < CO_IRQ_APIC0) {
656 set_piix4_virtual_irq_type();
656 desc->chip = &piix4_virtual_irq_type; 657 desc->chip = &piix4_virtual_irq_type;
657 } 658 }
658 else if (IS_CO_APIC(i)) { 659 else if (IS_CO_APIC(i)) {
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index d430e4c30193..ce9fbacb7526 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -28,11 +28,13 @@
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/sched.h> 30#include <linux/sched.h>
31#include <linux/gfp.h>
31#include <asm/vmi.h> 32#include <asm/vmi.h>
32#include <asm/io.h> 33#include <asm/io.h>
33#include <asm/fixmap.h> 34#include <asm/fixmap.h>
34#include <asm/apicdef.h> 35#include <asm/apicdef.h>
35#include <asm/apic.h> 36#include <asm/apic.h>
37#include <asm/pgalloc.h>
36#include <asm/processor.h> 38#include <asm/processor.h>
37#include <asm/timer.h> 39#include <asm/timer.h>
38#include <asm/vmi_time.h> 40#include <asm/vmi_time.h>
@@ -266,30 +268,6 @@ static void vmi_nop(void)
266{ 268{
267} 269}
268 270
269#ifdef CONFIG_HIGHPTE
270static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
271{
272 void *va = kmap_atomic(page, type);
273
274 /*
275 * Internally, the VMI ROM must map virtual addresses to physical
276 * addresses for processing MMU updates. By the time MMU updates
277 * are issued, this information is typically already lost.
278 * Fortunately, the VMI provides a cache of mapping slots for active
279 * page tables.
280 *
281 * We use slot zero for the linear mapping of physical memory, and
282 * in HIGHPTE kernels, slot 1 and 2 for KM_PTE0 and KM_PTE1.
283 *
284 * args: SLOT VA COUNT PFN
285 */
286 BUG_ON(type != KM_PTE0 && type != KM_PTE1);
287 vmi_ops.set_linear_mapping((type - KM_PTE0)+1, va, 1, page_to_pfn(page));
288
289 return va;
290}
291#endif
292
293static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) 271static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn)
294{ 272{
295 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); 273 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
@@ -640,6 +618,12 @@ static inline int __init activate_vmi(void)
640 u64 reloc; 618 u64 reloc;
641 const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; 619 const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc;
642 620
621 /*
622 * Prevent page tables from being allocated in highmem, even if
623 * CONFIG_HIGHPTE is enabled.
624 */
625 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
626
643 if (call_vrom_func(vmi_rom, vmi_init) != 0) { 627 if (call_vrom_func(vmi_rom, vmi_init) != 0) {
644 printk(KERN_ERR "VMI ROM failed to initialize!"); 628 printk(KERN_ERR "VMI ROM failed to initialize!");
645 return 0; 629 return 0;
@@ -778,10 +762,6 @@ static inline int __init activate_vmi(void)
778 762
779 /* Set linear is needed in all cases */ 763 /* Set linear is needed in all cases */
780 vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); 764 vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
781#ifdef CONFIG_HIGHPTE
782 if (vmi_ops.set_linear_mapping)
783 pv_mmu_ops.kmap_atomic_pte = vmi_kmap_atomic_pte;
784#endif
785 765
786 /* 766 /*
787 * These MUST always be patched. Don't support indirect jumps 767 * These MUST always be patched. Don't support indirect jumps
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 74c92bb194df..5e1ff66ecd73 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -79,11 +79,7 @@ unsigned long vmi_tsc_khz(void)
79 79
80static inline unsigned int vmi_get_timer_vector(void) 80static inline unsigned int vmi_get_timer_vector(void)
81{ 81{
82#ifdef CONFIG_X86_IO_APIC 82 return IRQ0_VECTOR;
83 return FIRST_DEVICE_VECTOR;
84#else
85 return FIRST_EXTERNAL_VECTOR;
86#endif
87} 83}
88 84
89/** vmi clockchip */ 85/** vmi clockchip */
@@ -171,7 +167,7 @@ static int vmi_timer_next_event(unsigned long delta,
171{ 167{
172 /* Unfortunately, set_next_event interface only passes relative 168 /* Unfortunately, set_next_event interface only passes relative
173 * expiry, but we want absolute expiry. It'd be better if were 169 * expiry, but we want absolute expiry. It'd be better if were
174 * were passed an aboslute expiry, since a bunch of time may 170 * were passed an absolute expiry, since a bunch of time may
175 * have been stolen between the time the delta is computed and 171 * have been stolen between the time the delta is computed and
176 * when we set the alarm below. */ 172 * when we set the alarm below. */
177 cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT)); 173 cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT));
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index f92a0da608cb..2cc249718c46 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -291,8 +291,8 @@ SECTIONS
291 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { 291 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
292 __smp_locks = .; 292 __smp_locks = .;
293 *(.smp_locks) 293 *(.smp_locks)
294 __smp_locks_end = .;
295 . = ALIGN(PAGE_SIZE); 294 . = ALIGN(PAGE_SIZE);
295 __smp_locks_end = .;
296 } 296 }
297 297
298#ifdef CONFIG_X86_64 298#ifdef CONFIG_X86_64
@@ -341,7 +341,7 @@ SECTIONS
341 * Per-cpu symbols which need to be offset from __per_cpu_load 341 * Per-cpu symbols which need to be offset from __per_cpu_load
342 * for the boot processor. 342 * for the boot processor.
343 */ 343 */
344#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load 344#define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load
345INIT_PER_CPU(gdt_page); 345INIT_PER_CPU(gdt_page);
346INIT_PER_CPU(irq_stack_union); 346INIT_PER_CPU(irq_stack_union);
347 347
@@ -352,7 +352,7 @@ INIT_PER_CPU(irq_stack_union);
352 "kernel image bigger than KERNEL_IMAGE_SIZE"); 352 "kernel image bigger than KERNEL_IMAGE_SIZE");
353 353
354#ifdef CONFIG_SMP 354#ifdef CONFIG_SMP
355. = ASSERT((per_cpu__irq_stack_union == 0), 355. = ASSERT((irq_stack_union == 0),
356 "irq_stack_union is not at start of per-cpu area"); 356 "irq_stack_union is not at start of per-cpu area");
357#endif 357#endif
358 358
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ee5746c94628..61a1e8c7e19f 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -4,9 +4,11 @@
4 * For licencing details see kernel-base/COPYING 4 * For licencing details see kernel-base/COPYING
5 */ 5 */
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/ioport.h>
7 8
8#include <asm/bios_ebda.h> 9#include <asm/bios_ebda.h>
9#include <asm/paravirt.h> 10#include <asm/paravirt.h>
11#include <asm/pci_x86.h>
10#include <asm/mpspec.h> 12#include <asm/mpspec.h>
11#include <asm/setup.h> 13#include <asm/setup.h>
12#include <asm/apic.h> 14#include <asm/apic.h>
@@ -70,6 +72,12 @@ struct x86_init_ops x86_init __initdata = {
70 .iommu = { 72 .iommu = {
71 .iommu_init = iommu_init_noop, 73 .iommu_init = iommu_init_noop,
72 }, 74 },
75
76 .pci = {
77 .init = x86_default_pci_init,
78 .init_irq = x86_default_pci_init_irq,
79 .fixup_irqs = x86_default_pci_fixup_irqs,
80 },
73}; 81};
74 82
75struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { 83struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 06871111bf54..970bbd479516 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -66,6 +66,7 @@ config KVM_AMD
66 66
67# OK, it's a little counter-intuitive to do this, but it puts it neatly under 67# OK, it's a little counter-intuitive to do this, but it puts it neatly under
68# the virtualization menu. 68# the virtualization menu.
69source drivers/vhost/Kconfig
69source drivers/lguest/Kconfig 70source drivers/lguest/Kconfig
70source drivers/virtio/Kconfig 71source drivers/virtio/Kconfig
71 72
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 294698b6daff..0150affad25d 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -32,6 +32,7 @@
32#define pr_fmt(fmt) "pit: " fmt 32#define pr_fmt(fmt) "pit: " fmt
33 33
34#include <linux/kvm_host.h> 34#include <linux/kvm_host.h>
35#include <linux/slab.h>
35 36
36#include "irq.h" 37#include "irq.h"
37#include "i8254.h" 38#include "i8254.h"
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 07771da85de5..a790fa128a9f 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -26,6 +26,7 @@
26 * Port from Qemu. 26 * Port from Qemu.
27 */ 27 */
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/slab.h>
29#include <linux/bitops.h> 30#include <linux/bitops.h>
30#include "irq.h" 31#include "irq.h"
31 32
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4b224f90087b..1eb7a4ae0c9c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -26,6 +26,7 @@
26#include <linux/io.h> 26#include <linux/io.h>
27#include <linux/module.h> 27#include <linux/module.h>
28#include <linux/math64.h> 28#include <linux/math64.h>
29#include <linux/slab.h>
29#include <asm/processor.h> 30#include <asm/processor.h>
30#include <asm/msr.h> 31#include <asm/msr.h>
31#include <asm/page.h> 32#include <asm/page.h>
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 741373e8ca77..19a8906bcaa2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -31,6 +31,7 @@
31#include <linux/hugetlb.h> 31#include <linux/hugetlb.h>
32#include <linux/compiler.h> 32#include <linux/compiler.h>
33#include <linux/srcu.h> 33#include <linux/srcu.h>
34#include <linux/slab.h>
34 35
35#include <asm/page.h> 36#include <asm/page.h>
36#include <asm/cmpxchg.h> 37#include <asm/cmpxchg.h>
@@ -1489,8 +1490,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
1489 for_each_sp(pages, sp, parents, i) { 1490 for_each_sp(pages, sp, parents, i) {
1490 kvm_mmu_zap_page(kvm, sp); 1491 kvm_mmu_zap_page(kvm, sp);
1491 mmu_pages_clear_parents(&parents); 1492 mmu_pages_clear_parents(&parents);
1493 zapped++;
1492 } 1494 }
1493 zapped += pages.nr;
1494 kvm_mmu_pages_init(parent, &parents, &pages); 1495 kvm_mmu_pages_init(parent, &parents, &pages);
1495 } 1496 }
1496 1497
@@ -1541,14 +1542,16 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
1541 */ 1542 */
1542 1543
1543 if (used_pages > kvm_nr_mmu_pages) { 1544 if (used_pages > kvm_nr_mmu_pages) {
1544 while (used_pages > kvm_nr_mmu_pages) { 1545 while (used_pages > kvm_nr_mmu_pages &&
1546 !list_empty(&kvm->arch.active_mmu_pages)) {
1545 struct kvm_mmu_page *page; 1547 struct kvm_mmu_page *page;
1546 1548
1547 page = container_of(kvm->arch.active_mmu_pages.prev, 1549 page = container_of(kvm->arch.active_mmu_pages.prev,
1548 struct kvm_mmu_page, link); 1550 struct kvm_mmu_page, link);
1549 kvm_mmu_zap_page(kvm, page); 1551 used_pages -= kvm_mmu_zap_page(kvm, page);
1550 used_pages--; 1552 used_pages--;
1551 } 1553 }
1554 kvm_nr_mmu_pages = used_pages;
1552 kvm->arch.n_free_mmu_pages = 0; 1555 kvm->arch.n_free_mmu_pages = 0;
1553 } 1556 }
1554 else 1557 else
@@ -1595,7 +1598,8 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
1595 && !sp->role.invalid) { 1598 && !sp->role.invalid) {
1596 pgprintk("%s: zap %lx %x\n", 1599 pgprintk("%s: zap %lx %x\n",
1597 __func__, gfn, sp->role.word); 1600 __func__, gfn, sp->role.word);
1598 kvm_mmu_zap_page(kvm, sp); 1601 if (kvm_mmu_zap_page(kvm, sp))
1602 nn = bucket->first;
1599 } 1603 }
1600 } 1604 }
1601} 1605}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 52f78dd03010..2ba58206812a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -26,6 +26,7 @@
26#include <linux/highmem.h> 26#include <linux/highmem.h>
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/ftrace_event.h> 28#include <linux/ftrace_event.h>
29#include <linux/slab.h>
29 30
30#include <asm/desc.h> 31#include <asm/desc.h>
31 32
@@ -705,29 +706,28 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
705 if (err) 706 if (err)
706 goto free_svm; 707 goto free_svm;
707 708
709 err = -ENOMEM;
708 page = alloc_page(GFP_KERNEL); 710 page = alloc_page(GFP_KERNEL);
709 if (!page) { 711 if (!page)
710 err = -ENOMEM;
711 goto uninit; 712 goto uninit;
712 }
713 713
714 err = -ENOMEM;
715 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 714 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
716 if (!msrpm_pages) 715 if (!msrpm_pages)
717 goto uninit; 716 goto free_page1;
718 717
719 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 718 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
720 if (!nested_msrpm_pages) 719 if (!nested_msrpm_pages)
721 goto uninit; 720 goto free_page2;
722
723 svm->msrpm = page_address(msrpm_pages);
724 svm_vcpu_init_msrpm(svm->msrpm);
725 721
726 hsave_page = alloc_page(GFP_KERNEL); 722 hsave_page = alloc_page(GFP_KERNEL);
727 if (!hsave_page) 723 if (!hsave_page)
728 goto uninit; 724 goto free_page3;
725
729 svm->nested.hsave = page_address(hsave_page); 726 svm->nested.hsave = page_address(hsave_page);
730 727
728 svm->msrpm = page_address(msrpm_pages);
729 svm_vcpu_init_msrpm(svm->msrpm);
730
731 svm->nested.msrpm = page_address(nested_msrpm_pages); 731 svm->nested.msrpm = page_address(nested_msrpm_pages);
732 732
733 svm->vmcb = page_address(page); 733 svm->vmcb = page_address(page);
@@ -743,6 +743,12 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
743 743
744 return &svm->vcpu; 744 return &svm->vcpu;
745 745
746free_page3:
747 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
748free_page2:
749 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
750free_page1:
751 __free_page(page);
746uninit: 752uninit:
747 kvm_vcpu_uninit(&svm->vcpu); 753 kvm_vcpu_uninit(&svm->vcpu);
748free_svm: 754free_svm:
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 14873b9f8430..bc933cfb4e66 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -26,6 +26,7 @@
26#include <linux/sched.h> 26#include <linux/sched.h>
27#include <linux/moduleparam.h> 27#include <linux/moduleparam.h>
28#include <linux/ftrace_event.h> 28#include <linux/ftrace_event.h>
29#include <linux/slab.h>
29#include "kvm_cache_regs.h" 30#include "kvm_cache_regs.h"
30#include "x86.h" 31#include "x86.h"
31 32
@@ -76,6 +77,8 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
76#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) 77#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
77#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) 78#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
78 79
80#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
81
79/* 82/*
80 * These 2 parameters are used to config the controls for Pause-Loop Exiting: 83 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
81 * ple_gap: upper bound on the amount of time between two successive 84 * ple_gap: upper bound on the amount of time between two successive
@@ -130,7 +133,7 @@ struct vcpu_vmx {
130 } host_state; 133 } host_state;
131 struct { 134 struct {
132 int vm86_active; 135 int vm86_active;
133 u8 save_iopl; 136 ulong save_rflags;
134 struct kvm_save_segment { 137 struct kvm_save_segment {
135 u16 selector; 138 u16 selector;
136 unsigned long base; 139 unsigned long base;
@@ -817,18 +820,23 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
817 820
818static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) 821static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
819{ 822{
820 unsigned long rflags; 823 unsigned long rflags, save_rflags;
821 824
822 rflags = vmcs_readl(GUEST_RFLAGS); 825 rflags = vmcs_readl(GUEST_RFLAGS);
823 if (to_vmx(vcpu)->rmode.vm86_active) 826 if (to_vmx(vcpu)->rmode.vm86_active) {
824 rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM); 827 rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
828 save_rflags = to_vmx(vcpu)->rmode.save_rflags;
829 rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
830 }
825 return rflags; 831 return rflags;
826} 832}
827 833
828static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 834static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
829{ 835{
830 if (to_vmx(vcpu)->rmode.vm86_active) 836 if (to_vmx(vcpu)->rmode.vm86_active) {
837 to_vmx(vcpu)->rmode.save_rflags = rflags;
831 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 838 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
839 }
832 vmcs_writel(GUEST_RFLAGS, rflags); 840 vmcs_writel(GUEST_RFLAGS, rflags);
833} 841}
834 842
@@ -1482,8 +1490,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1482 vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); 1490 vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
1483 1491
1484 flags = vmcs_readl(GUEST_RFLAGS); 1492 flags = vmcs_readl(GUEST_RFLAGS);
1485 flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM); 1493 flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
1486 flags |= (vmx->rmode.save_iopl << IOPL_SHIFT); 1494 flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
1487 vmcs_writel(GUEST_RFLAGS, flags); 1495 vmcs_writel(GUEST_RFLAGS, flags);
1488 1496
1489 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | 1497 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
@@ -1556,8 +1564,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1556 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); 1564 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
1557 1565
1558 flags = vmcs_readl(GUEST_RFLAGS); 1566 flags = vmcs_readl(GUEST_RFLAGS);
1559 vmx->rmode.save_iopl 1567 vmx->rmode.save_rflags = flags;
1560 = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1561 1568
1562 flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 1569 flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
1563 1570
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e46282a56565..3c4ca98ad27f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -39,6 +39,7 @@
39#include <linux/cpufreq.h> 39#include <linux/cpufreq.h>
40#include <linux/user-return-notifier.h> 40#include <linux/user-return-notifier.h>
41#include <linux/srcu.h> 41#include <linux/srcu.h>
42#include <linux/slab.h>
42#include <trace/events/kvm.h> 43#include <trace/events/kvm.h>
43#undef TRACE_INCLUDE_FILE 44#undef TRACE_INCLUDE_FILE
44#define CREATE_TRACE_POINTS 45#define CREATE_TRACE_POINTS
@@ -432,8 +433,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
432 433
433#ifdef CONFIG_X86_64 434#ifdef CONFIG_X86_64
434 if (cr0 & 0xffffffff00000000UL) { 435 if (cr0 & 0xffffffff00000000UL) {
435 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
436 cr0, kvm_read_cr0(vcpu));
437 kvm_inject_gp(vcpu, 0); 436 kvm_inject_gp(vcpu, 0);
438 return; 437 return;
439 } 438 }
@@ -442,14 +441,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
442 cr0 &= ~CR0_RESERVED_BITS; 441 cr0 &= ~CR0_RESERVED_BITS;
443 442
444 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { 443 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
445 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
446 kvm_inject_gp(vcpu, 0); 444 kvm_inject_gp(vcpu, 0);
447 return; 445 return;
448 } 446 }
449 447
450 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { 448 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
451 printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
452 "and a clear PE flag\n");
453 kvm_inject_gp(vcpu, 0); 449 kvm_inject_gp(vcpu, 0);
454 return; 450 return;
455 } 451 }
@@ -460,15 +456,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
460 int cs_db, cs_l; 456 int cs_db, cs_l;
461 457
462 if (!is_pae(vcpu)) { 458 if (!is_pae(vcpu)) {
463 printk(KERN_DEBUG "set_cr0: #GP, start paging "
464 "in long mode while PAE is disabled\n");
465 kvm_inject_gp(vcpu, 0); 459 kvm_inject_gp(vcpu, 0);
466 return; 460 return;
467 } 461 }
468 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 462 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
469 if (cs_l) { 463 if (cs_l) {
470 printk(KERN_DEBUG "set_cr0: #GP, start paging "
471 "in long mode while CS.L == 1\n");
472 kvm_inject_gp(vcpu, 0); 464 kvm_inject_gp(vcpu, 0);
473 return; 465 return;
474 466
@@ -476,8 +468,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
476 } else 468 } else
477#endif 469#endif
478 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 470 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
479 printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
480 "reserved bits\n");
481 kvm_inject_gp(vcpu, 0); 471 kvm_inject_gp(vcpu, 0);
482 return; 472 return;
483 } 473 }
@@ -504,28 +494,23 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
504 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; 494 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
505 495
506 if (cr4 & CR4_RESERVED_BITS) { 496 if (cr4 & CR4_RESERVED_BITS) {
507 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
508 kvm_inject_gp(vcpu, 0); 497 kvm_inject_gp(vcpu, 0);
509 return; 498 return;
510 } 499 }
511 500
512 if (is_long_mode(vcpu)) { 501 if (is_long_mode(vcpu)) {
513 if (!(cr4 & X86_CR4_PAE)) { 502 if (!(cr4 & X86_CR4_PAE)) {
514 printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
515 "in long mode\n");
516 kvm_inject_gp(vcpu, 0); 503 kvm_inject_gp(vcpu, 0);
517 return; 504 return;
518 } 505 }
519 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) 506 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
520 && ((cr4 ^ old_cr4) & pdptr_bits) 507 && ((cr4 ^ old_cr4) & pdptr_bits)
521 && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 508 && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
522 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
523 kvm_inject_gp(vcpu, 0); 509 kvm_inject_gp(vcpu, 0);
524 return; 510 return;
525 } 511 }
526 512
527 if (cr4 & X86_CR4_VMXE) { 513 if (cr4 & X86_CR4_VMXE) {
528 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
529 kvm_inject_gp(vcpu, 0); 514 kvm_inject_gp(vcpu, 0);
530 return; 515 return;
531 } 516 }
@@ -546,21 +531,16 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
546 531
547 if (is_long_mode(vcpu)) { 532 if (is_long_mode(vcpu)) {
548 if (cr3 & CR3_L_MODE_RESERVED_BITS) { 533 if (cr3 & CR3_L_MODE_RESERVED_BITS) {
549 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
550 kvm_inject_gp(vcpu, 0); 534 kvm_inject_gp(vcpu, 0);
551 return; 535 return;
552 } 536 }
553 } else { 537 } else {
554 if (is_pae(vcpu)) { 538 if (is_pae(vcpu)) {
555 if (cr3 & CR3_PAE_RESERVED_BITS) { 539 if (cr3 & CR3_PAE_RESERVED_BITS) {
556 printk(KERN_DEBUG
557 "set_cr3: #GP, reserved bits\n");
558 kvm_inject_gp(vcpu, 0); 540 kvm_inject_gp(vcpu, 0);
559 return; 541 return;
560 } 542 }
561 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { 543 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
562 printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
563 "reserved bits\n");
564 kvm_inject_gp(vcpu, 0); 544 kvm_inject_gp(vcpu, 0);
565 return; 545 return;
566 } 546 }
@@ -592,7 +572,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr3);
592void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 572void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
593{ 573{
594 if (cr8 & CR8_RESERVED_BITS) { 574 if (cr8 & CR8_RESERVED_BITS) {
595 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
596 kvm_inject_gp(vcpu, 0); 575 kvm_inject_gp(vcpu, 0);
597 return; 576 return;
598 } 577 }
@@ -648,15 +627,12 @@ static u32 emulated_msrs[] = {
648static void set_efer(struct kvm_vcpu *vcpu, u64 efer) 627static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
649{ 628{
650 if (efer & efer_reserved_bits) { 629 if (efer & efer_reserved_bits) {
651 printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
652 efer);
653 kvm_inject_gp(vcpu, 0); 630 kvm_inject_gp(vcpu, 0);
654 return; 631 return;
655 } 632 }
656 633
657 if (is_paging(vcpu) 634 if (is_paging(vcpu)
658 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { 635 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) {
659 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
660 kvm_inject_gp(vcpu, 0); 636 kvm_inject_gp(vcpu, 0);
661 return; 637 return;
662 } 638 }
@@ -666,7 +642,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
666 642
667 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 643 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
668 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { 644 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
669 printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
670 kvm_inject_gp(vcpu, 0); 645 kvm_inject_gp(vcpu, 0);
671 return; 646 return;
672 } 647 }
@@ -677,7 +652,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
677 652
678 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 653 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
679 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { 654 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
680 printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
681 kvm_inject_gp(vcpu, 0); 655 kvm_inject_gp(vcpu, 0);
682 return; 656 return;
683 } 657 }
@@ -966,9 +940,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
966 if (msr >= MSR_IA32_MC0_CTL && 940 if (msr >= MSR_IA32_MC0_CTL &&
967 msr < MSR_IA32_MC0_CTL + 4 * bank_num) { 941 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
968 u32 offset = msr - MSR_IA32_MC0_CTL; 942 u32 offset = msr - MSR_IA32_MC0_CTL;
969 /* only 0 or all 1s can be written to IA32_MCi_CTL */ 943 /* only 0 or all 1s can be written to IA32_MCi_CTL
944 * some Linux kernels though clear bit 10 in bank 4 to
945 * workaround a BIOS/GART TBL issue on AMD K8s, ignore
946 * this to avoid an uncatched #GP in the guest
947 */
970 if ((offset & 0x3) == 0 && 948 if ((offset & 0x3) == 0 &&
971 data != 0 && data != ~(u64)0) 949 data != 0 && (data | (1 << 10)) != ~(u64)0)
972 return -1; 950 return -1;
973 vcpu->arch.mce_banks[offset] = data; 951 vcpu->arch.mce_banks[offset] = data;
974 break; 952 break;
@@ -2634,8 +2612,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
2634int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 2612int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2635 struct kvm_dirty_log *log) 2613 struct kvm_dirty_log *log)
2636{ 2614{
2637 int r, n, i; 2615 int r, i;
2638 struct kvm_memory_slot *memslot; 2616 struct kvm_memory_slot *memslot;
2617 unsigned long n;
2639 unsigned long is_dirty = 0; 2618 unsigned long is_dirty = 0;
2640 unsigned long *dirty_bitmap = NULL; 2619 unsigned long *dirty_bitmap = NULL;
2641 2620
@@ -2650,7 +2629,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2650 if (!memslot->dirty_bitmap) 2629 if (!memslot->dirty_bitmap)
2651 goto out; 2630 goto out;
2652 2631
2653 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 2632 n = kvm_dirty_bitmap_bytes(memslot);
2654 2633
2655 r = -ENOMEM; 2634 r = -ENOMEM;
2656 dirty_bitmap = vmalloc(n); 2635 dirty_bitmap = vmalloc(n);
@@ -4482,7 +4461,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4482 kvm_set_cr8(vcpu, kvm_run->cr8); 4461 kvm_set_cr8(vcpu, kvm_run->cr8);
4483 4462
4484 if (vcpu->arch.pio.cur_count) { 4463 if (vcpu->arch.pio.cur_count) {
4464 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4485 r = complete_pio(vcpu); 4465 r = complete_pio(vcpu);
4466 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4486 if (r) 4467 if (r)
4487 goto out; 4468 goto out;
4488 } 4469 }
@@ -5145,6 +5126,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
5145 int ret = 0; 5126 int ret = 0;
5146 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); 5127 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
5147 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); 5128 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
5129 u32 desc_limit;
5148 5130
5149 old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); 5131 old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
5150 5132
@@ -5167,7 +5149,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
5167 } 5149 }
5168 } 5150 }
5169 5151
5170 if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) { 5152 desc_limit = get_desc_limit(&nseg_desc);
5153 if (!nseg_desc.p ||
5154 ((desc_limit < 0x67 && (nseg_desc.type & 8)) ||
5155 desc_limit < 0x2b)) {
5171 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); 5156 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
5172 return 1; 5157 return 1;
5173 } 5158 }
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 7e59dc1d3fc2..2bdf628066bd 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -115,7 +115,7 @@ static void async_hcall(unsigned long call, unsigned long arg1,
115 local_irq_save(flags); 115 local_irq_save(flags);
116 if (lguest_data.hcall_status[next_call] != 0xFF) { 116 if (lguest_data.hcall_status[next_call] != 0xFF) {
117 /* Table full, so do normal hcall which will flush table. */ 117 /* Table full, so do normal hcall which will flush table. */
118 kvm_hypercall4(call, arg1, arg2, arg3, arg4); 118 hcall(call, arg1, arg2, arg3, arg4);
119 } else { 119 } else {
120 lguest_data.hcalls[next_call].arg0 = call; 120 lguest_data.hcalls[next_call].arg0 = call;
121 lguest_data.hcalls[next_call].arg1 = arg1; 121 lguest_data.hcalls[next_call].arg1 = arg1;
@@ -145,46 +145,45 @@ static void async_hcall(unsigned long call, unsigned long arg1,
145 * So, when we're in lazy mode, we call async_hcall() to store the call for 145 * So, when we're in lazy mode, we call async_hcall() to store the call for
146 * future processing: 146 * future processing:
147 */ 147 */
148static void lazy_hcall1(unsigned long call, 148static void lazy_hcall1(unsigned long call, unsigned long arg1)
149 unsigned long arg1)
150{ 149{
151 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) 150 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
152 kvm_hypercall1(call, arg1); 151 hcall(call, arg1, 0, 0, 0);
153 else 152 else
154 async_hcall(call, arg1, 0, 0, 0); 153 async_hcall(call, arg1, 0, 0, 0);
155} 154}
156 155
157/* You can imagine what lazy_hcall2, 3 and 4 look like. :*/ 156/* You can imagine what lazy_hcall2, 3 and 4 look like. :*/
158static void lazy_hcall2(unsigned long call, 157static void lazy_hcall2(unsigned long call,
159 unsigned long arg1, 158 unsigned long arg1,
160 unsigned long arg2) 159 unsigned long arg2)
161{ 160{
162 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) 161 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
163 kvm_hypercall2(call, arg1, arg2); 162 hcall(call, arg1, arg2, 0, 0);
164 else 163 else
165 async_hcall(call, arg1, arg2, 0, 0); 164 async_hcall(call, arg1, arg2, 0, 0);
166} 165}
167 166
168static void lazy_hcall3(unsigned long call, 167static void lazy_hcall3(unsigned long call,
169 unsigned long arg1, 168 unsigned long arg1,
170 unsigned long arg2, 169 unsigned long arg2,
171 unsigned long arg3) 170 unsigned long arg3)
172{ 171{
173 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) 172 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
174 kvm_hypercall3(call, arg1, arg2, arg3); 173 hcall(call, arg1, arg2, arg3, 0);
175 else 174 else
176 async_hcall(call, arg1, arg2, arg3, 0); 175 async_hcall(call, arg1, arg2, arg3, 0);
177} 176}
178 177
179#ifdef CONFIG_X86_PAE 178#ifdef CONFIG_X86_PAE
180static void lazy_hcall4(unsigned long call, 179static void lazy_hcall4(unsigned long call,
181 unsigned long arg1, 180 unsigned long arg1,
182 unsigned long arg2, 181 unsigned long arg2,
183 unsigned long arg3, 182 unsigned long arg3,
184 unsigned long arg4) 183 unsigned long arg4)
185{ 184{
186 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) 185 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
187 kvm_hypercall4(call, arg1, arg2, arg3, arg4); 186 hcall(call, arg1, arg2, arg3, arg4);
188 else 187 else
189 async_hcall(call, arg1, arg2, arg3, arg4); 188 async_hcall(call, arg1, arg2, arg3, arg4);
190} 189}
@@ -196,13 +195,13 @@ static void lazy_hcall4(unsigned long call,
196:*/ 195:*/
197static void lguest_leave_lazy_mmu_mode(void) 196static void lguest_leave_lazy_mmu_mode(void)
198{ 197{
199 kvm_hypercall0(LHCALL_FLUSH_ASYNC); 198 hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);
200 paravirt_leave_lazy_mmu(); 199 paravirt_leave_lazy_mmu();
201} 200}
202 201
203static void lguest_end_context_switch(struct task_struct *next) 202static void lguest_end_context_switch(struct task_struct *next)
204{ 203{
205 kvm_hypercall0(LHCALL_FLUSH_ASYNC); 204 hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);
206 paravirt_end_context_switch(next); 205 paravirt_end_context_switch(next);
207} 206}
208 207
@@ -286,7 +285,7 @@ static void lguest_write_idt_entry(gate_desc *dt,
286 /* Keep the local copy up to date. */ 285 /* Keep the local copy up to date. */
287 native_write_idt_entry(dt, entrynum, g); 286 native_write_idt_entry(dt, entrynum, g);
288 /* Tell Host about this new entry. */ 287 /* Tell Host about this new entry. */
289 kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]); 288 hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1], 0);
290} 289}
291 290
292/* 291/*
@@ -300,7 +299,7 @@ static void lguest_load_idt(const struct desc_ptr *desc)
300 struct desc_struct *idt = (void *)desc->address; 299 struct desc_struct *idt = (void *)desc->address;
301 300
302 for (i = 0; i < (desc->size+1)/8; i++) 301 for (i = 0; i < (desc->size+1)/8; i++)
303 kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b); 302 hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b, 0);
304} 303}
305 304
306/* 305/*
@@ -321,7 +320,7 @@ static void lguest_load_gdt(const struct desc_ptr *desc)
321 struct desc_struct *gdt = (void *)desc->address; 320 struct desc_struct *gdt = (void *)desc->address;
322 321
323 for (i = 0; i < (desc->size+1)/8; i++) 322 for (i = 0; i < (desc->size+1)/8; i++)
324 kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b); 323 hcall(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b, 0);
325} 324}
326 325
327/* 326/*
@@ -334,8 +333,8 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
334{ 333{
335 native_write_gdt_entry(dt, entrynum, desc, type); 334 native_write_gdt_entry(dt, entrynum, desc, type);
336 /* Tell Host about this new entry. */ 335 /* Tell Host about this new entry. */
337 kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, entrynum, 336 hcall(LHCALL_LOAD_GDT_ENTRY, entrynum,
338 dt[entrynum].a, dt[entrynum].b); 337 dt[entrynum].a, dt[entrynum].b, 0);
339} 338}
340 339
341/* 340/*
@@ -931,7 +930,7 @@ static int lguest_clockevent_set_next_event(unsigned long delta,
931 } 930 }
932 931
933 /* Please wake us this far in the future. */ 932 /* Please wake us this far in the future. */
934 kvm_hypercall1(LHCALL_SET_CLOCKEVENT, delta); 933 hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0, 0);
935 return 0; 934 return 0;
936} 935}
937 936
@@ -942,7 +941,7 @@ static void lguest_clockevent_set_mode(enum clock_event_mode mode,
942 case CLOCK_EVT_MODE_UNUSED: 941 case CLOCK_EVT_MODE_UNUSED:
943 case CLOCK_EVT_MODE_SHUTDOWN: 942 case CLOCK_EVT_MODE_SHUTDOWN:
944 /* A 0 argument shuts the clock down. */ 943 /* A 0 argument shuts the clock down. */
945 kvm_hypercall0(LHCALL_SET_CLOCKEVENT); 944 hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0, 0);
946 break; 945 break;
947 case CLOCK_EVT_MODE_ONESHOT: 946 case CLOCK_EVT_MODE_ONESHOT:
948 /* This is what we expect. */ 947 /* This is what we expect. */
@@ -1100,7 +1099,7 @@ static void set_lguest_basic_apic_ops(void)
1100/* STOP! Until an interrupt comes in. */ 1099/* STOP! Until an interrupt comes in. */
1101static void lguest_safe_halt(void) 1100static void lguest_safe_halt(void)
1102{ 1101{
1103 kvm_hypercall0(LHCALL_HALT); 1102 hcall(LHCALL_HALT, 0, 0, 0, 0);
1104} 1103}
1105 1104
1106/* 1105/*
@@ -1112,8 +1111,8 @@ static void lguest_safe_halt(void)
1112 */ 1111 */
1113static void lguest_power_off(void) 1112static void lguest_power_off(void)
1114{ 1113{
1115 kvm_hypercall2(LHCALL_SHUTDOWN, __pa("Power down"), 1114 hcall(LHCALL_SHUTDOWN, __pa("Power down"),
1116 LGUEST_SHUTDOWN_POWEROFF); 1115 LGUEST_SHUTDOWN_POWEROFF, 0, 0);
1117} 1116}
1118 1117
1119/* 1118/*
@@ -1123,7 +1122,7 @@ static void lguest_power_off(void)
1123 */ 1122 */
1124static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p) 1123static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)
1125{ 1124{
1126 kvm_hypercall2(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF); 1125 hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0, 0);
1127 /* The hcall won't return, but to keep gcc happy, we're "done". */ 1126 /* The hcall won't return, but to keep gcc happy, we're "done". */
1128 return NOTIFY_DONE; 1127 return NOTIFY_DONE;
1129} 1128}
@@ -1162,7 +1161,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
1162 len = sizeof(scratch) - 1; 1161 len = sizeof(scratch) - 1;
1163 scratch[len] = '\0'; 1162 scratch[len] = '\0';
1164 memcpy(scratch, buf, len); 1163 memcpy(scratch, buf, len);
1165 kvm_hypercall1(LHCALL_NOTIFY, __pa(scratch)); 1164 hcall(LHCALL_NOTIFY, __pa(scratch), 0, 0, 0);
1166 1165
1167 /* This routine returns the number of bytes actually written. */ 1166 /* This routine returns the number of bytes actually written. */
1168 return len; 1167 return len;
@@ -1174,7 +1173,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
1174 */ 1173 */
1175static void lguest_restart(char *reason) 1174static void lguest_restart(char *reason)
1176{ 1175{
1177 kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART); 1176 hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0, 0);
1178} 1177}
1179 1178
1180/*G:050 1179/*G:050
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 27eac0faee48..4f420c2f2d55 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -32,7 +32,7 @@ ENTRY(lguest_entry)
32 */ 32 */
33 movl $LHCALL_LGUEST_INIT, %eax 33 movl $LHCALL_LGUEST_INIT, %eax
34 movl $lguest_data - __PAGE_OFFSET, %ebx 34 movl $lguest_data - __PAGE_OFFSET, %ebx
35 .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ 35 int $LGUEST_TRAP_ENTRY
36 36
37 /* Set up the initial stack so we can run C code. */ 37 /* Set up the initial stack so we can run C code. */
38 movl $(init_thread_union+THREAD_SIZE),%esp 38 movl $(init_thread_union+THREAD_SIZE),%esp
diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S
index 15acecf0d7aa..41fcf00e49df 100644
--- a/arch/x86/lib/rwsem_64.S
+++ b/arch/x86/lib/rwsem_64.S
@@ -60,7 +60,7 @@ ENTRY(call_rwsem_down_write_failed)
60 ENDPROC(call_rwsem_down_write_failed) 60 ENDPROC(call_rwsem_down_write_failed)
61 61
62ENTRY(call_rwsem_wake) 62ENTRY(call_rwsem_wake)
63 decw %dx /* do nothing if still outstanding active readers */ 63 decl %edx /* do nothing if still outstanding active readers */
64 jnz 1f 64 jnz 1f
65 save_common_regs 65 save_common_regs
66 movq %rax,%rdi 66 movq %rax,%rdi
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index f46c340727b8..069ce7c37c01 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -9,7 +9,6 @@
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/hugetlb.h> 10#include <linux/hugetlb.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/slab.h>
13#include <linux/err.h> 12#include <linux/err.h>
14#include <linux/sysctl.h> 13#include <linux/sysctl.h>
15#include <asm/mman.h> 14#include <asm/mman.h>
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index e71c5cbc8f35..b278535b14aa 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -1,3 +1,4 @@
1#include <linux/gfp.h>
1#include <linux/initrd.h> 2#include <linux/initrd.h>
2#include <linux/ioport.h> 3#include <linux/ioport.h>
3#include <linux/swap.h> 4#include <linux/swap.h>
@@ -331,11 +332,23 @@ int devmem_is_allowed(unsigned long pagenr)
331 332
332void free_init_pages(char *what, unsigned long begin, unsigned long end) 333void free_init_pages(char *what, unsigned long begin, unsigned long end)
333{ 334{
334 unsigned long addr = begin; 335 unsigned long addr;
336 unsigned long begin_aligned, end_aligned;
335 337
336 if (addr >= end) 338 /* Make sure boundaries are page aligned */
339 begin_aligned = PAGE_ALIGN(begin);
340 end_aligned = end & PAGE_MASK;
341
342 if (WARN_ON(begin_aligned != begin || end_aligned != end)) {
343 begin = begin_aligned;
344 end = end_aligned;
345 }
346
347 if (begin >= end)
337 return; 348 return;
338 349
350 addr = begin;
351
339 /* 352 /*
340 * If debugging page accesses then do not free this memory but 353 * If debugging page accesses then do not free this memory but
341 * mark them not present - any buggy init-section access will 354 * mark them not present - any buggy init-section access will
@@ -343,7 +356,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
343 */ 356 */
344#ifdef CONFIG_DEBUG_PAGEALLOC 357#ifdef CONFIG_DEBUG_PAGEALLOC
345 printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", 358 printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
346 begin, PAGE_ALIGN(end)); 359 begin, end);
347 set_memory_np(begin, (end - begin) >> PAGE_SHIFT); 360 set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
348#else 361#else
349 /* 362 /*
@@ -358,8 +371,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
358 for (; addr < end; addr += PAGE_SIZE) { 371 for (; addr < end; addr += PAGE_SIZE) {
359 ClearPageReserved(virt_to_page(addr)); 372 ClearPageReserved(virt_to_page(addr));
360 init_page_count(virt_to_page(addr)); 373 init_page_count(virt_to_page(addr));
361 memset((void *)(addr & ~(PAGE_SIZE-1)), 374 memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
362 POISON_FREE_INITMEM, PAGE_SIZE);
363 free_page(addr); 375 free_page(addr);
364 totalram_pages++; 376 totalram_pages++;
365 } 377 }
@@ -376,6 +388,15 @@ void free_initmem(void)
376#ifdef CONFIG_BLK_DEV_INITRD 388#ifdef CONFIG_BLK_DEV_INITRD
377void free_initrd_mem(unsigned long start, unsigned long end) 389void free_initrd_mem(unsigned long start, unsigned long end)
378{ 390{
379 free_init_pages("initrd memory", start, end); 391 /*
392 * end could be not aligned, and We can not align that,
393 * decompresser could be confused by aligned initrd_end
394 * We already reserve the end partial page before in
395 * - i386_start_kernel()
396 * - x86_64_start_kernel()
397 * - relocate_initrd()
398 * So here We can do PAGE_ALIGN() safely to get partial page to be freed
399 */
400 free_init_pages("initrd memory", start, PAGE_ALIGN(end));
380} 401}
381#endif 402#endif
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 2226f2c70ea3..bca79091b9d6 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -25,11 +25,11 @@
25#include <linux/pfn.h> 25#include <linux/pfn.h>
26#include <linux/poison.h> 26#include <linux/poison.h>
27#include <linux/bootmem.h> 27#include <linux/bootmem.h>
28#include <linux/slab.h>
29#include <linux/proc_fs.h> 28#include <linux/proc_fs.h>
30#include <linux/memory_hotplug.h> 29#include <linux/memory_hotplug.h>
31#include <linux/initrd.h> 30#include <linux/initrd.h>
32#include <linux/cpumask.h> 31#include <linux/cpumask.h>
32#include <linux/gfp.h>
33 33
34#include <asm/asm.h> 34#include <asm/asm.h>
35#include <asm/bios_ebda.h> 35#include <asm/bios_ebda.h>
@@ -750,6 +750,7 @@ static void __init zone_sizes_init(void)
750 free_area_init_nodes(max_zone_pfns); 750 free_area_init_nodes(max_zone_pfns);
751} 751}
752 752
753#ifndef CONFIG_NO_BOOTMEM
753static unsigned long __init setup_node_bootmem(int nodeid, 754static unsigned long __init setup_node_bootmem(int nodeid,
754 unsigned long start_pfn, 755 unsigned long start_pfn,
755 unsigned long end_pfn, 756 unsigned long end_pfn,
@@ -766,13 +767,14 @@ static unsigned long __init setup_node_bootmem(int nodeid,
766 printk(KERN_INFO " node %d bootmap %08lx - %08lx\n", 767 printk(KERN_INFO " node %d bootmap %08lx - %08lx\n",
767 nodeid, bootmap, bootmap + bootmap_size); 768 nodeid, bootmap, bootmap + bootmap_size);
768 free_bootmem_with_active_regions(nodeid, end_pfn); 769 free_bootmem_with_active_regions(nodeid, end_pfn);
769 early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
770 770
771 return bootmap + bootmap_size; 771 return bootmap + bootmap_size;
772} 772}
773#endif
773 774
774void __init setup_bootmem_allocator(void) 775void __init setup_bootmem_allocator(void)
775{ 776{
777#ifndef CONFIG_NO_BOOTMEM
776 int nodeid; 778 int nodeid;
777 unsigned long bootmap_size, bootmap; 779 unsigned long bootmap_size, bootmap;
778 /* 780 /*
@@ -784,11 +786,13 @@ void __init setup_bootmem_allocator(void)
784 if (bootmap == -1L) 786 if (bootmap == -1L)
785 panic("Cannot find bootmem map of size %ld\n", bootmap_size); 787 panic("Cannot find bootmem map of size %ld\n", bootmap_size);
786 reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); 788 reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
789#endif
787 790
788 printk(KERN_INFO " mapped low ram: 0 - %08lx\n", 791 printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
789 max_pfn_mapped<<PAGE_SHIFT); 792 max_pfn_mapped<<PAGE_SHIFT);
790 printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); 793 printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
791 794
795#ifndef CONFIG_NO_BOOTMEM
792 for_each_online_node(nodeid) { 796 for_each_online_node(nodeid) {
793 unsigned long start_pfn, end_pfn; 797 unsigned long start_pfn, end_pfn;
794 798
@@ -806,6 +810,7 @@ void __init setup_bootmem_allocator(void)
806 bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, 810 bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn,
807 bootmap); 811 bootmap);
808 } 812 }
813#endif
809 814
810 after_bootmem = 1; 815 after_bootmem = 1;
811} 816}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 69ddfbd91135..ee41bba315d1 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -29,6 +29,7 @@
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/memory_hotplug.h> 30#include <linux/memory_hotplug.h>
31#include <linux/nmi.h> 31#include <linux/nmi.h>
32#include <linux/gfp.h>
32 33
33#include <asm/processor.h> 34#include <asm/processor.h>
34#include <asm/bios_ebda.h> 35#include <asm/bios_ebda.h>
@@ -572,6 +573,7 @@ kernel_physical_mapping_init(unsigned long start,
572void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, 573void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
573 int acpi, int k8) 574 int acpi, int k8)
574{ 575{
576#ifndef CONFIG_NO_BOOTMEM
575 unsigned long bootmap_size, bootmap; 577 unsigned long bootmap_size, bootmap;
576 578
577 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; 579 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
@@ -579,13 +581,15 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
579 PAGE_SIZE); 581 PAGE_SIZE);
580 if (bootmap == -1L) 582 if (bootmap == -1L)
581 panic("Cannot find bootmem map of size %ld\n", bootmap_size); 583 panic("Cannot find bootmem map of size %ld\n", bootmap_size);
584 reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
582 /* don't touch min_low_pfn */ 585 /* don't touch min_low_pfn */
583 bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, 586 bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
584 0, end_pfn); 587 0, end_pfn);
585 e820_register_active_regions(0, start_pfn, end_pfn); 588 e820_register_active_regions(0, start_pfn, end_pfn);
586 free_bootmem_with_active_regions(0, end_pfn); 589 free_bootmem_with_active_regions(0, end_pfn);
587 early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); 590#else
588 reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); 591 e820_register_active_regions(0, start_pfn, end_pfn);
592#endif
589} 593}
590#endif 594#endif
591 595
@@ -974,7 +978,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
974 if (pmd_none(*pmd)) { 978 if (pmd_none(*pmd)) {
975 pte_t entry; 979 pte_t entry;
976 980
977 p = vmemmap_alloc_block(PMD_SIZE, node); 981 p = vmemmap_alloc_block_buf(PMD_SIZE, node);
978 if (!p) 982 if (!p)
979 return -ENOMEM; 983 return -ENOMEM;
980 984
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 5eb1ba74a3a9..12e4d2d3c110 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -448,6 +448,20 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
448static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; 448static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
449static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; 449static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
450 450
451void __init fixup_early_ioremap(void)
452{
453 int i;
454
455 for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
456 if (prev_map[i]) {
457 WARN_ON(1);
458 break;
459 }
460 }
461
462 early_ioremap_init();
463}
464
451static int __init check_early_ioremap_leak(void) 465static int __init check_early_ioremap_leak(void)
452{ 466{
453 int count = 0; 467 int count = 0;
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 536fb6823366..5d0e67fff1a6 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -21,6 +21,7 @@
21#include <linux/kdebug.h> 21#include <linux/kdebug.h>
22#include <linux/mutex.h> 22#include <linux/mutex.h>
23#include <linux/io.h> 23#include <linux/io.h>
24#include <linux/slab.h>
24#include <asm/cacheflush.h> 25#include <asm/cacheflush.h>
25#include <asm/tlbflush.h> 26#include <asm/tlbflush.h>
26#include <linux/errno.h> 27#include <linux/errno.h>
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 34a3291ca103..3adff7dcc148 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -26,6 +26,7 @@
26 26
27#include <linux/module.h> 27#include <linux/module.h>
28#include <linux/debugfs.h> 28#include <linux/debugfs.h>
29#include <linux/slab.h>
29#include <linux/uaccess.h> 30#include <linux/uaccess.h>
30#include <linux/io.h> 31#include <linux/io.h>
31#include <linux/version.h> 32#include <linux/version.h>
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index b20760ca7244..809baaaf48b1 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -418,7 +418,10 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
418 418
419 for_each_online_node(nid) { 419 for_each_online_node(nid) {
420 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 420 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
421 NODE_DATA(nid)->node_id = nid;
422#ifndef CONFIG_NO_BOOTMEM
421 NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; 423 NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
424#endif
422 } 425 }
423 426
424 setup_bootmem_allocator(); 427 setup_bootmem_allocator();
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 3307ea8bd43a..8948f47fde05 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -163,30 +163,48 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
163 unsigned long end, unsigned long size, 163 unsigned long end, unsigned long size,
164 unsigned long align) 164 unsigned long align)
165{ 165{
166 unsigned long mem = find_e820_area(start, end, size, align); 166 unsigned long mem;
167 void *ptr;
168 167
168 /*
169 * put it on high as possible
170 * something will go with NODE_DATA
171 */
172 if (start < (MAX_DMA_PFN<<PAGE_SHIFT))
173 start = MAX_DMA_PFN<<PAGE_SHIFT;
174 if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) &&
175 end > (MAX_DMA32_PFN<<PAGE_SHIFT))
176 start = MAX_DMA32_PFN<<PAGE_SHIFT;
177 mem = find_e820_area(start, end, size, align);
178 if (mem != -1L)
179 return __va(mem);
180
181 /* extend the search scope */
182 end = max_pfn_mapped << PAGE_SHIFT;
183 if (end > (MAX_DMA32_PFN<<PAGE_SHIFT))
184 start = MAX_DMA32_PFN<<PAGE_SHIFT;
185 else
186 start = MAX_DMA_PFN<<PAGE_SHIFT;
187 mem = find_e820_area(start, end, size, align);
169 if (mem != -1L) 188 if (mem != -1L)
170 return __va(mem); 189 return __va(mem);
171 190
172 ptr = __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS)); 191 printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
173 if (ptr == NULL) {
174 printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
175 size, nodeid); 192 size, nodeid);
176 return NULL; 193
177 } 194 return NULL;
178 return ptr;
179} 195}
180 196
181/* Initialize bootmem allocator for a node */ 197/* Initialize bootmem allocator for a node */
182void __init 198void __init
183setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) 199setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
184{ 200{
185 unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; 201 unsigned long start_pfn, last_pfn, nodedata_phys;
186 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 202 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
187 unsigned long bootmap_start, nodedata_phys;
188 void *bootmap;
189 int nid; 203 int nid;
204#ifndef CONFIG_NO_BOOTMEM
205 unsigned long bootmap_start, bootmap_pages, bootmap_size;
206 void *bootmap;
207#endif
190 208
191 if (!end) 209 if (!end)
192 return; 210 return;
@@ -200,7 +218,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
200 218
201 start = roundup(start, ZONE_ALIGN); 219 start = roundup(start, ZONE_ALIGN);
202 220
203 printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, 221 printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid,
204 start, end); 222 start, end);
205 223
206 start_pfn = start >> PAGE_SHIFT; 224 start_pfn = start >> PAGE_SHIFT;
@@ -211,14 +229,21 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
211 if (node_data[nodeid] == NULL) 229 if (node_data[nodeid] == NULL)
212 return; 230 return;
213 nodedata_phys = __pa(node_data[nodeid]); 231 nodedata_phys = __pa(node_data[nodeid]);
232 reserve_early(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA");
214 printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, 233 printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
215 nodedata_phys + pgdat_size - 1); 234 nodedata_phys + pgdat_size - 1);
235 nid = phys_to_nid(nodedata_phys);
236 if (nid != nodeid)
237 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
216 238
217 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); 239 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
218 NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; 240 NODE_DATA(nodeid)->node_id = nodeid;
219 NODE_DATA(nodeid)->node_start_pfn = start_pfn; 241 NODE_DATA(nodeid)->node_start_pfn = start_pfn;
220 NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; 242 NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
221 243
244#ifndef CONFIG_NO_BOOTMEM
245 NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
246
222 /* 247 /*
223 * Find a place for the bootmem map 248 * Find a place for the bootmem map
224 * nodedata_phys could be on other nodes by alloc_bootmem, 249 * nodedata_phys could be on other nodes by alloc_bootmem,
@@ -227,11 +252,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
227 * of alloc_bootmem, that could clash with reserved range 252 * of alloc_bootmem, that could clash with reserved range
228 */ 253 */
229 bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); 254 bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn);
230 nid = phys_to_nid(nodedata_phys); 255 bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE);
231 if (nid == nodeid)
232 bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE);
233 else
234 bootmap_start = roundup(start, PAGE_SIZE);
235 /* 256 /*
236 * SMP_CACHE_BYTES could be enough, but init_bootmem_node like 257 * SMP_CACHE_BYTES could be enough, but init_bootmem_node like
237 * to use that to align to PAGE_SIZE 258 * to use that to align to PAGE_SIZE
@@ -239,18 +260,13 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
239 bootmap = early_node_mem(nodeid, bootmap_start, end, 260 bootmap = early_node_mem(nodeid, bootmap_start, end,
240 bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); 261 bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
241 if (bootmap == NULL) { 262 if (bootmap == NULL) {
242 if (nodedata_phys < start || nodedata_phys >= end) { 263 free_early(nodedata_phys, nodedata_phys + pgdat_size);
243 /*
244 * only need to free it if it is from other node
245 * bootmem
246 */
247 if (nid != nodeid)
248 free_bootmem(nodedata_phys, pgdat_size);
249 }
250 node_data[nodeid] = NULL; 264 node_data[nodeid] = NULL;
251 return; 265 return;
252 } 266 }
253 bootmap_start = __pa(bootmap); 267 bootmap_start = __pa(bootmap);
268 reserve_early(bootmap_start, bootmap_start+(bootmap_pages<<PAGE_SHIFT),
269 "BOOTMAP");
254 270
255 bootmap_size = init_bootmem_node(NODE_DATA(nodeid), 271 bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
256 bootmap_start >> PAGE_SHIFT, 272 bootmap_start >> PAGE_SHIFT,
@@ -259,31 +275,12 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
259 printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", 275 printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n",
260 bootmap_start, bootmap_start + bootmap_size - 1, 276 bootmap_start, bootmap_start + bootmap_size - 1,
261 bootmap_pages); 277 bootmap_pages);
262
263 free_bootmem_with_active_regions(nodeid, end);
264
265 /*
266 * convert early reserve to bootmem reserve earlier
267 * otherwise early_node_mem could use early reserved mem
268 * on previous node
269 */
270 early_res_to_bootmem(start, end);
271
272 /*
273 * in some case early_node_mem could use alloc_bootmem
274 * to get range on other node, don't reserve that again
275 */
276 if (nid != nodeid)
277 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
278 else
279 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys,
280 pgdat_size, BOOTMEM_DEFAULT);
281 nid = phys_to_nid(bootmap_start); 278 nid = phys_to_nid(bootmap_start);
282 if (nid != nodeid) 279 if (nid != nodeid)
283 printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); 280 printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid);
284 else 281
285 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, 282 free_bootmem_with_active_regions(nodeid, end);
286 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); 283#endif
287 284
288 node_set_online(nodeid); 285 node_set_online(nodeid);
289} 286}
@@ -709,6 +706,10 @@ unsigned long __init numa_free_all_bootmem(void)
709 for_each_online_node(i) 706 for_each_online_node(i)
710 pages += free_all_bootmem_node(NODE_DATA(i)); 707 pages += free_all_bootmem_node(NODE_DATA(i));
711 708
709#ifdef CONFIG_NO_BOOTMEM
710 pages += free_all_memory_core_early(MAX_NUMNODES);
711#endif
712
712 return pages; 713 return pages;
713} 714}
714 715
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 1d4eb93d333c..28195c350b97 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -6,13 +6,13 @@
6#include <linux/bootmem.h> 6#include <linux/bootmem.h>
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/slab.h>
10#include <linux/mm.h> 9#include <linux/mm.h>
11#include <linux/interrupt.h> 10#include <linux/interrupt.h>
12#include <linux/seq_file.h> 11#include <linux/seq_file.h>
13#include <linux/debugfs.h> 12#include <linux/debugfs.h>
14#include <linux/pfn.h> 13#include <linux/pfn.h>
15#include <linux/percpu.h> 14#include <linux/percpu.h>
15#include <linux/gfp.h>
16 16
17#include <asm/e820.h> 17#include <asm/e820.h>
18#include <asm/processor.h> 18#include <asm/processor.h>
@@ -291,8 +291,29 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
291 */ 291 */
292 if (kernel_set_to_readonly && 292 if (kernel_set_to_readonly &&
293 within(address, (unsigned long)_text, 293 within(address, (unsigned long)_text,
294 (unsigned long)__end_rodata_hpage_align)) 294 (unsigned long)__end_rodata_hpage_align)) {
295 pgprot_val(forbidden) |= _PAGE_RW; 295 unsigned int level;
296
297 /*
298 * Don't enforce the !RW mapping for the kernel text mapping,
299 * if the current mapping is already using small page mapping.
300 * No need to work hard to preserve large page mappings in this
301 * case.
302 *
303 * This also fixes the Linux Xen paravirt guest boot failure
304 * (because of unexpected read-only mappings for kernel identity
305 * mappings). In this paravirt guest case, the kernel text
306 * mapping and the kernel identity mapping share the same
307 * page-table pages. Thus we can't really use different
308 * protections for the kernel text and identity mappings. Also,
309 * these shared mappings are made of small page mappings.
310 * Thus this don't enforce !RW mapping for small page kernel
311 * text mapping logic will help Linux Xen parvirt guest boot
312 * aswell.
313 */
314 if (lookup_address(address, &level) && (level != PG_LEVEL_4K))
315 pgprot_val(forbidden) |= _PAGE_RW;
316 }
296#endif 317#endif
297 318
298 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); 319 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index ae9648eb1c7f..edc8b95afc1a 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -12,7 +12,7 @@
12#include <linux/debugfs.h> 12#include <linux/debugfs.h>
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/gfp.h> 15#include <linux/slab.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/rbtree.h> 18#include <linux/rbtree.h>
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index c9ba9deafe83..5c4ee422590e 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -1,4 +1,5 @@
1#include <linux/mm.h> 1#include <linux/mm.h>
2#include <linux/gfp.h>
2#include <asm/pgalloc.h> 3#include <asm/pgalloc.h>
3#include <asm/pgtable.h> 4#include <asm/pgtable.h>
4#include <asm/tlb.h> 5#include <asm/tlb.h>
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 46c8834aedc0..792854003ed3 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -6,7 +6,6 @@
6#include <linux/swap.h> 6#include <linux/swap.h>
7#include <linux/smp.h> 7#include <linux/smp.h>
8#include <linux/highmem.h> 8#include <linux/highmem.h>
9#include <linux/slab.h>
10#include <linux/pagemap.h> 9#include <linux/pagemap.h>
11#include <linux/spinlock.h> 10#include <linux/spinlock.h>
12#include <linux/module.h> 11#include <linux/module.h>
@@ -19,6 +18,7 @@
19#include <asm/e820.h> 18#include <asm/e820.h>
20#include <asm/tlb.h> 19#include <asm/tlb.h>
21#include <asm/tlbflush.h> 20#include <asm/tlbflush.h>
21#include <asm/io.h>
22 22
23unsigned int __VMALLOC_RESERVE = 128 << 20; 23unsigned int __VMALLOC_RESERVE = 128 << 20;
24 24
@@ -129,6 +129,7 @@ static int __init parse_reservetop(char *arg)
129 129
130 address = memparse(arg, &arg); 130 address = memparse(arg, &arg);
131 reserve_top_address(address); 131 reserve_top_address(address);
132 fixup_early_ioremap();
132 return 0; 133 return 0;
133} 134}
134early_param("reservetop", parse_reservetop); 135early_param("reservetop", parse_reservetop);
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 6a58256dce9f..090cbbec7dbd 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -46,17 +46,6 @@
46 46
47static unsigned long reset_value[NUM_VIRT_COUNTERS]; 47static unsigned long reset_value[NUM_VIRT_COUNTERS];
48 48
49/* IbsFetchCtl bits/masks */
50#define IBS_FETCH_RAND_EN (1ULL<<57)
51#define IBS_FETCH_VAL (1ULL<<49)
52#define IBS_FETCH_ENABLE (1ULL<<48)
53#define IBS_FETCH_CNT_MASK 0xFFFF0000ULL
54
55/* IbsOpCtl bits */
56#define IBS_OP_CNT_CTL (1ULL<<19)
57#define IBS_OP_VAL (1ULL<<18)
58#define IBS_OP_ENABLE (1ULL<<17)
59
60#define IBS_FETCH_SIZE 6 49#define IBS_FETCH_SIZE 6
61#define IBS_OP_SIZE 12 50#define IBS_OP_SIZE 12
62 51
@@ -182,7 +171,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
182 continue; 171 continue;
183 } 172 }
184 rdmsrl(msrs->controls[i].addr, val); 173 rdmsrl(msrs->controls[i].addr, val);
185 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) 174 if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
186 op_x86_warn_in_use(i); 175 op_x86_warn_in_use(i);
187 val &= model->reserved; 176 val &= model->reserved;
188 wrmsrl(msrs->controls[i].addr, val); 177 wrmsrl(msrs->controls[i].addr, val);
@@ -290,7 +279,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
290 oprofile_write_commit(&entry); 279 oprofile_write_commit(&entry);
291 280
292 /* reenable the IRQ */ 281 /* reenable the IRQ */
293 ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT_MASK); 282 ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT);
294 ctl |= IBS_FETCH_ENABLE; 283 ctl |= IBS_FETCH_ENABLE;
295 wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl); 284 wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
296 } 285 }
@@ -330,7 +319,7 @@ static inline void op_amd_start_ibs(void)
330 return; 319 return;
331 320
332 if (ibs_config.fetch_enabled) { 321 if (ibs_config.fetch_enabled) {
333 val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; 322 val = (ibs_config.max_cnt_fetch >> 4) & IBS_FETCH_MAX_CNT;
334 val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; 323 val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
335 val |= IBS_FETCH_ENABLE; 324 val |= IBS_FETCH_ENABLE;
336 wrmsrl(MSR_AMD64_IBSFETCHCTL, val); 325 wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
@@ -352,7 +341,7 @@ static inline void op_amd_start_ibs(void)
352 * avoid underflows. 341 * avoid underflows.
353 */ 342 */
354 ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET, 343 ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET,
355 0xFFFFULL); 344 IBS_OP_MAX_CNT);
356 } 345 }
357 if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops) 346 if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops)
358 ibs_op_ctl |= IBS_OP_CNT_CTL; 347 ibs_op_ctl |= IBS_OP_CNT_CTL;
@@ -409,7 +398,7 @@ static void op_amd_start(struct op_msrs const * const msrs)
409 if (!reset_value[op_x86_phys_to_virt(i)]) 398 if (!reset_value[op_x86_phys_to_virt(i)])
410 continue; 399 continue;
411 rdmsrl(msrs->controls[i].addr, val); 400 rdmsrl(msrs->controls[i].addr, val);
412 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 401 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
413 wrmsrl(msrs->controls[i].addr, val); 402 wrmsrl(msrs->controls[i].addr, val);
414 } 403 }
415 404
@@ -429,7 +418,7 @@ static void op_amd_stop(struct op_msrs const * const msrs)
429 if (!reset_value[op_x86_phys_to_virt(i)]) 418 if (!reset_value[op_x86_phys_to_virt(i)])
430 continue; 419 continue;
431 rdmsrl(msrs->controls[i].addr, val); 420 rdmsrl(msrs->controls[i].addr, val);
432 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 421 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
433 wrmsrl(msrs->controls[i].addr, val); 422 wrmsrl(msrs->controls[i].addr, val);
434 } 423 }
435 424
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 5d1727ba409e..2bf90fafa7b5 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -88,7 +88,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
88 continue; 88 continue;
89 } 89 }
90 rdmsrl(msrs->controls[i].addr, val); 90 rdmsrl(msrs->controls[i].addr, val);
91 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) 91 if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
92 op_x86_warn_in_use(i); 92 op_x86_warn_in_use(i);
93 val &= model->reserved; 93 val &= model->reserved;
94 wrmsrl(msrs->controls[i].addr, val); 94 wrmsrl(msrs->controls[i].addr, val);
@@ -166,7 +166,7 @@ static void ppro_start(struct op_msrs const * const msrs)
166 for (i = 0; i < num_counters; ++i) { 166 for (i = 0; i < num_counters; ++i) {
167 if (reset_value[i]) { 167 if (reset_value[i]) {
168 rdmsrl(msrs->controls[i].addr, val); 168 rdmsrl(msrs->controls[i].addr, val);
169 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 169 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
170 wrmsrl(msrs->controls[i].addr, val); 170 wrmsrl(msrs->controls[i].addr, val);
171 } 171 }
172 } 172 }
@@ -184,7 +184,7 @@ static void ppro_stop(struct op_msrs const * const msrs)
184 if (!reset_value[i]) 184 if (!reset_value[i])
185 continue; 185 continue;
186 rdmsrl(msrs->controls[i].addr, val); 186 rdmsrl(msrs->controls[i].addr, val);
187 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 187 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
188 wrmsrl(msrs->controls[i].addr, val); 188 wrmsrl(msrs->controls[i].addr, val);
189 } 189 }
190} 190}
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 39fba37f702f..b110d97fb925 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -13,9 +13,10 @@ obj-$(CONFIG_X86_VISWS) += visws.o
13 13
14obj-$(CONFIG_X86_NUMAQ) += numaq_32.o 14obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
15 15
16obj-$(CONFIG_X86_MRST) += mrst.o
17
16obj-y += common.o early.o 18obj-y += common.o early.o
17obj-y += amd_bus.o 19obj-y += amd_bus.o bus_numa.o
18obj-$(CONFIG_X86_64) += bus_numa.o
19 20
20ifeq ($(CONFIG_PCI_DEBUG),y) 21ifeq ($(CONFIG_PCI_DEBUG),y)
21EXTRA_CFLAGS += -DDEBUG 22EXTRA_CFLAGS += -DDEBUG
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 5f11ff6f5389..31930fd30ea9 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -3,6 +3,7 @@
3#include <linux/init.h> 3#include <linux/init.h>
4#include <linux/irq.h> 4#include <linux/irq.h>
5#include <linux/dmi.h> 5#include <linux/dmi.h>
6#include <linux/slab.h>
6#include <asm/numa.h> 7#include <asm/numa.h>
7#include <asm/pci_x86.h> 8#include <asm/pci_x86.h>
8 9
@@ -65,14 +66,44 @@ resource_to_addr(struct acpi_resource *resource,
65 struct acpi_resource_address64 *addr) 66 struct acpi_resource_address64 *addr)
66{ 67{
67 acpi_status status; 68 acpi_status status;
68 69 struct acpi_resource_memory24 *memory24;
69 status = acpi_resource_to_address64(resource, addr); 70 struct acpi_resource_memory32 *memory32;
70 if (ACPI_SUCCESS(status) && 71 struct acpi_resource_fixed_memory32 *fixed_memory32;
71 (addr->resource_type == ACPI_MEMORY_RANGE || 72
72 addr->resource_type == ACPI_IO_RANGE) && 73 memset(addr, 0, sizeof(*addr));
73 addr->address_length > 0 && 74 switch (resource->type) {
74 addr->producer_consumer == ACPI_PRODUCER) { 75 case ACPI_RESOURCE_TYPE_MEMORY24:
76 memory24 = &resource->data.memory24;
77 addr->resource_type = ACPI_MEMORY_RANGE;
78 addr->minimum = memory24->minimum;
79 addr->address_length = memory24->address_length;
80 addr->maximum = addr->minimum + addr->address_length - 1;
81 return AE_OK;
82 case ACPI_RESOURCE_TYPE_MEMORY32:
83 memory32 = &resource->data.memory32;
84 addr->resource_type = ACPI_MEMORY_RANGE;
85 addr->minimum = memory32->minimum;
86 addr->address_length = memory32->address_length;
87 addr->maximum = addr->minimum + addr->address_length - 1;
75 return AE_OK; 88 return AE_OK;
89 case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
90 fixed_memory32 = &resource->data.fixed_memory32;
91 addr->resource_type = ACPI_MEMORY_RANGE;
92 addr->minimum = fixed_memory32->address;
93 addr->address_length = fixed_memory32->address_length;
94 addr->maximum = addr->minimum + addr->address_length - 1;
95 return AE_OK;
96 case ACPI_RESOURCE_TYPE_ADDRESS16:
97 case ACPI_RESOURCE_TYPE_ADDRESS32:
98 case ACPI_RESOURCE_TYPE_ADDRESS64:
99 status = acpi_resource_to_address64(resource, addr);
100 if (ACPI_SUCCESS(status) &&
101 (addr->resource_type == ACPI_MEMORY_RANGE ||
102 addr->resource_type == ACPI_IO_RANGE) &&
103 addr->address_length > 0) {
104 return AE_OK;
105 }
106 break;
76 } 107 }
77 return AE_ERROR; 108 return AE_ERROR;
78} 109}
@@ -90,30 +121,6 @@ count_resource(struct acpi_resource *acpi_res, void *data)
90 return AE_OK; 121 return AE_OK;
91} 122}
92 123
93static void
94align_resource(struct acpi_device *bridge, struct resource *res)
95{
96 int align = (res->flags & IORESOURCE_MEM) ? 16 : 4;
97
98 /*
99 * Host bridge windows are not BARs, but the decoders on the PCI side
100 * that claim this address space have starting alignment and length
101 * constraints, so fix any obvious BIOS goofs.
102 */
103 if (!IS_ALIGNED(res->start, align)) {
104 dev_printk(KERN_DEBUG, &bridge->dev,
105 "host bridge window %pR invalid; "
106 "aligning start to %d-byte boundary\n", res, align);
107 res->start &= ~(align - 1);
108 }
109 if (!IS_ALIGNED(res->end + 1, align)) {
110 dev_printk(KERN_DEBUG, &bridge->dev,
111 "host bridge window %pR invalid; "
112 "aligning end to %d-byte boundary\n", res, align);
113 res->end = ALIGN(res->end, align) - 1;
114 }
115}
116
117static acpi_status 124static acpi_status
118setup_resource(struct acpi_resource *acpi_res, void *data) 125setup_resource(struct acpi_resource *acpi_res, void *data)
119{ 126{
@@ -122,7 +129,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
122 struct acpi_resource_address64 addr; 129 struct acpi_resource_address64 addr;
123 acpi_status status; 130 acpi_status status;
124 unsigned long flags; 131 unsigned long flags;
125 struct resource *root; 132 struct resource *root, *conflict;
126 u64 start, end; 133 u64 start, end;
127 134
128 status = resource_to_addr(acpi_res, &addr); 135 status = resource_to_addr(acpi_res, &addr);
@@ -141,7 +148,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
141 return AE_OK; 148 return AE_OK;
142 149
143 start = addr.minimum + addr.translation_offset; 150 start = addr.minimum + addr.translation_offset;
144 end = start + addr.address_length - 1; 151 end = addr.maximum + addr.translation_offset;
145 152
146 res = &info->res[info->res_num]; 153 res = &info->res[info->res_num];
147 res->name = info->name; 154 res->name = info->name;
@@ -149,7 +156,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
149 res->start = start; 156 res->start = start;
150 res->end = end; 157 res->end = end;
151 res->child = NULL; 158 res->child = NULL;
152 align_resource(info->bridge, res);
153 159
154 if (!pci_use_crs) { 160 if (!pci_use_crs) {
155 dev_printk(KERN_DEBUG, &info->bridge->dev, 161 dev_printk(KERN_DEBUG, &info->bridge->dev,
@@ -157,9 +163,12 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
157 return AE_OK; 163 return AE_OK;
158 } 164 }
159 165
160 if (insert_resource(root, res)) { 166 conflict = insert_resource_conflict(root, res);
167 if (conflict) {
161 dev_err(&info->bridge->dev, 168 dev_err(&info->bridge->dev,
162 "can't allocate host bridge window %pR\n", res); 169 "address space collision: host bridge window %pR "
170 "conflicts with %s %pR\n",
171 res, conflict->name, conflict);
163 } else { 172 } else {
164 pci_bus_add_resource(info->bus, res, 0); 173 pci_bus_add_resource(info->bus, res, 0);
165 info->res_num++; 174 info->res_num++;
@@ -298,17 +307,14 @@ int __init pci_acpi_init(void)
298{ 307{
299 struct pci_dev *dev = NULL; 308 struct pci_dev *dev = NULL;
300 309
301 if (pcibios_scanned)
302 return 0;
303
304 if (acpi_noirq) 310 if (acpi_noirq)
305 return 0; 311 return -ENODEV;
306 312
307 printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); 313 printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
308 acpi_irq_penalty_init(); 314 acpi_irq_penalty_init();
309 pcibios_scanned++;
310 pcibios_enable_irq = acpi_pci_irq_enable; 315 pcibios_enable_irq = acpi_pci_irq_enable;
311 pcibios_disable_irq = acpi_pci_irq_disable; 316 pcibios_disable_irq = acpi_pci_irq_disable;
317 x86_init.pci.init_irq = x86_init_noop;
312 318
313 if (pci_routeirq) { 319 if (pci_routeirq) {
314 /* 320 /*
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 95ecbd495955..fc1e8fe07e5c 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -2,11 +2,11 @@
2#include <linux/pci.h> 2#include <linux/pci.h>
3#include <linux/topology.h> 3#include <linux/topology.h>
4#include <linux/cpu.h> 4#include <linux/cpu.h>
5#include <linux/range.h>
6
5#include <asm/pci_x86.h> 7#include <asm/pci_x86.h>
6 8
7#ifdef CONFIG_X86_64
8#include <asm/pci-direct.h> 9#include <asm/pci-direct.h>
9#endif
10 10
11#include "bus_numa.h" 11#include "bus_numa.h"
12 12
@@ -15,60 +15,6 @@
15 * also get peer root bus resource for io,mmio 15 * also get peer root bus resource for io,mmio
16 */ 16 */
17 17
18#ifdef CONFIG_X86_64
19
20#define RANGE_NUM 16
21
22struct res_range {
23 size_t start;
24 size_t end;
25};
26
27static void __init update_range(struct res_range *range, size_t start,
28 size_t end)
29{
30 int i;
31 int j;
32
33 for (j = 0; j < RANGE_NUM; j++) {
34 if (!range[j].end)
35 continue;
36
37 if (start <= range[j].start && end >= range[j].end) {
38 range[j].start = 0;
39 range[j].end = 0;
40 continue;
41 }
42
43 if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
44 range[j].start = end + 1;
45 continue;
46 }
47
48
49 if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
50 range[j].end = start - 1;
51 continue;
52 }
53
54 if (start > range[j].start && end < range[j].end) {
55 /* find the new spare */
56 for (i = 0; i < RANGE_NUM; i++) {
57 if (range[i].end == 0)
58 break;
59 }
60 if (i < RANGE_NUM) {
61 range[i].end = range[j].end;
62 range[i].start = end + 1;
63 } else {
64 printk(KERN_ERR "run of slot in ranges\n");
65 }
66 range[j].end = start - 1;
67 continue;
68 }
69 }
70}
71
72struct pci_hostbridge_probe { 18struct pci_hostbridge_probe {
73 u32 bus; 19 u32 bus;
74 u32 slot; 20 u32 slot;
@@ -111,6 +57,8 @@ static void __init get_pci_mmcfg_amd_fam10h_range(void)
111 fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1; 57 fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1;
112} 58}
113 59
60#define RANGE_NUM 16
61
114/** 62/**
115 * early_fill_mp_bus_to_node() 63 * early_fill_mp_bus_to_node()
116 * called before pcibios_scan_root and pci_scan_bus 64 * called before pcibios_scan_root and pci_scan_bus
@@ -130,16 +78,17 @@ static int __init early_fill_mp_bus_info(void)
130 struct pci_root_info *info; 78 struct pci_root_info *info;
131 u32 reg; 79 u32 reg;
132 struct resource *res; 80 struct resource *res;
133 size_t start; 81 u64 start;
134 size_t end; 82 u64 end;
135 struct res_range range[RANGE_NUM]; 83 struct range range[RANGE_NUM];
136 u64 val; 84 u64 val;
137 u32 address; 85 u32 address;
86 bool found;
138 87
139 if (!early_pci_allowed()) 88 if (!early_pci_allowed())
140 return -1; 89 return -1;
141 90
142 found_all_numa_early = 0; 91 found = false;
143 for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { 92 for (i = 0; i < ARRAY_SIZE(pci_probes); i++) {
144 u32 id; 93 u32 id;
145 u16 device; 94 u16 device;
@@ -153,12 +102,12 @@ static int __init early_fill_mp_bus_info(void)
153 device = (id>>16) & 0xffff; 102 device = (id>>16) & 0xffff;
154 if (pci_probes[i].vendor == vendor && 103 if (pci_probes[i].vendor == vendor &&
155 pci_probes[i].device == device) { 104 pci_probes[i].device == device) {
156 found_all_numa_early = 1; 105 found = true;
157 break; 106 break;
158 } 107 }
159 } 108 }
160 109
161 if (!found_all_numa_early) 110 if (!found)
162 return 0; 111 return 0;
163 112
164 pci_root_num = 0; 113 pci_root_num = 0;
@@ -196,7 +145,7 @@ static int __init early_fill_mp_bus_info(void)
196 def_link = (reg >> 8) & 0x03; 145 def_link = (reg >> 8) & 0x03;
197 146
198 memset(range, 0, sizeof(range)); 147 memset(range, 0, sizeof(range));
199 range[0].end = 0xffff; 148 add_range(range, RANGE_NUM, 0, 0, 0xffff + 1);
200 /* io port resource */ 149 /* io port resource */
201 for (i = 0; i < 4; i++) { 150 for (i = 0; i < 4; i++) {
202 reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3)); 151 reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3));
@@ -220,13 +169,13 @@ static int __init early_fill_mp_bus_info(void)
220 169
221 info = &pci_root_info[j]; 170 info = &pci_root_info[j];
222 printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n", 171 printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n",
223 node, link, (u64)start, (u64)end); 172 node, link, start, end);
224 173
225 /* kernel only handle 16 bit only */ 174 /* kernel only handle 16 bit only */
226 if (end > 0xffff) 175 if (end > 0xffff)
227 end = 0xffff; 176 end = 0xffff;
228 update_res(info, start, end, IORESOURCE_IO, 1); 177 update_res(info, start, end, IORESOURCE_IO, 1);
229 update_range(range, start, end); 178 subtract_range(range, RANGE_NUM, start, end + 1);
230 } 179 }
231 /* add left over io port range to def node/link, [0, 0xffff] */ 180 /* add left over io port range to def node/link, [0, 0xffff] */
232 /* find the position */ 181 /* find the position */
@@ -241,29 +190,32 @@ static int __init early_fill_mp_bus_info(void)
241 if (!range[i].end) 190 if (!range[i].end)
242 continue; 191 continue;
243 192
244 update_res(info, range[i].start, range[i].end, 193 update_res(info, range[i].start, range[i].end - 1,
245 IORESOURCE_IO, 1); 194 IORESOURCE_IO, 1);
246 } 195 }
247 } 196 }
248 197
249 memset(range, 0, sizeof(range)); 198 memset(range, 0, sizeof(range));
250 /* 0xfd00000000-0xffffffffff for HT */ 199 /* 0xfd00000000-0xffffffffff for HT */
251 range[0].end = (0xfdULL<<32) - 1; 200 end = cap_resource((0xfdULL<<32) - 1);
201 end++;
202 add_range(range, RANGE_NUM, 0, 0, end);
252 203
253 /* need to take out [0, TOM) for RAM*/ 204 /* need to take out [0, TOM) for RAM*/
254 address = MSR_K8_TOP_MEM1; 205 address = MSR_K8_TOP_MEM1;
255 rdmsrl(address, val); 206 rdmsrl(address, val);
256 end = (val & 0xffffff800000ULL); 207 end = (val & 0xffffff800000ULL);
257 printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); 208 printk(KERN_INFO "TOM: %016llx aka %lldM\n", end, end>>20);
258 if (end < (1ULL<<32)) 209 if (end < (1ULL<<32))
259 update_range(range, 0, end - 1); 210 subtract_range(range, RANGE_NUM, 0, end);
260 211
261 /* get mmconfig */ 212 /* get mmconfig */
262 get_pci_mmcfg_amd_fam10h_range(); 213 get_pci_mmcfg_amd_fam10h_range();
263 /* need to take out mmconf range */ 214 /* need to take out mmconf range */
264 if (fam10h_mmconf_end) { 215 if (fam10h_mmconf_end) {
265 printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end); 216 printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end);
266 update_range(range, fam10h_mmconf_start, fam10h_mmconf_end); 217 subtract_range(range, RANGE_NUM, fam10h_mmconf_start,
218 fam10h_mmconf_end + 1);
267 } 219 }
268 220
269 /* mmio resource */ 221 /* mmio resource */
@@ -293,7 +245,7 @@ static int __init early_fill_mp_bus_info(void)
293 info = &pci_root_info[j]; 245 info = &pci_root_info[j];
294 246
295 printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]", 247 printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]",
296 node, link, (u64)start, (u64)end); 248 node, link, start, end);
297 /* 249 /*
298 * some sick allocation would have range overlap with fam10h 250 * some sick allocation would have range overlap with fam10h
299 * mmconf range, so need to update start and end. 251 * mmconf range, so need to update start and end.
@@ -318,14 +270,15 @@ static int __init early_fill_mp_bus_info(void)
318 /* we got a hole */ 270 /* we got a hole */
319 endx = fam10h_mmconf_start - 1; 271 endx = fam10h_mmconf_start - 1;
320 update_res(info, start, endx, IORESOURCE_MEM, 0); 272 update_res(info, start, endx, IORESOURCE_MEM, 0);
321 update_range(range, start, endx); 273 subtract_range(range, RANGE_NUM, start,
322 printk(KERN_CONT " ==> [%llx, %llx]", (u64)start, endx); 274 endx + 1);
275 printk(KERN_CONT " ==> [%llx, %llx]", start, endx);
323 start = fam10h_mmconf_end + 1; 276 start = fam10h_mmconf_end + 1;
324 changed = 1; 277 changed = 1;
325 } 278 }
326 if (changed) { 279 if (changed) {
327 if (start <= end) { 280 if (start <= end) {
328 printk(KERN_CONT " %s [%llx, %llx]", endx?"and":"==>", (u64)start, (u64)end); 281 printk(KERN_CONT " %s [%llx, %llx]", endx ? "and" : "==>", start, end);
329 } else { 282 } else {
330 printk(KERN_CONT "%s\n", endx?"":" ==> none"); 283 printk(KERN_CONT "%s\n", endx?"":" ==> none");
331 continue; 284 continue;
@@ -333,8 +286,9 @@ static int __init early_fill_mp_bus_info(void)
333 } 286 }
334 } 287 }
335 288
336 update_res(info, start, end, IORESOURCE_MEM, 1); 289 update_res(info, cap_resource(start), cap_resource(end),
337 update_range(range, start, end); 290 IORESOURCE_MEM, 1);
291 subtract_range(range, RANGE_NUM, start, end + 1);
338 printk(KERN_CONT "\n"); 292 printk(KERN_CONT "\n");
339 } 293 }
340 294
@@ -348,8 +302,8 @@ static int __init early_fill_mp_bus_info(void)
348 address = MSR_K8_TOP_MEM2; 302 address = MSR_K8_TOP_MEM2;
349 rdmsrl(address, val); 303 rdmsrl(address, val);
350 end = (val & 0xffffff800000ULL); 304 end = (val & 0xffffff800000ULL);
351 printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); 305 printk(KERN_INFO "TOM2: %016llx aka %lldM\n", end, end>>20);
352 update_range(range, 1ULL<<32, end - 1); 306 subtract_range(range, RANGE_NUM, 1ULL<<32, end);
353 } 307 }
354 308
355 /* 309 /*
@@ -368,7 +322,8 @@ static int __init early_fill_mp_bus_info(void)
368 if (!range[i].end) 322 if (!range[i].end)
369 continue; 323 continue;
370 324
371 update_res(info, range[i].start, range[i].end, 325 update_res(info, cap_resource(range[i].start),
326 cap_resource(range[i].end - 1),
372 IORESOURCE_MEM, 1); 327 IORESOURCE_MEM, 1);
373 } 328 }
374 } 329 }
@@ -384,24 +339,14 @@ static int __init early_fill_mp_bus_info(void)
384 info->bus_min, info->bus_max, info->node, info->link); 339 info->bus_min, info->bus_max, info->node, info->link);
385 for (j = 0; j < res_num; j++) { 340 for (j = 0; j < res_num; j++) {
386 res = &info->res[j]; 341 res = &info->res[j];
387 printk(KERN_DEBUG "bus: %02x index %x %s: [%llx, %llx]\n", 342 printk(KERN_DEBUG "bus: %02x index %x %pR\n",
388 busnum, j, 343 busnum, j, res);
389 (res->flags & IORESOURCE_IO)?"io port":"mmio",
390 res->start, res->end);
391 } 344 }
392 } 345 }
393 346
394 return 0; 347 return 0;
395} 348}
396 349
397#else /* !CONFIG_X86_64 */
398
399static int __init early_fill_mp_bus_info(void) { return 0; }
400
401#endif /* !CONFIG_X86_64 */
402
403/* common 32/64 bit code */
404
405#define ENABLE_CF8_EXT_CFG (1ULL << 46) 350#define ENABLE_CF8_EXT_CFG (1ULL << 46)
406 351
407static void enable_pci_io_ecs(void *unused) 352static void enable_pci_io_ecs(void *unused)
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
index 12d54ff3654d..64a122883896 100644
--- a/arch/x86/pci/bus_numa.c
+++ b/arch/x86/pci/bus_numa.c
@@ -1,11 +1,11 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <linux/pci.h> 2#include <linux/pci.h>
3#include <linux/range.h>
3 4
4#include "bus_numa.h" 5#include "bus_numa.h"
5 6
6int pci_root_num; 7int pci_root_num;
7struct pci_root_info pci_root_info[PCI_ROOT_NR]; 8struct pci_root_info pci_root_info[PCI_ROOT_NR];
8int found_all_numa_early;
9 9
10void x86_pci_root_bus_res_quirks(struct pci_bus *b) 10void x86_pci_root_bus_res_quirks(struct pci_bus *b)
11{ 11{
@@ -21,10 +21,6 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b)
21 if (!pci_root_num) 21 if (!pci_root_num)
22 return; 22 return;
23 23
24 /* for amd, if only one root bus, don't need to do anything */
25 if (pci_root_num < 2 && found_all_numa_early)
26 return;
27
28 for (i = 0; i < pci_root_num; i++) { 24 for (i = 0; i < pci_root_num; i++) {
29 if (pci_root_info[i].bus_min == b->number) 25 if (pci_root_info[i].bus_min == b->number)
30 break; 26 break;
@@ -52,8 +48,8 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b)
52 } 48 }
53} 49}
54 50
55void __devinit update_res(struct pci_root_info *info, size_t start, 51void __devinit update_res(struct pci_root_info *info, resource_size_t start,
56 size_t end, unsigned long flags, int merge) 52 resource_size_t end, unsigned long flags, int merge)
57{ 53{
58 int i; 54 int i;
59 struct resource *res; 55 struct resource *res;
@@ -61,25 +57,28 @@ void __devinit update_res(struct pci_root_info *info, size_t start,
61 if (start > end) 57 if (start > end)
62 return; 58 return;
63 59
60 if (start == MAX_RESOURCE)
61 return;
62
64 if (!merge) 63 if (!merge)
65 goto addit; 64 goto addit;
66 65
67 /* try to merge it with old one */ 66 /* try to merge it with old one */
68 for (i = 0; i < info->res_num; i++) { 67 for (i = 0; i < info->res_num; i++) {
69 size_t final_start, final_end; 68 resource_size_t final_start, final_end;
70 size_t common_start, common_end; 69 resource_size_t common_start, common_end;
71 70
72 res = &info->res[i]; 71 res = &info->res[i];
73 if (res->flags != flags) 72 if (res->flags != flags)
74 continue; 73 continue;
75 74
76 common_start = max((size_t)res->start, start); 75 common_start = max(res->start, start);
77 common_end = min((size_t)res->end, end); 76 common_end = min(res->end, end);
78 if (common_start > common_end + 1) 77 if (common_start > common_end + 1)
79 continue; 78 continue;
80 79
81 final_start = min((size_t)res->start, start); 80 final_start = min(res->start, start);
82 final_end = max((size_t)res->end, end); 81 final_end = max(res->end, end);
83 82
84 res->start = final_start; 83 res->start = final_start;
85 res->end = final_end; 84 res->end = final_end;
diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h
index 731b64ee8d84..804a4b40c31a 100644
--- a/arch/x86/pci/bus_numa.h
+++ b/arch/x86/pci/bus_numa.h
@@ -1,5 +1,5 @@
1#ifdef CONFIG_X86_64 1#ifndef __BUS_NUMA_H
2 2#define __BUS_NUMA_H
3/* 3/*
4 * sub bus (transparent) will use entres from 3 to store extra from 4 * sub bus (transparent) will use entres from 3 to store extra from
5 * root, so need to make sure we have enough slot there. 5 * root, so need to make sure we have enough slot there.
@@ -19,8 +19,7 @@ struct pci_root_info {
19#define PCI_ROOT_NR 4 19#define PCI_ROOT_NR 4
20extern int pci_root_num; 20extern int pci_root_num;
21extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; 21extern struct pci_root_info pci_root_info[PCI_ROOT_NR];
22extern int found_all_numa_early;
23 22
24extern void update_res(struct pci_root_info *info, size_t start, 23extern void update_res(struct pci_root_info *info, resource_size_t start,
25 size_t end, unsigned long flags, int merge); 24 resource_size_t end, unsigned long flags, int merge);
26#endif 25#endif
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 3736176acaab..cf2e93869c48 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -9,6 +9,7 @@
9#include <linux/ioport.h> 9#include <linux/ioport.h>
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/dmi.h> 11#include <linux/dmi.h>
12#include <linux/slab.h>
12 13
13#include <asm/acpi.h> 14#include <asm/acpi.h>
14#include <asm/segment.h> 15#include <asm/segment.h>
@@ -72,12 +73,6 @@ struct pci_ops pci_root_ops = {
72}; 73};
73 74
74/* 75/*
75 * legacy, numa, and acpi all want to call pcibios_scan_root
76 * from their initcalls. This flag prevents that.
77 */
78int pcibios_scanned;
79
80/*
81 * This interrupt-safe spinlock protects all accesses to PCI 76 * This interrupt-safe spinlock protects all accesses to PCI
82 * configuration space. 77 * configuration space.
83 */ 78 */
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 5a8fbf8d4cac..97da2ba9344b 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -72,6 +72,9 @@ pcibios_align_resource(void *data, const struct resource *res,
72 return start; 72 return start;
73 if (start & 0x300) 73 if (start & 0x300)
74 start = (start + 0x3ff) & ~0x3ff; 74 start = (start + 0x3ff) & ~0x3ff;
75 } else if (res->flags & IORESOURCE_MEM) {
76 if (start < BIOS_END)
77 start = BIOS_END;
75 } 78 }
76 return start; 79 return start;
77} 80}
@@ -127,9 +130,6 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
127 continue; 130 continue;
128 if (!r->start || 131 if (!r->start ||
129 pci_claim_resource(dev, idx) < 0) { 132 pci_claim_resource(dev, idx) < 0) {
130 dev_info(&dev->dev,
131 "can't reserve window %pR\n",
132 r);
133 /* 133 /*
134 * Something is wrong with the region. 134 * Something is wrong with the region.
135 * Invalidate the resource to prevent 135 * Invalidate the resource to prevent
@@ -181,8 +181,6 @@ static void __init pcibios_allocate_resources(int pass)
181 "BAR %d: reserving %pr (d=%d, p=%d)\n", 181 "BAR %d: reserving %pr (d=%d, p=%d)\n",
182 idx, r, disabled, pass); 182 idx, r, disabled, pass);
183 if (pci_claim_resource(dev, idx) < 0) { 183 if (pci_claim_resource(dev, idx) < 0) {
184 dev_info(&dev->dev,
185 "can't reserve %pR\n", r);
186 /* We'll assign a new address later */ 184 /* We'll assign a new address later */
187 r->end -= r->start; 185 r->end -= r->start;
188 r->start = 0; 186 r->start = 0;
@@ -255,10 +253,6 @@ void __init pcibios_resource_survey(void)
255 */ 253 */
256fs_initcall(pcibios_assign_resources); 254fs_initcall(pcibios_assign_resources);
257 255
258void __weak x86_pci_root_bus_res_quirks(struct pci_bus *b)
259{
260}
261
262/* 256/*
263 * If we set up a device for bus mastering, we need to check the latency 257 * If we set up a device for bus mastering, we need to check the latency
264 * timer as certain crappy BIOSes forget to set it properly. 258 * timer as certain crappy BIOSes forget to set it properly.
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index 25a1f8efed4a..adb62aaa7ecd 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -1,6 +1,7 @@
1#include <linux/pci.h> 1#include <linux/pci.h>
2#include <linux/init.h> 2#include <linux/init.h>
3#include <asm/pci_x86.h> 3#include <asm/pci_x86.h>
4#include <asm/x86_init.h>
4 5
5/* arch_initcall has too random ordering, so call the initializers 6/* arch_initcall has too random ordering, so call the initializers
6 in the right sequence from here. */ 7 in the right sequence from here. */
@@ -15,10 +16,9 @@ static __init int pci_arch_init(void)
15 if (!(pci_probe & PCI_PROBE_NOEARLY)) 16 if (!(pci_probe & PCI_PROBE_NOEARLY))
16 pci_mmcfg_early_init(); 17 pci_mmcfg_early_init();
17 18
18#ifdef CONFIG_PCI_OLPC 19 if (x86_init.pci.arch_init && !x86_init.pci.arch_init())
19 if (!pci_olpc_init()) 20 return 0;
20 return 0; /* skip additional checks if it's an XO */ 21
21#endif
22#ifdef CONFIG_PCI_BIOS 22#ifdef CONFIG_PCI_BIOS
23 pci_pcbios_init(); 23 pci_pcbios_init();
24#endif 24#endif
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index b02f6d8ac922..5d362b5ba06f 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -8,7 +8,6 @@
8#include <linux/kernel.h> 8#include <linux/kernel.h>
9#include <linux/pci.h> 9#include <linux/pci.h>
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/slab.h>
12#include <linux/interrupt.h> 11#include <linux/interrupt.h>
13#include <linux/dmi.h> 12#include <linux/dmi.h>
14#include <linux/io.h> 13#include <linux/io.h>
@@ -53,7 +52,7 @@ struct irq_router_handler {
53 int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device); 52 int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device);
54}; 53};
55 54
56int (*pcibios_enable_irq)(struct pci_dev *dev) = NULL; 55int (*pcibios_enable_irq)(struct pci_dev *dev) = pirq_enable_irq;
57void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL; 56void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL;
58 57
59/* 58/*
@@ -1018,7 +1017,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
1018 return 1; 1017 return 1;
1019} 1018}
1020 1019
1021static void __init pcibios_fixup_irqs(void) 1020void __init pcibios_fixup_irqs(void)
1022{ 1021{
1023 struct pci_dev *dev = NULL; 1022 struct pci_dev *dev = NULL;
1024 u8 pin; 1023 u8 pin;
@@ -1112,12 +1111,12 @@ static struct dmi_system_id __initdata pciirq_dmi_table[] = {
1112 { } 1111 { }
1113}; 1112};
1114 1113
1115int __init pcibios_irq_init(void) 1114void __init pcibios_irq_init(void)
1116{ 1115{
1117 DBG(KERN_DEBUG "PCI: IRQ init\n"); 1116 DBG(KERN_DEBUG "PCI: IRQ init\n");
1118 1117
1119 if (pcibios_enable_irq || raw_pci_ops == NULL) 1118 if (raw_pci_ops == NULL)
1120 return 0; 1119 return;
1121 1120
1122 dmi_check_system(pciirq_dmi_table); 1121 dmi_check_system(pciirq_dmi_table);
1123 1122
@@ -1144,9 +1143,7 @@ int __init pcibios_irq_init(void)
1144 pirq_table = NULL; 1143 pirq_table = NULL;
1145 } 1144 }
1146 1145
1147 pcibios_enable_irq = pirq_enable_irq; 1146 x86_init.pci.fixup_irqs();
1148
1149 pcibios_fixup_irqs();
1150 1147
1151 if (io_apic_assign_pci_irqs && pci_routeirq) { 1148 if (io_apic_assign_pci_irqs && pci_routeirq) {
1152 struct pci_dev *dev = NULL; 1149 struct pci_dev *dev = NULL;
@@ -1159,8 +1156,6 @@ int __init pcibios_irq_init(void)
1159 for_each_pci_dev(dev) 1156 for_each_pci_dev(dev)
1160 pirq_enable_irq(dev); 1157 pirq_enable_irq(dev);
1161 } 1158 }
1162
1163 return 0;
1164} 1159}
1165 1160
1166static void pirq_penalize_isa_irq(int irq, int active) 1161static void pirq_penalize_isa_irq(int irq, int active)
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index 4061bb0f267d..0db5eaf54560 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -35,16 +35,13 @@ static void __devinit pcibios_fixup_peer_bridges(void)
35 } 35 }
36} 36}
37 37
38static int __init pci_legacy_init(void) 38int __init pci_legacy_init(void)
39{ 39{
40 if (!raw_pci_ops) { 40 if (!raw_pci_ops) {
41 printk("PCI: System does not support PCI\n"); 41 printk("PCI: System does not support PCI\n");
42 return 0; 42 return 0;
43 } 43 }
44 44
45 if (pcibios_scanned++)
46 return 0;
47
48 printk("PCI: Probing PCI hardware\n"); 45 printk("PCI: Probing PCI hardware\n");
49 pci_root_bus = pcibios_scan_root(0); 46 pci_root_bus = pcibios_scan_root(0);
50 if (pci_root_bus) 47 if (pci_root_bus)
@@ -55,18 +52,15 @@ static int __init pci_legacy_init(void)
55 52
56int __init pci_subsys_init(void) 53int __init pci_subsys_init(void)
57{ 54{
58#ifdef CONFIG_X86_NUMAQ 55 /*
59 pci_numaq_init(); 56 * The init function returns an non zero value when
60#endif 57 * pci_legacy_init should be invoked.
61#ifdef CONFIG_ACPI 58 */
62 pci_acpi_init(); 59 if (x86_init.pci.init())
63#endif 60 pci_legacy_init();
64#ifdef CONFIG_X86_VISWS 61
65 pci_visws_init();
66#endif
67 pci_legacy_init();
68 pcibios_fixup_peer_bridges(); 62 pcibios_fixup_peer_bridges();
69 pcibios_irq_init(); 63 x86_init.pci.init_irq();
70 pcibios_init(); 64 pcibios_init();
71 65
72 return 0; 66 return 0;
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 8f3f9a50b1e0..39b9ebe8f886 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -16,6 +16,7 @@
16#include <linux/sfi_acpi.h> 16#include <linux/sfi_acpi.h>
17#include <linux/bitmap.h> 17#include <linux/bitmap.h>
18#include <linux/dmi.h> 18#include <linux/dmi.h>
19#include <linux/slab.h>
19#include <asm/e820.h> 20#include <asm/e820.h>
20#include <asm/pci_x86.h> 21#include <asm/pci_x86.h>
21#include <asm/acpi.h> 22#include <asm/acpi.h>
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
new file mode 100644
index 000000000000..8bf2fcb88d04
--- /dev/null
+++ b/arch/x86/pci/mrst.c
@@ -0,0 +1,262 @@
1/*
2 * Moorestown PCI support
3 * Copyright (c) 2008 Intel Corporation
4 * Jesse Barnes <jesse.barnes@intel.com>
5 *
6 * Moorestown has an interesting PCI implementation:
7 * - configuration space is memory mapped (as defined by MCFG)
8 * - Lincroft devices also have a real, type 1 configuration space
9 * - Early Lincroft silicon has a type 1 access bug that will cause
10 * a hang if non-existent devices are accessed
11 * - some devices have the "fixed BAR" capability, which means
12 * they can't be relocated or modified; check for that during
13 * BAR sizing
14 *
15 * So, we use the MCFG space for all reads and writes, but also send
16 * Lincroft writes to type 1 space. But only read/write if the device
17 * actually exists, otherwise return all 1s for reads and bit bucket
18 * the writes.
19 */
20
21#include <linux/sched.h>
22#include <linux/pci.h>
23#include <linux/ioport.h>
24#include <linux/init.h>
25#include <linux/dmi.h>
26
27#include <asm/acpi.h>
28#include <asm/segment.h>
29#include <asm/io.h>
30#include <asm/smp.h>
31#include <asm/pci_x86.h>
32#include <asm/hw_irq.h>
33#include <asm/io_apic.h>
34
35#define PCIE_CAP_OFFSET 0x100
36
37/* Fixed BAR fields */
38#define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00 /* Fixed BAR (TBD) */
39#define PCI_FIXED_BAR_0_SIZE 0x04
40#define PCI_FIXED_BAR_1_SIZE 0x08
41#define PCI_FIXED_BAR_2_SIZE 0x0c
42#define PCI_FIXED_BAR_3_SIZE 0x10
43#define PCI_FIXED_BAR_4_SIZE 0x14
44#define PCI_FIXED_BAR_5_SIZE 0x1c
45
46/**
47 * fixed_bar_cap - return the offset of the fixed BAR cap if found
48 * @bus: PCI bus
49 * @devfn: device in question
50 *
51 * Look for the fixed BAR cap on @bus and @devfn, returning its offset
52 * if found or 0 otherwise.
53 */
54static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn)
55{
56 int pos;
57 u32 pcie_cap = 0, cap_data;
58
59 pos = PCIE_CAP_OFFSET;
60
61 if (!raw_pci_ext_ops)
62 return 0;
63
64 while (pos) {
65 if (raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
66 devfn, pos, 4, &pcie_cap))
67 return 0;
68
69 if (pcie_cap == 0xffffffff)
70 return 0;
71
72 if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) {
73 raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
74 devfn, pos + 4, 4, &cap_data);
75 if ((cap_data & 0xffff) == PCIE_VNDR_CAP_ID_FIXED_BAR)
76 return pos;
77 }
78
79 pos = pcie_cap >> 20;
80 }
81
82 return 0;
83}
84
85static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn,
86 int reg, int len, u32 val, int offset)
87{
88 u32 size;
89 unsigned int domain, busnum;
90 int bar = (reg - PCI_BASE_ADDRESS_0) >> 2;
91
92 domain = pci_domain_nr(bus);
93 busnum = bus->number;
94
95 if (val == ~0 && len == 4) {
96 unsigned long decode;
97
98 raw_pci_ext_ops->read(domain, busnum, devfn,
99 offset + 8 + (bar * 4), 4, &size);
100
101 /* Turn the size into a decode pattern for the sizing code */
102 if (size) {
103 decode = size - 1;
104 decode |= decode >> 1;
105 decode |= decode >> 2;
106 decode |= decode >> 4;
107 decode |= decode >> 8;
108 decode |= decode >> 16;
109 decode++;
110 decode = ~(decode - 1);
111 } else {
112 decode = ~0;
113 }
114
115 /*
116 * If val is all ones, the core code is trying to size the reg,
117 * so update the mmconfig space with the real size.
118 *
119 * Note: this assumes the fixed size we got is a power of two.
120 */
121 return raw_pci_ext_ops->write(domain, busnum, devfn, reg, 4,
122 decode);
123 }
124
125 /* This is some other kind of BAR write, so just do it. */
126 return raw_pci_ext_ops->write(domain, busnum, devfn, reg, len, val);
127}
128
129/**
130 * type1_access_ok - check whether to use type 1
131 * @bus: bus number
132 * @devfn: device & function in question
133 *
134 * If the bus is on a Lincroft chip and it exists, or is not on a Lincroft at
135 * all, the we can go ahead with any reads & writes. If it's on a Lincroft,
136 * but doesn't exist, avoid the access altogether to keep the chip from
137 * hanging.
138 */
139static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg)
140{
141 /* This is a workaround for A0 LNC bug where PCI status register does
142 * not have new CAP bit set. can not be written by SW either.
143 *
144 * PCI header type in real LNC indicates a single function device, this
145 * will prevent probing other devices under the same function in PCI
146 * shim. Therefore, use the header type in shim instead.
147 */
148 if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE)
149 return 0;
150 if (bus == 0 && (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(0, 0)))
151 return 1;
152 return 0; /* langwell on others */
153}
154
155static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
156 int size, u32 *value)
157{
158 if (type1_access_ok(bus->number, devfn, where))
159 return pci_direct_conf1.read(pci_domain_nr(bus), bus->number,
160 devfn, where, size, value);
161 return raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
162 devfn, where, size, value);
163}
164
165static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
166 int size, u32 value)
167{
168 int offset;
169
170 /* On MRST, there is no PCI ROM BAR, this will cause a subsequent read
171 * to ROM BAR return 0 then being ignored.
172 */
173 if (where == PCI_ROM_ADDRESS)
174 return 0;
175
176 /*
177 * Devices with fixed BARs need special handling:
178 * - BAR sizing code will save, write ~0, read size, restore
179 * - so writes to fixed BARs need special handling
180 * - other writes to fixed BAR devices should go through mmconfig
181 */
182 offset = fixed_bar_cap(bus, devfn);
183 if (offset &&
184 (where >= PCI_BASE_ADDRESS_0 && where <= PCI_BASE_ADDRESS_5)) {
185 return pci_device_update_fixed(bus, devfn, where, size, value,
186 offset);
187 }
188
189 /*
190 * On Moorestown update both real & mmconfig space
191 * Note: early Lincroft silicon can't handle type 1 accesses to
192 * non-existent devices, so just eat the write in that case.
193 */
194 if (type1_access_ok(bus->number, devfn, where))
195 return pci_direct_conf1.write(pci_domain_nr(bus), bus->number,
196 devfn, where, size, value);
197 return raw_pci_ext_ops->write(pci_domain_nr(bus), bus->number, devfn,
198 where, size, value);
199}
200
201static int mrst_pci_irq_enable(struct pci_dev *dev)
202{
203 u8 pin;
204 struct io_apic_irq_attr irq_attr;
205
206 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
207
208 /* MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to
209 * IOAPIC RTE entries, so we just enable RTE for the device.
210 */
211 irq_attr.ioapic = mp_find_ioapic(dev->irq);
212 irq_attr.ioapic_pin = dev->irq;
213 irq_attr.trigger = 1; /* level */
214 irq_attr.polarity = 1; /* active low */
215 io_apic_set_pci_routing(&dev->dev, dev->irq, &irq_attr);
216
217 return 0;
218}
219
220struct pci_ops pci_mrst_ops = {
221 .read = pci_read,
222 .write = pci_write,
223};
224
225/**
226 * pci_mrst_init - installs pci_mrst_ops
227 *
228 * Moorestown has an interesting PCI implementation (see above).
229 * Called when the early platform detection installs it.
230 */
231int __init pci_mrst_init(void)
232{
233 printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n");
234 pci_mmcfg_late_init();
235 pcibios_enable_irq = mrst_pci_irq_enable;
236 pci_root_ops = pci_mrst_ops;
237 /* Continue with standard init */
238 return 1;
239}
240
241/*
242 * Langwell devices reside at fixed offsets, don't try to move them.
243 */
244static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev)
245{
246 unsigned long offset;
247 u32 size;
248 int i;
249
250 /* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */
251 offset = fixed_bar_cap(dev->bus, dev->devfn);
252 if (!offset || PCI_DEVFN(2, 0) == dev->devfn ||
253 PCI_DEVFN(2, 2) == dev->devfn)
254 return;
255
256 for (i = 0; i < PCI_ROM_RESOURCE; i++) {
257 pci_read_config_dword(dev, offset + 8 + (i * 4), &size);
258 dev->resource[i].end = dev->resource[i].start + size - 1;
259 dev->resource[i].flags |= IORESOURCE_PCI_FIXED;
260 }
261}
262DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixed_bar_fixup);
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index 8884a1c1ada6..8223738ad806 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -148,14 +148,8 @@ int __init pci_numaq_init(void)
148{ 148{
149 int quad; 149 int quad;
150 150
151 if (!found_numaq)
152 return 0;
153
154 raw_pci_ops = &pci_direct_conf1_mq; 151 raw_pci_ops = &pci_direct_conf1_mq;
155 152
156 if (pcibios_scanned++)
157 return 0;
158
159 pci_root_bus = pcibios_scan_root(0); 153 pci_root_bus = pcibios_scan_root(0);
160 if (pci_root_bus) 154 if (pci_root_bus)
161 pci_bus_add_devices(pci_root_bus); 155 pci_bus_add_devices(pci_root_bus);
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c
index b889d824f7c6..b34815408f58 100644
--- a/arch/x86/pci/olpc.c
+++ b/arch/x86/pci/olpc.c
@@ -304,9 +304,6 @@ static struct pci_raw_ops pci_olpc_conf = {
304 304
305int __init pci_olpc_init(void) 305int __init pci_olpc_init(void)
306{ 306{
307 if (!machine_is_olpc() || olpc_has_vsa())
308 return -ENODEV;
309
310 printk(KERN_INFO "PCI: Using configuration type OLPC\n"); 307 printk(KERN_INFO "PCI: Using configuration type OLPC\n");
311 raw_pci_ops = &pci_olpc_conf; 308 raw_pci_ops = &pci_olpc_conf;
312 is_lx = is_geode_lx(); 309 is_lx = is_geode_lx();
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 1c975cc9839e..59a225c17b84 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -4,6 +4,7 @@
4 4
5#include <linux/pci.h> 5#include <linux/pci.h>
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/slab.h>
7#include <linux/module.h> 8#include <linux/module.h>
8#include <linux/uaccess.h> 9#include <linux/uaccess.h>
9#include <asm/pci_x86.h> 10#include <asm/pci_x86.h>
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index bcead7a46871..03008f72eb04 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -69,9 +69,6 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq)
69 69
70int __init pci_visws_init(void) 70int __init pci_visws_init(void)
71{ 71{
72 if (!is_visws_box())
73 return -1;
74
75 pcibios_enable_irq = &pci_visws_enable_irq; 72 pcibios_enable_irq = &pci_visws_enable_irq;
76 pcibios_disable_irq = &pci_visws_disable_irq; 73 pcibios_disable_irq = &pci_visws_disable_irq;
77 74
@@ -90,5 +87,6 @@ int __init pci_visws_init(void)
90 pci_scan_bus_with_sysdata(pci_bus1); 87 pci_scan_bus_with_sysdata(pci_bus1);
91 pci_fixup_irqs(pci_common_swizzle, visws_map_irq); 88 pci_fixup_irqs(pci_common_swizzle, visws_map_irq);
92 pcibios_resource_survey(); 89 pcibios_resource_survey();
93 return 0; 90 /* Request bus scan */
91 return 1;
94} 92}
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index 81197c62d5b3..3769079874d8 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -6,6 +6,7 @@
6 * Copyright (c) 2006 Rafael J. Wysocki <rjw@sisk.pl> 6 * Copyright (c) 2006 Rafael J. Wysocki <rjw@sisk.pl>
7 */ 7 */
8 8
9#include <linux/gfp.h>
9#include <linux/suspend.h> 10#include <linux/suspend.h>
10#include <linux/bootmem.h> 11#include <linux/bootmem.h>
11 12
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 65fdc86e923f..d24f983ba1e5 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -8,6 +8,7 @@
8 * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> 8 * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
9 */ 9 */
10 10
11#include <linux/gfp.h>
11#include <linux/smp.h> 12#include <linux/smp.h>
12#include <linux/suspend.h> 13#include <linux/suspend.h>
13#include <asm/proto.h> 14#include <asm/proto.h>
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index b641388d8286..ad47daeafa4e 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -27,10 +27,17 @@ ENTRY(swsusp_arch_suspend)
27 ret 27 ret
28 28
29ENTRY(restore_image) 29ENTRY(restore_image)
30 movl mmu_cr4_features, %ecx
30 movl resume_pg_dir, %eax 31 movl resume_pg_dir, %eax
31 subl $__PAGE_OFFSET, %eax 32 subl $__PAGE_OFFSET, %eax
32 movl %eax, %cr3 33 movl %eax, %cr3
33 34
35 jecxz 1f # cr4 Pentium and higher, skip if zero
36 andl $~(X86_CR4_PGE), %ecx
37 movl %ecx, %cr4; # turn off PGE
38 movl %cr3, %eax; # flush TLB
39 movl %eax, %cr3
401:
34 movl restore_pblist, %edx 41 movl restore_pblist, %edx
35 .p2align 4,,7 42 .p2align 4,,7
36 43
@@ -54,16 +61,8 @@ done:
54 movl $swapper_pg_dir, %eax 61 movl $swapper_pg_dir, %eax
55 subl $__PAGE_OFFSET, %eax 62 subl $__PAGE_OFFSET, %eax
56 movl %eax, %cr3 63 movl %eax, %cr3
57 /* Flush TLB, including "global" things (vmalloc) */
58 movl mmu_cr4_features, %ecx 64 movl mmu_cr4_features, %ecx
59 jecxz 1f # cr4 Pentium and higher, skip if zero 65 jecxz 1f # cr4 Pentium and higher, skip if zero
60 movl %ecx, %edx
61 andl $~(X86_CR4_PGE), %edx
62 movl %edx, %cr4; # turn off PGE
631:
64 movl %cr3, %eax; # flush TLB
65 movl %eax, %cr3
66 jecxz 1f # cr4 Pentium and higher, skip if zero
67 movl %ecx, %cr4; # turn PGE back on 66 movl %ecx, %cr4; # turn PGE back on
681: 671:
69 68
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 21e1aeb9f3ea..ac74869b8140 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -6,6 +6,7 @@
6#include <linux/mm.h> 6#include <linux/mm.h>
7#include <linux/err.h> 7#include <linux/err.h>
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/slab.h>
9#include <linux/init.h> 10#include <linux/init.h>
10#include <linux/random.h> 11#include <linux/random.h>
11#include <linux/elf.h> 12#include <linux/elf.h>
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index e133ce25e290..1304bcec8ee5 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -1,5 +1,6 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <linux/debugfs.h> 2#include <linux/debugfs.h>
3#include <linux/slab.h>
3#include <linux/module.h> 4#include <linux/module.h>
4 5
5#include "debugfs.h" 6#include "debugfs.h"
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 36daccb68642..65d8d79b46a8 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -28,6 +28,7 @@
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29#include <linux/console.h> 29#include <linux/console.h>
30#include <linux/pci.h> 30#include <linux/pci.h>
31#include <linux/gfp.h>
31 32
32#include <xen/xen.h> 33#include <xen/xen.h>
33#include <xen/interface/xen.h> 34#include <xen/interface/xen.h>
@@ -50,6 +51,7 @@
50#include <asm/traps.h> 51#include <asm/traps.h>
51#include <asm/setup.h> 52#include <asm/setup.h>
52#include <asm/desc.h> 53#include <asm/desc.h>
54#include <asm/pgalloc.h>
53#include <asm/pgtable.h> 55#include <asm/pgtable.h>
54#include <asm/tlbflush.h> 56#include <asm/tlbflush.h>
55#include <asm/reboot.h> 57#include <asm/reboot.h>
@@ -1094,6 +1096,12 @@ asmlinkage void __init xen_start_kernel(void)
1094 1096
1095 __supported_pte_mask |= _PAGE_IOMAP; 1097 __supported_pte_mask |= _PAGE_IOMAP;
1096 1098
1099 /*
1100 * Prevent page tables from being allocated in highmem, even
1101 * if CONFIG_HIGHPTE is enabled.
1102 */
1103 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
1104
1097 /* Work out if we support NX */ 1105 /* Work out if we support NX */
1098 x86_configure_nx(); 1106 x86_configure_nx();
1099 1107
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index bf4cd6bfe959..914f04695ce5 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -43,6 +43,7 @@
43#include <linux/debugfs.h> 43#include <linux/debugfs.h>
44#include <linux/bug.h> 44#include <linux/bug.h>
45#include <linux/module.h> 45#include <linux/module.h>
46#include <linux/gfp.h>
46 47
47#include <asm/pgtable.h> 48#include <asm/pgtable.h>
48#include <asm/tlbflush.h> 49#include <asm/tlbflush.h>
@@ -1427,23 +1428,6 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
1427#endif 1428#endif
1428} 1429}
1429 1430
1430#ifdef CONFIG_HIGHPTE
1431static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
1432{
1433 pgprot_t prot = PAGE_KERNEL;
1434
1435 if (PagePinned(page))
1436 prot = PAGE_KERNEL_RO;
1437
1438 if (0 && PageHighMem(page))
1439 printk("mapping highpte %lx type %d prot %s\n",
1440 page_to_pfn(page), type,
1441 (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ");
1442
1443 return kmap_atomic_prot(page, type, prot);
1444}
1445#endif
1446
1447#ifdef CONFIG_X86_32 1431#ifdef CONFIG_X86_32
1448static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) 1432static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
1449{ 1433{
@@ -1902,10 +1886,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1902 .alloc_pmd_clone = paravirt_nop, 1886 .alloc_pmd_clone = paravirt_nop,
1903 .release_pmd = xen_release_pmd_init, 1887 .release_pmd = xen_release_pmd_init,
1904 1888
1905#ifdef CONFIG_HIGHPTE
1906 .kmap_atomic_pte = xen_kmap_atomic_pte,
1907#endif
1908
1909#ifdef CONFIG_X86_64 1889#ifdef CONFIG_X86_64
1910 .set_pte = xen_set_pte, 1890 .set_pte = xen_set_pte,
1911#else 1891#else
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 563d20504988..a29693fd3138 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -14,6 +14,7 @@
14 */ 14 */
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/err.h> 16#include <linux/err.h>
17#include <linux/slab.h>
17#include <linux/smp.h> 18#include <linux/smp.h>
18 19
19#include <asm/paravirt.h> 20#include <asm/paravirt.h>
@@ -361,7 +362,7 @@ static void xen_cpu_die(unsigned int cpu)
361 alternatives_smp_switch(0); 362 alternatives_smp_switch(0);
362} 363}
363 364
364static void __cpuinit xen_play_dead(void) /* used only with CPU_HOTPLUG */ 365static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */
365{ 366{
366 play_dead_common(); 367 play_dead_common();
367 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); 368 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 24ded31b5aec..e0500646585d 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -6,6 +6,7 @@
6#include <linux/spinlock.h> 6#include <linux/spinlock.h>
7#include <linux/debugfs.h> 7#include <linux/debugfs.h>
8#include <linux/log2.h> 8#include <linux/log2.h>
9#include <linux/gfp.h>
9 10
10#include <asm/paravirt.h> 11#include <asm/paravirt.h>
11 12
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 0d3f07cd1b5f..32764b8880b5 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -13,6 +13,7 @@
13#include <linux/clockchips.h> 13#include <linux/clockchips.h>
14#include <linux/kernel_stat.h> 14#include <linux/kernel_stat.h>
15#include <linux/math64.h> 15#include <linux/math64.h>
16#include <linux/gfp.h>
16 17
17#include <asm/pvclock.h> 18#include <asm/pvclock.h>
18#include <asm/xen/hypervisor.h> 19#include <asm/xen/hypervisor.h>
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index 88e15deb8b82..22a2093b5862 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -90,9 +90,9 @@ ENTRY(xen_iret)
90 GET_THREAD_INFO(%eax) 90 GET_THREAD_INFO(%eax)
91 movl TI_cpu(%eax), %eax 91 movl TI_cpu(%eax), %eax
92 movl __per_cpu_offset(,%eax,4), %eax 92 movl __per_cpu_offset(,%eax,4), %eax
93 mov per_cpu__xen_vcpu(%eax), %eax 93 mov xen_vcpu(%eax), %eax
94#else 94#else
95 movl per_cpu__xen_vcpu, %eax 95 movl xen_vcpu, %eax
96#endif 96#endif
97 97
98 /* check IF state we're restoring */ 98 /* check IF state we're restoring */